From 56c8ecffc1f84f630e10f775bc29fcf4c743a3c9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 6 Jun 2019 09:22:10 -0700
Subject: Correct tsd layout graph

Augmented the tsd layout graph so that the two recently added fields,
`offset_state` and `bytes_until_sample`, are properly reflected.
As is shown, the cache footprint is 16 bytes larger than before.
---
 include/jemalloc/internal/tsd.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 9ba2600..18b2476 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -20,6 +20,7 @@
  * e: tcache_enabled
  * m: thread_allocated (config_stats)
  * f: thread_deallocated (config_stats)
+ * b: bytes_until_sample (config_prof)
  * p: prof_tdata (config_prof)
  * c: rtree_ctx (rtree cache accessed on deallocation)
  * t: tcache
@@ -27,6 +28,7 @@
  * d: arenas_tdata_bypass
  * r: reentrancy_level
  * x: narenas_tdata
+ * v: offset_state
  * i: iarena
  * a: arena
  * o: arenas_tdata
@@ -35,11 +37,13 @@
  * Use a compact layout to reduce cache footprint.
  * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
  * |----------------------------  1st cacheline  ----------------------------|
- * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32  ........ ........ .......] |
+ * | sedrxxxx vvvvvvvv mmmmmmmm ffffffff bbbbbbbb pppppppp [c * 16  .......] |
  * |----------------------------  2nd cacheline  ----------------------------|
  * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
  * |----------------------------  3nd cacheline  ----------------------------|
- * | [c * 32  ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... |
+ * | [c * 48  ........ ........ ........ ........ .......] iiiiiiii aaaaaaaa |
+ * +----------------------------  4th cacheline  ----------------------------+
+ * | oooooooo [t...... ........ ........ ........ ........ ........ ........ |
  * +-------------------------------------------------------------------------+
  * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
  *
-- 
cgit v0.12


From 56126d0d2d0730acde6416cf02efdb9ed19d578b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 12 Jul 2019 16:37:37 -0700
Subject: Refactor prof log

Prof logging is conceptually seperate from core profiling, so
split it out as a module of its own.  There are a few internal
functions that had to be exposed but I think it is a fair trade-off.
---
 Makefile.in                                        |   1 +
 include/jemalloc/internal/prof_externs.h           |   8 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |   3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |   3 +
 src/prof.c                                         | 682 +-------------------
 src/prof_log.c                                     | 698 +++++++++++++++++++++
 8 files changed, 720 insertions(+), 677 deletions(-)
 create mode 100644 src/prof_log.c

diff --git a/Makefile.in b/Makefile.in
index 7128b00..1cd973d 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -117,6 +117,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
+	$(srcroot)src/prof_log.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
 	$(srcroot)src/stats.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 094f3e1..e94ac3b 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -43,6 +43,8 @@ extern uint64_t	prof_interval;
  */
 extern size_t	lg_prof_sample;
 
+extern bool	prof_booted;
+
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
 void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
@@ -64,10 +66,14 @@ extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes);
 #endif
+int prof_getpid(void);
 bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
+
+void prof_bt_hash(const void *key, size_t r_hash[2]);
+bool prof_bt_keycomp(const void *k1, const void *k2);
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 void prof_reset(tsd_t *tsd, size_t lg_sample);
@@ -91,8 +97,10 @@ void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
 void prof_sample_threshold_update(prof_tdata_t *tdata);
 
+void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx);
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
+bool prof_log_init(tsd_t *tsdn);
 #ifdef JEMALLOC_JET
 size_t prof_log_bt_count(void);
 size_t prof_log_alloc_count(void);
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 228e8be..d93d909 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index d839515..7b09d4e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index edcceed..28bd3cd 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 6df7260..a66c209 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/prof.c b/src/prof.c
index 13334cb..7efa20d 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -7,7 +7,6 @@
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/emitter.h"
 
 /******************************************************************************/
 
@@ -39,7 +38,6 @@ bool		opt_prof_gdump = false;
 bool		opt_prof_final = false;
 bool		opt_prof_leak = false;
 bool		opt_prof_accum = false;
-bool		opt_prof_log = false;
 char		opt_prof_prefix[
     /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
@@ -72,100 +70,6 @@ uint64_t	prof_interval = 0;
 
 size_t		lg_prof_sample;
 
-typedef enum prof_logging_state_e prof_logging_state_t;
-enum prof_logging_state_e {
-	prof_logging_state_stopped,
-	prof_logging_state_started,
-	prof_logging_state_dumping
-};
-
-/*
- * - stopped: log_start never called, or previous log_stop has completed.
- * - started: log_start called, log_stop not called yet. Allocations are logged.
- * - dumping: log_stop called but not finished; samples are not logged anymore.
- */
-prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
-
-#ifdef JEMALLOC_JET
-static bool prof_log_dummy = false;
-#endif
-
-/* Incremented for every log file that is output. */
-static uint64_t log_seq = 0;
-static char log_filename[
-    /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
-    PATH_MAX +
-#endif
-    1];
-
-/* Timestamp for most recent call to log_start(). */
-static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
-
-/* Increment these when adding to the log_bt and log_thr linked lists. */
-static size_t log_bt_index = 0;
-static size_t log_thr_index = 0;
-
-/* Linked list node definitions. These are only used in prof.c. */
-typedef struct prof_bt_node_s prof_bt_node_t;
-
-struct prof_bt_node_s {
-	prof_bt_node_t *next;
-	size_t index;
-	prof_bt_t bt;
-	/* Variable size backtrace vector pointed to by bt. */
-	void *vec[1];
-};
-
-typedef struct prof_thr_node_s prof_thr_node_t;
-
-struct prof_thr_node_s {
-	prof_thr_node_t *next;
-	size_t index;
-	uint64_t thr_uid;
-	/* Variable size based on thr_name_sz. */
-	char name[1];
-};
-
-typedef struct prof_alloc_node_s prof_alloc_node_t;
-
-/* This is output when logging sampled allocations. */
-struct prof_alloc_node_s {
-	prof_alloc_node_t *next;
-	/* Indices into an array of thread data. */
-	size_t alloc_thr_ind;
-	size_t free_thr_ind;
-
-	/* Indices into an array of backtraces. */
-	size_t alloc_bt_ind;
-	size_t free_bt_ind;
-
-	uint64_t alloc_time_ns;
-	uint64_t free_time_ns;
-
-	size_t usize;
-};
-
-/*
- * Created on the first call to prof_log_start and deleted on prof_log_stop.
- * These are the backtraces and threads that have already been logged by an
- * allocation.
- */
-static bool log_tables_initialized = false;
-static ckh_t log_bt_node_set;
-static ckh_t log_thr_node_set;
-
-/* Store linked lists for logged data. */
-static prof_bt_node_t *log_bt_first = NULL;
-static prof_bt_node_t *log_bt_last = NULL;
-static prof_thr_node_t *log_thr_first = NULL;
-static prof_thr_node_t *log_thr_last = NULL;
-static prof_alloc_node_t *log_alloc_first = NULL;
-static prof_alloc_node_t *log_alloc_last = NULL;
-
-/* Protects the prof_logging_state and any log_{...} variable. */
-static malloc_mutex_t log_mtx;
-
 /*
  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
  * there is no problem with using them for more than one gctx at the same time.
@@ -225,7 +129,7 @@ static size_t		prof_dump_buf_end;
 static int		prof_dump_fd;
 
 /* Do not dump any profiles until bootstrapping is complete. */
-static bool		prof_booted = false;
+bool			prof_booted = false;
 
 /******************************************************************************/
 /*
@@ -241,12 +145,6 @@ static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached);
 static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 
-/* Hashtable functions for log_bt_node_set and log_thr_node_set. */
-static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
-static bool prof_thr_node_keycomp(const void *k1, const void *k2);
-static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
-static bool prof_bt_node_keycomp(const void *k1, const void *k2);
-
 /******************************************************************************/
 /* Red-black trees. */
 
@@ -361,162 +259,6 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
 	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
 }
 
-static size_t
-prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
-	assert(prof_logging_state == prof_logging_state_started);
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
-
-	prof_bt_node_t dummy_node;
-	dummy_node.bt = *bt;
-	prof_bt_node_t *node;
-
-	/* See if this backtrace is already cached in the table. */
-	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
-		size_t sz = offsetof(prof_bt_node_t, vec) +
-			        (bt->len * sizeof(void *));
-		prof_bt_node_t *new_node = (prof_bt_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
-		if (log_bt_first == NULL) {
-			log_bt_first = new_node;
-			log_bt_last = new_node;
-		} else {
-			log_bt_last->next = new_node;
-			log_bt_last = new_node;
-		}
-
-		new_node->next = NULL;
-		new_node->index = log_bt_index;
-		/*
-		 * Copy the backtrace: bt is inside a tdata or gctx, which
-		 * might die before prof_log_stop is called.
-		 */
-		new_node->bt.len = bt->len;
-		memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
-		new_node->bt.vec = new_node->vec;
-
-		log_bt_index++;
-		ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
-		return new_node->index;
-	} else {
-		return node->index;
-	}
-}
-static size_t
-prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
-	assert(prof_logging_state == prof_logging_state_started);
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
-
-	prof_thr_node_t dummy_node;
-	dummy_node.thr_uid = thr_uid;
-	prof_thr_node_t *node;
-
-	/* See if this thread is already cached in the table. */
-	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
-		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
-		prof_thr_node_t *new_node = (prof_thr_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
-		if (log_thr_first == NULL) {
-			log_thr_first = new_node;
-			log_thr_last = new_node;
-		} else {
-			log_thr_last->next = new_node;
-			log_thr_last = new_node;
-		}
-
-		new_node->next = NULL;
-		new_node->index = log_thr_index;
-		new_node->thr_uid = thr_uid;
-		strcpy(new_node->name, name);
-
-		log_thr_index++;
-		ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
-		return new_node->index;
-	} else {
-		return node->index;
-	}
-}
-
-static void
-prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
-	if (cons_tdata == NULL) {
-		/*
-		 * We decide not to log these allocations. cons_tdata will be
-		 * NULL only when the current thread is in a weird state (e.g.
-		 * it's being destroyed).
-		 */
-		return;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
-
-	if (prof_logging_state != prof_logging_state_started) {
-		goto label_done;
-	}
-
-	if (!log_tables_initialized) {
-		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
-				prof_bt_node_hash, prof_bt_node_keycomp);
-		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
-				prof_thr_node_hash, prof_thr_node_keycomp);
-		if (err1 || err2) {
-			goto label_done;
-		}
-		log_tables_initialized = true;
-	}
-
-	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
-			          (alloc_ctx_t *)NULL);
-	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
-	nstime_update(&free_time);
-
-	size_t sz = sizeof(prof_alloc_node_t);
-	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
-	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-
-	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
-				        "" : tctx->tdata->thread_name;
-	const char *cons_thr_name = prof_thread_name_get(tsd);
-
-	prof_bt_t bt;
-	/* Initialize the backtrace, using the buffer in tdata to store it. */
-	bt_init(&bt, cons_tdata->vec);
-	prof_backtrace(&bt);
-	prof_bt_t *cons_bt = &bt;
-
-	/* We haven't destroyed tctx yet, so gctx should be good to read. */
-	prof_bt_t *prod_bt = &tctx->gctx->bt;
-
-	new_node->next = NULL;
-	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
-				      prod_thr_name);
-	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
-				     cons_thr_name);
-	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
-	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
-	new_node->alloc_time_ns = nstime_ns(&alloc_time);
-	new_node->free_time_ns = nstime_ns(&free_time);
-	new_node->usize = usize;
-
-	if (log_alloc_first == NULL) {
-		log_alloc_first = new_node;
-		log_alloc_last = new_node;
-	} else {
-		log_alloc_last->next = new_node;
-		log_alloc_last = new_node;
-	}
-
-label_done:
-	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
-}
-
 void
 prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
@@ -1694,7 +1436,7 @@ prof_open_maps(const char *format, ...) {
 }
 #endif
 
-static int
+int
 prof_getpid(void) {
 #ifdef _WIN32
 	return GetCurrentProcessId();
@@ -2136,7 +1878,7 @@ prof_gdump(tsdn_t *tsdn) {
 	}
 }
 
-static void
+void
 prof_bt_hash(const void *key, size_t r_hash[2]) {
 	prof_bt_t *bt = (prof_bt_t *)key;
 
@@ -2145,7 +1887,7 @@ prof_bt_hash(const void *key, size_t r_hash[2]) {
 	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
 }
 
-static bool
+bool
 prof_bt_keycomp(const void *k1, const void *k2) {
 	const prof_bt_t *bt1 = (prof_bt_t *)k1;
 	const prof_bt_t *bt2 = (prof_bt_t *)k2;
@@ -2158,33 +1900,6 @@ prof_bt_keycomp(const void *k1, const void *k2) {
 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
 }
 
-static void
-prof_bt_node_hash(const void *key, size_t r_hash[2]) {
-	const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
-	prof_bt_hash((void *)(&bt_node->bt), r_hash);
-}
-
-static bool
-prof_bt_node_keycomp(const void *k1, const void *k2) {
-	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
-	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
-	return prof_bt_keycomp((void *)(&bt_node1->bt),
-	    (void *)(&bt_node2->bt));
-}
-
-static void
-prof_thr_node_hash(const void *key, size_t r_hash[2]) {
-	const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
-	hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
-}
-
-static bool
-prof_thr_node_keycomp(const void *k1, const void *k2) {
-	const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
-	const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
-	return thr_node1->thr_uid == thr_node2->thr_uid;
-}
-
 static uint64_t
 prof_thr_uid_alloc(tsdn_t *tsdn) {
 	uint64_t thr_uid;
@@ -2417,368 +2132,6 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 	return prof_active_old;
 }
 
-#ifdef JEMALLOC_JET
-size_t
-prof_log_bt_count(void) {
-	size_t cnt = 0;
-	prof_bt_node_t *node = log_bt_first;
-	while (node != NULL) {
-		cnt++;
-		node = node->next;
-	}
-	return cnt;
-}
-
-size_t
-prof_log_alloc_count(void) {
-	size_t cnt = 0;
-	prof_alloc_node_t *node = log_alloc_first;
-	while (node != NULL) {
-		cnt++;
-		node = node->next;
-	}
-	return cnt;
-}
-
-size_t
-prof_log_thr_count(void) {
-	size_t cnt = 0;
-	prof_thr_node_t *node = log_thr_first;
-	while (node != NULL) {
-		cnt++;
-		node = node->next;
-	}
-	return cnt;
-}
-
-bool
-prof_log_is_logging(void) {
-	return prof_logging_state == prof_logging_state_started;
-}
-
-bool
-prof_log_rep_check(void) {
-	if (prof_logging_state == prof_logging_state_stopped
-	    && log_tables_initialized) {
-		return true;
-	}
-
-	if (log_bt_last != NULL && log_bt_last->next != NULL) {
-		return true;
-	}
-	if (log_thr_last != NULL && log_thr_last->next != NULL) {
-		return true;
-	}
-	if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
-		return true;
-	}
-
-	size_t bt_count = prof_log_bt_count();
-	size_t thr_count = prof_log_thr_count();
-	size_t alloc_count = prof_log_alloc_count();
-
-
-	if (prof_logging_state == prof_logging_state_stopped) {
-		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
-			return true;
-		}
-	}
-
-	prof_alloc_node_t *node = log_alloc_first;
-	while (node != NULL) {
-		if (node->alloc_bt_ind >= bt_count) {
-			return true;
-		}
-		if (node->free_bt_ind >= bt_count) {
-			return true;
-		}
-		if (node->alloc_thr_ind >= thr_count) {
-			return true;
-		}
-		if (node->free_thr_ind >= thr_count) {
-			return true;
-		}
-		if (node->alloc_time_ns > node->free_time_ns) {
-			return true;
-		}
-		node = node->next;
-	}
-
-	return false;
-}
-
-void
-prof_log_dummy_set(bool new_value) {
-	prof_log_dummy = new_value;
-}
-#endif
-
-bool
-prof_log_start(tsdn_t *tsdn, const char *filename) {
-	if (!opt_prof || !prof_booted) {
-		return true;
-	}
-
-	bool ret = false;
-	size_t buf_size = PATH_MAX + 1;
-
-	malloc_mutex_lock(tsdn, &log_mtx);
-
-	if (prof_logging_state != prof_logging_state_stopped) {
-		ret = true;
-	} else if (filename == NULL) {
-		/* Make default name. */
-		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
-		    opt_prof_prefix, prof_getpid(), log_seq);
-		log_seq++;
-		prof_logging_state = prof_logging_state_started;
-	} else if (strlen(filename) >= buf_size) {
-		ret = true;
-	} else {
-		strcpy(log_filename, filename);
-		prof_logging_state = prof_logging_state_started;
-	}
-
-	if (!ret) {
-		nstime_update(&log_start_timestamp);
-	}
-
-	malloc_mutex_unlock(tsdn, &log_mtx);
-
-	return ret;
-}
-
-/* Used as an atexit function to stop logging on exit. */
-static void
-prof_log_stop_final(void) {
-	tsd_t *tsd = tsd_fetch();
-	prof_log_stop(tsd_tsdn(tsd));
-}
-
-struct prof_emitter_cb_arg_s {
-	int fd;
-	ssize_t ret;
-};
-
-static void
-prof_emitter_write_cb(void *opaque, const char *to_write) {
-	struct prof_emitter_cb_arg_s *arg =
-	    (struct prof_emitter_cb_arg_s *)opaque;
-	size_t bytes = strlen(to_write);
-#ifdef JEMALLOC_JET
-	if (prof_log_dummy) {
-		return;
-	}
-#endif
-	arg->ret = write(arg->fd, (void *)to_write, bytes);
-}
-
-/*
- * prof_log_emit_{...} goes through the appropriate linked list, emitting each
- * node to the json and deallocating it.
- */
-static void
-prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
-	emitter_json_array_kv_begin(emitter, "threads");
-	prof_thr_node_t *thr_node = log_thr_first;
-	prof_thr_node_t *thr_old_node;
-	while (thr_node != NULL) {
-		emitter_json_object_begin(emitter);
-
-		emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
-		    &thr_node->thr_uid);
-
-		char *thr_name = thr_node->name;
-
-		emitter_json_kv(emitter, "thr_name", emitter_type_string,
-		    &thr_name);
-
-		emitter_json_object_end(emitter);
-		thr_old_node = thr_node;
-		thr_node = thr_node->next;
-		idalloc(tsd, thr_old_node);
-	}
-	emitter_json_array_end(emitter);
-}
-
-static void
-prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
-	emitter_json_array_kv_begin(emitter, "stack_traces");
-	prof_bt_node_t *bt_node = log_bt_first;
-	prof_bt_node_t *bt_old_node;
-	/*
-	 * Calculate how many hex digits we need: twice number of bytes, two for
-	 * "0x", and then one more for terminating '\0'.
-	 */
-	char buf[2 * sizeof(intptr_t) + 3];
-	size_t buf_sz = sizeof(buf);
-	while (bt_node != NULL) {
-		emitter_json_array_begin(emitter);
-		size_t i;
-		for (i = 0; i < bt_node->bt.len; i++) {
-			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
-			char *trace_str = buf;
-			emitter_json_value(emitter, emitter_type_string,
-			    &trace_str);
-		}
-		emitter_json_array_end(emitter);
-
-		bt_old_node = bt_node;
-		bt_node = bt_node->next;
-		idalloc(tsd, bt_old_node);
-	}
-	emitter_json_array_end(emitter);
-}
-
-static void
-prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
-	emitter_json_array_kv_begin(emitter, "allocations");
-	prof_alloc_node_t *alloc_node = log_alloc_first;
-	prof_alloc_node_t *alloc_old_node;
-	while (alloc_node != NULL) {
-		emitter_json_object_begin(emitter);
-
-		emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
-		    &alloc_node->alloc_thr_ind);
-
-		emitter_json_kv(emitter, "free_thread", emitter_type_size,
-		    &alloc_node->free_thr_ind);
-
-		emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
-		    &alloc_node->alloc_bt_ind);
-
-		emitter_json_kv(emitter, "free_trace", emitter_type_size,
-		    &alloc_node->free_bt_ind);
-
-		emitter_json_kv(emitter, "alloc_timestamp",
-		    emitter_type_uint64, &alloc_node->alloc_time_ns);
-
-		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
-		    &alloc_node->free_time_ns);
-
-		emitter_json_kv(emitter, "usize", emitter_type_uint64,
-		    &alloc_node->usize);
-
-		emitter_json_object_end(emitter);
-
-		alloc_old_node = alloc_node;
-		alloc_node = alloc_node->next;
-		idalloc(tsd, alloc_old_node);
-	}
-	emitter_json_array_end(emitter);
-}
-
-static void
-prof_log_emit_metadata(emitter_t *emitter) {
-	emitter_json_object_kv_begin(emitter, "info");
-
-	nstime_t now = NSTIME_ZERO_INITIALIZER;
-
-	nstime_update(&now);
-	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
-	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
-
-	char *vers = JEMALLOC_VERSION;
-	emitter_json_kv(emitter, "version",
-	    emitter_type_string, &vers);
-
-	emitter_json_kv(emitter, "lg_sample_rate",
-	    emitter_type_int, &lg_prof_sample);
-
-	int pid = prof_getpid();
-	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
-
-	emitter_json_object_end(emitter);
-}
-
-
-bool
-prof_log_stop(tsdn_t *tsdn) {
-	if (!opt_prof || !prof_booted) {
-		return true;
-	}
-
-	tsd_t *tsd = tsdn_tsd(tsdn);
-	malloc_mutex_lock(tsdn, &log_mtx);
-
-	if (prof_logging_state != prof_logging_state_started) {
-		malloc_mutex_unlock(tsdn, &log_mtx);
-		return true;
-	}
-
-	/*
-	 * Set the state to dumping. We'll set it to stopped when we're done.
-	 * Since other threads won't be able to start/stop/log when the state is
-	 * dumping, we don't have to hold the lock during the whole method.
-	 */
-	prof_logging_state = prof_logging_state_dumping;
-	malloc_mutex_unlock(tsdn, &log_mtx);
-
-
-	emitter_t emitter;
-
-	/* Create a file. */
-
-	int fd;
-#ifdef JEMALLOC_JET
-	if (prof_log_dummy) {
-		fd = 0;
-	} else {
-		fd = creat(log_filename, 0644);
-	}
-#else
-	fd = creat(log_filename, 0644);
-#endif
-
-	if (fd == -1) {
-		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
-			      " failed with %d\n", log_filename, errno);
-		if (opt_abort) {
-			abort();
-		}
-		return true;
-	}
-
-	/* Emit to json. */
-	struct prof_emitter_cb_arg_s arg;
-	arg.fd = fd;
-	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
-	    (void *)(&arg));
-
-	emitter_begin(&emitter);
-	prof_log_emit_metadata(&emitter);
-	prof_log_emit_threads(tsd, &emitter);
-	prof_log_emit_traces(tsd, &emitter);
-	prof_log_emit_allocs(tsd, &emitter);
-	emitter_end(&emitter);
-
-	/* Reset global state. */
-	if (log_tables_initialized) {
-		ckh_delete(tsd, &log_bt_node_set);
-		ckh_delete(tsd, &log_thr_node_set);
-	}
-	log_tables_initialized = false;
-	log_bt_index = 0;
-	log_thr_index = 0;
-	log_bt_first = NULL;
-	log_bt_last = NULL;
-	log_thr_first = NULL;
-	log_thr_last = NULL;
-	log_alloc_first = NULL;
-	log_alloc_last = NULL;
-
-	malloc_mutex_lock(tsdn, &log_mtx);
-	prof_logging_state = prof_logging_state_stopped;
-	malloc_mutex_unlock(tsdn, &log_mtx);
-
-#ifdef JEMALLOC_JET
-	if (prof_log_dummy) {
-		return false;
-	}
-#endif
-	return close(fd);
-}
-
 const char *
 prof_thread_name_get(tsd_t *tsd) {
 	prof_tdata_t *tdata;
@@ -3015,35 +2368,10 @@ prof_boot2(tsd_t *tsd) {
 			}
 		}
 
-		if (opt_prof_log) {
-			prof_log_start(tsd_tsdn(tsd), NULL);
-		}
-
-		if (atexit(prof_log_stop_final) != 0) {
-			malloc_write("<jemalloc>: Error in atexit() "
-				     "for logging\n");
-			if (opt_abort) {
-				abort();
-			}
-		}
-
-		if (malloc_mutex_init(&log_mtx, "prof_log",
-		    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
-		if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
-		    prof_bt_node_hash, prof_bt_node_keycomp)) {
+		if (prof_log_init(tsd)) {
 			return true;
 		}
 
-		if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
-		    prof_thr_node_hash, prof_thr_node_keycomp)) {
-			return true;
-		}
-
-		log_tables_initialized = true;
-
 		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
 		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
 		    CACHELINE);
diff --git a/src/prof_log.c b/src/prof_log.c
new file mode 100644
index 0000000..56d4e03
--- /dev/null
+++ b/src/prof_log.c
@@ -0,0 +1,698 @@
+#define JEMALLOC_PROF_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/emitter.h"
+
+bool opt_prof_log = false;
+typedef enum prof_logging_state_e prof_logging_state_t;
+enum prof_logging_state_e {
+	prof_logging_state_stopped,
+	prof_logging_state_started,
+	prof_logging_state_dumping
+};
+
+/*
+ * - stopped: log_start never called, or previous log_stop has completed.
+ * - started: log_start called, log_stop not called yet. Allocations are logged.
+ * - dumping: log_stop called but not finished; samples are not logged anymore.
+ */
+prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
+
+#ifdef JEMALLOC_JET
+static bool prof_log_dummy = false;
+#endif
+
+/* Incremented for every log file that is output. */
+static uint64_t log_seq = 0;
+static char log_filename[
+    /* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+    PATH_MAX +
+#endif
+    1];
+
+/* Timestamp for most recent call to log_start(). */
+static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
+
+/* Increment these when adding to the log_bt and log_thr linked lists. */
+static size_t log_bt_index = 0;
+static size_t log_thr_index = 0;
+
+/* Linked list node definitions. These are only used in this file. */
+typedef struct prof_bt_node_s prof_bt_node_t;
+
+struct prof_bt_node_s {
+	prof_bt_node_t *next;
+	size_t index;
+	prof_bt_t bt;
+	/* Variable size backtrace vector pointed to by bt. */
+	void *vec[1];
+};
+
+typedef struct prof_thr_node_s prof_thr_node_t;
+
+struct prof_thr_node_s {
+	prof_thr_node_t *next;
+	size_t index;
+	uint64_t thr_uid;
+	/* Variable size based on thr_name_sz. */
+	char name[1];
+};
+
+typedef struct prof_alloc_node_s prof_alloc_node_t;
+
+/* This is output when logging sampled allocations. */
+struct prof_alloc_node_s {
+	prof_alloc_node_t *next;
+	/* Indices into an array of thread data. */
+	size_t alloc_thr_ind;
+	size_t free_thr_ind;
+
+	/* Indices into an array of backtraces. */
+	size_t alloc_bt_ind;
+	size_t free_bt_ind;
+
+	uint64_t alloc_time_ns;
+	uint64_t free_time_ns;
+
+	size_t usize;
+};
+
+/*
+ * Created on the first call to prof_log_start and deleted on prof_log_stop.
+ * These are the backtraces and threads that have already been logged by an
+ * allocation.
+ */
+static bool log_tables_initialized = false;
+static ckh_t log_bt_node_set;
+static ckh_t log_thr_node_set;
+
+/* Store linked lists for logged data. */
+static prof_bt_node_t *log_bt_first = NULL;
+static prof_bt_node_t *log_bt_last = NULL;
+static prof_thr_node_t *log_thr_first = NULL;
+static prof_thr_node_t *log_thr_last = NULL;
+static prof_alloc_node_t *log_alloc_first = NULL;
+static prof_alloc_node_t *log_alloc_last = NULL;
+
+/* Protects the prof_logging_state and any log_{...} variable. */
+static malloc_mutex_t log_mtx;
+
+/******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
+
+/* Hashtable functions for log_bt_node_set and log_thr_node_set. */
+static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
+static bool prof_thr_node_keycomp(const void *k1, const void *k2);
+static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
+static bool prof_bt_node_keycomp(const void *k1, const void *k2);
+
+/******************************************************************************/
+
+static size_t
+prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
+	assert(prof_logging_state == prof_logging_state_started);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
+
+	prof_bt_node_t dummy_node;
+	dummy_node.bt = *bt;
+	prof_bt_node_t *node;
+
+	/* See if this backtrace is already cached in the table. */
+	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
+	    (void **)(&node), NULL)) {
+		size_t sz = offsetof(prof_bt_node_t, vec) +
+			        (bt->len * sizeof(void *));
+		prof_bt_node_t *new_node = (prof_bt_node_t *)
+		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
+		    true, arena_get(TSDN_NULL, 0, true), true);
+		if (log_bt_first == NULL) {
+			log_bt_first = new_node;
+			log_bt_last = new_node;
+		} else {
+			log_bt_last->next = new_node;
+			log_bt_last = new_node;
+		}
+
+		new_node->next = NULL;
+		new_node->index = log_bt_index;
+		/*
+		 * Copy the backtrace: bt is inside a tdata or gctx, which
+		 * might die before prof_log_stop is called.
+		 */
+		new_node->bt.len = bt->len;
+		memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
+		new_node->bt.vec = new_node->vec;
+
+		log_bt_index++;
+		ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
+		return new_node->index;
+	} else {
+		return node->index;
+	}
+}
+static size_t
+prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
+	assert(prof_logging_state == prof_logging_state_started);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
+
+	prof_thr_node_t dummy_node;
+	dummy_node.thr_uid = thr_uid;
+	prof_thr_node_t *node;
+
+	/* See if this thread is already cached in the table. */
+	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
+	    (void **)(&node), NULL)) {
+		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
+		prof_thr_node_t *new_node = (prof_thr_node_t *)
+		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
+		    true, arena_get(TSDN_NULL, 0, true), true);
+		if (log_thr_first == NULL) {
+			log_thr_first = new_node;
+			log_thr_last = new_node;
+		} else {
+			log_thr_last->next = new_node;
+			log_thr_last = new_node;
+		}
+
+		new_node->next = NULL;
+		new_node->index = log_thr_index;
+		new_node->thr_uid = thr_uid;
+		strcpy(new_node->name, name);
+
+		log_thr_index++;
+		ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
+		return new_node->index;
+	} else {
+		return node->index;
+	}
+}
+
+void
+prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
+	if (cons_tdata == NULL) {
+		/*
+		 * We decide not to log these allocations. cons_tdata will be
+		 * NULL only when the current thread is in a weird state (e.g.
+		 * it's being destroyed).
+		 */
+		return;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_started) {
+		goto label_done;
+	}
+
+	if (!log_tables_initialized) {
+		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
+				prof_bt_node_hash, prof_bt_node_keycomp);
+		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
+				prof_thr_node_hash, prof_thr_node_keycomp);
+		if (err1 || err2) {
+			goto label_done;
+		}
+		log_tables_initialized = true;
+	}
+
+	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
+			          (alloc_ctx_t *)NULL);
+	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
+	nstime_update(&free_time);
+
+	size_t sz = sizeof(prof_alloc_node_t);
+	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
+	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+
+	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
+				        "" : tctx->tdata->thread_name;
+	const char *cons_thr_name = prof_thread_name_get(tsd);
+
+	prof_bt_t bt;
+	/* Initialize the backtrace, using the buffer in tdata to store it. */
+	bt_init(&bt, cons_tdata->vec);
+	prof_backtrace(&bt);
+	prof_bt_t *cons_bt = &bt;
+
+	/* We haven't destroyed tctx yet, so gctx should be good to read. */
+	prof_bt_t *prod_bt = &tctx->gctx->bt;
+
+	new_node->next = NULL;
+	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
+				      prod_thr_name);
+	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
+				     cons_thr_name);
+	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
+	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
+	new_node->alloc_time_ns = nstime_ns(&alloc_time);
+	new_node->free_time_ns = nstime_ns(&free_time);
+	new_node->usize = usize;
+
+	if (log_alloc_first == NULL) {
+		log_alloc_first = new_node;
+		log_alloc_last = new_node;
+	} else {
+		log_alloc_last->next = new_node;
+		log_alloc_last = new_node;
+	}
+
+label_done:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
+}
+
+static void
+prof_bt_node_hash(const void *key, size_t r_hash[2]) {
+	const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
+	prof_bt_hash((void *)(&bt_node->bt), r_hash);
+}
+
+static bool
+prof_bt_node_keycomp(const void *k1, const void *k2) {
+	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
+	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
+	return prof_bt_keycomp((void *)(&bt_node1->bt),
+	    (void *)(&bt_node2->bt));
+}
+
+static void
+prof_thr_node_hash(const void *key, size_t r_hash[2]) {
+	const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
+	hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
+}
+
+static bool
+prof_thr_node_keycomp(const void *k1, const void *k2) {
+	const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
+	const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
+	return thr_node1->thr_uid == thr_node2->thr_uid;
+}
+
+#ifdef JEMALLOC_JET
+size_t
+prof_log_bt_count(void) {
+	size_t cnt = 0;
+	prof_bt_node_t *node = log_bt_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+size_t
+prof_log_alloc_count(void) {
+	size_t cnt = 0;
+	prof_alloc_node_t *node = log_alloc_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+size_t
+prof_log_thr_count(void) {
+	size_t cnt = 0;
+	prof_thr_node_t *node = log_thr_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+bool
+prof_log_is_logging(void) {
+	return prof_logging_state == prof_logging_state_started;
+}
+
+bool
+prof_log_rep_check(void) {
+	if (prof_logging_state == prof_logging_state_stopped
+	    && log_tables_initialized) {
+		return true;
+	}
+
+	if (log_bt_last != NULL && log_bt_last->next != NULL) {
+		return true;
+	}
+	if (log_thr_last != NULL && log_thr_last->next != NULL) {
+		return true;
+	}
+	if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
+		return true;
+	}
+
+	size_t bt_count = prof_log_bt_count();
+	size_t thr_count = prof_log_thr_count();
+	size_t alloc_count = prof_log_alloc_count();
+
+
+	if (prof_logging_state == prof_logging_state_stopped) {
+		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
+			return true;
+		}
+	}
+
+	prof_alloc_node_t *node = log_alloc_first;
+	while (node != NULL) {
+		if (node->alloc_bt_ind >= bt_count) {
+			return true;
+		}
+		if (node->free_bt_ind >= bt_count) {
+			return true;
+		}
+		if (node->alloc_thr_ind >= thr_count) {
+			return true;
+		}
+		if (node->free_thr_ind >= thr_count) {
+			return true;
+		}
+		if (node->alloc_time_ns > node->free_time_ns) {
+			return true;
+		}
+		node = node->next;
+	}
+
+	return false;
+}
+
+void
+prof_log_dummy_set(bool new_value) {
+	prof_log_dummy = new_value;
+}
+#endif
+
+bool
+prof_log_start(tsdn_t *tsdn, const char *filename) {
+	if (!opt_prof || !prof_booted) {
+		return true;
+	}
+
+	bool ret = false;
+	size_t buf_size = PATH_MAX + 1;
+
+	malloc_mutex_lock(tsdn, &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_stopped) {
+		ret = true;
+	} else if (filename == NULL) {
+		/* Make default name. */
+		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
+		    opt_prof_prefix, prof_getpid(), log_seq);
+		log_seq++;
+		prof_logging_state = prof_logging_state_started;
+	} else if (strlen(filename) >= buf_size) {
+		ret = true;
+	} else {
+		strcpy(log_filename, filename);
+		prof_logging_state = prof_logging_state_started;
+	}
+
+	if (!ret) {
+		nstime_update(&log_start_timestamp);
+	}
+
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+	return ret;
+}
+
+/* Used as an atexit function to stop logging on exit. */
+static void
+prof_log_stop_final(void) {
+	tsd_t *tsd = tsd_fetch();
+	prof_log_stop(tsd_tsdn(tsd));
+}
+
+struct prof_emitter_cb_arg_s {
+	int fd;
+	ssize_t ret;
+};
+
+static void
+prof_emitter_write_cb(void *opaque, const char *to_write) {
+	struct prof_emitter_cb_arg_s *arg =
+	    (struct prof_emitter_cb_arg_s *)opaque;
+	size_t bytes = strlen(to_write);
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		return;
+	}
+#endif
+	arg->ret = write(arg->fd, (void *)to_write, bytes);
+}
+
+/*
+ * prof_log_emit_{...} goes through the appropriate linked list, emitting each
+ * node to the json and deallocating it.
+ */
+static void
+prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "threads");
+	prof_thr_node_t *thr_node = log_thr_first;
+	prof_thr_node_t *thr_old_node;
+	while (thr_node != NULL) {
+		emitter_json_object_begin(emitter);
+
+		emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
+		    &thr_node->thr_uid);
+
+		char *thr_name = thr_node->name;
+
+		emitter_json_kv(emitter, "thr_name", emitter_type_string,
+		    &thr_name);
+
+		emitter_json_object_end(emitter);
+		thr_old_node = thr_node;
+		thr_node = thr_node->next;
+		idalloc(tsd, thr_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "stack_traces");
+	prof_bt_node_t *bt_node = log_bt_first;
+	prof_bt_node_t *bt_old_node;
+	/*
+	 * Calculate how many hex digits we need: twice number of bytes, two for
+	 * "0x", and then one more for terminating '\0'.
+	 */
+	char buf[2 * sizeof(intptr_t) + 3];
+	size_t buf_sz = sizeof(buf);
+	while (bt_node != NULL) {
+		emitter_json_array_begin(emitter);
+		size_t i;
+		for (i = 0; i < bt_node->bt.len; i++) {
+			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
+			char *trace_str = buf;
+			emitter_json_value(emitter, emitter_type_string,
+			    &trace_str);
+		}
+		emitter_json_array_end(emitter);
+
+		bt_old_node = bt_node;
+		bt_node = bt_node->next;
+		idalloc(tsd, bt_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "allocations");
+	prof_alloc_node_t *alloc_node = log_alloc_first;
+	prof_alloc_node_t *alloc_old_node;
+	while (alloc_node != NULL) {
+		emitter_json_object_begin(emitter);
+
+		emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
+		    &alloc_node->alloc_thr_ind);
+
+		emitter_json_kv(emitter, "free_thread", emitter_type_size,
+		    &alloc_node->free_thr_ind);
+
+		emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
+		    &alloc_node->alloc_bt_ind);
+
+		emitter_json_kv(emitter, "free_trace", emitter_type_size,
+		    &alloc_node->free_bt_ind);
+
+		emitter_json_kv(emitter, "alloc_timestamp",
+		    emitter_type_uint64, &alloc_node->alloc_time_ns);
+
+		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
+		    &alloc_node->free_time_ns);
+
+		emitter_json_kv(emitter, "usize", emitter_type_uint64,
+		    &alloc_node->usize);
+
+		emitter_json_object_end(emitter);
+
+		alloc_old_node = alloc_node;
+		alloc_node = alloc_node->next;
+		idalloc(tsd, alloc_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_metadata(emitter_t *emitter) {
+	emitter_json_object_kv_begin(emitter, "info");
+
+	nstime_t now = NSTIME_ZERO_INITIALIZER;
+
+	nstime_update(&now);
+	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
+	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
+
+	char *vers = JEMALLOC_VERSION;
+	emitter_json_kv(emitter, "version",
+	    emitter_type_string, &vers);
+
+	emitter_json_kv(emitter, "lg_sample_rate",
+	    emitter_type_int, &lg_prof_sample);
+
+	int pid = prof_getpid();
+	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
+
+	emitter_json_object_end(emitter);
+}
+
+
+bool
+prof_log_stop(tsdn_t *tsdn) {
+	if (!opt_prof || !prof_booted) {
+		return true;
+	}
+
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	malloc_mutex_lock(tsdn, &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_started) {
+		malloc_mutex_unlock(tsdn, &log_mtx);
+		return true;
+	}
+
+	/*
+	 * Set the state to dumping. We'll set it to stopped when we're done.
+	 * Since other threads won't be able to start/stop/log when the state is
+	 * dumping, we don't have to hold the lock during the whole method.
+	 */
+	prof_logging_state = prof_logging_state_dumping;
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+
+	emitter_t emitter;
+
+	/* Create a file. */
+
+	int fd;
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		fd = 0;
+	} else {
+		fd = creat(log_filename, 0644);
+	}
+#else
+	fd = creat(log_filename, 0644);
+#endif
+
+	if (fd == -1) {
+		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
+			      " failed with %d\n", log_filename, errno);
+		if (opt_abort) {
+			abort();
+		}
+		return true;
+	}
+
+	/* Emit to json. */
+	struct prof_emitter_cb_arg_s arg;
+	arg.fd = fd;
+	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
+	    (void *)(&arg));
+
+	emitter_begin(&emitter);
+	prof_log_emit_metadata(&emitter);
+	prof_log_emit_threads(tsd, &emitter);
+	prof_log_emit_traces(tsd, &emitter);
+	prof_log_emit_allocs(tsd, &emitter);
+	emitter_end(&emitter);
+
+	/* Reset global state. */
+	if (log_tables_initialized) {
+		ckh_delete(tsd, &log_bt_node_set);
+		ckh_delete(tsd, &log_thr_node_set);
+	}
+	log_tables_initialized = false;
+	log_bt_index = 0;
+	log_thr_index = 0;
+	log_bt_first = NULL;
+	log_bt_last = NULL;
+	log_thr_first = NULL;
+	log_thr_last = NULL;
+	log_alloc_first = NULL;
+	log_alloc_last = NULL;
+
+	malloc_mutex_lock(tsdn, &log_mtx);
+	prof_logging_state = prof_logging_state_stopped;
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		return false;
+	}
+#endif
+	return close(fd);
+}
+
+bool prof_log_init(tsd_t *tsd) {
+	if (opt_prof_log) {
+		prof_log_start(tsd_tsdn(tsd), NULL);
+	}
+
+	if (atexit(prof_log_stop_final) != 0) {
+		malloc_write("<jemalloc>: Error in atexit() "
+			     "for logging\n");
+		if (opt_abort) {
+			abort();
+		}
+	}
+
+	if (malloc_mutex_init(&log_mtx, "prof_log",
+	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+
+	if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
+	    prof_bt_node_hash, prof_bt_node_keycomp)) {
+		return true;
+	}
+
+	if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
+	    prof_thr_node_hash, prof_thr_node_keycomp)) {
+		return true;
+	}
+
+	log_tables_initialized = true;
+	return false;
+}
+
+/******************************************************************************/
-- 
cgit v0.12


From 07ce2434bf45420ff9d9d22590f68540c6dd7b78 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 17 Jul 2019 15:52:50 -0700
Subject: Refactor profiling

Refactored core profiling codebase into two logical parts:

(a) `prof_data.c`: core internal data structure managing & dumping;
(b) `prof.c`: mutexes & outward-facing APIs.

Some internal functions had to be exposed out, but there are not
that many of them if the modularization is (hopefully) clean enough.
---
 Makefile.in                                        |    1 +
 include/jemalloc/internal/prof_externs.h           |   14 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |    1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |    3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |    1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |    3 +
 src/prof.c                                         | 1607 ++------------------
 src/prof_data.c                                    | 1441 ++++++++++++++++++
 8 files changed, 1561 insertions(+), 1510 deletions(-)
 create mode 100644 src/prof_data.c

diff --git a/Makefile.in b/Makefile.in
index 1cd973d..40daf11 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -117,6 +117,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
+	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index e94ac3b..8fc45cf 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -4,6 +4,11 @@
 #include "jemalloc/internal/mutex.h"
 
 extern malloc_mutex_t	bt2gctx_mtx;
+extern malloc_mutex_t	tdatas_mtx;
+extern malloc_mutex_t	prof_dump_mtx;
+
+malloc_mutex_t *prof_gctx_mutex_choose(void);
+malloc_mutex_t *prof_tdata_mutex_choose(uint64_t thr_uid);
 
 extern bool	opt_prof;
 extern bool	opt_prof_active;
@@ -110,4 +115,13 @@ bool prof_log_rep_check(void);
 void prof_log_dummy_set(bool new_value);
 #endif
 
+/* Functions in prof_data.c only accessed in prof.c */
+bool prof_data_init(tsd_t *tsd);
+bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck);
+prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
+    uint64_t thr_discrim, char *thread_name, bool active);
+void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
+void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index d93d909..387f14b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 7b09d4e..030d826 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_data.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof_log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 28bd3cd..1606a3a 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index a66c209..622b93f 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_data.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof_log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/prof.c b/src/prof.c
index 7efa20d..79a0ffc 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -3,11 +3,14 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/ckh.h"
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
 
+/*
+ * This file implements the profiling "APIs" needed by other parts of jemalloc,
+ * and also manages the relevant "operational" data, mainly options and mutexes;
+ * the core profiling data structures are encapsulated in prof_data.c.
+ */
+
 /******************************************************************************/
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
@@ -88,20 +91,10 @@ static atomic_u_t	cum_gctxs; /* Atomic counter. */
  */
 static malloc_mutex_t	*tdata_locks;
 
-/*
- * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
- * structure that knows about all backtraces currently captured.
- */
-static ckh_t		bt2gctx;
 /* Non static to enable profiling. */
 malloc_mutex_t		bt2gctx_mtx;
 
-/*
- * Tree of all extant prof_tdata_t structures, regardless of state,
- * {attached,detached,expired}.
- */
-static prof_tdata_tree_t	tdatas;
-static malloc_mutex_t	tdatas_mtx;
+malloc_mutex_t	tdatas_mtx;
 
 static uint64_t		next_thr_uid;
 static malloc_mutex_t	next_thr_uid_mtx;
@@ -112,101 +105,29 @@ static uint64_t		prof_dump_iseq;
 static uint64_t		prof_dump_mseq;
 static uint64_t		prof_dump_useq;
 
-/*
- * This buffer is rather large for stack allocation, so use a single buffer for
- * all profile dumps.
- */
-static malloc_mutex_t	prof_dump_mtx;
-static char		prof_dump_buf[
-    /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
-    PROF_DUMP_BUFSIZE
-#else
-    1
-#endif
-];
-static size_t		prof_dump_buf_end;
-static int		prof_dump_fd;
+malloc_mutex_t	prof_dump_mtx;
 
 /* Do not dump any profiles until bootstrapping is complete. */
 bool			prof_booted = false;
 
 /******************************************************************************/
-/*
- * Function prototypes for static functions that are referenced prior to
- * definition.
- */
 
-static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
-static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
-static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
-    bool even_if_attached);
-static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
-    bool even_if_attached);
-static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
+static bool
+prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
 
-/******************************************************************************/
-/* Red-black trees. */
-
-static int
-prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
-	uint64_t a_thr_uid = a->thr_uid;
-	uint64_t b_thr_uid = b->thr_uid;
-	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
-	if (ret == 0) {
-		uint64_t a_thr_discrim = a->thr_discrim;
-		uint64_t b_thr_discrim = b->thr_discrim;
-		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
-		    b_thr_discrim);
-		if (ret == 0) {
-			uint64_t a_tctx_uid = a->tctx_uid;
-			uint64_t b_tctx_uid = b->tctx_uid;
-			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
-			    b_tctx_uid);
-		}
+	if (opt_prof_accum) {
+		return false;
 	}
-	return ret;
-}
-
-rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
-    tctx_link, prof_tctx_comp)
-
-static int
-prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
-	unsigned a_len = a->bt.len;
-	unsigned b_len = b->bt.len;
-	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
-	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
-	if (ret == 0) {
-		ret = (a_len > b_len) - (a_len < b_len);
+	if (tctx->cnts.curobjs != 0) {
+		return false;
 	}
-	return ret;
-}
-
-rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
-    prof_gctx_comp)
-
-static int
-prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
-	int ret;
-	uint64_t a_uid = a->thr_uid;
-	uint64_t b_uid = b->thr_uid;
-
-	ret = ((a_uid > b_uid) - (a_uid < b_uid));
-	if (ret == 0) {
-		uint64_t a_discrim = a->thr_discrim;
-		uint64_t b_discrim = b->thr_discrim;
-
-		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
+	if (tctx->prepared) {
+		return false;
 	}
-	return ret;
+	return true;
 }
 
-rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
-    prof_tdata_comp)
-
-/******************************************************************************/
-
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 	prof_tdata_t *tdata;
@@ -286,45 +207,6 @@ bt_init(prof_bt_t *bt, void **vec) {
 	bt->len = 0;
 }
 
-static void
-prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
-	cassert(config_prof);
-	assert(tdata == prof_tdata_get(tsd, false));
-
-	if (tdata != NULL) {
-		assert(!tdata->enq);
-		tdata->enq = true;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
-}
-
-static void
-prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
-	cassert(config_prof);
-	assert(tdata == prof_tdata_get(tsd, false));
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
-
-	if (tdata != NULL) {
-		bool idump, gdump;
-
-		assert(tdata->enq);
-		tdata->enq = false;
-		idump = tdata->enq_idump;
-		tdata->enq_idump = false;
-		gdump = tdata->enq_gdump;
-		tdata->enq_gdump = false;
-
-		if (idump) {
-			prof_idump(tsd_tsdn(tsd));
-		}
-		if (gdump) {
-			prof_gdump(tsd_tsdn(tsd));
-		}
-	}
-}
-
 #ifdef JEMALLOC_PROF_LIBUNWIND
 void
 prof_backtrace(prof_bt_t *bt) {
@@ -547,324 +429,18 @@ prof_backtrace(prof_bt_t *bt) {
 }
 #endif
 
-static malloc_mutex_t *
+malloc_mutex_t *
 prof_gctx_mutex_choose(void) {
 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
 
 	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
 }
 
-static malloc_mutex_t *
+malloc_mutex_t *
 prof_tdata_mutex_choose(uint64_t thr_uid) {
 	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
 }
 
-static prof_gctx_t *
-prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
-	/*
-	 * Create a single allocation that has space for vec of length bt->len.
-	 */
-	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
-	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
-	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
-	    true);
-	if (gctx == NULL) {
-		return NULL;
-	}
-	gctx->lock = prof_gctx_mutex_choose();
-	/*
-	 * Set nlimbo to 1, in order to avoid a race condition with
-	 * prof_tctx_destroy()/prof_gctx_try_destroy().
-	 */
-	gctx->nlimbo = 1;
-	tctx_tree_new(&gctx->tctxs);
-	/* Duplicate bt. */
-	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
-	gctx->bt.vec = gctx->vec;
-	gctx->bt.len = bt->len;
-	return gctx;
-}
-
-static void
-prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
-    prof_tdata_t *tdata) {
-	cassert(config_prof);
-
-	/*
-	 * Check that gctx is still unused by any thread cache before destroying
-	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
-	 * condition with this function, as does prof_tctx_destroy() in order to
-	 * avoid a race between the main body of prof_tctx_destroy() and entry
-	 * into this function.
-	 */
-	prof_enter(tsd, tdata_self);
-	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-	assert(gctx->nlimbo != 0);
-	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
-		/* Remove gctx from bt2gctx. */
-		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
-			not_reached();
-		}
-		prof_leave(tsd, tdata_self);
-		/* Destroy gctx. */
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
-	} else {
-		/*
-		 * Compensate for increment in prof_tctx_destroy() or
-		 * prof_lookup().
-		 */
-		gctx->nlimbo--;
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		prof_leave(tsd, tdata_self);
-	}
-}
-
-static bool
-prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
-	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
-
-	if (opt_prof_accum) {
-		return false;
-	}
-	if (tctx->cnts.curobjs != 0) {
-		return false;
-	}
-	if (tctx->prepared) {
-		return false;
-	}
-	return true;
-}
-
-static bool
-prof_gctx_should_destroy(prof_gctx_t *gctx) {
-	if (opt_prof_accum) {
-		return false;
-	}
-	if (!tctx_tree_empty(&gctx->tctxs)) {
-		return false;
-	}
-	if (gctx->nlimbo != 0) {
-		return false;
-	}
-	return true;
-}
-
-static void
-prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
-	prof_tdata_t *tdata = tctx->tdata;
-	prof_gctx_t *gctx = tctx->gctx;
-	bool destroy_tdata, destroy_tctx, destroy_gctx;
-
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	assert(tctx->cnts.curobjs == 0);
-	assert(tctx->cnts.curbytes == 0);
-	assert(!opt_prof_accum);
-	assert(tctx->cnts.accumobjs == 0);
-	assert(tctx->cnts.accumbytes == 0);
-
-	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-
-	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-	switch (tctx->state) {
-	case prof_tctx_state_nominal:
-		tctx_tree_remove(&gctx->tctxs, tctx);
-		destroy_tctx = true;
-		if (prof_gctx_should_destroy(gctx)) {
-			/*
-			 * Increment gctx->nlimbo in order to keep another
-			 * thread from winning the race to destroy gctx while
-			 * this one has gctx->lock dropped.  Without this, it
-			 * would be possible for another thread to:
-			 *
-			 * 1) Sample an allocation associated with gctx.
-			 * 2) Deallocate the sampled object.
-			 * 3) Successfully prof_gctx_try_destroy(gctx).
-			 *
-			 * The result would be that gctx no longer exists by the
-			 * time this thread accesses it in
-			 * prof_gctx_try_destroy().
-			 */
-			gctx->nlimbo++;
-			destroy_gctx = true;
-		} else {
-			destroy_gctx = false;
-		}
-		break;
-	case prof_tctx_state_dumping:
-		/*
-		 * A dumping thread needs tctx to remain valid until dumping
-		 * has finished.  Change state such that the dumping thread will
-		 * complete destruction during a late dump iteration phase.
-		 */
-		tctx->state = prof_tctx_state_purgatory;
-		destroy_tctx = false;
-		destroy_gctx = false;
-		break;
-	default:
-		not_reached();
-		destroy_tctx = false;
-		destroy_gctx = false;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-	if (destroy_gctx) {
-		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
-		    tdata);
-	}
-
-	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	if (destroy_tdata) {
-		prof_tdata_destroy(tsd, tdata, false);
-	}
-
-	if (destroy_tctx) {
-		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
-	}
-}
-
-static bool
-prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
-    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
-	union {
-		prof_gctx_t	*p;
-		void		*v;
-	} gctx, tgctx;
-	union {
-		prof_bt_t	*p;
-		void		*v;
-	} btkey;
-	bool new_gctx;
-
-	prof_enter(tsd, tdata);
-	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
-		/* bt has never been seen before.  Insert it. */
-		prof_leave(tsd, tdata);
-		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
-		if (tgctx.v == NULL) {
-			return true;
-		}
-		prof_enter(tsd, tdata);
-		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
-			gctx.p = tgctx.p;
-			btkey.p = &gctx.p->bt;
-			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
-				/* OOM. */
-				prof_leave(tsd, tdata);
-				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
-				    true, true);
-				return true;
-			}
-			new_gctx = true;
-		} else {
-			new_gctx = false;
-		}
-	} else {
-		tgctx.v = NULL;
-		new_gctx = false;
-	}
-
-	if (!new_gctx) {
-		/*
-		 * Increment nlimbo, in order to avoid a race condition with
-		 * prof_tctx_destroy()/prof_gctx_try_destroy().
-		 */
-		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
-		gctx.p->nlimbo++;
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
-		new_gctx = false;
-
-		if (tgctx.v != NULL) {
-			/* Lost race to insert. */
-			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
-			    true);
-		}
-	}
-	prof_leave(tsd, tdata);
-
-	*p_btkey = btkey.v;
-	*p_gctx = gctx.p;
-	*p_new_gctx = new_gctx;
-	return false;
-}
-
-prof_tctx_t *
-prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
-	union {
-		prof_tctx_t	*p;
-		void		*v;
-	} ret;
-	prof_tdata_t *tdata;
-	bool not_found;
-
-	cassert(config_prof);
-
-	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		return NULL;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
-	if (!not_found) { /* Note double negative! */
-		ret.p->prepared = true;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-	if (not_found) {
-		void *btkey;
-		prof_gctx_t *gctx;
-		bool new_gctx, error;
-
-		/*
-		 * This thread's cache lacks bt.  Look for it in the global
-		 * cache.
-		 */
-		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
-		    &new_gctx)) {
-			return NULL;
-		}
-
-		/* Link a prof_tctx_t into gctx for this thread. */
-		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
-		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
-		    arena_ichoose(tsd, NULL), true);
-		if (ret.p == NULL) {
-			if (new_gctx) {
-				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			}
-			return NULL;
-		}
-		ret.p->tdata = tdata;
-		ret.p->thr_uid = tdata->thr_uid;
-		ret.p->thr_discrim = tdata->thr_discrim;
-		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
-		ret.p->gctx = gctx;
-		ret.p->tctx_uid = tdata->tctx_uid_next++;
-		ret.p->prepared = true;
-		ret.p->state = prof_tctx_state_initializing;
-		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
-		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-		if (error) {
-			if (new_gctx) {
-				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			}
-			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
-			return NULL;
-		}
-		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-		ret.p->state = prof_tctx_state_nominal;
-		tctx_tree_insert(&gctx->tctxs, ret.p);
-		gctx->nlimbo--;
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-	}
-
-	return ret.p;
-}
-
 /*
  * The bodies of this function and prof_leakcheck() are compiled out unless heap
  * profiling is enabled, so that it is possible to compile jemalloc with
@@ -921,885 +497,85 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 #endif
 }
 
-#ifdef JEMALLOC_JET
-static prof_tdata_t *
-prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	size_t *tdata_count = (size_t *)arg;
-
-	(*tdata_count)++;
-
-	return NULL;
+int
+prof_getpid(void) {
+#ifdef _WIN32
+	return GetCurrentProcessId();
+#else
+	return getpid();
+#endif
 }
 
-size_t
-prof_tdata_count(void) {
-	size_t tdata_count = 0;
-	tsdn_t *tsdn;
-
-	tsdn = tsdn_fetch();
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
-	    (void *)&tdata_count);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+#define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
+#define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
+static void
+prof_dump_filename(char *filename, char v, uint64_t vseq) {
+	cassert(config_prof);
 
-	return tdata_count;
+	if (vseq != VSEQ_INVALID) {
+	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
+		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
+		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
+	} else {
+	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
+		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+		    "%s.%d.%"FMTu64".%c.heap",
+		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
+	}
+	prof_dump_seq++;
 }
 
-size_t
-prof_bt_count(void) {
-	size_t bt_count;
+static void
+prof_fdump(void) {
 	tsd_t *tsd;
-	prof_tdata_t *tdata;
-
-	tsd = tsd_fetch();
-	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		return 0;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
-	bt_count = ckh_count(&bt2gctx);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
-
-	return bt_count;
-}
-#endif
+	char filename[DUMP_FILENAME_BUFSIZE];
 
-static int
-prof_dump_open_impl(bool propagate_err, const char *filename) {
-	int fd;
+	cassert(config_prof);
+	assert(opt_prof_final);
+	assert(opt_prof_prefix[0] != '\0');
 
-	fd = creat(filename, 0644);
-	if (fd == -1 && !propagate_err) {
-		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
-		    filename);
-		if (opt_abort) {
-			abort();
-		}
+	if (!prof_booted) {
+		return;
 	}
+	tsd = tsd_fetch();
+	assert(tsd_reentrancy_level_get(tsd) == 0);
 
-	return fd;
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+	prof_dump_filename(filename, 'f', VSEQ_INVALID);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+	prof_dump(tsd, false, filename, opt_prof_leak);
 }
-prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
-
-static bool
-prof_dump_flush(bool propagate_err) {
-	bool ret = false;
-	ssize_t err;
 
+bool
+prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
 	cassert(config_prof);
 
-	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
-	if (err == -1) {
-		if (!propagate_err) {
-			malloc_write("<jemalloc>: write() failed during heap "
-			    "profile flush\n");
-			if (opt_abort) {
-				abort();
-			}
-		}
-		ret = true;
+#ifndef JEMALLOC_ATOMIC_U64
+	if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
+	    WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
+		return true;
 	}
-	prof_dump_buf_end = 0;
-
-	return ret;
-}
-
-static bool
-prof_dump_close(bool propagate_err) {
-	bool ret;
-
-	assert(prof_dump_fd != -1);
-	ret = prof_dump_flush(propagate_err);
-	close(prof_dump_fd);
-	prof_dump_fd = -1;
-
-	return ret;
+	prof_accum->accumbytes = 0;
+#else
+	atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED);
+#endif
+	return false;
 }
 
-static bool
-prof_dump_write(bool propagate_err, const char *s) {
-	size_t i, slen, n;
+void
+prof_idump(tsdn_t *tsdn) {
+	tsd_t *tsd;
+	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	i = 0;
-	slen = strlen(s);
-	while (i < slen) {
-		/* Flush the buffer if it is full. */
-		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			if (prof_dump_flush(propagate_err) && propagate_err) {
-				return true;
-			}
-		}
-
-		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
-			/* Finish writing. */
-			n = slen - i;
-		} else {
-			/* Write as much of s as will fit. */
-			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
-		}
-		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
-		prof_dump_buf_end += n;
-		i += n;
-	}
-	assert(i == slen);
-
-	return false;
-}
-
-JEMALLOC_FORMAT_PRINTF(2, 3)
-static bool
-prof_dump_printf(bool propagate_err, const char *format, ...) {
-	bool ret;
-	va_list ap;
-	char buf[PROF_PRINTF_BUFSIZE];
-
-	va_start(ap, format);
-	malloc_vsnprintf(buf, sizeof(buf), format, ap);
-	va_end(ap);
-	ret = prof_dump_write(propagate_err, buf);
-
-	return ret;
-}
-
-static void
-prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
-	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
-
-	malloc_mutex_lock(tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_initializing:
-		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
-		return;
-	case prof_tctx_state_nominal:
-		tctx->state = prof_tctx_state_dumping;
-		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
-
-		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
-
-		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
-		if (opt_prof_accum) {
-			tdata->cnt_summed.accumobjs +=
-			    tctx->dump_cnts.accumobjs;
-			tdata->cnt_summed.accumbytes +=
-			    tctx->dump_cnts.accumbytes;
-		}
-		break;
-	case prof_tctx_state_dumping:
-	case prof_tctx_state_purgatory:
-		not_reached();
-	}
-}
-
-static void
-prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
-	malloc_mutex_assert_owner(tsdn, gctx->lock);
-
-	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
-	if (opt_prof_accum) {
-		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
-		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
-	}
-}
-
-static prof_tctx_t *
-prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-
-	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_nominal:
-		/* New since dumping started; ignore. */
-		break;
-	case prof_tctx_state_dumping:
-	case prof_tctx_state_purgatory:
-		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
-		break;
-	default:
-		not_reached();
-	}
-
-	return NULL;
-}
-
-struct prof_tctx_dump_iter_arg_s {
-	tsdn_t	*tsdn;
-	bool	propagate_err;
-};
-
-static prof_tctx_t *
-prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
-	struct prof_tctx_dump_iter_arg_s *arg =
-	    (struct prof_tctx_dump_iter_arg_s *)opaque;
-
-	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_initializing:
-	case prof_tctx_state_nominal:
-		/* Not captured by this dump. */
-		break;
-	case prof_tctx_state_dumping:
-	case prof_tctx_state_purgatory:
-		if (prof_dump_printf(arg->propagate_err,
-		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
-		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
-		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
-		    tctx->dump_cnts.accumbytes)) {
-			return tctx;
-		}
-		break;
-	default:
-		not_reached();
-	}
-	return NULL;
-}
-
-static prof_tctx_t *
-prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-	prof_tctx_t *ret;
-
-	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_nominal:
-		/* New since dumping started; ignore. */
-		break;
-	case prof_tctx_state_dumping:
-		tctx->state = prof_tctx_state_nominal;
-		break;
-	case prof_tctx_state_purgatory:
-		ret = tctx;
-		goto label_return;
-	default:
-		not_reached();
-	}
-
-	ret = NULL;
-label_return:
-	return ret;
-}
-
-static void
-prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
-	cassert(config_prof);
-
-	malloc_mutex_lock(tsdn, gctx->lock);
-
-	/*
-	 * Increment nlimbo so that gctx won't go away before dump.
-	 * Additionally, link gctx into the dump list so that it is included in
-	 * prof_dump()'s second pass.
-	 */
-	gctx->nlimbo++;
-	gctx_tree_insert(gctxs, gctx);
-
-	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
-
-	malloc_mutex_unlock(tsdn, gctx->lock);
-}
-
-struct prof_gctx_merge_iter_arg_s {
-	tsdn_t	*tsdn;
-	size_t	leak_ngctx;
-};
-
-static prof_gctx_t *
-prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	struct prof_gctx_merge_iter_arg_s *arg =
-	    (struct prof_gctx_merge_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, gctx->lock);
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
-	    (void *)arg->tsdn);
-	if (gctx->cnt_summed.curobjs != 0) {
-		arg->leak_ngctx++;
-	}
-	malloc_mutex_unlock(arg->tsdn, gctx->lock);
-
-	return NULL;
-}
-
-static void
-prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
-	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
-	prof_gctx_t *gctx;
-
-	/*
-	 * Standard tree iteration won't work here, because as soon as we
-	 * decrement gctx->nlimbo and unlock gctx, another thread can
-	 * concurrently destroy it, which will corrupt the tree.  Therefore,
-	 * tear down the tree one node at a time during iteration.
-	 */
-	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
-		gctx_tree_remove(gctxs, gctx);
-		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-		{
-			prof_tctx_t *next;
-
-			next = NULL;
-			do {
-				prof_tctx_t *to_destroy =
-				    tctx_tree_iter(&gctx->tctxs, next,
-				    prof_tctx_finish_iter,
-				    (void *)tsd_tsdn(tsd));
-				if (to_destroy != NULL) {
-					next = tctx_tree_next(&gctx->tctxs,
-					    to_destroy);
-					tctx_tree_remove(&gctx->tctxs,
-					    to_destroy);
-					idalloctm(tsd_tsdn(tsd), to_destroy,
-					    NULL, NULL, true, true);
-				} else {
-					next = NULL;
-				}
-			} while (next != NULL);
-		}
-		gctx->nlimbo--;
-		if (prof_gctx_should_destroy(gctx)) {
-			gctx->nlimbo++;
-			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-		} else {
-			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		}
-	}
-}
-
-struct prof_tdata_merge_iter_arg_s {
-	tsdn_t		*tsdn;
-	prof_cnt_t	cnt_all;
-};
-
-static prof_tdata_t *
-prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *opaque) {
-	struct prof_tdata_merge_iter_arg_s *arg =
-	    (struct prof_tdata_merge_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, tdata->lock);
-	if (!tdata->expired) {
-		size_t tabind;
-		union {
-			prof_tctx_t	*p;
-			void		*v;
-		} tctx;
-
-		tdata->dumping = true;
-		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
-		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
-		    &tctx.v);) {
-			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
-		}
-
-		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
-		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
-		if (opt_prof_accum) {
-			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
-			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
-		}
-	} else {
-		tdata->dumping = false;
-	}
-	malloc_mutex_unlock(arg->tsdn, tdata->lock);
-
-	return NULL;
-}
-
-static prof_tdata_t *
-prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	bool propagate_err = *(bool *)arg;
-
-	if (!tdata->dumping) {
-		return NULL;
-	}
-
-	if (prof_dump_printf(propagate_err,
-	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
-	    tdata->thr_uid, tdata->cnt_summed.curobjs,
-	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
-	    tdata->cnt_summed.accumbytes,
-	    (tdata->thread_name != NULL) ? " " : "",
-	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
-		return tdata;
-	}
-	return NULL;
-}
-
-static bool
-prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
-    const prof_cnt_t *cnt_all) {
-	bool ret;
-
-	if (prof_dump_printf(propagate_err,
-	    "heap_v2/%"FMTu64"\n"
-	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
-	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
-	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
-		return true;
-	}
-
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
-	    (void *)&propagate_err) != NULL);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-	return ret;
-}
-prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
-
-static bool
-prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
-    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
-	bool ret;
-	unsigned i;
-	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
-
-	cassert(config_prof);
-	malloc_mutex_assert_owner(tsdn, gctx->lock);
-
-	/* Avoid dumping such gctx's that have no useful data. */
-	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
-	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
-		assert(gctx->cnt_summed.curobjs == 0);
-		assert(gctx->cnt_summed.curbytes == 0);
-		assert(gctx->cnt_summed.accumobjs == 0);
-		assert(gctx->cnt_summed.accumbytes == 0);
-		ret = false;
-		goto label_return;
-	}
-
-	if (prof_dump_printf(propagate_err, "@")) {
-		ret = true;
-		goto label_return;
-	}
-	for (i = 0; i < bt->len; i++) {
-		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
-		    (uintptr_t)bt->vec[i])) {
-			ret = true;
-			goto label_return;
-		}
-	}
-
-	if (prof_dump_printf(propagate_err,
-	    "\n"
-	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
-	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
-	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
-		ret = true;
-		goto label_return;
-	}
-
-	prof_tctx_dump_iter_arg.tsdn = tsdn;
-	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
-	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
-	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
-		ret = true;
-		goto label_return;
-	}
-
-	ret = false;
-label_return:
-	return ret;
-}
-
-#ifndef _WIN32
-JEMALLOC_FORMAT_PRINTF(1, 2)
-static int
-prof_open_maps(const char *format, ...) {
-	int mfd;
-	va_list ap;
-	char filename[PATH_MAX + 1];
-
-	va_start(ap, format);
-	malloc_vsnprintf(filename, sizeof(filename), format, ap);
-	va_end(ap);
-
-#if defined(O_CLOEXEC)
-	mfd = open(filename, O_RDONLY | O_CLOEXEC);
-#else
-	mfd = open(filename, O_RDONLY);
-	if (mfd != -1) {
-		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
-	}
-#endif
-
-	return mfd;
-}
-#endif
-
-int
-prof_getpid(void) {
-#ifdef _WIN32
-	return GetCurrentProcessId();
-#else
-	return getpid();
-#endif
-}
-
-static bool
-prof_dump_maps(bool propagate_err) {
-	bool ret;
-	int mfd;
-
-	cassert(config_prof);
-#ifdef __FreeBSD__
-	mfd = prof_open_maps("/proc/curproc/map");
-#elif defined(_WIN32)
-	mfd = -1; // Not implemented
-#else
-	{
-		int pid = prof_getpid();
-
-		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
-		if (mfd == -1) {
-			mfd = prof_open_maps("/proc/%d/maps", pid);
-		}
-	}
-#endif
-	if (mfd != -1) {
-		ssize_t nread;
-
-		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
-		    propagate_err) {
-			ret = true;
-			goto label_return;
-		}
-		nread = 0;
-		do {
-			prof_dump_buf_end += nread;
-			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-				/* Make space in prof_dump_buf before read(). */
-				if (prof_dump_flush(propagate_err) &&
-				    propagate_err) {
-					ret = true;
-					goto label_return;
-				}
-			}
-			nread = malloc_read_fd(mfd,
-			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
-			    - prof_dump_buf_end);
-		} while (nread > 0);
-	} else {
-		ret = true;
-		goto label_return;
-	}
-
-	ret = false;
-label_return:
-	if (mfd != -1) {
-		close(mfd);
-	}
-	return ret;
-}
-
-/*
- * See prof_sample_threshold_update() comment for why the body of this function
- * is conditionally compiled.
- */
-static void
-prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
-    const char *filename) {
-#ifdef JEMALLOC_PROF
-	/*
-	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
-	 * differ slightly from what jeprof reports, because here we scale the
-	 * summary values, whereas jeprof scales each context individually and
-	 * reports the sums of the scaled values.
-	 */
-	if (cnt_all->curbytes != 0) {
-		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
-		double ratio = (((double)cnt_all->curbytes) /
-		    (double)cnt_all->curobjs) / sample_period;
-		double scale_factor = 1.0 / (1.0 - exp(-ratio));
-		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
-		    * scale_factor);
-		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
-		    scale_factor);
-
-		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
-		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
-		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
-		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
-		malloc_printf(
-		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
-		    filename);
-	}
-#endif
-}
-
-struct prof_gctx_dump_iter_arg_s {
-	tsdn_t	*tsdn;
-	bool	propagate_err;
-};
-
-static prof_gctx_t *
-prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	prof_gctx_t *ret;
-	struct prof_gctx_dump_iter_arg_s *arg =
-	    (struct prof_gctx_dump_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, gctx->lock);
-
-	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
-	    gctxs)) {
-		ret = gctx;
-		goto label_return;
-	}
-
-	ret = NULL;
-label_return:
-	malloc_mutex_unlock(arg->tsdn, gctx->lock);
-	return ret;
-}
-
-static void
-prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
-    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
-    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
-    prof_gctx_tree_t *gctxs) {
-	size_t tabind;
-	union {
-		prof_gctx_t	*p;
-		void		*v;
-	} gctx;
-
-	prof_enter(tsd, tdata);
-
-	/*
-	 * Put gctx's in limbo and clear their counters in preparation for
-	 * summing.
-	 */
-	gctx_tree_new(gctxs);
-	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
-		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
-	}
-
-	/*
-	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
-	 * stats and merge them into the associated gctx's.
-	 */
-	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
-	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
-	    (void *)prof_tdata_merge_iter_arg);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-
-	/* Merge tctx stats into gctx's. */
-	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
-	prof_gctx_merge_iter_arg->leak_ngctx = 0;
-	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
-	    (void *)prof_gctx_merge_iter_arg);
-
-	prof_leave(tsd, tdata);
-}
-
-static bool
-prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck, prof_tdata_t *tdata,
-    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
-    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
-    struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
-    prof_gctx_tree_t *gctxs) {
-	/* Create dump file. */
-	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
-		return true;
-	}
-
-	/* Dump profile header. */
-	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
-	    &prof_tdata_merge_iter_arg->cnt_all)) {
-		goto label_write_error;
-	}
-
-	/* Dump per gctx profile stats. */
-	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
-	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
-	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
-	    (void *)prof_gctx_dump_iter_arg) != NULL) {
-		goto label_write_error;
-	}
-
-	/* Dump /proc/<pid>/maps if possible. */
-	if (prof_dump_maps(propagate_err)) {
-		goto label_write_error;
-	}
-
-	if (prof_dump_close(propagate_err)) {
-		return true;
-	}
-
-	return false;
-label_write_error:
-	prof_dump_close(propagate_err);
-	return true;
-}
-
-static bool
-prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck) {
-	cassert(config_prof);
-	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return true;
-	}
-
-	pre_reentrancy(tsd, NULL);
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
-
-	prof_gctx_tree_t gctxs;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
-	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
-	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
-	    &prof_gctx_merge_iter_arg, &gctxs);
-	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
-	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
-	    &prof_gctx_dump_iter_arg, &gctxs);
-	prof_gctx_finish(tsd, &gctxs);
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
-	post_reentrancy(tsd);
-
-	if (err) {
-		return true;
-	}
-
-	if (leakcheck) {
-		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
-		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
-	}
-	return false;
-}
-
-#ifdef JEMALLOC_JET
-void
-prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
-    uint64_t *accumbytes) {
-	tsd_t *tsd;
-	prof_tdata_t *tdata;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
-	prof_gctx_tree_t gctxs;
-
-	tsd = tsd_fetch();
-	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		if (curobjs != NULL) {
-			*curobjs = 0;
-		}
-		if (curbytes != NULL) {
-			*curbytes = 0;
-		}
-		if (accumobjs != NULL) {
-			*accumobjs = 0;
-		}
-		if (accumbytes != NULL) {
-			*accumbytes = 0;
-		}
-		return;
-	}
-
-	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
-	    &prof_gctx_merge_iter_arg, &gctxs);
-	prof_gctx_finish(tsd, &gctxs);
-
-	if (curobjs != NULL) {
-		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
-	}
-	if (curbytes != NULL) {
-		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
-	}
-	if (accumobjs != NULL) {
-		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
-	}
-	if (accumbytes != NULL) {
-		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
-	}
-}
-#endif
-
-#define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
-#define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
-static void
-prof_dump_filename(char *filename, char v, uint64_t vseq) {
-	cassert(config_prof);
-
-	if (vseq != VSEQ_INVALID) {
-	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
-		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
-		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
-	} else {
-	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
-		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c.heap",
-		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
-	}
-	prof_dump_seq++;
-}
-
-static void
-prof_fdump(void) {
-	tsd_t *tsd;
-	char filename[DUMP_FILENAME_BUFSIZE];
-
-	cassert(config_prof);
-	assert(opt_prof_final);
-	assert(opt_prof_prefix[0] != '\0');
-
-	if (!prof_booted) {
-		return;
-	}
-	tsd = tsd_fetch();
-	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
-	prof_dump_filename(filename, 'f', VSEQ_INVALID);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
-	prof_dump(tsd, false, filename, opt_prof_leak);
-}
-
-bool
-prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
-	cassert(config_prof);
-
-#ifndef JEMALLOC_ATOMIC_U64
-	if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
-	    WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-	prof_accum->accumbytes = 0;
-#else
-	atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED);
-#endif
-	return false;
-}
-
-void
-prof_idump(tsdn_t *tsdn) {
-	tsd_t *tsd;
-	prof_tdata_t *tdata;
-
-	cassert(config_prof);
-
-	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
-		return;
-	}
-	tsd = tsdn_tsd(tsdn);
-	if (tsd_reentrancy_level_get(tsd) > 0) {
-		return;
+	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
+		return;
+	}
+	tsd = tsdn_tsd(tsdn);
+	if (tsd_reentrancy_level_get(tsd) > 0) {
+		return;
 	}
 
 	tdata = prof_tdata_get(tsd, false);
@@ -1878,28 +654,6 @@ prof_gdump(tsdn_t *tsdn) {
 	}
 }
 
-void
-prof_bt_hash(const void *key, size_t r_hash[2]) {
-	prof_bt_t *bt = (prof_bt_t *)key;
-
-	cassert(config_prof);
-
-	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
-}
-
-bool
-prof_bt_keycomp(const void *k1, const void *k2) {
-	const prof_bt_t *bt1 = (prof_bt_t *)k1;
-	const prof_bt_t *bt2 = (prof_bt_t *)k2;
-
-	cassert(config_prof);
-
-	if (bt1->len != bt2->len) {
-		return false;
-	}
-	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
-}
-
 static uint64_t
 prof_thr_uid_alloc(tsdn_t *tsdn) {
 	uint64_t thr_uid;
@@ -1912,124 +666,33 @@ prof_thr_uid_alloc(tsdn_t *tsdn) {
 	return thr_uid;
 }
 
-static prof_tdata_t *
-prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
-    char *thread_name, bool active) {
-	prof_tdata_t *tdata;
-
-	cassert(config_prof);
-
-	/* Initialize an empty cache for this thread. */
-	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
-	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (tdata == NULL) {
-		return NULL;
-	}
-
-	tdata->lock = prof_tdata_mutex_choose(thr_uid);
-	tdata->thr_uid = thr_uid;
-	tdata->thr_discrim = thr_discrim;
-	tdata->thread_name = thread_name;
-	tdata->attached = true;
-	tdata->expired = false;
-	tdata->tctx_uid_next = 0;
-
-	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
-	    prof_bt_keycomp)) {
-		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
-		return NULL;
-	}
-
-	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
-	prof_sample_threshold_update(tdata);
-
-	tdata->enq = false;
-	tdata->enq_idump = false;
-	tdata->enq_gdump = false;
-
-	tdata->dumping = false;
-	tdata->active = active;
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-
-	return tdata;
-}
-
 prof_tdata_t *
 prof_tdata_init(tsd_t *tsd) {
 	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
 	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
 }
 
-static bool
-prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
-	if (tdata->attached && !even_if_attached) {
-		return false;
-	}
-	if (ckh_count(&tdata->bt2tctx) != 0) {
-		return false;
-	}
-	return true;
-}
-
-static bool
-prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
-    bool even_if_attached) {
-	malloc_mutex_assert_owner(tsdn, tdata->lock);
-
-	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
-}
-
-static void
-prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
-    bool even_if_attached) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
-
-	tdata_tree_remove(&tdatas, tdata);
-
-	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
+static char *
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
+	char *ret;
+	size_t size;
 
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
+	if (thread_name == NULL) {
+		return NULL;
 	}
-	ckh_delete(tsd, &tdata->bt2tctx);
-	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
-}
-
-static void
-prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-}
 
-static void
-prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
-	bool destroy_tdata;
-
-	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-	if (tdata->attached) {
-		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
-		    true);
-		/*
-		 * Only detach if !destroy_tdata, because detaching would allow
-		 * another thread to win the race to destroy tdata.
-		 */
-		if (!destroy_tdata) {
-			tdata->attached = false;
-		}
-		tsd_prof_tdata_set(tsd, NULL);
-	} else {
-		destroy_tdata = false;
+	size = strlen(thread_name) + 1;
+	if (size == 1) {
+		return "";
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-	if (destroy_tdata) {
-		prof_tdata_destroy(tsd, tdata, true);
+
+	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (ret == NULL) {
+		return NULL;
 	}
+	memcpy(ret, thread_name, size);
+	return ret;
 }
 
 prof_tdata_t *
@@ -2045,58 +708,6 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	    active);
 }
 
-static bool
-prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
-	bool destroy_tdata;
-
-	malloc_mutex_lock(tsdn, tdata->lock);
-	if (!tdata->expired) {
-		tdata->expired = true;
-		destroy_tdata = tdata->attached ? false :
-		    prof_tdata_should_destroy(tsdn, tdata, false);
-	} else {
-		destroy_tdata = false;
-	}
-	malloc_mutex_unlock(tsdn, tdata->lock);
-
-	return destroy_tdata;
-}
-
-static prof_tdata_t *
-prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-
-	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
-}
-
-void
-prof_reset(tsd_t *tsd, size_t lg_sample) {
-	prof_tdata_t *next;
-
-	assert(lg_sample < (sizeof(uint64_t) << 3));
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-
-	lg_prof_sample = lg_sample;
-
-	next = NULL;
-	do {
-		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, (void *)tsd);
-		if (to_destroy != NULL) {
-			next = tdata_tree_next(&tdatas, to_destroy);
-			prof_tdata_destroy_locked(tsd, to_destroy, false);
-		} else {
-			next = NULL;
-		}
-	} while (next != NULL);
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
-}
-
 void
 prof_tdata_cleanup(tsd_t *tsd) {
 	prof_tdata_t *tdata;
@@ -2143,29 +754,6 @@ prof_thread_name_get(tsd_t *tsd) {
 	return (tdata->thread_name != NULL ? tdata->thread_name : "");
 }
 
-static char *
-prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
-	char *ret;
-	size_t size;
-
-	if (thread_name == NULL) {
-		return NULL;
-	}
-
-	size = strlen(thread_name) + 1;
-	if (size == 1) {
-		return "";
-	}
-
-	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (ret == NULL) {
-		return NULL;
-	}
-	memcpy(ret, thread_name, size);
-	return ret;
-}
-
 int
 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
 	prof_tdata_t *tdata;
@@ -2330,16 +918,15 @@ prof_boot2(tsd_t *tsd) {
 			return true;
 		}
 
-		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
-		    prof_bt_keycomp)) {
+		if (prof_data_init(tsd)) {
 			return true;
 		}
+
 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
 		    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
-		tdata_tree_new(&tdatas);
 		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
 		    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
 			return true;
diff --git a/src/prof_data.c b/src/prof_data.c
new file mode 100644
index 0000000..bab8e5c
--- /dev/null
+++ b/src/prof_data.c
@@ -0,0 +1,1441 @@
+#define JEMALLOC_PROF_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/malloc_io.h"
+
+/*
+ * This file defines and manages the core profiling data structures.
+ *
+ * Conceptually, profiling data can be imagined as a table with three columns:
+ * thread, stack trace, and current allocation size.  (When prof_accum is on,
+ * there's one additional column which is the cumulative allocation size.)
+ *
+ * Implementation wise, each thread maintains a hash recording the stack trace
+ * to allocation size correspondences, which are basically the individual rows
+ * in the table.  In addition, two global "indices" are built to make data
+ * aggregation efficient (for dumping): bt2gctx and tdatas, which are basically
+ * the "grouped by stack trace" and "grouped by thread" views of the same table,
+ * respectively.  Note that the allocation size is only aggregated to the two
+ * indices at dumping time, so as to optimize for performance.
+ */
+
+/******************************************************************************/
+
+/*
+ * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
+ * structure that knows about all backtraces currently captured.
+ */
+static ckh_t		bt2gctx;
+
+/*
+ * Tree of all extant prof_tdata_t structures, regardless of state,
+ * {attached,detached,expired}.
+ */
+static prof_tdata_tree_t	tdatas;
+
+/*
+ * This buffer is rather large for stack allocation, so use a single buffer for
+ * all profile dumps.
+ */
+static char		prof_dump_buf[
+    /* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+    PROF_DUMP_BUFSIZE
+#else
+    1
+#endif
+];
+static size_t		prof_dump_buf_end;
+static int		prof_dump_fd;
+
+/******************************************************************************/
+/* Red-black trees. */
+
+static int
+prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
+	uint64_t a_thr_uid = a->thr_uid;
+	uint64_t b_thr_uid = b->thr_uid;
+	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
+	if (ret == 0) {
+		uint64_t a_thr_discrim = a->thr_discrim;
+		uint64_t b_thr_discrim = b->thr_discrim;
+		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
+		    b_thr_discrim);
+		if (ret == 0) {
+			uint64_t a_tctx_uid = a->tctx_uid;
+			uint64_t b_tctx_uid = b->tctx_uid;
+			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
+			    b_tctx_uid);
+		}
+	}
+	return ret;
+}
+
+rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
+    tctx_link, prof_tctx_comp)
+
+static int
+prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
+	unsigned a_len = a->bt.len;
+	unsigned b_len = b->bt.len;
+	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
+	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
+	if (ret == 0) {
+		ret = (a_len > b_len) - (a_len < b_len);
+	}
+	return ret;
+}
+
+rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
+    prof_gctx_comp)
+
+static int
+prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
+	int ret;
+	uint64_t a_uid = a->thr_uid;
+	uint64_t b_uid = b->thr_uid;
+
+	ret = ((a_uid > b_uid) - (a_uid < b_uid));
+	if (ret == 0) {
+		uint64_t a_discrim = a->thr_discrim;
+		uint64_t b_discrim = b->thr_discrim;
+
+		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
+	}
+	return ret;
+}
+
+rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
+    prof_tdata_comp)
+
+/******************************************************************************/
+
+bool
+prof_data_init(tsd_t *tsd) {
+	tdata_tree_new(&tdatas);
+	return ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS,
+	    prof_bt_hash, prof_bt_keycomp);
+}
+
+static void
+prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
+	cassert(config_prof);
+	assert(tdata == prof_tdata_get(tsd, false));
+
+	if (tdata != NULL) {
+		assert(!tdata->enq);
+		tdata->enq = true;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
+}
+
+static void
+prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
+	cassert(config_prof);
+	assert(tdata == prof_tdata_get(tsd, false));
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
+
+	if (tdata != NULL) {
+		bool idump, gdump;
+
+		assert(tdata->enq);
+		tdata->enq = false;
+		idump = tdata->enq_idump;
+		tdata->enq_idump = false;
+		gdump = tdata->enq_gdump;
+		tdata->enq_gdump = false;
+
+		if (idump) {
+			prof_idump(tsd_tsdn(tsd));
+		}
+		if (gdump) {
+			prof_gdump(tsd_tsdn(tsd));
+		}
+	}
+}
+
+static prof_gctx_t *
+prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
+	/*
+	 * Create a single allocation that has space for vec of length bt->len.
+	 */
+	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
+	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
+	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
+	    true);
+	if (gctx == NULL) {
+		return NULL;
+	}
+	gctx->lock = prof_gctx_mutex_choose();
+	/*
+	 * Set nlimbo to 1, in order to avoid a race condition with
+	 * prof_tctx_destroy()/prof_gctx_try_destroy().
+	 */
+	gctx->nlimbo = 1;
+	tctx_tree_new(&gctx->tctxs);
+	/* Duplicate bt. */
+	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
+	gctx->bt.vec = gctx->vec;
+	gctx->bt.len = bt->len;
+	return gctx;
+}
+
+static void
+prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
+    prof_tdata_t *tdata) {
+	cassert(config_prof);
+
+	/*
+	 * Check that gctx is still unused by any thread cache before destroying
+	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
+	 * condition with this function, as does prof_tctx_destroy() in order to
+	 * avoid a race between the main body of prof_tctx_destroy() and entry
+	 * into this function.
+	 */
+	prof_enter(tsd, tdata_self);
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+	assert(gctx->nlimbo != 0);
+	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
+		/* Remove gctx from bt2gctx. */
+		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
+			not_reached();
+		}
+		prof_leave(tsd, tdata_self);
+		/* Destroy gctx. */
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
+	} else {
+		/*
+		 * Compensate for increment in prof_tctx_destroy() or
+		 * prof_lookup().
+		 */
+		gctx->nlimbo--;
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		prof_leave(tsd, tdata_self);
+	}
+}
+
+static bool
+prof_gctx_should_destroy(prof_gctx_t *gctx) {
+	if (opt_prof_accum) {
+		return false;
+	}
+	if (!tctx_tree_empty(&gctx->tctxs)) {
+		return false;
+	}
+	if (gctx->nlimbo != 0) {
+		return false;
+	}
+	return true;
+}
+
+static bool
+prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
+    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
+	union {
+		prof_gctx_t	*p;
+		void		*v;
+	} gctx, tgctx;
+	union {
+		prof_bt_t	*p;
+		void		*v;
+	} btkey;
+	bool new_gctx;
+
+	prof_enter(tsd, tdata);
+	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
+		/* bt has never been seen before.  Insert it. */
+		prof_leave(tsd, tdata);
+		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
+		if (tgctx.v == NULL) {
+			return true;
+		}
+		prof_enter(tsd, tdata);
+		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
+			gctx.p = tgctx.p;
+			btkey.p = &gctx.p->bt;
+			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
+				/* OOM. */
+				prof_leave(tsd, tdata);
+				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
+				    true, true);
+				return true;
+			}
+			new_gctx = true;
+		} else {
+			new_gctx = false;
+		}
+	} else {
+		tgctx.v = NULL;
+		new_gctx = false;
+	}
+
+	if (!new_gctx) {
+		/*
+		 * Increment nlimbo, in order to avoid a race condition with
+		 * prof_tctx_destroy()/prof_gctx_try_destroy().
+		 */
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
+		gctx.p->nlimbo++;
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
+		new_gctx = false;
+
+		if (tgctx.v != NULL) {
+			/* Lost race to insert. */
+			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
+			    true);
+		}
+	}
+	prof_leave(tsd, tdata);
+
+	*p_btkey = btkey.v;
+	*p_gctx = gctx.p;
+	*p_new_gctx = new_gctx;
+	return false;
+}
+
+prof_tctx_t *
+prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
+	union {
+		prof_tctx_t	*p;
+		void		*v;
+	} ret;
+	prof_tdata_t *tdata;
+	bool not_found;
+
+	cassert(config_prof);
+
+	tdata = prof_tdata_get(tsd, false);
+	if (tdata == NULL) {
+		return NULL;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
+	if (!not_found) { /* Note double negative! */
+		ret.p->prepared = true;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+	if (not_found) {
+		void *btkey;
+		prof_gctx_t *gctx;
+		bool new_gctx, error;
+
+		/*
+		 * This thread's cache lacks bt.  Look for it in the global
+		 * cache.
+		 */
+		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
+		    &new_gctx)) {
+			return NULL;
+		}
+
+		/* Link a prof_tctx_t into gctx for this thread. */
+		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
+		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
+		    arena_ichoose(tsd, NULL), true);
+		if (ret.p == NULL) {
+			if (new_gctx) {
+				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+			}
+			return NULL;
+		}
+		ret.p->tdata = tdata;
+		ret.p->thr_uid = tdata->thr_uid;
+		ret.p->thr_discrim = tdata->thr_discrim;
+		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
+		ret.p->gctx = gctx;
+		ret.p->tctx_uid = tdata->tctx_uid_next++;
+		ret.p->prepared = true;
+		ret.p->state = prof_tctx_state_initializing;
+		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+		if (error) {
+			if (new_gctx) {
+				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+			}
+			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
+			return NULL;
+		}
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+		ret.p->state = prof_tctx_state_nominal;
+		tctx_tree_insert(&gctx->tctxs, ret.p);
+		gctx->nlimbo--;
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+	}
+
+	return ret.p;
+}
+
+#ifdef JEMALLOC_JET
+static prof_tdata_t *
+prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
+	size_t *tdata_count = (size_t *)arg;
+
+	(*tdata_count)++;
+
+	return NULL;
+}
+
+size_t
+prof_tdata_count(void) {
+	size_t tdata_count = 0;
+	tsdn_t *tsdn;
+
+	tsdn = tsdn_fetch();
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
+	    (void *)&tdata_count);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+
+	return tdata_count;
+}
+
+size_t
+prof_bt_count(void) {
+	size_t bt_count;
+	tsd_t *tsd;
+	prof_tdata_t *tdata;
+
+	tsd = tsd_fetch();
+	tdata = prof_tdata_get(tsd, false);
+	if (tdata == NULL) {
+		return 0;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
+	bt_count = ckh_count(&bt2gctx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
+
+	return bt_count;
+}
+#endif
+
+static int
+prof_dump_open_impl(bool propagate_err, const char *filename) {
+	int fd;
+
+	fd = creat(filename, 0644);
+	if (fd == -1 && !propagate_err) {
+		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
+		    filename);
+		if (opt_abort) {
+			abort();
+		}
+	}
+
+	return fd;
+}
+prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
+
+static bool
+prof_dump_flush(bool propagate_err) {
+	bool ret = false;
+	ssize_t err;
+
+	cassert(config_prof);
+
+	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
+	if (err == -1) {
+		if (!propagate_err) {
+			malloc_write("<jemalloc>: write() failed during heap "
+			    "profile flush\n");
+			if (opt_abort) {
+				abort();
+			}
+		}
+		ret = true;
+	}
+	prof_dump_buf_end = 0;
+
+	return ret;
+}
+
+static bool
+prof_dump_close(bool propagate_err) {
+	bool ret;
+
+	assert(prof_dump_fd != -1);
+	ret = prof_dump_flush(propagate_err);
+	close(prof_dump_fd);
+	prof_dump_fd = -1;
+
+	return ret;
+}
+
+static bool
+prof_dump_write(bool propagate_err, const char *s) {
+	size_t i, slen, n;
+
+	cassert(config_prof);
+
+	i = 0;
+	slen = strlen(s);
+	while (i < slen) {
+		/* Flush the buffer if it is full. */
+		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+			if (prof_dump_flush(propagate_err) && propagate_err) {
+				return true;
+			}
+		}
+
+		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
+			/* Finish writing. */
+			n = slen - i;
+		} else {
+			/* Write as much of s as will fit. */
+			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
+		}
+		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
+		prof_dump_buf_end += n;
+		i += n;
+	}
+	assert(i == slen);
+
+	return false;
+}
+
+JEMALLOC_FORMAT_PRINTF(2, 3)
+static bool
+prof_dump_printf(bool propagate_err, const char *format, ...) {
+	bool ret;
+	va_list ap;
+	char buf[PROF_PRINTF_BUFSIZE];
+
+	va_start(ap, format);
+	malloc_vsnprintf(buf, sizeof(buf), format, ap);
+	va_end(ap);
+	ret = prof_dump_write(propagate_err, buf);
+
+	return ret;
+}
+
+static void
+prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
+
+	malloc_mutex_lock(tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_initializing:
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
+		return;
+	case prof_tctx_state_nominal:
+		tctx->state = prof_tctx_state_dumping;
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
+
+		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
+
+		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+		if (opt_prof_accum) {
+			tdata->cnt_summed.accumobjs +=
+			    tctx->dump_cnts.accumobjs;
+			tdata->cnt_summed.accumbytes +=
+			    tctx->dump_cnts.accumbytes;
+		}
+		break;
+	case prof_tctx_state_dumping:
+	case prof_tctx_state_purgatory:
+		not_reached();
+	}
+}
+
+static void
+prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
+
+	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+	if (opt_prof_accum) {
+		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
+		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
+	}
+}
+
+static prof_tctx_t *
+prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_nominal:
+		/* New since dumping started; ignore. */
+		break;
+	case prof_tctx_state_dumping:
+	case prof_tctx_state_purgatory:
+		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
+		break;
+	default:
+		not_reached();
+	}
+
+	return NULL;
+}
+
+struct prof_tctx_dump_iter_arg_s {
+	tsdn_t	*tsdn;
+	bool	propagate_err;
+};
+
+static prof_tctx_t *
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
+	struct prof_tctx_dump_iter_arg_s *arg =
+	    (struct prof_tctx_dump_iter_arg_s *)opaque;
+
+	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_initializing:
+	case prof_tctx_state_nominal:
+		/* Not captured by this dump. */
+		break;
+	case prof_tctx_state_dumping:
+	case prof_tctx_state_purgatory:
+		if (prof_dump_printf(arg->propagate_err,
+		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
+		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
+		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
+		    tctx->dump_cnts.accumbytes)) {
+			return tctx;
+		}
+		break;
+	default:
+		not_reached();
+	}
+	return NULL;
+}
+
+static prof_tctx_t *
+prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+	prof_tctx_t *ret;
+
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_nominal:
+		/* New since dumping started; ignore. */
+		break;
+	case prof_tctx_state_dumping:
+		tctx->state = prof_tctx_state_nominal;
+		break;
+	case prof_tctx_state_purgatory:
+		ret = tctx;
+		goto label_return;
+	default:
+		not_reached();
+	}
+
+	ret = NULL;
+label_return:
+	return ret;
+}
+
+static void
+prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
+	cassert(config_prof);
+
+	malloc_mutex_lock(tsdn, gctx->lock);
+
+	/*
+	 * Increment nlimbo so that gctx won't go away before dump.
+	 * Additionally, link gctx into the dump list so that it is included in
+	 * prof_dump()'s second pass.
+	 */
+	gctx->nlimbo++;
+	gctx_tree_insert(gctxs, gctx);
+
+	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
+
+	malloc_mutex_unlock(tsdn, gctx->lock);
+}
+
+struct prof_gctx_merge_iter_arg_s {
+	tsdn_t	*tsdn;
+	size_t	leak_ngctx;
+};
+
+static prof_gctx_t *
+prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
+	struct prof_gctx_merge_iter_arg_s *arg =
+	    (struct prof_gctx_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
+	    (void *)arg->tsdn);
+	if (gctx->cnt_summed.curobjs != 0) {
+		arg->leak_ngctx++;
+	}
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
+
+	return NULL;
+}
+
+static void
+prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
+	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
+	prof_gctx_t *gctx;
+
+	/*
+	 * Standard tree iteration won't work here, because as soon as we
+	 * decrement gctx->nlimbo and unlock gctx, another thread can
+	 * concurrently destroy it, which will corrupt the tree.  Therefore,
+	 * tear down the tree one node at a time during iteration.
+	 */
+	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
+		gctx_tree_remove(gctxs, gctx);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+		{
+			prof_tctx_t *next;
+
+			next = NULL;
+			do {
+				prof_tctx_t *to_destroy =
+				    tctx_tree_iter(&gctx->tctxs, next,
+				    prof_tctx_finish_iter,
+				    (void *)tsd_tsdn(tsd));
+				if (to_destroy != NULL) {
+					next = tctx_tree_next(&gctx->tctxs,
+					    to_destroy);
+					tctx_tree_remove(&gctx->tctxs,
+					    to_destroy);
+					idalloctm(tsd_tsdn(tsd), to_destroy,
+					    NULL, NULL, true, true);
+				} else {
+					next = NULL;
+				}
+			} while (next != NULL);
+		}
+		gctx->nlimbo--;
+		if (prof_gctx_should_destroy(gctx)) {
+			gctx->nlimbo++;
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+		} else {
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		}
+	}
+}
+
+struct prof_tdata_merge_iter_arg_s {
+	tsdn_t		*tsdn;
+	prof_cnt_t	cnt_all;
+};
+
+static prof_tdata_t *
+prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *opaque) {
+	struct prof_tdata_merge_iter_arg_s *arg =
+	    (struct prof_tdata_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, tdata->lock);
+	if (!tdata->expired) {
+		size_t tabind;
+		union {
+			prof_tctx_t	*p;
+			void		*v;
+		} tctx;
+
+		tdata->dumping = true;
+		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
+		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
+		    &tctx.v);) {
+			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
+		}
+
+		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
+		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
+		if (opt_prof_accum) {
+			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
+			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
+		}
+	} else {
+		tdata->dumping = false;
+	}
+	malloc_mutex_unlock(arg->tsdn, tdata->lock);
+
+	return NULL;
+}
+
+static prof_tdata_t *
+prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
+	bool propagate_err = *(bool *)arg;
+
+	if (!tdata->dumping) {
+		return NULL;
+	}
+
+	if (prof_dump_printf(propagate_err,
+	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
+	    tdata->thr_uid, tdata->cnt_summed.curobjs,
+	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
+	    tdata->cnt_summed.accumbytes,
+	    (tdata->thread_name != NULL) ? " " : "",
+	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
+		return tdata;
+	}
+	return NULL;
+}
+
+static bool
+prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
+    const prof_cnt_t *cnt_all) {
+	bool ret;
+
+	if (prof_dump_printf(propagate_err,
+	    "heap_v2/%"FMTu64"\n"
+	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
+	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
+		return true;
+	}
+
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
+	    (void *)&propagate_err) != NULL);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	return ret;
+}
+prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
+
+static bool
+prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
+    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
+	bool ret;
+	unsigned i;
+	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
+
+	cassert(config_prof);
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
+
+	/* Avoid dumping such gctx's that have no useful data. */
+	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
+	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
+		assert(gctx->cnt_summed.curobjs == 0);
+		assert(gctx->cnt_summed.curbytes == 0);
+		assert(gctx->cnt_summed.accumobjs == 0);
+		assert(gctx->cnt_summed.accumbytes == 0);
+		ret = false;
+		goto label_return;
+	}
+
+	if (prof_dump_printf(propagate_err, "@")) {
+		ret = true;
+		goto label_return;
+	}
+	for (i = 0; i < bt->len; i++) {
+		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
+		    (uintptr_t)bt->vec[i])) {
+			ret = true;
+			goto label_return;
+		}
+	}
+
+	if (prof_dump_printf(propagate_err,
+	    "\n"
+	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
+	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
+		ret = true;
+		goto label_return;
+	}
+
+	prof_tctx_dump_iter_arg.tsdn = tsdn;
+	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
+	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
+	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
+		ret = true;
+		goto label_return;
+	}
+
+	ret = false;
+label_return:
+	return ret;
+}
+
+#ifndef _WIN32
+JEMALLOC_FORMAT_PRINTF(1, 2)
+static int
+prof_open_maps(const char *format, ...) {
+	int mfd;
+	va_list ap;
+	char filename[PATH_MAX + 1];
+
+	va_start(ap, format);
+	malloc_vsnprintf(filename, sizeof(filename), format, ap);
+	va_end(ap);
+
+#if defined(O_CLOEXEC)
+	mfd = open(filename, O_RDONLY | O_CLOEXEC);
+#else
+	mfd = open(filename, O_RDONLY);
+	if (mfd != -1) {
+		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
+	}
+#endif
+
+	return mfd;
+}
+#endif
+
+static bool
+prof_dump_maps(bool propagate_err) {
+	bool ret;
+	int mfd;
+
+	cassert(config_prof);
+#ifdef __FreeBSD__
+	mfd = prof_open_maps("/proc/curproc/map");
+#elif defined(_WIN32)
+	mfd = -1; // Not implemented
+#else
+	{
+		int pid = prof_getpid();
+
+		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
+		if (mfd == -1) {
+			mfd = prof_open_maps("/proc/%d/maps", pid);
+		}
+	}
+#endif
+	if (mfd != -1) {
+		ssize_t nread;
+
+		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
+		    propagate_err) {
+			ret = true;
+			goto label_return;
+		}
+		nread = 0;
+		do {
+			prof_dump_buf_end += nread;
+			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+				/* Make space in prof_dump_buf before read(). */
+				if (prof_dump_flush(propagate_err) &&
+				    propagate_err) {
+					ret = true;
+					goto label_return;
+				}
+			}
+			nread = malloc_read_fd(mfd,
+			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
+			    - prof_dump_buf_end);
+		} while (nread > 0);
+	} else {
+		ret = true;
+		goto label_return;
+	}
+
+	ret = false;
+label_return:
+	if (mfd != -1) {
+		close(mfd);
+	}
+	return ret;
+}
+
+/*
+ * See prof_sample_threshold_update() comment for why the body of this function
+ * is conditionally compiled.
+ */
+static void
+prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
+    const char *filename) {
+#ifdef JEMALLOC_PROF
+	/*
+	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
+	 * differ slightly from what jeprof reports, because here we scale the
+	 * summary values, whereas jeprof scales each context individually and
+	 * reports the sums of the scaled values.
+	 */
+	if (cnt_all->curbytes != 0) {
+		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
+		double ratio = (((double)cnt_all->curbytes) /
+		    (double)cnt_all->curobjs) / sample_period;
+		double scale_factor = 1.0 / (1.0 - exp(-ratio));
+		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
+		    * scale_factor);
+		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
+		    scale_factor);
+
+		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
+		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
+		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
+		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
+		malloc_printf(
+		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
+		    filename);
+	}
+#endif
+}
+
+struct prof_gctx_dump_iter_arg_s {
+	tsdn_t	*tsdn;
+	bool	propagate_err;
+};
+
+static prof_gctx_t *
+prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
+	prof_gctx_t *ret;
+	struct prof_gctx_dump_iter_arg_s *arg =
+	    (struct prof_gctx_dump_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
+
+	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
+	    gctxs)) {
+		ret = gctx;
+		goto label_return;
+	}
+
+	ret = NULL;
+label_return:
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
+	return ret;
+}
+
+static void
+prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
+    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
+    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+    prof_gctx_tree_t *gctxs) {
+	size_t tabind;
+	union {
+		prof_gctx_t	*p;
+		void		*v;
+	} gctx;
+
+	prof_enter(tsd, tdata);
+
+	/*
+	 * Put gctx's in limbo and clear their counters in preparation for
+	 * summing.
+	 */
+	gctx_tree_new(gctxs);
+	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
+		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
+	}
+
+	/*
+	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
+	 * stats and merge them into the associated gctx's.
+	 */
+	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
+	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
+	    (void *)prof_tdata_merge_iter_arg);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+
+	/* Merge tctx stats into gctx's. */
+	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
+	prof_gctx_merge_iter_arg->leak_ngctx = 0;
+	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
+	    (void *)prof_gctx_merge_iter_arg);
+
+	prof_leave(tsd, tdata);
+}
+
+static bool
+prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck, prof_tdata_t *tdata,
+    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
+    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+    struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
+    prof_gctx_tree_t *gctxs) {
+	/* Create dump file. */
+	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
+		return true;
+	}
+
+	/* Dump profile header. */
+	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
+	    &prof_tdata_merge_iter_arg->cnt_all)) {
+		goto label_write_error;
+	}
+
+	/* Dump per gctx profile stats. */
+	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
+	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
+	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
+	    (void *)prof_gctx_dump_iter_arg) != NULL) {
+		goto label_write_error;
+	}
+
+	/* Dump /proc/<pid>/maps if possible. */
+	if (prof_dump_maps(propagate_err)) {
+		goto label_write_error;
+	}
+
+	if (prof_dump_close(propagate_err)) {
+		return true;
+	}
+
+	return false;
+label_write_error:
+	prof_dump_close(propagate_err);
+	return true;
+}
+
+bool
+prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck) {
+	cassert(config_prof);
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
+	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return true;
+	}
+
+	pre_reentrancy(tsd, NULL);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+
+	prof_gctx_tree_t gctxs;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
+	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
+	    &prof_gctx_merge_iter_arg, &gctxs);
+	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
+	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
+	    &prof_gctx_dump_iter_arg, &gctxs);
+	prof_gctx_finish(tsd, &gctxs);
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
+	post_reentrancy(tsd);
+
+	if (err) {
+		return true;
+	}
+
+	if (leakcheck) {
+		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
+		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
+	}
+	return false;
+}
+
+#ifdef JEMALLOC_JET
+void
+prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
+    uint64_t *accumbytes) {
+	tsd_t *tsd;
+	prof_tdata_t *tdata;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	prof_gctx_tree_t gctxs;
+
+	tsd = tsd_fetch();
+	tdata = prof_tdata_get(tsd, false);
+	if (tdata == NULL) {
+		if (curobjs != NULL) {
+			*curobjs = 0;
+		}
+		if (curbytes != NULL) {
+			*curbytes = 0;
+		}
+		if (accumobjs != NULL) {
+			*accumobjs = 0;
+		}
+		if (accumbytes != NULL) {
+			*accumbytes = 0;
+		}
+		return;
+	}
+
+	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
+	    &prof_gctx_merge_iter_arg, &gctxs);
+	prof_gctx_finish(tsd, &gctxs);
+
+	if (curobjs != NULL) {
+		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
+	}
+	if (curbytes != NULL) {
+		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
+	}
+	if (accumobjs != NULL) {
+		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
+	}
+	if (accumbytes != NULL) {
+		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
+	}
+}
+#endif
+
+void
+prof_bt_hash(const void *key, size_t r_hash[2]) {
+	prof_bt_t *bt = (prof_bt_t *)key;
+
+	cassert(config_prof);
+
+	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
+}
+
+bool
+prof_bt_keycomp(const void *k1, const void *k2) {
+	const prof_bt_t *bt1 = (prof_bt_t *)k1;
+	const prof_bt_t *bt2 = (prof_bt_t *)k2;
+
+	cassert(config_prof);
+
+	if (bt1->len != bt2->len) {
+		return false;
+	}
+	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
+}
+
+prof_tdata_t *
+prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
+    char *thread_name, bool active) {
+	prof_tdata_t *tdata;
+
+	cassert(config_prof);
+
+	/* Initialize an empty cache for this thread. */
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
+	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (tdata == NULL) {
+		return NULL;
+	}
+
+	tdata->lock = prof_tdata_mutex_choose(thr_uid);
+	tdata->thr_uid = thr_uid;
+	tdata->thr_discrim = thr_discrim;
+	tdata->thread_name = thread_name;
+	tdata->attached = true;
+	tdata->expired = false;
+	tdata->tctx_uid_next = 0;
+
+	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
+	    prof_bt_keycomp)) {
+		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
+		return NULL;
+	}
+
+	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
+	prof_sample_threshold_update(tdata);
+
+	tdata->enq = false;
+	tdata->enq_idump = false;
+	tdata->enq_gdump = false;
+
+	tdata->dumping = false;
+	tdata->active = active;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	tdata_tree_insert(&tdatas, tdata);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+
+	return tdata;
+}
+
+static bool
+prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
+	if (tdata->attached && !even_if_attached) {
+		return false;
+	}
+	if (ckh_count(&tdata->bt2tctx) != 0) {
+		return false;
+	}
+	return true;
+}
+
+static bool
+prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
+    bool even_if_attached) {
+	malloc_mutex_assert_owner(tsdn, tdata->lock);
+
+	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
+}
+
+static void
+prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
+    bool even_if_attached) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
+
+	tdata_tree_remove(&tdatas, tdata);
+
+	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
+
+	if (tdata->thread_name != NULL) {
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
+		    true);
+	}
+	ckh_delete(tsd, &tdata->bt2tctx);
+	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
+}
+
+static void
+prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+}
+
+void
+prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
+	bool destroy_tdata;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+	if (tdata->attached) {
+		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
+		    true);
+		/*
+		 * Only detach if !destroy_tdata, because detaching would allow
+		 * another thread to win the race to destroy tdata.
+		 */
+		if (!destroy_tdata) {
+			tdata->attached = false;
+		}
+		tsd_prof_tdata_set(tsd, NULL);
+	} else {
+		destroy_tdata = false;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+	if (destroy_tdata) {
+		prof_tdata_destroy(tsd, tdata, true);
+	}
+}
+
+static bool
+prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
+	bool destroy_tdata;
+
+	malloc_mutex_lock(tsdn, tdata->lock);
+	if (!tdata->expired) {
+		tdata->expired = true;
+		destroy_tdata = tdata->attached ? false :
+		    prof_tdata_should_destroy(tsdn, tdata, false);
+	} else {
+		destroy_tdata = false;
+	}
+	malloc_mutex_unlock(tsdn, tdata->lock);
+
+	return destroy_tdata;
+}
+
+static prof_tdata_t *
+prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+
+	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
+}
+
+void
+prof_reset(tsd_t *tsd, size_t lg_sample) {
+	prof_tdata_t *next;
+
+	assert(lg_sample < (sizeof(uint64_t) << 3));
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+
+	lg_prof_sample = lg_sample;
+
+	next = NULL;
+	do {
+		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
+		    prof_tdata_reset_iter, (void *)tsd);
+		if (to_destroy != NULL) {
+			next = tdata_tree_next(&tdatas, to_destroy);
+			prof_tdata_destroy_locked(tsd, to_destroy, false);
+		} else {
+			next = NULL;
+		}
+	} while (next != NULL);
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
+}
+
+void
+prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
+	prof_tdata_t *tdata = tctx->tdata;
+	prof_gctx_t *gctx = tctx->gctx;
+	bool destroy_tdata, destroy_tctx, destroy_gctx;
+
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	assert(tctx->cnts.curobjs == 0);
+	assert(tctx->cnts.curbytes == 0);
+	assert(!opt_prof_accum);
+	assert(tctx->cnts.accumobjs == 0);
+	assert(tctx->cnts.accumbytes == 0);
+
+	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+	switch (tctx->state) {
+	case prof_tctx_state_nominal:
+		tctx_tree_remove(&gctx->tctxs, tctx);
+		destroy_tctx = true;
+		if (prof_gctx_should_destroy(gctx)) {
+			/*
+			 * Increment gctx->nlimbo in order to keep another
+			 * thread from winning the race to destroy gctx while
+			 * this one has gctx->lock dropped.  Without this, it
+			 * would be possible for another thread to:
+			 *
+			 * 1) Sample an allocation associated with gctx.
+			 * 2) Deallocate the sampled object.
+			 * 3) Successfully prof_gctx_try_destroy(gctx).
+			 *
+			 * The result would be that gctx no longer exists by the
+			 * time this thread accesses it in
+			 * prof_gctx_try_destroy().
+			 */
+			gctx->nlimbo++;
+			destroy_gctx = true;
+		} else {
+			destroy_gctx = false;
+		}
+		break;
+	case prof_tctx_state_dumping:
+		/*
+		 * A dumping thread needs tctx to remain valid until dumping
+		 * has finished.  Change state such that the dumping thread will
+		 * complete destruction during a late dump iteration phase.
+		 */
+		tctx->state = prof_tctx_state_purgatory;
+		destroy_tctx = false;
+		destroy_gctx = false;
+		break;
+	default:
+		not_reached();
+		destroy_tctx = false;
+		destroy_gctx = false;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+	if (destroy_gctx) {
+		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
+		    tdata);
+	}
+
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	if (destroy_tdata) {
+		prof_tdata_destroy(tsd, tdata, false);
+	}
+
+	if (destroy_tctx) {
+		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
+	}
+}
+
+/******************************************************************************/
-- 
cgit v0.12


From 87e2400cbb8b5a49f910b3c72b10297fcc9df839 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 7 Aug 2019 20:12:25 -0700
Subject: Fix tcaches mutex pre- / post-fork handling.

---
 src/tcache.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 50099a9..01c6160 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -778,21 +778,15 @@ tcache_boot(tsdn_t *tsdn) {
 
 void
 tcache_prefork(tsdn_t *tsdn) {
-	if (!config_prof && opt_tcache) {
-		malloc_mutex_prefork(tsdn, &tcaches_mtx);
-	}
+	malloc_mutex_prefork(tsdn, &tcaches_mtx);
 }
 
 void
 tcache_postfork_parent(tsdn_t *tsdn) {
-	if (!config_prof && opt_tcache) {
-		malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
-	}
+	malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
 }
 
 void
 tcache_postfork_child(tsdn_t *tsdn) {
-	if (!config_prof && opt_tcache) {
-		malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
-	}
+	malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
 }
-- 
cgit v0.12


From 39343555d6ac84a105a2d5e8ba0059115eb20f93 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 2 Aug 2019 09:41:35 -0700
Subject: Report stats for tdatas_mtx and prof_dump_mtx

---
 doc/jemalloc.xml.in                    | 24 ++++++++++++++++++++++++
 include/jemalloc/internal/mutex_prof.h |  4 +++-
 src/ctl.c                              | 10 ++++++++--
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 7fecda7..5636fb9 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2509,6 +2509,30 @@ struct extent_hooks_s {
         counters</link>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.mutexes.prof_tdatas">
+        <term>
+          <mallctl>stats.mutexes.prof_thds_data.{counter}</mallctl>
+	  (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+	<listitem><para>Statistics on <varname>prof</varname> threads data mutex
+	(global scope; profiling related).  <mallctl>{counter}</mallctl> is one
+	of the counters in <link linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.mutexes.prof_dump">
+        <term>
+          <mallctl>stats.mutexes.prof_dump.{counter}</mallctl>
+	  (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+	<listitem><para>Statistics on <varname>prof</varname> dumping mutex
+	(global scope; profiling related).  <mallctl>{counter}</mallctl> is one
+	of the counters in <link linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.mutexes.reset">
         <term>
           <mallctl>stats.mutexes.reset</mallctl>
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 2cb8fb0..6288ede 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -8,7 +8,9 @@
 #define MUTEX_PROF_GLOBAL_MUTEXES					\
     OP(background_thread)						\
     OP(ctl)								\
-    OP(prof)
+    OP(prof)								\
+    OP(prof_thds_data)							\
+    OP(prof_dump)
 
 typedef enum {
 #define OP(mtx) global_prof_mutex_##mtx,
diff --git a/src/ctl.c b/src/ctl.c
index 48afaa6..a89a709 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1093,8 +1093,12 @@ ctl_refresh(tsdn_t *tsdn) {
     malloc_mutex_unlock(tsdn, &mtx);
 
 		if (config_prof && opt_prof) {
-			READ_GLOBAL_MUTEX_PROF_DATA(global_prof_mutex_prof,
-			    bt2gctx_mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_prof, bt2gctx_mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_prof_thds_data, tdatas_mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_prof_dump, prof_dump_mtx);
 		}
 		if (have_background_thread) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
@@ -2972,6 +2976,8 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 	}
 	if (config_prof && opt_prof) {
 		MUTEX_PROF_RESET(bt2gctx_mtx);
+		MUTEX_PROF_RESET(tdatas_mtx);
+		MUTEX_PROF_RESET(prof_dump_mtx);
 	}
 
 
-- 
cgit v0.12


From 7fc6b1b259fd1c38a59341ad555a47790da6f773 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 7 Jun 2019 14:04:59 -0700
Subject: Add buffered writer

The buffered writer adopts a signature identical to `write_cb`,
so that it can be plugged into anywhere `write_cb` appears.
---
 Makefile.in                           |  1 +
 include/jemalloc/internal/malloc_io.h | 25 ++++++++++++++
 src/malloc_io.c                       | 30 ++++++++++++++++
 test/unit/buf_writer.c                | 64 +++++++++++++++++++++++++++++++++++
 4 files changed, 120 insertions(+)
 create mode 100644 test/unit/buf_writer.c

diff --git a/Makefile.in b/Makefile.in
index 40daf11..ef75d8a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -177,6 +177,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
 	$(srcroot)test/unit/binshard.c \
+	$(srcroot)test/unit/buf_writer.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 1d1a414..f5d16a5 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -99,4 +99,29 @@ malloc_read_fd(int fd, void *buf, size_t count) {
 	return (ssize_t)result;
 }
 
+/******************************************************************************/
+
+/*
+ * The rest is buffered writing utility.
+ *
+ * The only difference when using the buffered writer is that cbopaque is
+ * passed to write_cb only when the buffer is flushed.  It would make a
+ * difference if cbopaque points to something that's changing for each write_cb
+ * call, or something that affects write_cb in a way dependent on the content
+ * of the output string.  However, the most typical usage case in practice is
+ * that cbopaque points to some "option like" content for the write_cb, so it
+ * doesn't matter.
+ */
+
+typedef struct {
+	void (*write_cb)(void *, const char *);
+	void *cbopaque;
+	char *buf;
+	size_t buf_size; /* must be one less than the capacity of buf array */
+	size_t buf_end;
+} buf_writer_arg_t;
+
+void buf_writer_flush(buf_writer_arg_t *arg);
+void buffered_write_cb(void *buf_writer_arg, const char *s);
+
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/src/malloc_io.c b/src/malloc_io.c
index d7cb0f5..2fae757 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -664,6 +664,36 @@ malloc_printf(const char *format, ...) {
 	va_end(ap);
 }
 
+void
+buf_writer_flush(buf_writer_arg_t *arg) {
+	assert(arg->buf_end <= arg->buf_size);
+	arg->buf[arg->buf_end] = '\0';
+	if (arg->write_cb == NULL) {
+		arg->write_cb = je_malloc_message != NULL ?
+		    je_malloc_message : wrtmessage;
+	}
+	arg->write_cb(arg->cbopaque, arg->buf);
+	arg->buf_end = 0;
+}
+
+void
+buffered_write_cb(void *buf_writer_arg, const char *s) {
+	buf_writer_arg_t *arg = (buf_writer_arg_t *)buf_writer_arg;
+	size_t i, slen, n, s_remain, buf_remain;
+	assert(arg->buf_end <= arg->buf_size);
+	for (i = 0, slen = strlen(s); i < slen; i += n) {
+		if (arg->buf_end == arg->buf_size) {
+			buf_writer_flush(arg);
+		}
+		s_remain = slen - i;
+		buf_remain = arg->buf_size - arg->buf_end;
+		n = s_remain < buf_remain ? s_remain : buf_remain;
+		memcpy(arg->buf + arg->buf_end, s + i, n);
+		arg->buf_end += n;
+	}
+	assert(i == slen);
+}
+
 /*
  * Restore normal assertion macros, in order to make it possible to compile all
  * C files as a single concatenation.
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
new file mode 100644
index 0000000..4d8ae99
--- /dev/null
+++ b/test/unit/buf_writer.c
@@ -0,0 +1,64 @@
+#include "test/jemalloc_test.h"
+
+#define TEST_BUF_SIZE 16
+#define UNIT_MAX (TEST_BUF_SIZE * 3)
+
+static size_t test_write_len;
+static char test_buf[TEST_BUF_SIZE];
+static uint64_t arg_store;
+
+static void test_write_cb(void *cbopaque, const char *s) {
+	size_t prev_test_write_len = test_write_len;
+	test_write_len += strlen(s); /* only increase the length */
+	arg_store = *(uint64_t *)cbopaque; /* only pass along the argument */
+	assert_zu_le(prev_test_write_len, test_write_len,
+	    "Test write overflowed");
+}
+
+TEST_BEGIN(test_buf_write) {
+	char s[UNIT_MAX + 1];
+	size_t n_unit, remain, i;
+	ssize_t unit;
+	uint64_t arg = 4; /* Starting value of random argument. */
+	buf_writer_arg_t test_buf_arg =
+	    {test_write_cb, &arg, test_buf, TEST_BUF_SIZE - 1, 0};
+
+	memset(s, 'a', UNIT_MAX);
+	arg_store = arg;
+	for (unit = UNIT_MAX; unit >= 0; --unit) {
+		/* unit keeps decreasing, so strlen(s) is always unit. */
+		s[unit] = '\0';
+		for (n_unit = 1; n_unit <= 3; ++n_unit) {
+			test_write_len = 0;
+			remain = 0;
+			for (i = 1; i <= n_unit; ++i) {
+				arg = prng_lg_range_u64(&arg, 64);
+				buffered_write_cb(&test_buf_arg, s);
+				remain += unit;
+				if (remain > test_buf_arg.buf_size) {
+					/* Flushes should have happened. */
+					assert_u64_eq(arg_store, arg, "Call "
+					    "back argument didn't get through");
+					remain %= test_buf_arg.buf_size;
+					if (remain == 0) {
+						/* Last flush should be lazy. */
+						remain += test_buf_arg.buf_size;
+					}
+				}
+				assert_zu_eq(test_write_len + remain, i * unit,
+				    "Incorrect length after writing %zu strings"
+				    " of length %zu", i, unit);
+			}
+			buf_writer_flush(&test_buf_arg);
+			assert_zu_eq(test_write_len, n_unit * unit,
+			    "Incorrect length after flushing at the end of"
+			    " writing %zu strings of length %zu", n_unit, unit);
+		}
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_buf_write);
+}
-- 
cgit v0.12


From 8c8466fa6e413b08ce83c6f5ac96d2b1454e3afe Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 30 Jul 2019 11:07:24 -0700
Subject: Add compact json option for emitter

JSON format is largely meant for machine-machine communication, so
adding the option to the emitter.  According to local testing, the
savings in terms of bytes outputted is around 50% for stats printing
and around 25% for prof log printing.
---
 include/jemalloc/internal/emitter.h |  66 +++++++++-----
 test/unit/emitter.c                 | 170 +++++++++++++++++++++++++-----------
 2 files changed, 160 insertions(+), 76 deletions(-)

diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 542bc79..009bf9a 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -6,6 +6,7 @@
 typedef enum emitter_output_e emitter_output_t;
 enum emitter_output_e {
 	emitter_output_json,
+	emitter_output_json_compact,
 	emitter_output_table
 };
 
@@ -75,6 +76,12 @@ struct emitter_s {
 	bool emitted_key;
 };
 
+static inline bool
+emitter_outputs_json(emitter_t *emitter) {
+	return emitter->output == emitter_output_json ||
+	    emitter->output == emitter_output_json_compact;
+}
+
 /* Internal convenience function.  Write to the emitter the given string. */
 JEMALLOC_FORMAT_PRINTF(2, 3)
 static inline void
@@ -135,7 +142,7 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 
 	switch (value_type) {
 	case emitter_type_bool:
-		emitter_printf(emitter, 
+		emitter_printf(emitter,
 		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width),
 		    *(const bool *)value ?  "true" : "false");
 		break;
@@ -159,7 +166,7 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 		 * anywhere near the fmt size.
 		 */
 		assert(str_written < BUF_SIZE);
-		emitter_printf(emitter, 
+		emitter_printf(emitter,
 		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), buf);
 		break;
 	case emitter_type_uint32:
@@ -196,6 +203,7 @@ static inline void
 emitter_indent(emitter_t *emitter) {
 	int amount = emitter->nesting_depth;
 	const char *indent_str;
+	assert(emitter->output != emitter_output_json_compact);
 	if (emitter->output == emitter_output_json) {
 		indent_str = "\t";
 	} else {
@@ -209,12 +217,18 @@ emitter_indent(emitter_t *emitter) {
 
 static inline void
 emitter_json_key_prefix(emitter_t *emitter) {
+	assert(emitter_outputs_json(emitter));
 	if (emitter->emitted_key) {
 		emitter->emitted_key = false;
 		return;
 	}
-	emitter_printf(emitter, "%s\n", emitter->item_at_depth ? "," : "");
-	emitter_indent(emitter);
+	if (emitter->item_at_depth) {
+		emitter_printf(emitter, ",");
+	}
+	if (emitter->output != emitter_output_json_compact) {
+		emitter_printf(emitter, "\n");
+		emitter_indent(emitter);
+	}
 }
 
 /******************************************************************************/
@@ -227,22 +241,23 @@ emitter_init(emitter_t *emitter, emitter_output_t emitter_output,
 	emitter->write_cb = write_cb;
 	emitter->cbopaque = cbopaque;
 	emitter->item_at_depth = false;
-	emitter->emitted_key = false; 
+	emitter->emitted_key = false;
 	emitter->nesting_depth = 0;
 }
 
 /******************************************************************************/
 /* JSON public API. */
 
-/* 
+/*
  * Emits a key (e.g. as appears in an object). The next json entity emitted will
  * be the corresponding value.
  */
 static inline void
 emitter_json_key(emitter_t *emitter, const char *json_key) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_key_prefix(emitter);
-		emitter_printf(emitter, "\"%s\": ", json_key);
+		emitter_printf(emitter, "\"%s\":%s", json_key,
+		    emitter->output == emitter_output_json_compact ? "" : " ");
 		emitter->emitted_key = true;
 	}
 }
@@ -250,7 +265,7 @@ emitter_json_key(emitter_t *emitter, const char *json_key) {
 static inline void
 emitter_json_value(emitter_t *emitter, emitter_type_t value_type,
     const void *value) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_key_prefix(emitter);
 		emitter_print_value(emitter, emitter_justify_none, -1,
 		    value_type, value);
@@ -268,7 +283,7 @@ emitter_json_kv(emitter_t *emitter, const char *json_key,
 
 static inline void
 emitter_json_array_begin(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_key_prefix(emitter);
 		emitter_printf(emitter, "[");
 		emitter_nest_inc(emitter);
@@ -284,18 +299,20 @@ emitter_json_array_kv_begin(emitter_t *emitter, const char *json_key) {
 
 static inline void
 emitter_json_array_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		assert(emitter->nesting_depth > 0);
 		emitter_nest_dec(emitter);
-		emitter_printf(emitter, "\n");
-		emitter_indent(emitter);
+		if (emitter->output != emitter_output_json_compact) {
+			emitter_printf(emitter, "\n");
+			emitter_indent(emitter);
+		}
 		emitter_printf(emitter, "]");
 	}
 }
 
 static inline void
 emitter_json_object_begin(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_key_prefix(emitter);
 		emitter_printf(emitter, "{");
 		emitter_nest_inc(emitter);
@@ -311,11 +328,13 @@ emitter_json_object_kv_begin(emitter_t *emitter, const char *json_key) {
 
 static inline void
 emitter_json_object_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		assert(emitter->nesting_depth > 0);
 		emitter_nest_dec(emitter);
-		emitter_printf(emitter, "\n");
-		emitter_indent(emitter);
+		if (emitter->output != emitter_output_json_compact) {
+			emitter_printf(emitter, "\n");
+			emitter_indent(emitter);
+		}
 		emitter_printf(emitter, "}");
 	}
 }
@@ -420,7 +439,7 @@ emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
     emitter_type_t value_type, const void *value,
     const char *table_note_key, emitter_type_t table_note_value_type,
     const void *table_note_value) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_key(emitter, json_key);
 		emitter_json_value(emitter, value_type, value);
 	} else {
@@ -440,7 +459,7 @@ emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key,
 static inline void
 emitter_dict_begin(emitter_t *emitter, const char *json_key,
     const char *table_header) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_key(emitter, json_key);
 		emitter_json_object_begin(emitter);
 	} else {
@@ -450,7 +469,7 @@ emitter_dict_begin(emitter_t *emitter, const char *json_key,
 
 static inline void
 emitter_dict_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_object_end(emitter);
 	} else {
 		emitter_table_dict_end(emitter);
@@ -459,7 +478,7 @@ emitter_dict_end(emitter_t *emitter) {
 
 static inline void
 emitter_begin(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		assert(emitter->nesting_depth == 0);
 		emitter_printf(emitter, "{");
 		emitter_nest_inc(emitter);
@@ -476,10 +495,11 @@ emitter_begin(emitter_t *emitter) {
 
 static inline void
 emitter_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		assert(emitter->nesting_depth == 1);
 		emitter_nest_dec(emitter);
-		emitter_printf(emitter, "\n}\n");
+		emitter_printf(emitter, "%s", emitter->output ==
+		    emitter_output_json_compact ? "}" : "\n}\n");
 	}
 }
 
diff --git a/test/unit/emitter.c b/test/unit/emitter.c
index b4a693f..712c9e1 100644
--- a/test/unit/emitter.c
+++ b/test/unit/emitter.c
@@ -66,7 +66,9 @@ forwarding_cb(void *buf_descriptor_v, const char *str) {
 
 static void
 assert_emit_output(void (*emit_fn)(emitter_t *),
-    const char *expected_json_output, const char *expected_table_output) {
+    const char *expected_json_output,
+    const char *expected_json_compact_output,
+    const char *expected_table_output) {
 	emitter_t emitter;
 	char buf[MALLOC_PRINTF_BUFSIZE];
 	buf_descriptor_t buf_descriptor;
@@ -84,6 +86,16 @@ assert_emit_output(void (*emit_fn)(emitter_t *),
 	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
 	buf_descriptor.mid_quote = false;
 
+	emitter_init(&emitter, emitter_output_json_compact, &forwarding_cb,
+	    &buf_descriptor);
+	(*emit_fn)(&emitter);
+	assert_str_eq(expected_json_compact_output, buf,
+	    "compact json output failure");
+
+	buf_descriptor.buf = buf;
+	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
+	buf_descriptor.mid_quote = false;
+
 	emitter_init(&emitter, emitter_output_table, &forwarding_cb,
 	    &buf_descriptor);
 	(*emit_fn)(&emitter);
@@ -108,6 +120,7 @@ emit_dict(emitter_t *emitter) {
 	emitter_dict_end(emitter);
 	emitter_end(emitter);
 }
+
 static const char *dict_json =
 "{\n"
 "\t\"foo\": {\n"
@@ -117,6 +130,15 @@ static const char *dict_json =
 "\t\t\"jkl\": \"a string\"\n"
 "\t}\n"
 "}\n";
+static const char *dict_json_compact =
+"{"
+	"\"foo\":{"
+		"\"abc\":false,"
+		"\"def\":true,"
+		"\"ghi\":123,"
+		"\"jkl\":\"a string\""
+	"}"
+"}";
 static const char *dict_table =
 "This is the foo table:\n"
 "  ABC: false\n"
@@ -124,11 +146,6 @@ static const char *dict_table =
 "  GHI: 123 (note_key1: \"a string\")\n"
 "  JKL: \"a string\" (note_key2: false)\n";
 
-TEST_BEGIN(test_dict) {
-	assert_emit_output(&emit_dict, dict_json, dict_table);
-}
-TEST_END
-
 static void
 emit_table_printf(emitter_t *emitter) {
 	emitter_begin(emitter);
@@ -141,17 +158,11 @@ emit_table_printf(emitter_t *emitter) {
 static const char *table_printf_json =
 "{\n"
 "}\n";
-
+static const char *table_printf_json_compact = "{}";
 static const char *table_printf_table =
 "Table note 1\n"
 "Table note 2 with format string\n";
 
-TEST_BEGIN(test_table_printf) {
-	assert_emit_output(&emit_table_printf, table_printf_json,
-	    table_printf_table);
-}
-TEST_END
-
 static void emit_nested_dict(emitter_t *emitter) {
 	int val = 123;
 	emitter_begin(emitter);
@@ -169,7 +180,7 @@ static void emit_nested_dict(emitter_t *emitter) {
 	emitter_end(emitter);
 }
 
-static const char *nested_object_json =
+static const char *nested_dict_json =
 "{\n"
 "\t\"json1\": {\n"
 "\t\t\"json2\": {\n"
@@ -182,8 +193,20 @@ static const char *nested_object_json =
 "\t\t\"primitive\": 123\n"
 "\t}\n"
 "}\n";
-
-static const char *nested_object_table =
+static const char *nested_dict_json_compact =
+"{"
+	"\"json1\":{"
+		"\"json2\":{"
+			"\"primitive\":123"
+		"},"
+		"\"json3\":{"
+		"}"
+	"},"
+	"\"json4\":{"
+		"\"primitive\":123"
+	"}"
+"}";
+static const char *nested_dict_table =
 "Dict 1\n"
 "  Dict 2\n"
 "    A primitive: 123\n"
@@ -191,12 +214,6 @@ static const char *nested_object_table =
 "Dict 4\n"
 "  Another primitive: 123\n";
 
-TEST_BEGIN(test_nested_dict) {
-	assert_emit_output(&emit_nested_dict, nested_object_json,
-	    nested_object_table);
-}
-TEST_END
-
 static void
 emit_types(emitter_t *emitter) {
 	bool b = false;
@@ -235,7 +252,17 @@ static const char *types_json =
 "\t\"k7\": 789,\n"
 "\t\"k8\": 10000000000\n"
 "}\n";
-
+static const char *types_json_compact =
+"{"
+	"\"k1\":false,"
+	"\"k2\":-123,"
+	"\"k3\":123,"
+	"\"k4\":-456,"
+	"\"k5\":456,"
+	"\"k6\":\"string\","
+	"\"k7\":789,"
+	"\"k8\":10000000000"
+"}";
 static const char *types_table =
 "K1: false\n"
 "K2: -123\n"
@@ -246,11 +273,6 @@ static const char *types_table =
 "K7: 789\n"
 "K8: 10000000000\n";
 
-TEST_BEGIN(test_types) {
-	assert_emit_output(&emit_types, types_json, types_table);
-}
-TEST_END
-
 static void
 emit_modal(emitter_t *emitter) {
 	int val = 123;
@@ -283,7 +305,18 @@ const char *modal_json =
 "\t\t\"i6\": 123\n"
 "\t}\n"
 "}\n";
-
+const char *modal_json_compact =
+"{"
+	"\"j0\":{"
+		"\"j1\":{"
+			"\"i1\":123,"
+			"\"i2\":123,"
+			"\"i4\":123"
+		"},"
+		"\"i5\":123,"
+		"\"i6\":123"
+	"}"
+"}";
 const char *modal_table =
 "T0\n"
 "  I1: 123\n"
@@ -293,13 +326,8 @@ const char *modal_table =
 "    I5: 123\n"
 "  I6: 123\n";
 
-TEST_BEGIN(test_modal) {
-	assert_emit_output(&emit_modal, modal_json, modal_table);
-}
-TEST_END
-
 static void
-emit_json_arr(emitter_t *emitter) {
+emit_json_array(emitter_t *emitter) {
 	int ival = 123;
 
 	emitter_begin(emitter);
@@ -338,14 +366,24 @@ static const char *json_array_json =
 "\t\t]\n"
 "\t}\n"
 "}\n";
-
+static const char *json_array_json_compact =
+"{"
+	"\"dict\":{"
+		"\"arr\":["
+			"{"
+				"\"foo\":123"
+			"},"
+			"123,"
+			"123,"
+			"{"
+				"\"bar\":123,"
+				"\"baz\":123"
+			"}"
+		"]"
+	"}"
+"}";
 static const char *json_array_table = "";
 
-TEST_BEGIN(test_json_arr) {
-	assert_emit_output(&emit_json_arr, json_array_json, json_array_table);
-}
-TEST_END
-
 static void
 emit_json_nested_array(emitter_t *emitter) {
 	int ival = 123;
@@ -391,12 +429,27 @@ static const char *json_nested_array_json =
 "\t\t]\n"
 "\t]\n"
 "}\n";
-
-TEST_BEGIN(test_json_nested_arr) {
-	assert_emit_output(&emit_json_nested_array, json_nested_array_json,
-	    json_array_table);
-}
-TEST_END
+static const char *json_nested_array_json_compact =
+"{"
+	"["
+		"["
+			"123,"
+			"\"foo\","
+			"123,"
+			"\"foo\""
+		"],"
+		"["
+			"123"
+		"],"
+		"["
+			"\"foo\","
+			"123"
+		"],"
+		"["
+		"]"
+	"]"
+"}";
+static const char *json_nested_array_table = "";
 
 static void
 emit_table_row(emitter_t *emitter) {
@@ -443,18 +496,29 @@ emit_table_row(emitter_t *emitter) {
 static const char *table_row_json =
 "{\n"
 "}\n";
-
+static const char *table_row_json_compact = "{}";
 static const char *table_row_table =
 "ABC title       DEF title  GHI\n"
 "123                  true  456\n"
 "789                 false 1011\n"
 "\"a string\"          false  ghi\n";
 
-TEST_BEGIN(test_table_row) {
-	assert_emit_output(&emit_table_row, table_row_json, table_row_table);
-}
+#define GENERATE_TEST(feature)					\
+TEST_BEGIN(test_##feature) {					\
+	assert_emit_output(emit_##feature, feature##_json,	\
+	    feature##_json_compact, feature##_table);		\
+}								\
 TEST_END
 
+GENERATE_TEST(dict)
+GENERATE_TEST(table_printf)
+GENERATE_TEST(nested_dict)
+GENERATE_TEST(types)
+GENERATE_TEST(modal)
+GENERATE_TEST(json_array)
+GENERATE_TEST(json_nested_array)
+GENERATE_TEST(table_row)
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -463,7 +527,7 @@ main(void) {
 	    test_nested_dict,
 	    test_types,
 	    test_modal,
-	    test_json_arr,
-	    test_json_nested_arr,
+	    test_json_array,
+	    test_json_nested_array,
 	    test_table_row);
 }
-- 
cgit v0.12


From 22746d3c9fddd5486e9ec5c0c6b2e25230db9a8e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 8 Aug 2019 12:46:22 -0700
Subject: Properly dalloc prof nodes with idalloctm.

The prof_alloc_node is allocated through ialloc as internal.  Switch to
idalloctm with tcache and is_internal properly set.
---
 src/prof_log.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index 56d4e03..ad1cb38 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -480,7 +480,7 @@ prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
 		emitter_json_object_end(emitter);
 		thr_old_node = thr_node;
 		thr_node = thr_node->next;
-		idalloc(tsd, thr_old_node);
+		idalloctm(tsd_tsdn(tsd), thr_old_node, NULL, NULL, true, true);
 	}
 	emitter_json_array_end(emitter);
 }
@@ -509,7 +509,7 @@ prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
 
 		bt_old_node = bt_node;
 		bt_node = bt_node->next;
-		idalloc(tsd, bt_old_node);
+		idalloctm(tsd_tsdn(tsd), bt_old_node, NULL, NULL, true, true);
 	}
 	emitter_json_array_end(emitter);
 }
@@ -547,7 +547,8 @@ prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
 
 		alloc_old_node = alloc_node;
 		alloc_node = alloc_node->next;
-		idalloc(tsd, alloc_old_node);
+		idalloctm(tsd_tsdn(tsd), alloc_old_node, NULL, NULL, true,
+		    true);
 	}
 	emitter_json_array_end(emitter);
 }
-- 
cgit v0.12


From 593484661261c20f75557279931eb2d9ca165185 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 9 Aug 2019 22:15:42 -0700
Subject: Fix large bin index accessed through cache bin descriptor.

---
 src/arena.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index ba50e41..e956c39 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -199,13 +199,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 	cache_bin_array_descriptor_t *descriptor;
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
-		szind_t i = 0;
-		for (; i < SC_NBINS; i++) {
+		for (szind_t i = 0; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    tbin->ncached * sz_index2size(i));
 		}
-		for (; i < nhbins; i++) {
+		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_large[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    tbin->ncached * sz_index2size(i));
-- 
cgit v0.12


From ad3f7dbfa0f6b510d6e1e0dbaf859506d5ad2a96 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 7 Aug 2019 14:34:34 -0700
Subject: Buffer prof_log_stop

Make use of the new buffered writer for the output of `prof_log_stop`.
---
 src/prof_log.c       | 22 +++++++++++++++++-----
 test/unit/prof_log.c |  2 +-
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index ad1cb38..a659f87 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -160,6 +160,7 @@ prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
 		return node->index;
 	}
 }
+
 static size_t
 prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 	assert(prof_logging_state == prof_logging_state_started);
@@ -576,7 +577,7 @@ prof_log_emit_metadata(emitter_t *emitter) {
 	emitter_json_object_end(emitter);
 }
 
-
+#define PROF_LOG_STOP_BUFSIZE PROF_DUMP_BUFSIZE
 bool
 prof_log_stop(tsdn_t *tsdn) {
 	if (!opt_prof || !prof_booted) {
@@ -624,11 +625,18 @@ prof_log_stop(tsdn_t *tsdn) {
 		return true;
 	}
 
-	/* Emit to json. */
 	struct prof_emitter_cb_arg_s arg;
 	arg.fd = fd;
-	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
-	    (void *)(&arg));
+
+	char *prof_log_stop_buf = (char *)iallocztm(tsdn,
+	    PROF_LOG_STOP_BUFSIZE, sz_size2index(PROF_LOG_STOP_BUFSIZE),
+	    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
+	buf_writer_arg_t prof_log_stop_buf_arg = {prof_emitter_write_cb, &arg,
+	    prof_log_stop_buf, PROF_LOG_STOP_BUFSIZE - 1, 0};
+
+	/* Emit to json. */
+	emitter_init(&emitter, emitter_output_json, buffered_write_cb,
+	    &prof_log_stop_buf_arg);
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
@@ -637,6 +645,9 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_log_emit_allocs(tsd, &emitter);
 	emitter_end(&emitter);
 
+	buf_writer_flush(&prof_log_stop_buf_arg);
+	idalloctm(tsdn, prof_log_stop_buf, NULL, NULL, true, true);
+
 	/* Reset global state. */
 	if (log_tables_initialized) {
 		ckh_delete(tsd, &log_bt_node_set);
@@ -661,8 +672,9 @@ prof_log_stop(tsdn_t *tsdn) {
 		return false;
 	}
 #endif
-	return close(fd);
+	return close(fd) || arg.ret == -1;
 }
+#undef PROF_LOG_STOP_BUFSIZE
 
 bool prof_log_init(tsd_t *tsd) {
 	if (opt_prof_log) {
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index 92fbd7c..9336ebc 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -61,7 +61,7 @@ static void *f_thread(void *unused) {
 	int i;
 	for (i = 0; i < N_PARAM; i++) {
 		void *p = malloc(100);
-		memset(p, 100, sizeof(char));
+		memset(p, 100, 1);
 		free(p);
 	}
 
-- 
cgit v0.12


From a219cfcda34e9916c14ff9f9e198b18b41b71fbc Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 12 Jun 2019 17:24:30 -0700
Subject: Clear tcache prof_accumbytes in tcache_flush_cache

`tcache->prof_accumbytes` should always be cleared after being
transferred to arena; otherwise the allocations would be double
counted, leading to excessive prof dumps.
---
 src/tcache.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 01c6160..3e1b55c 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -529,10 +529,12 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 		}
 	}
 
-	if (config_prof && tcache->prof_accumbytes > 0 &&
-	    arena_prof_accum(tsd_tsdn(tsd), tcache->arena,
-	    tcache->prof_accumbytes)) {
-		prof_idump(tsd_tsdn(tsd));
+	if (config_prof && tcache->prof_accumbytes > 0) {
+		if (arena_prof_accum(tsd_tsdn(tsd), tcache->arena,
+		    tcache->prof_accumbytes)) {
+			prof_idump(tsd_tsdn(tsd));
+		}
+		tcache->prof_accumbytes = 0;
 	}
 }
 
-- 
cgit v0.12


From eb70fef8ca86363a036a962852808675ed1598c1 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 9 Aug 2019 10:19:51 -0700
Subject: Make compact json format as default

Saves 20-50% of the output size.
---
 src/prof_log.c | 4 ++--
 src/stats.c    | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index a659f87..3997656 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -635,8 +635,8 @@ prof_log_stop(tsdn_t *tsdn) {
 	    prof_log_stop_buf, PROF_LOG_STOP_BUFSIZE - 1, 0};
 
 	/* Emit to json. */
-	emitter_init(&emitter, emitter_output_json, buffered_write_cb,
-	    &prof_log_stop_buf_arg);
+	emitter_init(&emitter, emitter_output_json_compact,
+	    buffered_write_cb, &prof_log_stop_buf_arg);
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
diff --git a/src/stats.c b/src/stats.c
index 118e05d..cf75810 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1181,7 +1181,7 @@ stats_general_print(emitter_t *emitter) {
 	 * We do enough mallctls in a loop that we actually want to omit them
 	 * (not just omit the printing).
 	 */
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_array_kv_begin(emitter, "bin");
 		for (unsigned i = 0; i < nbins; i++) {
 			emitter_json_object_begin(emitter);
@@ -1212,7 +1212,7 @@ stats_general_print(emitter_t *emitter) {
 	emitter_kv(emitter, "nlextents", "Number of large size classes",
 	    emitter_type_unsigned, &nlextents);
 
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_array_kv_begin(emitter, "lextent");
 		for (unsigned i = 0; i < nlextents; i++) {
 			emitter_json_object_begin(emitter);
@@ -1437,8 +1437,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 	emitter_t emitter;
 	emitter_init(&emitter,
-	    json ? emitter_output_json : emitter_output_table, write_cb,
-	    cbopaque);
+	    json ? emitter_output_json_compact : emitter_output_table,
+	    write_cb, cbopaque);
 	emitter_begin(&emitter);
 	emitter_table_printf(&emitter, "___ Begin jemalloc statistics ___\n");
 	emitter_json_object_kv_begin(&emitter, "jemalloc");
-- 
cgit v0.12


From 28ed9b9a5198ed866750361fe2c36f83742900ac Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 18 Jul 2019 10:10:45 -0700
Subject: Buffer stats printing

Without buffering `malloc_stats_print` would invoke the write back
call (which could mean an expensive `malloc_write_fd` call) for every
single `printf` (including printing each line break and each leading
tab/space for indentation).
---
 src/jemalloc.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index ed13718..dec987c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3704,6 +3704,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	return ret;
 }
 
+#define STATS_PRINT_BUFSIZE 65536
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts) {
@@ -3713,10 +3714,24 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
-	stats_print(write_cb, cbopaque, opts);
+
+	if (config_debug) {
+		stats_print(write_cb, cbopaque, opts);
+	} else {
+		char *stats_print_buf = (char *)iallocztm(tsdn,
+		    STATS_PRINT_BUFSIZE, sz_size2index(STATS_PRINT_BUFSIZE),
+		    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
+		buf_writer_arg_t stats_print_buf_arg = {write_cb, cbopaque,
+		    stats_print_buf, STATS_PRINT_BUFSIZE - 1, 0};
+		stats_print(buffered_write_cb, &stats_print_buf_arg, opts);
+		buf_writer_flush(&stats_print_buf_arg);
+		idalloctm(tsdn, stats_print_buf, NULL, NULL, true, true);
+	}
+
 	check_entry_exit_locking(tsdn);
 	LOG("core.malloc_stats_print.exit", "");
 }
+#undef STATS_PRINT_BUFSIZE
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
-- 
cgit v0.12


From 9c5c2a2c86d473a63806e534c39fb74a882fa558 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 12 Aug 2019 11:08:39 -0700
Subject: Unify the signature of tcache_flush small and large.

---
 include/jemalloc/internal/tcache_externs.h |  4 ++--
 include/jemalloc/internal/tcache_inlines.h |  4 ++--
 src/tcache.c                               | 11 ++++++-----
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index d63eafd..266f246 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -31,8 +31,8 @@ void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, bool *tcache_success);
 void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem);
-void	tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
-    unsigned rem, tcache_t *tcache);
+void	tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+    szind_t binind, unsigned rem);
 void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
     arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 5eca20e..46b9af4 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -202,8 +202,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	bin = tcache_large_bin_get(tcache, binind);
 	bin_info = &tcache_bin_info[binind];
 	if (unlikely(bin->ncached == bin_info->ncached_max)) {
-		tcache_bin_flush_large(tsd, bin, binind,
-		    (bin_info->ncached_max >> 1), tcache);
+		tcache_bin_flush_large(tsd, tcache, bin, binind,
+		    (bin_info->ncached_max >> 1));
 	}
 	assert(bin->ncached < bin_info->ncached_max);
 	bin->ncached++;
diff --git a/src/tcache.c b/src/tcache.c
index 3e1b55c..c5fe67a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -65,8 +65,9 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 				tcache->lg_fill_div[binind]++;
 			}
 		} else {
-			tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
-			    - tbin->low_water + (tbin->low_water >> 2), tcache);
+			tcache_bin_flush_large(tsd, tcache, tbin, binind,
+			    tbin->ncached - tbin->low_water + (tbin->low_water
+			    >> 2));
 		}
 	} else if (tbin->low_water < 0) {
 		/*
@@ -227,8 +228,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 }
 
 void
-tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
-    unsigned rem, tcache_t *tcache) {
+tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t binind,
+    unsigned rem) {
 	bool merged_stats = false;
 
 	assert(binind < nhbins);
@@ -522,7 +523,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	}
 	for (unsigned i = SC_NBINS; i < nhbins; i++) {
 		cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
-		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
+		tcache_bin_flush_large(tsd, tcache, tbin, i, 0);
 
 		if (config_stats) {
 			assert(tbin->tstats.nrequests == 0);
-- 
cgit v0.12


From e2c7584361718ccb12c932d2236a16ec3a31f1a7 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 12 Aug 2019 11:11:01 -0700
Subject: Simplify / refactor tcache_dalloc_large.

---
 include/jemalloc/internal/tcache_inlines.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 46b9af4..4815774 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -201,13 +201,12 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_large_bin_get(tcache, binind);
 	bin_info = &tcache_bin_info[binind];
-	if (unlikely(bin->ncached == bin_info->ncached_max)) {
+	if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) {
 		tcache_bin_flush_large(tsd, tcache, bin, binind,
 		    (bin_info->ncached_max >> 1));
+		bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr);
+		assert(ret);
 	}
-	assert(bin->ncached < bin_info->ncached_max);
-	bin->ncached++;
-	*(bin->avail - bin->ncached) = ptr;
 
 	tcache_event(tsd, tcache);
 }
-- 
cgit v0.12


From d6b7995c1629768590366a6ff2170d65c4cc6d9b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Aug 2019 22:33:34 -0700
Subject: Update INSTALL.md about the default doc build.

---
 INSTALL.md | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index b8f729b..eb55acf 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -9,14 +9,11 @@ If building from unpackaged developer sources, the simplest command sequence
 that might work is:
 
     ./autogen.sh
-    make dist
     make
     make install
 
-Note that documentation is not built by the default target because doing so
-would create a dependency on xsltproc in packaged releases, hence the
-requirement to either run 'make dist' or avoid installing docs via the various
-install_* targets documented below.
+Note that documentation is built by the default target only when xsltproc is
+available.  Build will warn but not stop if the dependency is missing.
 
 
 ## Advanced configuration
-- 
cgit v0.12


From d2dddfb82aac9f2212922eb90324e84790704bfe Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Aug 2019 22:11:21 -0700
Subject: Add hint in the bogus version string.

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 261d81c..bca422a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -583,7 +583,7 @@ if test ! -e "${objroot}VERSION" ; then
   if test ! -e "${srcroot}VERSION" ; then
     AC_MSG_RESULT(
       [Missing VERSION file, and unable to generate it; creating bogus VERSION])
-    echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION"
+    echo "0.0.0-0-g000000missing_version_try_git_fetch_tags" > "${objroot}VERSION"
   else
     cp ${srcroot}VERSION ${objroot}VERSION
   fi
-- 
cgit v0.12


From 7599c82d48ffaa07ce934320f7256b56b200dace Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 9 Aug 2019 22:12:47 -0700
Subject: Redesign the cache bin metadata for fast path.

Implement the pointer-based metadata for tcache bins --
- 3 pointers are maintained to represent each bin;
- 2 of the pointers are compressed on 64-bit;
- is_full / is_empty done through pointer comparison;

Comparing to the previous counter based design --
- fast-path speed up ~15% in benchmarks
- direct pointer comparison and de-reference
- no need to access tcache_bin_info in common case
---
 Makefile.in                                        |   1 +
 include/jemalloc/internal/cache_bin.h              | 160 ++++++++++++++----
 .../internal/jemalloc_internal_inlines_a.h         |   4 +-
 include/jemalloc/internal/tcache_externs.h         |   2 -
 include/jemalloc/internal/tcache_inlines.h         |  20 +--
 src/arena.c                                        |  20 ++-
 src/jemalloc.c                                     |   5 +-
 src/tcache.c                                       | 186 ++++++++++++++-------
 test/unit/cache_bin.c                              |  64 +++++++
 9 files changed, 340 insertions(+), 122 deletions(-)
 create mode 100644 test/unit/cache_bin.c

diff --git a/Makefile.in b/Makefile.in
index ef75d8a..7584f59 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -178,6 +178,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/bit_util.c \
 	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/buf_writer.c \
+	$(srcroot)test/unit/cache_bin.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index d14556a..67180cf 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -13,7 +13,6 @@
  * of the tcache at all.
  */
 
-
 /*
  * The count of the number of cached allocations in a bin.  We make this signed
  * so that negative numbers can encode "invalid" states (e.g. a low water mark
@@ -39,29 +38,67 @@ struct cache_bin_info_s {
 	/* Upper limit on ncached. */
 	cache_bin_sz_t ncached_max;
 };
+extern cache_bin_info_t	*tcache_bin_info;
 
 typedef struct cache_bin_s cache_bin_t;
 struct cache_bin_s {
-	/* Min # cached since last GC. */
-	cache_bin_sz_t low_water;
-	/* # of cached objects. */
-	cache_bin_sz_t ncached;
 	/*
-	 * ncached and stats are both modified frequently.  Let's keep them
+	 * The cache bin stack is represented using 3 pointers: cur_ptr,
+	 * low_water and full, optimized for the fast path efficiency.
+	 *
+	 * low addr ==> high addr
+	 * |----|----|----|item1|item2|.....................|itemN|
+	 *  full            cur                                    empty
+	 * (ncached == N; full + ncached_max == empty)
+	 *
+	 * Data directly stored:
+	 * 1) cur_ptr points to the current item to be allocated, i.e. *cur_ptr.
+	 * 2) full points to the top of the stack (i.e. ncached == ncached_max),
+	 * which is compared against on free_fastpath to check "is_full".
+	 * 3) low_water indicates a low water mark of ncached.
+	 * Range of low_water is [cur, empty + 1], i.e. values of [ncached, -1].
+	 *
+	 * The empty position (ncached == 0) is derived via full + ncached_max
+	 * and not accessed in the common case (guarded behind low_water).
+	 *
+	 * On 64-bit, 2 of the 3 pointers (full and low water) are compressed by
+	 * omitting the high 32 bits.  Overflow of the half pointers is avoided
+	 * when allocating / initializing the stack space.  As a result,
+	 * cur_ptr.lowbits can be safely used for pointer comparisons.
+	 */
+	union {
+		void **ptr;
+		struct {
+			/* highbits never accessed directly. */
+#if (LG_SIZEOF_PTR == 3 && defined(JEMALLOC_BIG_ENDIAN))
+			uint32_t __highbits;
+#endif
+			uint32_t lowbits;
+#if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN))
+			uint32_t __highbits;
+#endif
+		};
+	} cur_ptr;
+	/*
+	 * cur_ptr and stats are both modified frequently.  Let's keep them
 	 * close so that they have a higher chance of being on the same
 	 * cacheline, thus less write-backs.
 	 */
 	cache_bin_stats_t tstats;
 	/*
-	 * Stack of available objects.
+	 * Points to the first item that hasn't been used since last GC, to
+	 * track the low water mark (min # of cached).  It may point to
+	 * empty_position + 1, which indicates the cache has been depleted and
+	 * refilled (low_water == -1).
+	 */
+	uint32_t low_water_position;
+	/*
+	 * Points to the position when the cache is full.
 	 *
 	 * To make use of adjacent cacheline prefetch, the items in the avail
-	 * stack goes to higher address for newer allocations.  avail points
-	 * just above the available space, which means that
-	 * avail[-ncached, ... -1] are available items and the lowest item will
-	 * be allocated first.
+	 * stack goes to higher address for newer allocations (i.e. cur_ptr++).
 	 */
-	void **avail;
+	uint32_t full_position;
 };
 
 typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
@@ -76,6 +113,67 @@ struct cache_bin_array_descriptor_s {
 	cache_bin_t *bins_large;
 };
 
+/*
+ * None of the cache_bin_*_get / _set functions is used on the fast path, which
+ * relies on pointer comparisons to determine if the cache is full / empty.
+ */
+static inline cache_bin_sz_t
+cache_bin_ncached_get(cache_bin_t *bin, szind_t ind) {
+	cache_bin_sz_t n = tcache_bin_info[ind].ncached_max -
+	    (bin->cur_ptr.lowbits - bin->full_position) / sizeof(void *);
+	assert(n >= 0 && n <= tcache_bin_info[ind].ncached_max);
+	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
+
+	return n;
+}
+
+static inline void **
+cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind) {
+	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, ind);
+	/* Low bits overflow disallowed when allocating the space. */
+	assert((uint32_t)(uintptr_t)ret >= bin->cur_ptr.lowbits);
+	assert(bin->full_position + tcache_bin_info[ind].ncached_max *
+	    sizeof(void *) > bin->full_position);
+
+	/* Can also be computed via (full_position + ncached_max) | highbits. */
+	assert(ret == (void **)((uintptr_t)(bin->full_position +
+	    tcache_bin_info[ind].ncached_max * sizeof(void *)) |
+	    (uintptr_t)((uintptr_t)bin->cur_ptr.ptr &
+	    ~(((uint64_t)1 << 32) - 1))));
+
+	return ret;
+}
+
+/* Returns the position of the bottom item on the stack; for convenience. */
+static inline void **
+cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind) {
+	void **bottom = cache_bin_empty_position_get(bin, ind) - 1;
+	assert(cache_bin_ncached_get(bin, ind) == 0 || *bottom != NULL);
+
+	return bottom;
+}
+
+/* Returns the numeric value of low water in [-1, ncached]. */
+static inline cache_bin_sz_t
+cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
+	cache_bin_sz_t low_water = tcache_bin_info[ind].ncached_max -
+	    (bin->low_water_position - bin->full_position) / sizeof(void *);
+	assert(low_water >= -1 && low_water <=
+	    tcache_bin_info[ind].ncached_max);
+	assert(low_water <= cache_bin_ncached_get(bin, ind));
+	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
+
+	return low_water;
+}
+
+static inline void
+cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n) {
+	bin->cur_ptr.lowbits = bin->full_position +
+	    (tcache_bin_info[ind].ncached_max - n) * sizeof(void *);
+	assert(n >= 0 && n <= tcache_bin_info[ind].ncached_max);
+	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
+}
+
 static inline void
 cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
     cache_bin_t *bins_small, cache_bin_t *bins_large) {
@@ -85,19 +183,24 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
-	void *ret;
-
-	bin->ncached--;
-
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_sz_t ind) {
 	/*
-	 * Check for both bin->ncached == 0 and ncached < low_water
-	 * in a single branch.
+	 * This may read from the empty position; however the loaded value won't
+	 * be used.  It's safe because the stack has one more slot reserved.
 	 */
-	if (unlikely(bin->ncached <= bin->low_water)) {
-		bin->low_water = bin->ncached;
-		if (bin->ncached == -1) {
-			bin->ncached = 0;
+	void *ret = *(bin->cur_ptr.ptr++);
+	/*
+	 * Check for both bin->ncached == 0 and ncached < low_water in a single
+	 * branch.  This also avoids accessing tcache_bin_info (which is on a
+	 * separate cacheline / page) in the common case.
+	 */
+	if (unlikely(bin->cur_ptr.lowbits >= bin->low_water_position)) {
+		bin->low_water_position = bin->cur_ptr.lowbits;
+		uint32_t empty_position = bin->full_position +
+		    tcache_bin_info[ind].ncached_max * sizeof(void *);
+		if (bin->cur_ptr.lowbits > empty_position) {
+			bin->cur_ptr.ptr--;
+			assert(bin->cur_ptr.lowbits == empty_position);
 			*success = false;
 			return NULL;
 		}
@@ -111,19 +214,18 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
 	 * cacheline).
 	 */
 	*success = true;
-	ret = *(bin->avail - (bin->ncached + 1));
 
 	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-cache_bin_dalloc_easy(cache_bin_t *bin, cache_bin_info_t *bin_info, void *ptr) {
-	if (unlikely(bin->ncached == bin_info->ncached_max)) {
+cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
+	if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {
 		return false;
 	}
-	assert(bin->ncached < bin_info->ncached_max);
-	bin->ncached++;
-	*(bin->avail - bin->ncached) = ptr;
+
+	*(--bin->cur_ptr.ptr) = ptr;
+	assert(bin->cur_ptr.lowbits >= bin->full_position);
 
 	return true;
 }
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index ddde9b4..fedbd86 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -130,8 +130,8 @@ tcache_available(tsd_t *tsd) {
 	if (likely(tsd_tcache_enabled_get(tsd))) {
 		/* Associated arena == NULL implies tcache init in progress. */
 		assert(tsd_tcachep_get(tsd)->arena == NULL ||
-		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->avail !=
-		    NULL);
+		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->cur_ptr.ptr
+		    != NULL);
 		return true;
 	}
 
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 266f246..2060bb1 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -4,8 +4,6 @@
 extern bool	opt_tcache;
 extern ssize_t	opt_lg_tcache_max;
 
-extern cache_bin_info_t	*tcache_bin_info;
-
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
  * large-object bins.
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 4815774..4f7e02a 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -48,7 +48,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 
 	assert(binind < SC_NBINS);
 	bin = tcache_small_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success);
+	ret = cache_bin_alloc_easy(bin, &tcache_success, binind);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -109,7 +109,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind >= SC_NBINS &&binind < nhbins);
 	bin = tcache_large_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success);
+	ret = cache_bin_alloc_easy(bin, &tcache_success, binind);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
@@ -164,7 +164,6 @@ JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
 	cache_bin_t *bin;
-	cache_bin_info_t *bin_info;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
 	    <= SC_SMALL_MAXCLASS);
@@ -174,11 +173,10 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	}
 
 	bin = tcache_small_bin_get(tcache, binind);
-	bin_info = &tcache_bin_info[binind];
-	if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) {
+	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		tcache_bin_flush_small(tsd, tcache, bin, binind,
-		    (bin_info->ncached_max >> 1));
-		bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr);
+		    tcache_bin_info[binind].ncached_max >> 1);
+		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
 	}
 
@@ -189,7 +187,6 @@ JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
 	cache_bin_t *bin;
-	cache_bin_info_t *bin_info;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
 	    > SC_SMALL_MAXCLASS);
@@ -200,11 +197,10 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	}
 
 	bin = tcache_large_bin_get(tcache, binind);
-	bin_info = &tcache_bin_info[binind];
-	if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) {
+	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		tcache_bin_flush_large(tsd, tcache, bin, binind,
-		    (bin_info->ncached_max >> 1));
-		bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr);
+		    tcache_bin_info[binind].ncached_max >> 1);
+		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
 	}
 
diff --git a/src/arena.c b/src/arena.c
index e956c39..23d0294 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -202,12 +202,13 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		for (szind_t i = 0; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    tbin->ncached * sz_index2size(i));
+			    cache_bin_ncached_get(tbin, i) * sz_index2size(i));
 		}
 		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_large[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    tbin->ncached * sz_index2size(i));
+			    cache_bin_ncached_get(tbin, i + SC_NBINS) *
+			    sz_index2size(i));
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
@@ -1381,7 +1382,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
 	unsigned i, nfill, cnt;
 
-	assert(tbin->ncached == 0);
+	assert(cache_bin_ncached_get(tbin, binind) == 0);
 
 	if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) {
 		prof_idump(tsdn);
@@ -1390,6 +1391,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	unsigned binshard;
 	bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
 
+	void **empty_position = cache_bin_empty_position_get(tbin, binind);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
 	    tcache->lg_fill_div[binind]); i < nfill; i += cnt) {
 		extent_t *slab;
@@ -1400,7 +1402,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 				tofill : extent_nfree_get(slab);
 			arena_slab_reg_alloc_batch(
 			   slab, &bin_infos[binind], cnt,
-			   tbin->avail - nfill + i);
+			   empty_position - nfill + i);
 		} else {
 			cnt = 1;
 			void *ptr = arena_bin_malloc_hard(tsdn, arena, bin,
@@ -1412,18 +1414,18 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 			 */
 			if (ptr == NULL) {
 				if (i > 0) {
-					memmove(tbin->avail - i,
-						tbin->avail - nfill,
+					memmove(empty_position - i,
+						empty_position - nfill,
 						i * sizeof(void *));
 				}
 				break;
 			}
 			/* Insert such that low regions get used first. */
-			*(tbin->avail - nfill + i) = ptr;
+			*(empty_position - nfill + i) = ptr;
 		}
 		if (config_fill && unlikely(opt_junk_alloc)) {
 			for (unsigned j = 0; j < cnt; j++) {
-				void* ptr = *(tbin->avail - nfill + i + j);
+				void* ptr = *(empty_position - nfill + i + j);
 				arena_alloc_junk_small(ptr, &bin_infos[binind],
 							true);
 			}
@@ -1437,7 +1439,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 		tbin->tstats.nrequests = 0;
 	}
 	malloc_mutex_unlock(tsdn, &bin->lock);
-	tbin->ncached = i;
+	cache_bin_ncached_set(tbin, binind, i);
 	arena_decay_tick(tsdn, arena);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index dec987c..75a4027 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2368,7 +2368,7 @@ je_malloc(size_t size) {
 
 	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
 	bool tcache_success;
-	void* ret = cache_bin_alloc_easy(bin, &tcache_success);
+	void *ret = cache_bin_alloc_easy(bin, &tcache_success, ind);
 
 	if (tcache_success) {
 		if (config_stats) {
@@ -2846,8 +2846,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	}
 
 	cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
-	cache_bin_info_t *bin_info = &tcache_bin_info[alloc_ctx.szind];
-	if (!cache_bin_dalloc_easy(bin, bin_info, ptr)) {
+	if (!cache_bin_dalloc_easy(bin, ptr)) {
 		return false;
 	}
 
diff --git a/src/tcache.c b/src/tcache.c
index c5fe67a..d282e1f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -14,7 +14,16 @@ bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 
 cache_bin_info_t	*tcache_bin_info;
-static unsigned		stack_nelms; /* Total stack elms per tcache. */
+/*
+ * For the total bin stack region (per tcache), reserve 2 more slots so that 1)
+ * the empty position can be safely read on the fast path before checking
+ * "is_empty"; and 2) the low_water == -1 case can go beyond the empty position
+ * by 1 step safely (i.e. no overflow).
+ */
+static const unsigned total_stack_padding = sizeof(void *) * 2;
+
+/* Total stack size required (per tcache).  Include the padding above. */
+static uint32_t total_stack_bytes;
 
 unsigned		nhbins;
 size_t			tcache_maxclass;
@@ -47,14 +56,16 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 	} else {
 		tbin = tcache_large_bin_get(tcache, binind);
 	}
-	if (tbin->low_water > 0) {
+
+	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin, binind);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
+	if (low_water > 0) {
 		/*
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
 		 */
 		if (binind < SC_NBINS) {
 			tcache_bin_flush_small(tsd, tcache, tbin, binind,
-			    tbin->ncached - tbin->low_water + (tbin->low_water
-			    >> 2));
+			    ncached - low_water + (low_water >> 2));
 			/*
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
 			 * the fill count is always at least 1.
@@ -66,10 +77,10 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			}
 		} else {
 			tcache_bin_flush_large(tsd, tcache, tbin, binind,
-			    tbin->ncached - tbin->low_water + (tbin->low_water
-			    >> 2));
+			     ncached - low_water + (low_water >> 2));
 		}
-	} else if (tbin->low_water < 0) {
+	} else if (low_water < 0) {
+		assert(low_water == -1);
 		/*
 		 * Increase fill count by 2X for small bins.  Make sure
 		 * lg_fill_div stays greater than 0.
@@ -78,7 +89,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			tcache->lg_fill_div[binind]--;
 		}
 	}
-	tbin->low_water = tbin->ncached;
+	tbin->low_water_position = tbin->cur_ptr.lowbits;
 
 	tcache->next_gc_bin++;
 	if (tcache->next_gc_bin == nhbins) {
@@ -97,7 +108,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	if (config_prof) {
 		tcache->prof_accumbytes = 0;
 	}
-	ret = cache_bin_alloc_easy(tbin, tcache_success);
+	ret = cache_bin_alloc_easy(tbin, tcache_success, binind);
 
 	return ret;
 }
@@ -117,9 +128,10 @@ tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 	 */
 	szind_t szind;
 	size_t sz_sum = binind * nflush;
+	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	for (unsigned i = 0 ; i < nflush; i++) {
 		rtree_extent_szind_read(tsdn, &extents_rtree,
-		    rtree_ctx, (uintptr_t)*(tbin->avail - 1 - i), true,
+		    rtree_ctx, (uintptr_t)*(bottom_item - i), true,
 		    &extents[i], &szind);
 		sz_sum -= szind;
 	}
@@ -137,13 +149,15 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	bool merged_stats = false;
 
 	assert(binind < SC_NBINS);
-	assert((cache_bin_sz_t)rem <= tbin->ncached);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
+	assert((cache_bin_sz_t)rem <= ncached);
 
 	arena_t *arena = tcache->arena;
 	assert(arena != NULL);
-	unsigned nflush = tbin->ncached - rem;
+	unsigned nflush = ncached - rem;
 	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
 
+	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	/* Look up extent once per item. */
 	if (config_opt_safety_checks) {
 		tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind,
@@ -151,7 +165,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
 			item_extent[i] = iealloc(tsd_tsdn(tsd),
-			    *(tbin->avail - 1 - i));
+			    *(bottom_item - i));
 		}
 	}
 	while (nflush > 0) {
@@ -181,7 +195,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		}
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = *(tbin->avail - 1 - i);
+			void *ptr = *(bottom_item - i);
 			extent = item_extent[i];
 			assert(ptr != NULL && extent != NULL);
 
@@ -196,7 +210,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 				 * locked.  Stash the object, so that it can be
 				 * handled in a future pass.
 				 */
-				*(tbin->avail - 1 - ndeferred) = ptr;
+				*(bottom_item - ndeferred) = ptr;
 				item_extent[ndeferred] = extent;
 				ndeferred++;
 			}
@@ -219,11 +233,11 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
 
-	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
 	    sizeof(void *));
-	tbin->ncached = rem;
-	if (tbin->ncached < tbin->low_water) {
-		tbin->low_water = tbin->ncached;
+	cache_bin_ncached_set(tbin, binind, rem);
+	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
+		tbin->low_water_position = tbin->cur_ptr.lowbits;
 	}
 }
 
@@ -233,17 +247,19 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 	bool merged_stats = false;
 
 	assert(binind < nhbins);
-	assert((cache_bin_sz_t)rem <= tbin->ncached);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
+	assert((cache_bin_sz_t)rem <= ncached);
 
 	arena_t *tcache_arena = tcache->arena;
 	assert(tcache_arena != NULL);
-	unsigned nflush = tbin->ncached - rem;
+	unsigned nflush = ncached - rem;
 	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
 
+	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 #ifndef JEMALLOC_EXTRA_SIZE_CHECK
 	/* Look up extent once per item. */
 	for (unsigned i = 0 ; i < nflush; i++) {
-		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
+		item_extent[i] = iealloc(tsd_tsdn(tsd), *(bottom_item - i));
 	}
 #else
 	tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
@@ -266,7 +282,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 			malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
 		}
 		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = *(tbin->avail - 1 - i);
+			void *ptr = *(bottom_item - i);
 			assert(ptr != NULL);
 			extent = item_extent[i];
 			if (extent_arena_ind_get(extent) == locked_arena_ind) {
@@ -295,7 +311,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = *(tbin->avail - 1 - i);
+			void *ptr = *(bottom_item - i);
 			extent = item_extent[i];
 			assert(ptr != NULL && extent != NULL);
 
@@ -308,7 +324,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 				 * Stash the object, so that it can be handled
 				 * in a future pass.
 				 */
-				*(tbin->avail - 1 - ndeferred) = ptr;
+				*(bottom_item - ndeferred) = ptr;
 				item_extent[ndeferred] = extent;
 				ndeferred++;
 			}
@@ -330,11 +346,11 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 		tbin->tstats.nrequests = 0;
 	}
 
-	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
 	    sizeof(void *));
-	tbin->ncached = rem;
-	if (tbin->ncached < tbin->low_water) {
-		tbin->low_water = tbin->ncached;
+	cache_bin_ncached_set(tbin, binind, rem);
+	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
+		tbin->low_water_position = tbin->cur_ptr.lowbits;
 	}
 }
 
@@ -406,9 +422,43 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 	return false;
 }
 
-/* Initialize auto tcache (embedded in TSD). */
+static bool
+tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
+	cassert(sizeof(bin->cur_ptr) == sizeof(void *));
+	/*
+	 * The full_position points to the lowest available space.  Allocations
+	 * will access the slots toward higher addresses (for the benefit of
+	 * adjacent prefetch).
+	 */
+	void *full_position = (void *)*stack_cur;
+	uint32_t bin_stack_size = tcache_bin_info[ind].ncached_max *
+	    sizeof(void *);
+
+	*stack_cur += bin_stack_size;
+	void *empty_position = (void *)*stack_cur;
+
+	/* Init to the empty position. */
+	bin->cur_ptr.ptr = empty_position;
+	bin->low_water_position = bin->cur_ptr.lowbits;
+	bin->full_position = (uint32_t)(uintptr_t)full_position;
+	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
+	assert(cache_bin_ncached_get(bin, ind) == 0);
+	assert(cache_bin_empty_position_get(bin, ind) == empty_position);
+
+	return false;
+}
+
+/* Sanity check only. */
+static bool
+tcache_bin_lowbits_overflowable(void *ptr) {
+	uint32_t lowbits = (uint32_t)((uintptr_t)ptr + total_stack_bytes);
+	return lowbits < (uint32_t)(uintptr_t)ptr;
+}
+
 static void
 tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
+	assert(!tcache_bin_lowbits_overflowable(avail_stack));
+
 	memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
 	tcache->prof_accumbytes = 0;
 	tcache->next_gc_bin = 0;
@@ -416,41 +466,43 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 
 	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
 
-	size_t stack_offset = 0;
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
 	memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
+
 	unsigned i = 0;
+	uintptr_t stack_cur = (uintptr_t)avail_stack;
 	for (; i < SC_NBINS; i++) {
 		tcache->lg_fill_div[i] = 1;
-		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
-		/*
-		 * avail points past the available space.  Allocations will
-		 * access the slots toward higher addresses (for the benefit of
-		 * prefetch).
-		 */
-		tcache_small_bin_get(tcache, i)->avail =
-		    (void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
+		cache_bin_t *bin = tcache_small_bin_get(tcache, i);
+		tcache_bin_init(bin, i, &stack_cur);
 	}
 	for (; i < nhbins; i++) {
-		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
-		tcache_large_bin_get(tcache, i)->avail =
-		    (void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
+		cache_bin_t *bin = tcache_large_bin_get(tcache, i);
+		tcache_bin_init(bin, i, &stack_cur);
 	}
-	assert(stack_offset == stack_nelms * sizeof(void *));
+
+	/* Sanity check that the whole stack is used. */
+	size_t stack_offset = stack_cur - (uintptr_t)avail_stack;
+	assert(stack_offset + total_stack_padding == total_stack_bytes);
+}
+
+static size_t
+tcache_bin_stack_alignment (size_t size) {
+	/* Align pow2 to avoid overflow the cache bin compressed pointers. */
+	return (LG_SIZEOF_PTR == 3) ? pow2_ceil_zu(size) : CACHELINE;
 }
 
 /* Initialize auto tcache (embedded in TSD). */
 bool
 tsd_tcache_data_init(tsd_t *tsd) {
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
-	assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
-	size_t size = stack_nelms * sizeof(void *);
+	assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr == NULL);
 	/* Avoid false cacheline sharing. */
-	size = sz_sa2u(size, CACHELINE);
-
-	void *avail_array = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true,
-	    NULL, true, arena_get(TSDN_NULL, 0, true));
+	size_t size = sz_sa2u(total_stack_bytes, CACHELINE);
+	void *avail_array = ipallocztm(tsd_tsdn(tsd), size,
+	    tcache_bin_stack_alignment(size), true, NULL, true,
+	    arena_get(TSDN_NULL, 0, true));
 	if (avail_array == NULL) {
 		return true;
 	}
@@ -485,25 +537,24 @@ tsd_tcache_data_init(tsd_t *tsd) {
 /* Created manual tcache for tcache.create mallctl. */
 tcache_t *
 tcache_create_explicit(tsd_t *tsd) {
-	tcache_t *tcache;
-	size_t size, stack_offset;
-
-	size = sizeof(tcache_t);
+	size_t size = sizeof(tcache_t);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
-	stack_offset = size;
-	size += stack_nelms * sizeof(void *);
+	size_t stack_offset = size;
+	size += total_stack_bytes;
 	/* Avoid false cacheline sharing. */
 	size = sz_sa2u(size, CACHELINE);
 
-	tcache = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true, NULL, true,
+	tcache_t *tcache = ipallocztm(tsd_tsdn(tsd), size,
+	    tcache_bin_stack_alignment(size), true, NULL, true,
 	    arena_get(TSDN_NULL, 0, true));
 	if (tcache == NULL) {
 		return NULL;
 	}
 
-	tcache_init(tsd, tcache,
-	    (void *)((uintptr_t)tcache + (uintptr_t)stack_offset));
+	void *avail_array = (void *)((uintptr_t)tcache +
+	    (uintptr_t)stack_offset);
+	tcache_init(tsd, tcache, avail_array);
 	tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
 
 	return tcache;
@@ -553,9 +604,12 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 
 	if (tsd_tcache) {
 		/* Release the avail array for the TSD embedded auto tcache. */
-		void *avail_array =
-		    (void *)((uintptr_t)tcache_small_bin_get(tcache, 0)->avail -
-		    (uintptr_t)tcache_bin_info[0].ncached_max * sizeof(void *));
+		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
+		assert(cache_bin_ncached_get(bin, 0) == 0);
+		assert(cache_bin_empty_position_get(bin, 0) ==
+		    bin->cur_ptr.ptr);
+		void *avail_array = bin->cur_ptr.ptr -
+		    tcache_bin_info[0].ncached_max;
 		idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
 	} else {
 		/* Release both the tcache struct and avail array. */
@@ -587,16 +641,17 @@ tcache_cleanup(tsd_t *tsd) {
 	if (!tcache_available(tsd)) {
 		assert(tsd_tcache_enabled_get(tsd) == false);
 		if (config_debug) {
-			assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
+			assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr
+			    == NULL);
 		}
 		return;
 	}
 	assert(tsd_tcache_enabled_get(tsd));
-	assert(tcache_small_bin_get(tcache, 0)->avail != NULL);
+	assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr != NULL);
 
 	tcache_destroy(tsd, tcache, true);
 	if (config_debug) {
-		tcache_small_bin_get(tcache, 0)->avail = NULL;
+		tcache_small_bin_get(tcache, 0)->cur_ptr.ptr = NULL;
 	}
 }
 
@@ -755,8 +810,8 @@ tcache_boot(tsdn_t *tsdn) {
 	if (tcache_bin_info == NULL) {
 		return true;
 	}
+	unsigned i, stack_nelms;
 	stack_nelms = 0;
-	unsigned i;
 	for (i = 0; i < SC_NBINS; i++) {
 		if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
 			tcache_bin_info[i].ncached_max =
@@ -775,6 +830,7 @@ tcache_boot(tsdn_t *tsdn) {
 		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
 		stack_nelms += tcache_bin_info[i].ncached_max;
 	}
+	total_stack_bytes = stack_nelms * sizeof(void *) + total_stack_padding;
 
 	return false;
 }
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
new file mode 100644
index 0000000..74cf24c
--- /dev/null
+++ b/test/unit/cache_bin.c
@@ -0,0 +1,64 @@
+#include "test/jemalloc_test.h"
+
+cache_bin_t test_bin;
+
+TEST_BEGIN(test_cache_bin) {
+	cache_bin_t *bin = &test_bin;
+	cassert(PAGE > TCACHE_NSLOTS_SMALL_MAX * sizeof(void *));
+	/* Page aligned to make sure lowbits not overflowable. */
+	void **stack = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ALIGN(PAGE));
+
+	assert_ptr_not_null(stack, "Unexpected mallocx failure");
+	/* Initialize to empty; bin 0. */
+	cache_bin_sz_t ncached_max = tcache_bin_info[0].ncached_max;
+	void **empty_position = stack + ncached_max;
+	bin->cur_ptr.ptr = empty_position;
+	bin->low_water_position = bin->cur_ptr.lowbits;
+	bin->full_position = (uint32_t)(uintptr_t)stack;
+	assert_ptr_eq(cache_bin_empty_position_get(bin, 0), empty_position,
+	    "Incorrect empty position");
+	/* Not using assert_zu etc on cache_bin_sz_t since it may change. */
+	assert_true(cache_bin_ncached_get(bin, 0) == 0, "Incorrect cache size");
+
+	bool success;
+	void *ret = cache_bin_alloc_easy(bin, &success, 0);
+	assert_false(success, "Empty cache bin should not alloc");
+	assert_true(cache_bin_low_water_get(bin, 0) == - 1,
+	    "Incorrect low water mark");
+
+	cache_bin_ncached_set(bin, 0, 0);
+	assert_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
+	for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
+		success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
+		assert_true(success && cache_bin_ncached_get(bin, 0) == i,
+		    "Bin dalloc failure");
+	}
+	success = cache_bin_dalloc_easy(bin, (void *)1);
+	assert_false(success, "Bin should be full");
+	assert_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
+
+	cache_bin_ncached_set(bin, 0, ncached_max);
+	assert_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
+	/* Emulate low water after refill. */
+	bin->low_water_position = bin->full_position;
+	for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
+		ret = cache_bin_alloc_easy(bin, &success, 0);
+		cache_bin_sz_t ncached = cache_bin_ncached_get(bin, 0);
+		assert_true(success && ncached == i - 1,
+		    "Cache bin alloc failure");
+		assert_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
+		assert_true(cache_bin_low_water_get(bin, 0) == ncached,
+		    "Incorrect low water mark");
+	}
+
+	ret = cache_bin_alloc_easy(bin, &success, 0);
+	assert_false(success, "Empty cache bin should not alloc.");
+	assert_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
+	    "Bin should be empty");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_cache_bin);
+}
-- 
cgit v0.12


From 937ca1db9fa1f3c5c54e189049e181b6de5e7133 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 14 Aug 2019 13:08:06 -0700
Subject: Store ncached_max * ptr_size in tcache_bin_info.

With the cache bin metadata switched to pointers, ncached_max is usually
accessed and timed by sizeof(ptr). Store the results in tcache_bin_info for
direct access, and add a helper function for the ncached_max value.
---
 include/jemalloc/internal/cache_bin.h      | 40 +++++++++++++++++-------------
 include/jemalloc/internal/tcache_inlines.h |  8 +++---
 src/arena.c                                |  2 +-
 src/tcache.c                               | 34 ++++++++++++-------------
 test/unit/cache_bin.c                      |  2 +-
 5 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 67180cf..775eb3f 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -35,8 +35,8 @@ struct cache_bin_stats_s {
  */
 typedef struct cache_bin_info_s cache_bin_info_t;
 struct cache_bin_info_s {
-	/* Upper limit on ncached. */
-	cache_bin_sz_t ncached_max;
+	/* The size of the bin stack, i.e. ncached_max * sizeof(ptr). */
+	cache_bin_sz_t stack_size;
 };
 extern cache_bin_info_t	*tcache_bin_info;
 
@@ -117,11 +117,18 @@ struct cache_bin_array_descriptor_s {
  * None of the cache_bin_*_get / _set functions is used on the fast path, which
  * relies on pointer comparisons to determine if the cache is full / empty.
  */
+
+/* Returns ncached_max: Upper limit on ncached. */
+static inline cache_bin_sz_t
+cache_bin_ncached_max_get(szind_t ind) {
+	return tcache_bin_info[ind].stack_size / sizeof(void *);
+}
+
 static inline cache_bin_sz_t
 cache_bin_ncached_get(cache_bin_t *bin, szind_t ind) {
-	cache_bin_sz_t n = tcache_bin_info[ind].ncached_max -
-	    (bin->cur_ptr.lowbits - bin->full_position) / sizeof(void *);
-	assert(n >= 0 && n <= tcache_bin_info[ind].ncached_max);
+	cache_bin_sz_t n = (tcache_bin_info[ind].stack_size +
+	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *);
+	assert(n >= 0 && n <= cache_bin_ncached_max_get(ind));
 	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
 
 	return n;
@@ -132,14 +139,13 @@ cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind) {
 	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, ind);
 	/* Low bits overflow disallowed when allocating the space. */
 	assert((uint32_t)(uintptr_t)ret >= bin->cur_ptr.lowbits);
-	assert(bin->full_position + tcache_bin_info[ind].ncached_max *
-	    sizeof(void *) > bin->full_position);
 
 	/* Can also be computed via (full_position + ncached_max) | highbits. */
-	assert(ret == (void **)((uintptr_t)(bin->full_position +
-	    tcache_bin_info[ind].ncached_max * sizeof(void *)) |
-	    (uintptr_t)((uintptr_t)bin->cur_ptr.ptr &
-	    ~(((uint64_t)1 << 32) - 1))));
+	uintptr_t lowbits = bin->full_position +
+	    tcache_bin_info[ind].stack_size;
+	uintptr_t highbits = (uintptr_t)bin->cur_ptr.ptr &
+	    ~(((uint64_t)1 << 32) - 1);
+	assert(ret == (void **)(lowbits | highbits));
 
 	return ret;
 }
@@ -156,10 +162,10 @@ cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind) {
 /* Returns the numeric value of low water in [-1, ncached]. */
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
-	cache_bin_sz_t low_water = tcache_bin_info[ind].ncached_max -
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind);
+	cache_bin_sz_t low_water = ncached_max -
 	    (bin->low_water_position - bin->full_position) / sizeof(void *);
-	assert(low_water >= -1 && low_water <=
-	    tcache_bin_info[ind].ncached_max);
+	assert(low_water >= -1 && low_water <= ncached_max);
 	assert(low_water <= cache_bin_ncached_get(bin, ind));
 	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
 
@@ -169,8 +175,8 @@ cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
 static inline void
 cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n) {
 	bin->cur_ptr.lowbits = bin->full_position +
-	    (tcache_bin_info[ind].ncached_max - n) * sizeof(void *);
-	assert(n >= 0 && n <= tcache_bin_info[ind].ncached_max);
+	    tcache_bin_info[ind].stack_size - n * sizeof(void *);
+	assert(n >= 0 && n <= cache_bin_ncached_max_get(ind));
 	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
 }
 
@@ -197,7 +203,7 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_sz_t ind) {
 	if (unlikely(bin->cur_ptr.lowbits >= bin->low_water_position)) {
 		bin->low_water_position = bin->cur_ptr.lowbits;
 		uint32_t empty_position = bin->full_position +
-		    tcache_bin_info[ind].ncached_max * sizeof(void *);
+		    tcache_bin_info[ind].stack_size;
 		if (bin->cur_ptr.lowbits > empty_position) {
 			bin->cur_ptr.ptr--;
 			assert(bin->cur_ptr.lowbits == empty_position);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 4f7e02a..8988ae9 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -174,8 +174,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_small_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		tcache_bin_flush_small(tsd, tcache, bin, binind,
-		    tcache_bin_info[binind].ncached_max >> 1);
+		unsigned remain = cache_bin_ncached_max_get(binind) >> 1;
+		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
 	}
@@ -198,8 +198,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_large_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		tcache_bin_flush_large(tsd, tcache, bin, binind,
-		    tcache_bin_info[binind].ncached_max >> 1);
+		unsigned remain = cache_bin_ncached_max_get(binind) >> 1;
+		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
 	}
diff --git a/src/arena.c b/src/arena.c
index 23d0294..b383bef 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1392,7 +1392,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
 
 	void **empty_position = cache_bin_empty_position_get(tbin, binind);
-	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
+	for (i = 0, nfill = (cache_bin_ncached_max_get(binind) >>
 	    tcache->lg_fill_div[binind]); i < nfill; i += cnt) {
 		extent_t *slab;
 		if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) >
diff --git a/src/tcache.c b/src/tcache.c
index d282e1f..2594a02 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -70,8 +70,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
 			 * the fill count is always at least 1.
 			 */
-			cache_bin_info_t *tbin_info = &tcache_bin_info[binind];
-			if ((tbin_info->ncached_max >>
+			if ((cache_bin_ncached_max_get(binind) >>
 			     (tcache->lg_fill_div[binind] + 1)) >= 1) {
 				tcache->lg_fill_div[binind]++;
 			}
@@ -431,8 +430,7 @@ tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
 	 * adjacent prefetch).
 	 */
 	void *full_position = (void *)*stack_cur;
-	uint32_t bin_stack_size = tcache_bin_info[ind].ncached_max *
-	    sizeof(void *);
+	uint32_t bin_stack_size = tcache_bin_info[ind].stack_size;
 
 	*stack_cur += bin_stack_size;
 	void *empty_position = (void *)*stack_cur;
@@ -608,8 +606,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 		assert(cache_bin_ncached_get(bin, 0) == 0);
 		assert(cache_bin_empty_position_get(bin, 0) ==
 		    bin->cur_ptr.ptr);
-		void *avail_array = bin->cur_ptr.ptr -
-		    tcache_bin_info[0].ncached_max;
+		void *avail_array = (void *)((uintptr_t)bin->cur_ptr.ptr -
+		    tcache_bin_info[0].stack_size);
 		idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
 	} else {
 		/* Release both the tcache struct and avail array. */
@@ -810,27 +808,27 @@ tcache_boot(tsdn_t *tsdn) {
 	if (tcache_bin_info == NULL) {
 		return true;
 	}
-	unsigned i, stack_nelms;
-	stack_nelms = 0;
+	unsigned i, ncached_max;
+	total_stack_bytes = 0;
 	for (i = 0; i < SC_NBINS; i++) {
 		if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
-			tcache_bin_info[i].ncached_max =
-			    TCACHE_NSLOTS_SMALL_MIN;
+			ncached_max = TCACHE_NSLOTS_SMALL_MIN;
 		} else if ((bin_infos[i].nregs << 1) <=
 		    TCACHE_NSLOTS_SMALL_MAX) {
-			tcache_bin_info[i].ncached_max =
-			    (bin_infos[i].nregs << 1);
+			ncached_max = bin_infos[i].nregs << 1;
 		} else {
-			tcache_bin_info[i].ncached_max =
-			    TCACHE_NSLOTS_SMALL_MAX;
+			ncached_max = TCACHE_NSLOTS_SMALL_MAX;
 		}
-		stack_nelms += tcache_bin_info[i].ncached_max;
+		unsigned stack_size = ncached_max * sizeof(void *);
+		tcache_bin_info[i].stack_size = stack_size;
+		total_stack_bytes += stack_size;
 	}
 	for (; i < nhbins; i++) {
-		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
-		stack_nelms += tcache_bin_info[i].ncached_max;
+		unsigned stack_size = TCACHE_NSLOTS_LARGE * sizeof(void *);
+		tcache_bin_info[i].stack_size = stack_size;
+		total_stack_bytes += stack_size;
 	}
-	total_stack_bytes = stack_nelms * sizeof(void *) + total_stack_padding;
+	total_stack_bytes += total_stack_padding;
 
 	return false;
 }
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 74cf24c..d890041 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -10,7 +10,7 @@ TEST_BEGIN(test_cache_bin) {
 
 	assert_ptr_not_null(stack, "Unexpected mallocx failure");
 	/* Initialize to empty; bin 0. */
-	cache_bin_sz_t ncached_max = tcache_bin_info[0].ncached_max;
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(0);
 	void **empty_position = stack + ncached_max;
 	bin->cur_ptr.ptr = empty_position;
 	bin->low_water_position = bin->cur_ptr.lowbits;
-- 
cgit v0.12


From 0043e68d4c54a305d84ead95cae27a730540451b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 20 Aug 2019 18:14:18 -0700
Subject: Track low_water == -1 case explicitly.

The -1 value of low_water indicates if the cache has been depleted and
refilled.  Track the status explicitly in the tcache struct.

This allows the fast path to check if (cur_ptr > low_water), instead of >=,
which avoids reaching slow path when the last item is allocated.
---
 include/jemalloc/internal/cache_bin.h      | 15 +++++++--------
 include/jemalloc/internal/tcache_structs.h |  2 ++
 src/arena.c                                |  2 +-
 src/tcache.c                               | 19 ++++++++++++-------
 test/unit/cache_bin.c                      |  2 +-
 5 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 775eb3f..7ec1ccb 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -56,7 +56,7 @@ struct cache_bin_s {
 	 * 2) full points to the top of the stack (i.e. ncached == ncached_max),
 	 * which is compared against on free_fastpath to check "is_full".
 	 * 3) low_water indicates a low water mark of ncached.
-	 * Range of low_water is [cur, empty + 1], i.e. values of [ncached, -1].
+	 * Range of low_water is [cur, empty], i.e. values of [ncached, 0].
 	 *
 	 * The empty position (ncached == 0) is derived via full + ncached_max
 	 * and not accessed in the common case (guarded behind low_water).
@@ -87,9 +87,7 @@ struct cache_bin_s {
 	cache_bin_stats_t tstats;
 	/*
 	 * Points to the first item that hasn't been used since last GC, to
-	 * track the low water mark (min # of cached).  It may point to
-	 * empty_position + 1, which indicates the cache has been depleted and
-	 * refilled (low_water == -1).
+	 * track the low water mark (min # of cached).
 	 */
 	uint32_t low_water_position;
 	/*
@@ -165,7 +163,7 @@ cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind);
 	cache_bin_sz_t low_water = ncached_max -
 	    (bin->low_water_position - bin->full_position) / sizeof(void *);
-	assert(low_water >= -1 && low_water <= ncached_max);
+	assert(low_water >= 0 && low_water <= ncached_max);
 	assert(low_water <= cache_bin_ncached_get(bin, ind));
 	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
 
@@ -200,16 +198,17 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_sz_t ind) {
 	 * branch.  This also avoids accessing tcache_bin_info (which is on a
 	 * separate cacheline / page) in the common case.
 	 */
-	if (unlikely(bin->cur_ptr.lowbits >= bin->low_water_position)) {
-		bin->low_water_position = bin->cur_ptr.lowbits;
+	if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
 		uint32_t empty_position = bin->full_position +
 		    tcache_bin_info[ind].stack_size;
-		if (bin->cur_ptr.lowbits > empty_position) {
+		if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
+			/* Over-allocated; revert. */
 			bin->cur_ptr.ptr--;
 			assert(bin->cur_ptr.lowbits == empty_position);
 			*success = false;
 			return NULL;
 		}
+		bin->low_water_position = bin->cur_ptr.lowbits;
 	}
 
 	/*
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 172ef90..008b1f7 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -51,6 +51,8 @@ struct tcache_s {
 	szind_t		next_gc_bin;
 	/* For small bins, fill (ncached_max >> lg_fill_div). */
 	uint8_t		lg_fill_div[SC_NBINS];
+	/* For small bins, whether has been refilled since last GC. */
+	bool		bin_refilled[SC_NBINS];
 	/*
 	 * We put the cache bins for large size classes at the end of the
 	 * struct, since some of them might not get used.  This might end up
diff --git a/src/arena.c b/src/arena.c
index b383bef..aa707f4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1383,10 +1383,10 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	unsigned i, nfill, cnt;
 
 	assert(cache_bin_ncached_get(tbin, binind) == 0);
-
 	if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) {
 		prof_idump(tsdn);
 	}
+	tcache->bin_refilled[binind] = true;
 
 	unsigned binshard;
 	bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
diff --git a/src/tcache.c b/src/tcache.c
index 2594a02..8f89c55 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -17,8 +17,8 @@ cache_bin_info_t	*tcache_bin_info;
 /*
  * For the total bin stack region (per tcache), reserve 2 more slots so that 1)
  * the empty position can be safely read on the fast path before checking
- * "is_empty"; and 2) the low_water == -1 case can go beyond the empty position
- * by 1 step safely (i.e. no overflow).
+ * "is_empty"; and 2) the cur_ptr can go beyond the empty position by 1 step
+ * safely on the fast path (i.e. no overflow).
  */
 static const unsigned total_stack_padding = sizeof(void *) * 2;
 
@@ -49,12 +49,14 @@ tcache_salloc(tsdn_t *tsdn, const void *ptr) {
 void
 tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 	szind_t binind = tcache->next_gc_bin;
-
 	cache_bin_t *tbin;
+	bool is_small;
 	if (binind < SC_NBINS) {
 		tbin = tcache_small_bin_get(tcache, binind);
+		is_small = true;
 	} else {
 		tbin = tcache_large_bin_get(tcache, binind);
+		is_small = false;
 	}
 
 	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin, binind);
@@ -63,7 +65,8 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 		/*
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
 		 */
-		if (binind < SC_NBINS) {
+		if (is_small) {
+			assert(!tcache->bin_refilled[binind]);
 			tcache_bin_flush_small(tsd, tcache, tbin, binind,
 			    ncached - low_water + (low_water >> 2));
 			/*
@@ -78,15 +81,16 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			tcache_bin_flush_large(tsd, tcache, tbin, binind,
 			     ncached - low_water + (low_water >> 2));
 		}
-	} else if (low_water < 0) {
-		assert(low_water == -1);
+	} else if (is_small && tcache->bin_refilled[binind]) {
+		assert(low_water == 0);
 		/*
 		 * Increase fill count by 2X for small bins.  Make sure
 		 * lg_fill_div stays greater than 0.
 		 */
-		if (binind < SC_NBINS && tcache->lg_fill_div[binind] > 1) {
+		if (tcache->lg_fill_div[binind] > 1) {
 			tcache->lg_fill_div[binind]--;
 		}
+		tcache->bin_refilled[binind] = false;
 	}
 	tbin->low_water_position = tbin->cur_ptr.lowbits;
 
@@ -472,6 +476,7 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 	uintptr_t stack_cur = (uintptr_t)avail_stack;
 	for (; i < SC_NBINS; i++) {
 		tcache->lg_fill_div[i] = 1;
+		tcache->bin_refilled[i] = false;
 		cache_bin_t *bin = tcache_small_bin_get(tcache, i);
 		tcache_bin_init(bin, i, &stack_cur);
 	}
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index d890041..f469b8d 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -23,7 +23,7 @@ TEST_BEGIN(test_cache_bin) {
 	bool success;
 	void *ret = cache_bin_alloc_easy(bin, &success, 0);
 	assert_false(success, "Empty cache bin should not alloc");
-	assert_true(cache_bin_low_water_get(bin, 0) == - 1,
+	assert_true(cache_bin_low_water_get(bin, 0) == 0,
 	    "Incorrect low water mark");
 
 	cache_bin_ncached_set(bin, 0, 0);
-- 
cgit v0.12


From 9e031c1d1128af879589f5e5c37960edd87238c6 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 21 Aug 2019 16:38:44 -0700
Subject: Bug fix for prof_active switch

The bug is subtle but critical: if application performs the following
three actions in sequence: (a) turn `prof_active` off, (b) make at
least one allocation that triggers the malloc slow path via the
`if (unlikely(bytes_until_sample < 0))` path, and (c) turn
`prof_active` back on, then the application would never get another
sample (until a very very long time later).

The fix is to properly reset `bytes_until_sample` rather than
throwing it all the way to `SSIZE_MAX`.

A side minor change is to call `prof_active_get_unlocked()` rather
than directly grabbing the `prof_active` variable - it is the very
reason why we defined the `prof_active_get_unlocked()` function.
---
 src/jemalloc.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 75a4027..dd20688 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2356,13 +2356,15 @@ je_malloc(size_t size) {
 			/*
 			 * Avoid a prof_active check on the fastpath.
 			 * If prof_active is false, set bytes_until_sample to
-			 * a large value.  If prof_active is set to true,
+			 * sampling interval.  If prof_active is set to true,
 			 * bytes_until_sample will be reset.
 			 */
-			if (!prof_active) {
-				tsd_bytes_until_sample_set(tsd, SSIZE_MAX);
+			if (!prof_active_get_unlocked()) {
+				tsd_bytes_until_sample_set(tsd,
+				    ((uint64_t)1U << lg_prof_sample));
+			} else {
+				return malloc_default(size);
 			}
-			return malloc_default(size);
 		}
 	}
 
-- 
cgit v0.12


From 57b81c078e24cf05025f51dddc7c1b9353999390 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 12 Aug 2019 11:03:36 -0700
Subject: Pull thread_(de)allocated out of config_stats

---
 include/jemalloc/internal/tsd.h |  4 ++--
 src/ctl.c                       | 31 ++++---------------------------
 2 files changed, 6 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 18b2476..e2cc774 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -18,8 +18,8 @@
  * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof ---
  * s: state
  * e: tcache_enabled
- * m: thread_allocated (config_stats)
- * f: thread_deallocated (config_stats)
+ * m: thread_allocated
+ * f: thread_deallocated
  * b: bytes_until_sample (config_prof)
  * p: prof_tdata (config_prof)
  * c: rtree_ctx (rtree cache accessed on deallocation)
diff --git a/src/ctl.c b/src/ctl.c
index a89a709..4bc09a3 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1577,25 +1577,6 @@ label_return:								\
 	return ret;							\
 }
 
-#define CTL_TSD_RO_NL_CGEN(c, n, m, t)					\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen) {			\
-	int ret;							\
-	t oldval;							\
-									\
-	if (!(c)) {							\
-		return ENOENT;						\
-	}								\
-	READONLY();							\
-	oldval = (m(tsd));						\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	return ret;							\
-}
-
 #define CTL_RO_CONFIG_GEN(n, t)						\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
@@ -1859,14 +1840,10 @@ label_return:
 	return ret;
 }
 
-CTL_TSD_RO_NL_CGEN(config_stats, thread_allocated, tsd_thread_allocated_get,
-    uint64_t)
-CTL_TSD_RO_NL_CGEN(config_stats, thread_allocatedp, tsd_thread_allocatedp_get,
-    uint64_t *)
-CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocated, tsd_thread_deallocated_get,
-    uint64_t)
-CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp,
-    tsd_thread_deallocatedp_get, uint64_t *)
+CTL_RO_NL_GEN(thread_allocated, tsd_thread_allocated_get(tsd), uint64_t)
+CTL_RO_NL_GEN(thread_allocatedp, tsd_thread_allocatedp_get(tsd), uint64_t *)
+CTL_RO_NL_GEN(thread_deallocated, tsd_thread_deallocated_get(tsd), uint64_t)
+CTL_RO_NL_GEN(thread_deallocatedp, tsd_thread_deallocatedp_get(tsd), uint64_t *)
 
 static int
 thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib,
-- 
cgit v0.12


From 49e6fbce78ee2541e41f9d587ae5f31110433ce7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 22 Aug 2019 15:56:47 -0700
Subject: Always adjust thread_(de)allocated

---
 src/jemalloc.c | 119 +++++++++++++++++++++------------------------------------
 1 file changed, 43 insertions(+), 76 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index dd20688..3961984 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2031,16 +2031,14 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	/* Filled in by compute_size_with_overflow below. */
 	size_t size = 0;
 	/*
-	 * For unaligned allocations, we need only ind.  For aligned
-	 * allocations, or in case of stats or profiling we need usize.
-	 *
-	 * These are actually dead stores, in that their values are reset before
-	 * any branch on their value is taken.  Sometimes though, it's
-	 * convenient to pass them as arguments before this point.  To avoid
-	 * undefined behavior then, we initialize them with dummy stores.
+	 * The zero initialization for ind is actually dead store, in that its
+	 * value is reset before any branch on its value is taken.  Sometimes
+	 * though, it's convenient to pass it as arguments before this point.
+	 * To avoid undefined behavior then, we initialize it with dummy stores.
 	 */
 	szind_t ind = 0;
-	size_t usize = 0;
+	/* usize will always be properly initialized. */
+	size_t usize;
 
 	/* Reentrancy is only checked on slow path. */
 	int8_t reentrancy_level;
@@ -2063,12 +2061,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		if (unlikely(ind >= SC_NSIZES)) {
 			goto label_oom;
 		}
-		if (config_stats || (config_prof && opt_prof) || sopts->usize) {
-			usize = sz_index2size(ind);
-			dopts->usize = usize;
-			assert(usize > 0 && usize
-			    <= SC_LARGE_MAXCLASS);
-		}
+		usize = sz_index2size(ind);
+		assert(usize > 0 && usize <= SC_LARGE_MAXCLASS);
+		dopts->usize = usize;
 	} else {
 		if (sopts->bump_empty_aligned_alloc) {
 			if (unlikely(size == 0)) {
@@ -2077,8 +2072,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 		usize = sz_sa2u(size, dopts->alignment);
 		dopts->usize = usize;
-		if (unlikely(usize == 0
-		    || usize > SC_LARGE_MAXCLASS)) {
+		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
 	}
@@ -2107,26 +2101,23 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		dopts->arena_ind = 0;
 	}
 
+	/*
+	 * If dopts->alignment > 0, then ind is still 0, but usize was computed
+	 * in the previous if statement.  Down the positive alignment path,
+	 * imalloc_no_sample and imalloc_sample will ignore ind.
+	 */
+
 	/* If profiling is on, get our profiling context. */
 	if (config_prof && opt_prof) {
-		/*
-		 * Note that if we're going down this path, usize must have been
-		 * initialized in the previous if statement.
-		 */
 		prof_tctx_t *tctx = prof_alloc_prep(
 		    tsd, usize, prof_active_get_unlocked(), true);
 
 		alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
-			alloc_ctx.slab = (usize
-			    <= SC_SMALL_MAXCLASS);
+			alloc_ctx.slab = (usize <= SC_SMALL_MAXCLASS);
 			allocation = imalloc_no_sample(
 			    sopts, dopts, tsd, usize, usize, ind);
 		} else if ((uintptr_t)tctx > (uintptr_t)1U) {
-			/*
-			 * Note that ind might still be 0 here.  This is fine;
-			 * imalloc_sample ignores ind if dopts->alignment > 0.
-			 */
 			allocation = imalloc_sample(
 			    sopts, dopts, tsd, usize, ind);
 			alloc_ctx.slab = false;
@@ -2140,12 +2131,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 		prof_malloc(tsd_tsdn(tsd), allocation, usize, &alloc_ctx, tctx);
 	} else {
-		/*
-		 * If dopts->alignment > 0, then ind is still 0, but usize was
-		 * computed in the previous if statement.  Down the positive
-		 * alignment path, imalloc_no_sample ignores ind and size
-		 * (relying only on usize).
-		 */
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
@@ -2160,10 +2145,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	assert(dopts->alignment == 0
 	    || ((uintptr_t)allocation & (dopts->alignment - 1)) == ZU(0));
 
-	if (config_stats) {
-		assert(usize == isalloc(tsd_tsdn(tsd), allocation));
-		*tsd_thread_allocatedp_get(tsd) += usize;
-	}
+	assert(usize == isalloc(tsd_tsdn(tsd), allocation));
+	*tsd_thread_allocatedp_get(tsd) += usize;
 
 	if (sopts->slow) {
 		UTRACE(0, size, allocation);
@@ -2339,11 +2322,12 @@ je_malloc(size_t size) {
 	}
 
 	szind_t ind = sz_size2index_lookup(size);
-	size_t usize;
-	if (config_stats || config_prof) {
-		usize = sz_index2size(ind);
-	}
-	/* Fast path relies on size being a bin. I.e. SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS */
+	/* usize is always needed to increment thread_allocated. */
+	size_t usize = sz_index2size(ind);
+	/*
+	 * Fast path relies on size being a bin.
+	 * I.e. SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS
+	 */
 	assert(ind < SC_NBINS);
 	assert(size <= SC_SMALL_MAXCLASS);
 
@@ -2373,8 +2357,8 @@ je_malloc(size_t size) {
 	void *ret = cache_bin_alloc_easy(bin, &tcache_success, ind);
 
 	if (tcache_success) {
+		*tsd_thread_allocatedp_get(tsd) += usize;
 		if (config_stats) {
-			*tsd_thread_allocatedp_get(tsd) += usize;
 			bin->tstats.nrequests++;
 		}
 		if (config_prof) {
@@ -2573,16 +2557,11 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
-	size_t usize;
+	size_t usize = sz_index2size(alloc_ctx.szind);
 	if (config_prof && opt_prof) {
-		usize = sz_index2size(alloc_ctx.szind);
 		prof_free(tsd, ptr, usize, &alloc_ctx);
-	} else if (config_stats) {
-		usize = sz_index2size(alloc_ctx.szind);
-	}
-	if (config_stats) {
-		*tsd_thread_deallocatedp_get(tsd) += usize;
 	}
+	*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	if (likely(!slow_path)) {
 		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
@@ -2638,9 +2617,8 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	if (config_prof && opt_prof) {
 		prof_free(tsd, ptr, usize, ctx);
 	}
-	if (config_stats) {
-		*tsd_thread_deallocatedp_get(tsd) += usize;
-	}
+
+	*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	if (likely(!slow_path)) {
 		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, false);
@@ -2701,19 +2679,15 @@ je_realloc(void *ptr, size_t arg_size) {
 		assert(alloc_ctx.szind != SC_NSIZES);
 		old_usize = sz_index2size(alloc_ctx.szind);
 		assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
+		usize = sz_s2u(size);
 		if (config_prof && opt_prof) {
-			usize = sz_s2u(size);
-			if (unlikely(usize == 0
-			    || usize > SC_LARGE_MAXCLASS)) {
+			if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 				ret = NULL;
 			} else {
 				ret = irealloc_prof(tsd, ptr, old_usize, usize,
 				    &alloc_ctx, &hook_args);
 			}
 		} else {
-			if (config_stats) {
-				usize = sz_s2u(size);
-			}
 			ret = iralloc(tsd, ptr, old_usize, size, 0, false,
 			    &hook_args);
 		}
@@ -2753,7 +2727,7 @@ je_realloc(void *ptr, size_t arg_size) {
 		}
 		set_errno(ENOMEM);
 	}
-	if (config_stats && likely(ret != NULL)) {
+	if (likely(ret != NULL)) {
 		tsd_t *tsd;
 
 		assert(usize == isalloc(tsdn, ret));
@@ -2852,10 +2826,8 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		return false;
 	}
 
-	if (config_stats) {
-		size_t usize = sz_index2size(alloc_ctx.szind);
-		*tsd_thread_deallocatedp_get(tsd) += usize;
-	}
+	size_t usize = sz_index2size(alloc_ctx.szind);
+	*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	return true;
 }
@@ -3267,8 +3239,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ?
 		    sz_s2u(size) : sz_sa2u(size, alignment);
-		if (unlikely(usize == 0
-		    || usize > SC_LARGE_MAXCLASS)) {
+		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
@@ -3282,16 +3253,13 @@ je_rallocx(void *ptr, size_t size, int flags) {
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
-		if (config_stats) {
-			usize = isalloc(tsd_tsdn(tsd), p);
-		}
+		usize = isalloc(tsd_tsdn(tsd), p);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 
-	if (config_stats) {
-		*tsd_thread_allocatedp_get(tsd) += usize;
-		*tsd_thread_deallocatedp_get(tsd) += old_usize;
-	}
+	*tsd_thread_allocatedp_get(tsd) += usize;
+	*tsd_thread_deallocatedp_get(tsd) += old_usize;
+
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
@@ -3439,10 +3407,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 		goto label_not_resized;
 	}
 
-	if (config_stats) {
-		*tsd_thread_allocatedp_get(tsd) += usize;
-		*tsd_thread_deallocatedp_get(tsd) += old_usize;
-	}
+	*tsd_thread_allocatedp_get(tsd) += usize;
+	*tsd_thread_deallocatedp_get(tsd) += old_usize;
+
 label_not_resized:
 	if (unlikely(!tsd_fast(tsd))) {
 		uintptr_t args[4] = {(uintptr_t)ptr, size, extra, flags};
-- 
cgit v0.12


From adce29c88597c97f46fd02e28ce2689872ac1b0a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 26 Aug 2019 14:41:32 -0700
Subject: Optimize for prof_active off

Move the handling of `prof_active` off case completely to slow path,
so as to reduce register pressure on malloc fast path.
---
 src/jemalloc.c | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3961984..753fcbe 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2109,6 +2109,20 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 	/* If profiling is on, get our profiling context. */
 	if (config_prof && opt_prof) {
+		/*
+		 * The fast path modifies bytes_until_sample regardless of
+		 * prof_active.  We reset it to be the sample interval, so that
+		 * there won't be excessive routings to the slow path, and that
+		 * when prof_active is turned on later, the counting for
+		 * sampling can immediately resume as normal (though the very
+		 * first sampling interval is not randomized).
+		 */
+		if (unlikely(tsd_bytes_until_sample_get(tsd) < 0) &&
+		    !prof_active_get_unlocked()) {
+			tsd_bytes_until_sample_set(tsd,
+			    (ssize_t)(1 << lg_prof_sample));
+		}
+
 		prof_tctx_t *tctx = prof_alloc_prep(
 		    tsd, usize, prof_active_get_unlocked(), true);
 
@@ -2131,6 +2145,16 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 		prof_malloc(tsd_tsdn(tsd), allocation, usize, &alloc_ctx, tctx);
 	} else {
+		assert(!opt_prof);
+		/*
+		 * The fast path modifies bytes_until_sample regardless of
+		 * opt_prof.  We reset it to a huge value here, so as to
+		 * minimize the triggering for slow path.
+		 */
+		if (config_prof &&
+		    unlikely(tsd_bytes_until_sample_get(tsd) < 0)) {
+			tsd_bytes_until_sample_set(tsd, SSIZE_MAX);
+		}
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
@@ -2339,16 +2363,10 @@ je_malloc(size_t size) {
 		if (unlikely(bytes_until_sample < 0)) {
 			/*
 			 * Avoid a prof_active check on the fastpath.
-			 * If prof_active is false, set bytes_until_sample to
-			 * sampling interval.  If prof_active is set to true,
-			 * bytes_until_sample will be reset.
+			 * If prof_active is false, bytes_until_sample will be
+			 * reset in slow path.
 			 */
-			if (!prof_active_get_unlocked()) {
-				tsd_bytes_until_sample_set(tsd,
-				    ((uint64_t)1U << lg_prof_sample));
-			} else {
-				return malloc_default(size);
-			}
+			return malloc_default(size);
 		}
 	}
 
-- 
cgit v0.12


From 719583f14acc3dc0d24287e18a80b280e46aebb3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 26 Aug 2019 13:18:50 -0700
Subject: Fix large.nflushes in the merged stats.

---
 src/ctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/ctl.c b/src/ctl.c
index 4bc09a3..d6f803c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -915,6 +915,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		    &astats->astats.ndalloc_large);
 		ctl_accum_arena_stats_u64(&sdstats->astats.nrequests_large,
 		    &astats->astats.nrequests_large);
+		ctl_accum_arena_stats_u64(&sdstats->astats.nflushes_large,
+		    &astats->astats.nflushes_large);
 		accum_atomic_zu(&sdstats->astats.abandoned_vm,
 		    &astats->astats.abandoned_vm);
 
-- 
cgit v0.12


From 2abb02ecd74e7e65d3992a542ffb43abe91a8a7f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 26 Aug 2019 14:15:54 -0700
Subject: Fix MSVC 2015 build, as proposed by @christianaguilera-foundry.

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 387f14b..ed0e7b9 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -65,6 +65,7 @@
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 030d826..bc40883 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -88,6 +88,9 @@
     <ClCompile Include="..\..\..\..\src\tcache.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\test_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-- 
cgit v0.12


From 23dc7a7fba904d3893c0f335dfc2d16439b7109c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 30 Aug 2019 11:54:35 -0700
Subject: Fix index type for cache_bin_alloc_easy.

---
 include/jemalloc/internal/cache_bin.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 7ec1ccb..0ce3cab 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -187,7 +187,7 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_sz_t ind) {
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
 	/*
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
-- 
cgit v0.12


From 785b84e60382515f1bf1a63457da7a7ab5d0a96b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 30 Aug 2019 11:52:15 -0700
Subject: Make cache_bin_sz_t unsigned.

The bin size type was made signed only because the low_water could go -1, which
was already removed.
---
 include/jemalloc/internal/cache_bin.h | 16 ++++++----------
 src/tcache.c                          |  2 ++
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 0ce3cab..5396c2d 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -13,12 +13,8 @@
  * of the tcache at all.
  */
 
-/*
- * The count of the number of cached allocations in a bin.  We make this signed
- * so that negative numbers can encode "invalid" states (e.g. a low water mark
- * of -1 for a cache that has been depleted).
- */
-typedef int32_t cache_bin_sz_t;
+/* The size in bytes of each cache bin stack. */
+typedef uint16_t cache_bin_sz_t;
 
 typedef struct cache_bin_stats_s cache_bin_stats_t;
 struct cache_bin_stats_s {
@@ -126,7 +122,7 @@ static inline cache_bin_sz_t
 cache_bin_ncached_get(cache_bin_t *bin, szind_t ind) {
 	cache_bin_sz_t n = (tcache_bin_info[ind].stack_size +
 	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *);
-	assert(n >= 0 && n <= cache_bin_ncached_max_get(ind));
+	assert(n <= cache_bin_ncached_max_get(ind));
 	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
 
 	return n;
@@ -157,13 +153,13 @@ cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind) {
 	return bottom;
 }
 
-/* Returns the numeric value of low water in [-1, ncached]. */
+/* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind);
 	cache_bin_sz_t low_water = ncached_max -
 	    (bin->low_water_position - bin->full_position) / sizeof(void *);
-	assert(low_water >= 0 && low_water <= ncached_max);
+	assert(low_water <= ncached_max);
 	assert(low_water <= cache_bin_ncached_get(bin, ind));
 	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
 
@@ -174,7 +170,7 @@ static inline void
 cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n) {
 	bin->cur_ptr.lowbits = bin->full_position +
 	    tcache_bin_info[ind].stack_size - n * sizeof(void *);
-	assert(n >= 0 && n <= cache_bin_ncached_max_get(ind));
+	assert(n <= cache_bin_ncached_max_get(ind));
 	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 8f89c55..5dc2b0a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -825,6 +825,8 @@ tcache_boot(tsdn_t *tsdn) {
 			ncached_max = TCACHE_NSLOTS_SMALL_MAX;
 		}
 		unsigned stack_size = ncached_max * sizeof(void *);
+		assert(stack_size < ((uint64_t)1 <<
+		    (sizeof(cache_bin_sz_t) * 8)));
 		tcache_bin_info[i].stack_size = stack_size;
 		total_stack_bytes += stack_size;
 	}
-- 
cgit v0.12


From 671f120e2669f9574449d4ddad06e561ac8553c3 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 3 Sep 2019 17:11:06 -0700
Subject: Fix prof_backtrace() reentrancy level

---
 src/prof.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/prof.c b/src/prof.c
index 79a0ffc..f7311c3 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -208,8 +208,8 @@ bt_init(prof_bt_t *bt, void **vec) {
 }
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
-void
-prof_backtrace(prof_bt_t *bt) {
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
 	int nframes;
 
 	cassert(config_prof);
@@ -250,8 +250,8 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
 	return _URC_NO_REASON;
 }
 
-void
-prof_backtrace(prof_bt_t *bt) {
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
 	prof_unwind_data_t data = {bt, PROF_BT_MAX};
 
 	cassert(config_prof);
@@ -259,8 +259,8 @@ prof_backtrace(prof_bt_t *bt) {
 	_Unwind_Backtrace(prof_unwind_callback, &data);
 }
 #elif (defined(JEMALLOC_PROF_GCC))
-void
-prof_backtrace(prof_bt_t *bt) {
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
 #define BT_FRAME(i)							\
 	if ((i) < PROF_BT_MAX) {					\
 		void *p;						\
@@ -422,13 +422,22 @@ prof_backtrace(prof_bt_t *bt) {
 #undef BT_FRAME
 }
 #else
-void
-prof_backtrace(prof_bt_t *bt) {
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
 	cassert(config_prof);
 	not_reached();
 }
 #endif
 
+void
+prof_backtrace(prof_bt_t *bt) {
+	cassert(config_prof);
+	tsd_t *tsd = tsd_fetch();
+	pre_reentrancy(tsd, NULL);
+	prof_backtrace_impl(bt);
+	post_reentrancy(tsd);
+}
+
 malloc_mutex_t *
 prof_gctx_mutex_choose(void) {
 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
-- 
cgit v0.12


From 93d61518005d868c08b597a2d39bdd1775b2a211 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 4 Sep 2019 09:24:34 -0700
Subject: Pass tsd down to prof_backtrace()

---
 include/jemalloc/internal/prof_externs.h   | 2 +-
 include/jemalloc/internal/prof_inlines_b.h | 2 +-
 src/prof.c                                 | 3 +--
 src/prof_log.c                             | 2 +-
 4 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 8fc45cf..c0471f5 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -56,7 +56,7 @@ void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
 void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void bt_init(prof_bt_t *bt, void **vec);
-void prof_backtrace(prof_bt_t *bt);
+void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 #ifdef JEMALLOC_JET
 size_t prof_tdata_count(void);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 8ba8a1e..860dfbe 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -155,7 +155,7 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 		ret = (prof_tctx_t *)(uintptr_t)1U;
 	} else {
 		bt_init(&bt, tdata->vec);
-		prof_backtrace(&bt);
+		prof_backtrace(tsd, &bt);
 		ret = prof_lookup(tsd, &bt);
 	}
 
diff --git a/src/prof.c b/src/prof.c
index f7311c3..6a0a9de 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -430,9 +430,8 @@ prof_backtrace_impl(prof_bt_t *bt) {
 #endif
 
 void
-prof_backtrace(prof_bt_t *bt) {
+prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 	cassert(config_prof);
-	tsd_t *tsd = tsd_fetch();
 	pre_reentrancy(tsd, NULL);
 	prof_backtrace_impl(bt);
 	post_reentrancy(tsd);
diff --git a/src/prof_log.c b/src/prof_log.c
index 3997656..8274cfc 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -246,7 +246,7 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 	prof_bt_t bt;
 	/* Initialize the backtrace, using the buffer in tdata to store it. */
 	bt_init(&bt, cons_tdata->vec);
-	prof_backtrace(&bt);
+	prof_backtrace(tsd, &bt);
 	prof_bt_t *cons_bt = &bt;
 
 	/* We haven't destroyed tctx yet, so gctx should be good to read. */
-- 
cgit v0.12


From 22bc75ee3e98fb45058fbee45210ed3ab65da6f4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 27 Aug 2019 13:44:41 -0700
Subject: Workaround the stringop-overflow check false positives.

---
 test/include/test/test.h |  2 ++
 test/src/test.c          | 13 +++++++++++++
 test/unit/log.c          | 17 +++++++++++------
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/test/include/test/test.h b/test/include/test/test.h
index fd0e526..07f58a4 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -336,3 +336,5 @@ test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
 void	p_test_fail(const char *prefix, const char *message);
+
+void strncpy_cond(void *dst, const char *src, bool cond);
diff --git a/test/src/test.c b/test/src/test.c
index f97ce4d..4583e55 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -232,3 +232,16 @@ p_test_fail(const char *prefix, const char *message) {
 	malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message);
 	test_status = test_status_fail;
 }
+
+void
+strncpy_cond(void *dst, const char *src, bool cond) {
+	if (cond) {
+		/*
+		 * Avoid strcpy and explicitly set length to 0 because the
+		 * `stringop-overflow` check may warn even if the specific test
+		 * is unreachable.
+		 */
+		size_t n = cond ? strlen(src) + 1 : 0;
+		strncpy(dst, src, n);
+	}
+}
diff --git a/test/unit/log.c b/test/unit/log.c
index a52bd73..10f45bc 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -3,12 +3,17 @@
 #include "jemalloc/internal/log.h"
 
 static void
+update_log_var_names(const char *names) {
+	strncpy_cond(log_var_names, names, config_log);
+}
+
+static void
 expect_no_logging(const char *names) {
 	log_var_t log_l1 = LOG_VAR_INIT("l1");
 	log_var_t log_l2 = LOG_VAR_INIT("l2");
 	log_var_t log_l2_a = LOG_VAR_INIT("l2.a");
 
-	strcpy(log_var_names, names);
+	update_log_var_names(names);
 
 	int count = 0;
 
@@ -50,7 +55,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 	int count;
 
 	count = 0;
-	strcpy(log_var_names, "l1");
+	update_log_var_names("l1");
 	for (int i = 0; i < 10; i++) {
 		log_do_begin(log_l1)
 			count++;
@@ -59,7 +64,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 	assert_d_eq(count, 10, "Mis-logged!");
 
 	count = 0;
-	strcpy(log_var_names, "l1.a");
+	update_log_var_names("l1.a");
 	for (int i = 0; i < 10; i++) {
 		log_do_begin(log_l1_a)
 			count++;
@@ -68,7 +73,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 	assert_d_eq(count, 10, "Mis-logged!");
 
 	count = 0;
-	strcpy(log_var_names, "l1.a|abc|l2|def");
+	update_log_var_names("l1.a|abc|l2|def");
 	for (int i = 0; i < 10; i++) {
 		log_do_begin(log_l1_a)
 			count++;
@@ -85,7 +90,7 @@ TEST_END
 TEST_BEGIN(test_log_enabled_indirect) {
 	test_skip_if(!config_log);
 	atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
-	strcpy(log_var_names, "l0|l1|abc|l2.b|def");
+	update_log_var_names("l0|l1|abc|l2.b|def");
 
 	/* On. */
 	log_var_t log_l1 = LOG_VAR_INIT("l1");
@@ -135,7 +140,7 @@ TEST_END
 TEST_BEGIN(test_log_enabled_global) {
 	test_skip_if(!config_log);
 	atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
-	strcpy(log_var_names, "abc|.|def");
+	update_log_var_names("abc|.|def");
 
 	log_var_t log_l1 = LOG_VAR_INIT("l1");
 	log_var_t log_l2_a_a = LOG_VAR_INIT("l2.a.a");
-- 
cgit v0.12


From e06658cb24e9f880570c5a44a5ad6b11b620efc5 Mon Sep 17 00:00:00 2001
From: Giridhar Prasath R <cristianoprasath@gmail.com>
Date: Thu, 12 Sep 2019 07:35:32 +0530
Subject: check GNU make exists in path

Signed-off-by: Giridhar Prasath R <cristianoprasath@gmail.com>
---
 scripts/gen_run_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index a414f81..6875a49 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -14,7 +14,7 @@ nparallel = cpu_count() * 2
 
 uname = uname()[0]
 
-if "BSD" in uname:
+if call("command -v gmake", shell=True) == 0:
     make_cmd = 'gmake'
 else:
     make_cmd = 'make'
-- 
cgit v0.12


From 242af439b81044b2604a515ad5d3a8c2d6fbbdfd Mon Sep 17 00:00:00 2001
From: zhxchen17 <zhxchen17@outlook.com>
Date: Mon, 9 Sep 2019 20:04:18 -0700
Subject: Rename "prof_dump_seq_mtx" to "prof_dump_filename_mtx".

---
 include/jemalloc/internal/witness.h |  2 +-
 src/prof.c                          | 28 ++++++++++++++--------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index fff9e98..d76b790 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -57,7 +57,7 @@
 #define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_ACCUM		WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_DUMP_FILENAME	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
diff --git a/src/prof.c b/src/prof.c
index 6a0a9de..c7c91ef 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -99,7 +99,7 @@ malloc_mutex_t	tdatas_mtx;
 static uint64_t		next_thr_uid;
 static malloc_mutex_t	next_thr_uid_mtx;
 
-static malloc_mutex_t	prof_dump_seq_mtx;
+static malloc_mutex_t	prof_dump_filename_mtx;
 static uint64_t		prof_dump_seq;
 static uint64_t		prof_dump_iseq;
 static uint64_t		prof_dump_mseq;
@@ -549,9 +549,9 @@ prof_fdump(void) {
 	tsd = tsd_fetch();
 	assert(tsd_reentrancy_level_get(tsd) == 0);
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 	prof_dump(tsd, false, filename, opt_prof_leak);
 }
 
@@ -597,10 +597,10 @@ prof_idump(tsdn_t *tsdn) {
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[PATH_MAX + 1];
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		prof_dump_filename(filename, 'i', prof_dump_iseq);
 		prof_dump_iseq++;
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
@@ -619,10 +619,10 @@ prof_mdump(tsd_t *tsd, const char *filename) {
 		if (opt_prof_prefix[0] == '\0') {
 			return true;
 		}
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
 		prof_dump_mseq++;
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		filename = filename_buf;
 	}
 	return prof_dump(tsd, true, filename, false);
@@ -654,10 +654,10 @@ prof_gdump(tsdn_t *tsdn) {
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[DUMP_FILENAME_BUFSIZE];
-		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
 		prof_dump_filename(filename, 'u', prof_dump_useq);
 		prof_dump_useq++;
-		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
@@ -946,8 +946,8 @@ prof_boot2(tsd_t *tsd) {
 			return true;
 		}
 
-		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
-		    WITNESS_RANK_PROF_DUMP_SEQ, malloc_mutex_rank_exclusive)) {
+		if (malloc_mutex_init(&prof_dump_filename_mtx, "prof_dump_filename",
+		    WITNESS_RANK_PROF_DUMP_FILENAME, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
@@ -1028,7 +1028,7 @@ void
 prof_prefork1(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
 		malloc_mutex_prefork(tsdn, &prof_active_mtx);
-		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_prefork(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
 		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
@@ -1044,7 +1044,7 @@ prof_postfork_parent(tsdn_t *tsdn) {
 		    &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
-		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
@@ -1066,7 +1066,7 @@ prof_postfork_child(tsdn_t *tsdn) {
 		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
-		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
-- 
cgit v0.12


From 4b76c684bb8d7f0b7960bfac84391e9fd51a234e Mon Sep 17 00:00:00 2001
From: zhxchen17 <zhxchen17@outlook.com>
Date: Mon, 9 Sep 2019 20:18:41 -0700
Subject: Add "prof.dump_prefix" to override filename prefixes for dumps.

---
 doc/jemalloc.xml.in                      |  42 +++++++++---
 include/jemalloc/internal/ctl.h          |   1 +
 include/jemalloc/internal/prof_externs.h |   2 +
 include/jemalloc/internal/prof_types.h   |   7 ++
 src/ctl.c                                |  27 ++++++++
 src/prof.c                               | 106 +++++++++++++++++++++++--------
 src/prof_log.c                           |   6 +-
 test/unit/prof_idump.c                   |  13 ++++
 8 files changed, 166 insertions(+), 38 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 5636fb9..e83bfbf 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1344,7 +1344,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         set to the empty string, no automatic dumps will occur; this is
         primarily useful for disabling the automatic final heap dump (which
         also disables leak reporting, if enabled).  The default prefix is
-        <filename>jeprof</filename>.</para></listitem>
+        <filename>jeprof</filename>.  This prefix value can be overriden by
+        <link
+        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>.
+        </para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.prof_active">
@@ -1423,8 +1426,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.i&lt;iseq&gt;.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the
         <link
-        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.  By default, interval-triggered profile dumping is disabled
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+        <link
+        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        options.  By default, interval-triggered profile dumping is disabled
         (encoded as -1).
         </para></listitem>
       </varlistentry>
@@ -1456,8 +1461,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         usage to a file named according to the pattern
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the <link
-        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.  Note that <function>atexit()</function> may allocate
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+        <link
+        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        options.  Note that <function>atexit()</function> may allocate
         memory during application initialization and then deadlock internally
         when jemalloc in turn calls <function>atexit()</function>, so
         this option is not universally usable (though the application can
@@ -2224,8 +2231,25 @@ struct extent_hooks_s {
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.m&lt;mseq&gt;.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the
         <link
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+        <link
+        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        options.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="prof.dump_prefix">
+        <term>
+          <mallctl>prof.dump_prefix</mallctl>
+          (<type>const char *</type>)
+          <literal>-w</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Set the filename prefix for profile dumps. See
+        <link
         linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.</para></listitem>
+        for the default setting.  This can be useful to differentiate profile
+        dumps such as from forked processes.
+        </para></listitem>
       </varlistentry>
 
       <varlistentry id="prof.gdump">
@@ -2240,8 +2264,10 @@ struct extent_hooks_s {
         dumped to files named according to the pattern
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.u&lt;useq&gt;.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the <link
-        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.</para></listitem>
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+        <link
+        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        options.</para></listitem>
       </varlistentry>
 
       <varlistentry id="prof.reset">
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 1d1aacc..8ddf7f8 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -103,6 +103,7 @@ bool ctl_boot(void);
 void ctl_prefork(tsdn_t *tsdn);
 void ctl_postfork_parent(tsdn_t *tsdn);
 void ctl_postfork_child(tsdn_t *tsdn);
+void ctl_mtx_assert_held(tsdn_t *tsdn);
 
 #define xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
 	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index c0471f5..7befad6 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -72,10 +72,12 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes);
 #endif
 int prof_getpid(void);
+void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
+bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
 
 void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 1eff995..a50653b 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -53,4 +53,11 @@ typedef struct prof_tdata_s prof_tdata_t;
 #define PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
 #define PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
 
+/* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+#define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
+#else
+#define PROF_DUMP_FILENAME_LEN 1
+#endif
+
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/src/ctl.c b/src/ctl.c
index d6f803c..0beef6e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -148,6 +148,7 @@ CTL_PROTO(prof_thread_active_init)
 CTL_PROTO(prof_active)
 CTL_PROTO(prof_dump)
 CTL_PROTO(prof_gdump)
+CTL_PROTO(prof_dump_prefix)
 CTL_PROTO(prof_reset)
 CTL_PROTO(prof_interval)
 CTL_PROTO(lg_prof_sample)
@@ -413,6 +414,7 @@ static const ctl_named_node_t	prof_node[] = {
 	{NAME("active"),	CTL(prof_active)},
 	{NAME("dump"),		CTL(prof_dump)},
 	{NAME("gdump"),		CTL(prof_gdump)},
+	{NAME("dump_prefix"),	CTL(prof_dump_prefix)},
 	{NAME("reset"),		CTL(prof_reset)},
 	{NAME("interval"),	CTL(prof_interval)},
 	{NAME("lg_sample"),	CTL(lg_prof_sample)},
@@ -1416,6 +1418,11 @@ ctl_postfork_child(tsdn_t *tsdn) {
 	malloc_mutex_postfork_child(tsdn, &ctl_mtx);
 }
 
+void
+ctl_mtx_assert_held(tsdn_t *tsdn) {
+	malloc_mutex_assert_owner(tsdn, &ctl_mtx);
+}
+
 /******************************************************************************/
 /* *_ctl() functions. */
 
@@ -2721,6 +2728,26 @@ label_return:
 }
 
 static int
+prof_dump_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	const char *prefix = NULL;
+
+	if (!config_prof) {
+		return ENOENT;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+	WRITEONLY();
+	WRITE(prefix, const char *);
+
+	ret = prof_dump_prefix_set(tsd_tsdn(tsd), prefix) ? EFAULT : 0;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	return ret;
+}
+
+static int
 prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
diff --git a/src/prof.c b/src/prof.c
index c7c91ef..9ea4eda 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
 
@@ -41,12 +42,7 @@ bool		opt_prof_gdump = false;
 bool		opt_prof_final = false;
 bool		opt_prof_leak = false;
 bool		opt_prof_accum = false;
-char		opt_prof_prefix[
-    /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
-    PATH_MAX +
-#endif
-    1];
+char		opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 
 /*
  * Initialized as opt_prof_active, and accessed via
@@ -106,6 +102,7 @@ static uint64_t		prof_dump_mseq;
 static uint64_t		prof_dump_useq;
 
 malloc_mutex_t	prof_dump_mtx;
+static char	*prof_dump_prefix = NULL;
 
 /* Do not dump any profiles until bootstrapping is complete. */
 bool			prof_booted = false;
@@ -514,26 +511,53 @@ prof_getpid(void) {
 #endif
 }
 
+static const char *
+prof_dump_prefix_get(tsdn_t* tsdn) {
+	malloc_mutex_assert_owner(tsdn, &prof_dump_filename_mtx);
+
+	return prof_dump_prefix == NULL ? opt_prof_prefix : prof_dump_prefix;
+}
+
+static bool
+prof_dump_prefix_is_empty(tsdn_t *tsdn) {
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	bool ret = (prof_dump_prefix_get(tsdn)[0] == '\0');
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+	return ret;
+}
+
 #define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
 #define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
 static void
-prof_dump_filename(char *filename, char v, uint64_t vseq) {
+prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 	cassert(config_prof);
 
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+	const char *prof_prefix = prof_dump_prefix_get(tsd_tsdn(tsd));
+
 	if (vseq != VSEQ_INVALID) {
 	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
 		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
-		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
+		    prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
 	} else {
 	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
 		    "%s.%d.%"FMTu64".%c.heap",
-		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
+		    prof_prefix, prof_getpid(), prof_dump_seq, v);
 	}
 	prof_dump_seq++;
 }
 
+void
+prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind) {
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
+	    "%s.%d.%"FMTu64".json", prof_dump_prefix_get(tsdn), prof_getpid(),
+	    ind);
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+}
+
 static void
 prof_fdump(void) {
 	tsd_t *tsd;
@@ -541,16 +565,16 @@ prof_fdump(void) {
 
 	cassert(config_prof);
 	assert(opt_prof_final);
-	assert(opt_prof_prefix[0] != '\0');
 
 	if (!prof_booted) {
 		return;
 	}
 	tsd = tsd_fetch();
 	assert(tsd_reentrancy_level_get(tsd) == 0);
+	assert(!prof_dump_prefix_is_empty(tsd_tsdn(tsd)));
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-	prof_dump_filename(filename, 'f', VSEQ_INVALID);
+	prof_dump_filename(tsd, filename, 'f', VSEQ_INVALID);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 	prof_dump(tsd, false, filename, opt_prof_leak);
 }
@@ -571,6 +595,31 @@ prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
 	return false;
 }
 
+bool
+prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
+	cassert(config_prof);
+	ctl_mtx_assert_held(tsdn);
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	if (prof_dump_prefix == NULL) {
+		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+		/* Everything is still guarded by ctl_mtx. */
+		char *buffer = base_alloc(tsdn, b0get(), PROF_DUMP_FILENAME_LEN,
+		    QUANTUM);
+		if (buffer == NULL) {
+			return true;
+		}
+		malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+		prof_dump_prefix = buffer;
+	}
+	assert(prof_dump_prefix != NULL);
+
+	strncpy(prof_dump_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
+	prof_dump_prefix[PROF_DUMP_FILENAME_LEN - 1] = '\0';
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+
+	return false;
+}
+
 void
 prof_idump(tsdn_t *tsdn) {
 	tsd_t *tsd;
@@ -595,14 +644,16 @@ prof_idump(tsdn_t *tsdn) {
 		return;
 	}
 
-	if (opt_prof_prefix[0] != '\0') {
-		char filename[PATH_MAX + 1];
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		prof_dump_filename(filename, 'i', prof_dump_iseq);
-		prof_dump_iseq++;
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+	if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		prof_dump(tsd, false, filename, false);
+		return;
 	}
+	char filename[PATH_MAX + 1];
+	prof_dump_filename(tsd, filename, 'i', prof_dump_iseq);
+	prof_dump_iseq++;
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+	prof_dump(tsd, false, filename, false);
 }
 
 bool
@@ -616,11 +667,12 @@ prof_mdump(tsd_t *tsd, const char *filename) {
 	char filename_buf[DUMP_FILENAME_BUFSIZE];
 	if (filename == NULL) {
 		/* No filename specified, so automatically generate one. */
-		if (opt_prof_prefix[0] == '\0') {
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+		if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
+			malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 			return true;
 		}
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
+		prof_dump_filename(tsd, filename_buf, 'm', prof_dump_mseq);
 		prof_dump_mseq++;
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		filename = filename_buf;
@@ -652,14 +704,16 @@ prof_gdump(tsdn_t *tsdn) {
 		return;
 	}
 
-	if (opt_prof_prefix[0] != '\0') {
-		char filename[DUMP_FILENAME_BUFSIZE];
-		malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-		prof_dump_filename(filename, 'u', prof_dump_useq);
-		prof_dump_useq++;
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	if (prof_dump_prefix_get(tsdn)[0] == '\0') {
 		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-		prof_dump(tsd, false, filename, false);
+		return;
 	}
+	char filename[DUMP_FILENAME_BUFSIZE];
+	prof_dump_filename(tsd, filename, 'u', prof_dump_useq);
+	prof_dump_useq++;
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+	prof_dump(tsd, false, filename, false);
 }
 
 static uint64_t
diff --git a/src/prof_log.c b/src/prof_log.c
index 8274cfc..af91af7 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -405,7 +405,6 @@ prof_log_start(tsdn_t *tsdn, const char *filename) {
 	}
 
 	bool ret = false;
-	size_t buf_size = PATH_MAX + 1;
 
 	malloc_mutex_lock(tsdn, &log_mtx);
 
@@ -413,11 +412,10 @@ prof_log_start(tsdn_t *tsdn, const char *filename) {
 		ret = true;
 	} else if (filename == NULL) {
 		/* Make default name. */
-		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
-		    opt_prof_prefix, prof_getpid(), log_seq);
+		prof_get_default_filename(tsdn, log_filename, log_seq);
 		log_seq++;
 		prof_logging_state = prof_logging_state_started;
-	} else if (strlen(filename) >= buf_size) {
+	} else if (strlen(filename) >= PROF_DUMP_FILENAME_LEN) {
 		ret = true;
 	} else {
 		strcpy(log_filename, filename);
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 1cc6c98..7a9b288 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#define TEST_PREFIX "test_prefix"
+
 static bool did_prof_dump_open;
 
 static int
@@ -8,6 +10,10 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 
 	did_prof_dump_open = true;
 
+	const char filename_prefix[] = TEST_PREFIX ".";
+	assert_d_eq(strncmp(filename_prefix, filename, sizeof(filename_prefix)
+	    - 1), 0, "Dump file name should start with \"" TEST_PREFIX ".\"");
+
 	fd = open("/dev/null", O_WRONLY);
 	assert_d_ne(fd, -1, "Unexpected open() failure");
 
@@ -18,9 +24,16 @@ TEST_BEGIN(test_idump) {
 	bool active;
 	void *p;
 
+	const char *dump_prefix = TEST_PREFIX;
+
 	test_skip_if(!config_prof);
 
 	active = true;
+
+	assert_d_eq(mallctl("prof.dump_prefix", NULL, NULL,
+	    (void *)&dump_prefix, sizeof(dump_prefix)), 0,
+	    "Unexpected mallctl failure while overwriting dump prefix");
+
 	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");
-- 
cgit v0.12


From b7c7df24ba7c3b76b4985084de6e20356b26547e Mon Sep 17 00:00:00 2001
From: zhxchen17 <zhxchen17@fb.com>
Date: Wed, 14 Aug 2019 16:10:09 -0700
Subject: Add max_per_bg_thd stats for per background thread mutexes.

Added a new stats row to aggregate the maximum value of mutex counters for each
background threads.  Given that the per bg thd mutex is not expected to be
contended, this counter is mainly for sanity check / debugging.
---
 .../jemalloc/internal/background_thread_structs.h  |  1 +
 include/jemalloc/internal/mutex.h                  | 53 ++++++++++++++++++----
 include/jemalloc/internal/mutex_prof.h             |  1 +
 src/background_thread.c                            |  8 +++-
 src/ctl.c                                          |  3 ++
 5 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index c02aa43..249115c 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -48,6 +48,7 @@ struct background_thread_stats_s {
 	size_t num_threads;
 	uint64_t num_runs;
 	nstime_t run_interval;
+	mutex_prof_data_t max_counter_per_bg_thd;
 };
 typedef struct background_thread_stats_s background_thread_stats_t;
 
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 7c24f07..f5b1163 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -245,22 +245,25 @@ malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 }
 
-/* Copy the prof data from mutex for processing. */
 static inline void
-malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
-    malloc_mutex_t *mutex) {
-	mutex_prof_data_t *source = &mutex->prof_data;
-	/* Can only read holding the mutex. */
-	malloc_mutex_assert_owner(tsdn, mutex);
-
+malloc_mutex_prof_copy(mutex_prof_data_t *dst, mutex_prof_data_t *source) {
 	/*
 	 * Not *really* allowed (we shouldn't be doing non-atomic loads of
 	 * atomic data), but the mutex protection makes this safe, and writing
 	 * a member-for-member copy is tedious for this situation.
 	 */
-	*data = *source;
+	*dst = *source;
 	/* n_wait_thds is not reported (modified w/o locking). */
-	atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
+	atomic_store_u32(&dst->n_waiting_thds, 0, ATOMIC_RELAXED);
+}
+
+/* Copy the prof data from mutex for processing. */
+static inline void
+malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
+    malloc_mutex_t *mutex) {
+	/* Can only read holding the mutex. */
+	malloc_mutex_assert_owner(tsdn, mutex);
+	malloc_mutex_prof_copy(data, &mutex->prof_data);
 }
 
 static inline void
@@ -285,4 +288,36 @@ malloc_mutex_prof_accum(tsdn_t *tsdn, mutex_prof_data_t *data,
 	data->n_lock_ops += source->n_lock_ops;
 }
 
+/* Compare the prof data and update to the maximum. */
+static inline void
+malloc_mutex_prof_max_update(tsdn_t *tsdn, mutex_prof_data_t *data,
+    malloc_mutex_t *mutex) {
+	mutex_prof_data_t *source = &mutex->prof_data;
+	/* Can only read holding the mutex. */
+	malloc_mutex_assert_owner(tsdn, mutex);
+
+	if (nstime_compare(&source->tot_wait_time, &data->tot_wait_time) > 0) {
+		nstime_copy(&data->tot_wait_time, &source->tot_wait_time);
+	}
+	if (nstime_compare(&source->max_wait_time, &data->max_wait_time) > 0) {
+		nstime_copy(&data->max_wait_time, &source->max_wait_time);
+	}
+	if (source->n_wait_times > data->n_wait_times) {
+		data->n_wait_times = source->n_wait_times;
+	}
+	if (source->n_spin_acquired > data->n_spin_acquired) {
+		data->n_spin_acquired = source->n_spin_acquired;
+	}
+	if (source->max_n_thds > data->max_n_thds) {
+		data->max_n_thds = source->max_n_thds;
+	}
+	if (source->n_owner_switches > data->n_owner_switches) {
+		data->n_owner_switches = source->n_owner_switches;
+	}
+	if (source->n_lock_ops > data->n_lock_ops) {
+		data->n_lock_ops = source->n_lock_ops;
+	}
+	/* n_wait_thds is not reported. */
+}
+
 #endif /* JEMALLOC_INTERNAL_MUTEX_H */
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 6288ede..190402e 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -7,6 +7,7 @@
 
 #define MUTEX_PROF_GLOBAL_MUTEXES					\
     OP(background_thread)						\
+    OP(max_per_bg_thd)							\
     OP(ctl)								\
     OP(prof)								\
     OP(prof_thds_data)							\
diff --git a/src/background_thread.c b/src/background_thread.c
index 57b9b25..bea445f 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -794,9 +794,11 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 		return true;
 	}
 
-	stats->num_threads = n_background_threads;
-	uint64_t num_runs = 0;
 	nstime_init(&stats->run_interval, 0);
+	memset(&stats->max_counter_per_bg_thd, 0, sizeof(mutex_prof_data_t));
+
+	uint64_t num_runs = 0;
+	stats->num_threads = n_background_threads;
 	for (unsigned i = 0; i < max_background_threads; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
 		if (malloc_mutex_trylock(tsdn, &info->mtx)) {
@@ -809,6 +811,8 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 		if (info->state != background_thread_stopped) {
 			num_runs += info->tot_n_runs;
 			nstime_add(&stats->run_interval, &info->tot_sleep_time);
+			malloc_mutex_prof_max_update(tsdn,
+			    &stats->max_counter_per_bg_thd, &info->mtx);
 		}
 		malloc_mutex_unlock(tsdn, &info->mtx);
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 0beef6e..3ec6ca2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1042,6 +1042,9 @@ ctl_background_thread_stats_read(tsdn_t *tsdn) {
 		memset(stats, 0, sizeof(background_thread_stats_t));
 		nstime_init(&stats->run_interval, 0);
 	}
+	malloc_mutex_prof_copy(
+	    &ctl_stats->mutex_prof_data[global_prof_mutex_max_per_bg_thd],
+	    &stats->max_counter_per_bg_thd);
 }
 
 static void
-- 
cgit v0.12


From ac5185f73e4dc6b8d9a48b7080d07b11ef231765 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 23 Aug 2019 16:06:50 -0700
Subject: Fix tcache bin stack alignment.

Set the proper alignment when allocating space for the tcache bin stack.
---
 src/tcache.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 5dc2b0a..e17b67a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -492,8 +492,16 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 
 static size_t
 tcache_bin_stack_alignment (size_t size) {
+	/*
+	 * 1) Align to at least PAGE, to minimize the # of TLBs needed by the
+	 * smaller sizes; also helps if the larger sizes don't get used at all.
+	 * 2) On 32-bit the pointers won't be compressed; use minimal alignment.
+	 */
+	if (LG_SIZEOF_PTR < 3 || size < PAGE) {
+		return PAGE;
+	}
 	/* Align pow2 to avoid overflow the cache bin compressed pointers. */
-	return (LG_SIZEOF_PTR == 3) ? pow2_ceil_zu(size) : CACHELINE;
+	return pow2_ceil_zu(size);
 }
 
 /* Initialize auto tcache (embedded in TSD). */
@@ -501,11 +509,11 @@ bool
 tsd_tcache_data_init(tsd_t *tsd) {
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 	assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr == NULL);
-	/* Avoid false cacheline sharing. */
-	size_t size = sz_sa2u(total_stack_bytes, CACHELINE);
-	void *avail_array = ipallocztm(tsd_tsdn(tsd), size,
-	    tcache_bin_stack_alignment(size), true, NULL, true,
-	    arena_get(TSDN_NULL, 0, true));
+	size_t alignment = tcache_bin_stack_alignment(total_stack_bytes);
+	size_t size = sz_sa2u(total_stack_bytes, alignment);
+
+	void *avail_array = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
+	    NULL, true, arena_get(TSDN_NULL, 0, true));
 	if (avail_array == NULL) {
 		return true;
 	}
@@ -545,12 +553,11 @@ tcache_create_explicit(tsd_t *tsd) {
 	size = PTR_CEILING(size);
 	size_t stack_offset = size;
 	size += total_stack_bytes;
-	/* Avoid false cacheline sharing. */
-	size = sz_sa2u(size, CACHELINE);
+	size_t alignment = tcache_bin_stack_alignment(size);
+	size = sz_sa2u(size, alignment);
 
-	tcache_t *tcache = ipallocztm(tsd_tsdn(tsd), size,
-	    tcache_bin_stack_alignment(size), true, NULL, true,
-	    arena_get(TSDN_NULL, 0, true));
+	tcache_t *tcache = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
+	    NULL, true, arena_get(TSDN_NULL, 0, true));
 	if (tcache == NULL) {
 		return NULL;
 	}
-- 
cgit v0.12


From d1be488cd8ceab285b93265ae70a258779ab8310 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 11 Sep 2019 10:31:30 -0700
Subject: Add --with-lg-page=16 to CI.

---
 .travis.yml              | 27 +++++++++++++++++++++++++++
 scripts/gen_run_tests.py |  1 +
 scripts/gen_travis.py    |  1 +
 3 files changed, 29 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 2da5da8..777aa3e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -26,6 +26,8 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -46,6 +48,8 @@ matrix:
     - os: osx
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: osx
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -61,6 +65,8 @@ matrix:
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -84,6 +90,9 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons: *gcc_multilib
     - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      addons: *gcc_multilib
+    - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons: *gcc_multilib
     - os: linux
@@ -104,6 +113,8 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -118,6 +129,8 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -130,6 +143,8 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -140,6 +155,8 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -148,6 +165,8 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -156,6 +175,14 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 6875a49..77c2ce5 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -41,6 +41,7 @@ possible_config_opts = [
     '--enable-prof',
     '--disable-stats',
     '--enable-opt-safety-checks',
+    '--with-lg-page=16',
 ]
 if bits_64:
     possible_config_opts.append('--with-lg-vaddr=56')
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index f1478c6..b46bd00 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -47,6 +47,7 @@ configure_flag_unusuals = [
     '--disable-stats',
     '--disable-libdl',
     '--enable-opt-safety-checks',
+    '--with-lg-page=16',
 ]
 
 malloc_conf_unusuals = [
-- 
cgit v0.12


From e7cf84a8dd19af5957f2542934180fe95fdb0885 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 16:36:42 -0700
Subject: Rearrange slab data and constants

The constants logically belong in the sc module. The slab data bitmap isn't
really scoped to an arena; move it to its own module.
---
 include/jemalloc/internal/arena_structs_a.h            | 11 -----------
 include/jemalloc/internal/arena_types.h                |  5 -----
 include/jemalloc/internal/bitmap.h                     |  5 ++---
 include/jemalloc/internal/extent_inlines.h             |  4 ++--
 include/jemalloc/internal/extent_structs.h             |  5 +++--
 include/jemalloc/internal/jemalloc_internal_includes.h |  1 -
 include/jemalloc/internal/sc.h                         |  5 +++++
 include/jemalloc/internal/slab_data.h                  | 12 ++++++++++++
 src/arena.c                                            | 10 +++++-----
 9 files changed, 29 insertions(+), 29 deletions(-)
 delete mode 100644 include/jemalloc/internal/arena_structs_a.h
 create mode 100644 include/jemalloc/internal/slab_data.h

diff --git a/include/jemalloc/internal/arena_structs_a.h b/include/jemalloc/internal/arena_structs_a.h
deleted file mode 100644
index 46aa77c..0000000
--- a/include/jemalloc/internal/arena_structs_a.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
-#define JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
-
-#include "jemalloc/internal/bitmap.h"
-
-struct arena_slab_data_s {
-	/* Per region allocated/deallocated bitmap. */
-	bitmap_t	bitmap[BITMAP_GROUPS_MAX];
-};
-
-#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 624937e..369dff0 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -3,17 +3,12 @@
 
 #include "jemalloc/internal/sc.h"
 
-/* Maximum number of regions in one slab. */
-#define LG_SLAB_MAXREGS		(LG_PAGE - SC_LG_TINY_MIN)
-#define SLAB_MAXREGS		(1U << LG_SLAB_MAXREGS)
-
 /* Default decay times in milliseconds. */
 #define DIRTY_DECAY_MS_DEFAULT	ZD(10 * 1000)
 #define MUZZY_DECAY_MS_DEFAULT	(0)
 /* Number of event ticks between time checks. */
 #define DECAY_NTICKS_PER_UPDATE	1000
 
-typedef struct arena_slab_data_s arena_slab_data_t;
 typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index c3f9cb4..f7152a6 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -1,7 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_BITMAP_H
 #define JEMALLOC_INTERNAL_BITMAP_H
 
-#include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/sc.h"
 
@@ -9,9 +8,9 @@ typedef unsigned long bitmap_t;
 #define LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
 
 /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
-#if LG_SLAB_MAXREGS > LG_CEIL(SC_NSIZES)
+#if SC_LG_SLAB_MAXREGS > LG_CEIL(SC_NSIZES)
 /* Maximum bitmap bit count is determined by maximum regions per slab. */
-#  define LG_BITMAP_MAXBITS	LG_SLAB_MAXREGS
+#  define LG_BITMAP_MAXBITS	SC_LG_SLAB_MAXREGS
 #else
 /* Maximum bitmap bit count is determined by number of extent size classes. */
 #  define LG_BITMAP_MAXBITS	LG_CEIL(SC_NSIZES)
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 77fa4c4..97dca04 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -168,13 +168,13 @@ extent_past_get(const extent_t *extent) {
 	    extent_size_get(extent));
 }
 
-static inline arena_slab_data_t *
+static inline slab_data_t *
 extent_slab_data_get(extent_t *extent) {
 	assert(extent_slab_get(extent));
 	return &extent->e_slab_data;
 }
 
-static inline const arena_slab_data_t *
+static inline const slab_data_t *
 extent_slab_data_get_const(const extent_t *extent) {
 	assert(extent_slab_get(extent));
 	return &extent->e_slab_data;
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 767cd89..827bd3b 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -8,6 +8,7 @@
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/slab_data.h"
 
 typedef enum {
 	extent_state_active   = 0,
@@ -120,7 +121,7 @@ struct extent_s {
 #define EXTENT_BITS_SZIND_SHIFT  (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
 #define EXTENT_BITS_SZIND_MASK  MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
 
-#define EXTENT_BITS_NFREE_WIDTH  (LG_SLAB_MAXREGS + 1)
+#define EXTENT_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
 #define EXTENT_BITS_NFREE_SHIFT  (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
 #define EXTENT_BITS_NFREE_MASK  MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
 
@@ -170,7 +171,7 @@ struct extent_s {
 
 	union {
 		/* Small region slab metadata. */
-		arena_slab_data_t	e_slab_data;
+		slab_data_t	e_slab_data;
 
 		/* Profiling data, used for large objects. */
 		struct {
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 437eaa4..cb76a5e 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -50,7 +50,6 @@
 /* STRUCTS */
 /******************************************************************************/
 
-#include "jemalloc/internal/arena_structs_a.h"
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/base_structs.h"
 #include "jemalloc/internal/prof_structs.h"
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 9a099d8..a6341a3 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -264,6 +264,11 @@
 /* The largest size class supported. */
 #define SC_LARGE_MAXCLASS (SC_MAX_BASE + (SC_NGROUP - 1) * SC_MAX_DELTA)
 
+/* Maximum number of regions in one slab. */
+#define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
+#define SC_SLAB_MAXREGS (1U << LG_SLAB_MAXREGS)
+
+
 typedef struct sc_s sc_t;
 struct sc_s {
 	/* Size class index, or -1 if not a valid size class. */
diff --git a/include/jemalloc/internal/slab_data.h b/include/jemalloc/internal/slab_data.h
new file mode 100644
index 0000000..e821863
--- /dev/null
+++ b/include/jemalloc/internal/slab_data.h
@@ -0,0 +1,12 @@
+#ifndef JEMALLOC_INTERNAL_SLAB_DATA_H
+#define JEMALLOC_INTERNAL_SLAB_DATA_H
+
+#include "jemalloc/internal/bitmap.h"
+
+typedef struct slab_data_s slab_data_t;
+struct slab_data_s {
+	/* Per region allocated/deallocated bitmap. */
+	bitmap_t bitmap[BITMAP_GROUPS_MAX];
+};
+
+#endif /* JEMALLOC_INTERNAL_SLAB_DATA_H */
diff --git a/src/arena.c b/src/arena.c
index aa707f4..1a3cf7b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -270,7 +270,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
 static void *
 arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) {
 	void *ret;
-	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
+	slab_data_t *slab_data = extent_slab_data_get(slab);
 	size_t regind;
 
 	assert(extent_nfree_get(slab) > 0);
@@ -286,7 +286,7 @@ arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) {
 static void
 arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info,
 			   unsigned cnt, void** ptrs) {
-	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
+	slab_data_t *slab_data = extent_slab_data_get(slab);
 
 	assert(extent_nfree_get(slab) >= cnt);
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
@@ -356,7 +356,7 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 }
 
 static void
-arena_slab_reg_dalloc(extent_t *slab, arena_slab_data_t *slab_data, void *ptr) {
+arena_slab_reg_dalloc(extent_t *slab, slab_data_t *slab_data, void *ptr) {
 	szind_t binind = extent_szind_get(slab);
 	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t regind = arena_slab_regind(slab, binind, ptr);
@@ -1253,7 +1253,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	assert(extent_slab_get(slab));
 
 	/* Initialize slab internals. */
-	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
+	slab_data_t *slab_data = extent_slab_data_get(slab);
 	extent_nfree_binshard_set(slab, bin_info->nregs, binshard);
 	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false);
 
@@ -1686,7 +1686,7 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 static void
 arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, extent_t *slab, void *ptr, bool junked) {
-	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
+	slab_data_t *slab_data = extent_slab_data_get(slab);
 	const bin_info_t *bin_info = &bin_infos[binind];
 
 	if (!junked && config_fill && unlikely(opt_junk_free)) {
-- 
cgit v0.12


From 529cfe2abc7d10272c218a2b9047a85a49a9cd2a Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 16:43:54 -0700
Subject: Arena: rename arena_structs_b.h -> arena_structs.h

arena_structs_a.h was removed in the previous commit.
---
 include/jemalloc/internal/arena_structs.h          | 232 +++++++++++++++++++++
 include/jemalloc/internal/arena_structs_b.h        | 232 ---------------------
 .../jemalloc/internal/jemalloc_internal_includes.h |   2 +-
 3 files changed, 233 insertions(+), 233 deletions(-)
 create mode 100644 include/jemalloc/internal/arena_structs.h
 delete mode 100644 include/jemalloc/internal/arena_structs_b.h

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
new file mode 100644
index 0000000..eeab57f
--- /dev/null
+++ b/include/jemalloc/internal/arena_structs.h
@@ -0,0 +1,232 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
+#define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
+
+#include "jemalloc/internal/arena_stats.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/smoothstep.h"
+#include "jemalloc/internal/ticker.h"
+
+struct arena_decay_s {
+	/* Synchronizes all non-atomic fields. */
+	malloc_mutex_t		mtx;
+	/*
+	 * True if a thread is currently purging the extents associated with
+	 * this decay structure.
+	 */
+	bool			purging;
+	/*
+	 * Approximate time in milliseconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	atomic_zd_t		time_ms;
+	/* time / SMOOTHSTEP_NSTEPS. */
+	nstime_t		interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	nstime_t		epoch;
+	/* Deadline randomness generator. */
+	uint64_t		jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of interval and per
+	 * epoch jitter which is a uniform random variable in [0..interval).
+	 * Epochs always advance by precise multiples of interval, but we
+	 * randomize the deadline to reduce the likelihood of arenas purging in
+	 * lockstep.
+	 */
+	nstime_t		deadline;
+	/*
+	 * Number of unpurged pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between arena->decay_*.nunpurged and
+	 * extents_npages_get(&arena->extents_*) to determine how many dirty
+	 * pages, if any, were generated.
+	 */
+	size_t			nunpurged;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to epoch.
+	 */
+	size_t			backlog[SMOOTHSTEP_NSTEPS];
+
+	/*
+	 * Pointer to associated stats.  These stats are embedded directly in
+	 * the arena's stats due to how stats structures are shared between the
+	 * arena and ctl code.
+	 *
+	 * Synchronization: Same as associated arena's stats field. */
+	arena_stats_decay_t	*stats;
+	/* Peak number of pages in associated extents.  Used for debug only. */
+	uint64_t		ceil_npages;
+};
+
+struct arena_s {
+	/*
+	 * Number of threads currently assigned to this arena.  Each thread has
+	 * two distinct assignments, one for application-serving allocation, and
+	 * the other for internal metadata allocation.  Internal metadata must
+	 * not be allocated from arenas explicitly created via the arenas.create
+	 * mallctl, because the arena.<i>.reset mallctl indiscriminately
+	 * discards all allocations for the affected arena.
+	 *
+	 *   0: Application allocation.
+	 *   1: Internal metadata allocation.
+	 *
+	 * Synchronization: atomic.
+	 */
+	atomic_u_t		nthreads[2];
+
+	/* Next bin shard for binding new threads. Synchronization: atomic. */
+	atomic_u_t		binshard_next;
+
+	/*
+	 * When percpu_arena is enabled, to amortize the cost of reading /
+	 * updating the current CPU id, track the most recent thread accessing
+	 * this arena, and only read CPU if there is a mismatch.
+	 */
+	tsdn_t		*last_thd;
+
+	/* Synchronization: internal. */
+	arena_stats_t		stats;
+
+	/*
+	 * Lists of tcaches and cache_bin_array_descriptors for extant threads
+	 * associated with this arena.  Stats from these are merged
+	 * incrementally, and at exit if opt_stats_print is enabled.
+	 *
+	 * Synchronization: tcache_ql_mtx.
+	 */
+	ql_head(tcache_t)			tcache_ql;
+	ql_head(cache_bin_array_descriptor_t)	cache_bin_array_descriptor_ql;
+	malloc_mutex_t				tcache_ql_mtx;
+
+	/* Synchronization: internal. */
+	prof_accum_t		prof_accum;
+
+	/*
+	 * PRNG state for cache index randomization of large allocation base
+	 * pointers.
+	 *
+	 * Synchronization: atomic.
+	 */
+	atomic_zu_t		offset_state;
+
+	/*
+	 * Extent serial number generator state.
+	 *
+	 * Synchronization: atomic.
+	 */
+	atomic_zu_t		extent_sn_next;
+
+	/*
+	 * Represents a dss_prec_t, but atomically.
+	 *
+	 * Synchronization: atomic.
+	 */
+	atomic_u_t		dss_prec;
+
+	/*
+	 * Number of pages in active extents.
+	 *
+	 * Synchronization: atomic.
+	 */
+	atomic_zu_t		nactive;
+
+	/*
+	 * Extant large allocations.
+	 *
+	 * Synchronization: large_mtx.
+	 */
+	extent_list_t		large;
+	/* Synchronizes all large allocation/update/deallocation. */
+	malloc_mutex_t		large_mtx;
+
+	/*
+	 * Collections of extents that were previously allocated.  These are
+	 * used when allocating extents, in an attempt to re-use address space.
+	 *
+	 * Synchronization: internal.
+	 */
+	extents_t		extents_dirty;
+	extents_t		extents_muzzy;
+	extents_t		extents_retained;
+
+	/*
+	 * Decay-based purging state, responsible for scheduling extent state
+	 * transitions.
+	 *
+	 * Synchronization: internal.
+	 */
+	arena_decay_t		decay_dirty; /* dirty --> muzzy */
+	arena_decay_t		decay_muzzy; /* muzzy --> retained */
+
+	/*
+	 * Next extent size class in a growing series to use when satisfying a
+	 * request via the extent hooks (only if opt_retain).  This limits the
+	 * number of disjoint virtual memory ranges so that extent merging can
+	 * be effective even if multiple arenas' extent allocation requests are
+	 * highly interleaved.
+	 *
+	 * retain_grow_limit is the max allowed size ind to expand (unless the
+	 * required size is greater).  Default is no limit, and controlled
+	 * through mallctl only.
+	 *
+	 * Synchronization: extent_grow_mtx
+	 */
+	pszind_t		extent_grow_next;
+	pszind_t		retain_grow_limit;
+	malloc_mutex_t		extent_grow_mtx;
+
+	/*
+	 * Available extent structures that were allocated via
+	 * base_alloc_extent().
+	 *
+	 * Synchronization: extent_avail_mtx.
+	 */
+	extent_tree_t		extent_avail;
+	atomic_zu_t		extent_avail_cnt;
+	malloc_mutex_t		extent_avail_mtx;
+
+	/*
+	 * bins is used to store heaps of free regions.
+	 *
+	 * Synchronization: internal.
+	 */
+	bins_t			bins[SC_NBINS];
+
+	/*
+	 * Base allocator, from which arena metadata are allocated.
+	 *
+	 * Synchronization: internal.
+	 */
+	base_t			*base;
+	/* Used to determine uptime.  Read-only after initialization. */
+	nstime_t		create_time;
+};
+
+/* Used in conjunction with tsd for fast arena-related context lookup. */
+struct arena_tdata_s {
+	ticker_t		decay_ticker;
+};
+
+/* Used to pass rtree lookup context down the path. */
+struct alloc_ctx_s {
+	szind_t szind;
+	bool slab;
+};
+
+#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
deleted file mode 100644
index eeab57f..0000000
--- a/include/jemalloc/internal/arena_structs_b.h
+++ /dev/null
@@ -1,232 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
-#define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
-
-#include "jemalloc/internal/arena_stats.h"
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/bin.h"
-#include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/extent_dss.h"
-#include "jemalloc/internal/jemalloc_internal_types.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/nstime.h"
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/sc.h"
-#include "jemalloc/internal/smoothstep.h"
-#include "jemalloc/internal/ticker.h"
-
-struct arena_decay_s {
-	/* Synchronizes all non-atomic fields. */
-	malloc_mutex_t		mtx;
-	/*
-	 * True if a thread is currently purging the extents associated with
-	 * this decay structure.
-	 */
-	bool			purging;
-	/*
-	 * Approximate time in milliseconds from the creation of a set of unused
-	 * dirty pages until an equivalent set of unused dirty pages is purged
-	 * and/or reused.
-	 */
-	atomic_zd_t		time_ms;
-	/* time / SMOOTHSTEP_NSTEPS. */
-	nstime_t		interval;
-	/*
-	 * Time at which the current decay interval logically started.  We do
-	 * not actually advance to a new epoch until sometime after it starts
-	 * because of scheduling and computation delays, and it is even possible
-	 * to completely skip epochs.  In all cases, during epoch advancement we
-	 * merge all relevant activity into the most recently recorded epoch.
-	 */
-	nstime_t		epoch;
-	/* Deadline randomness generator. */
-	uint64_t		jitter_state;
-	/*
-	 * Deadline for current epoch.  This is the sum of interval and per
-	 * epoch jitter which is a uniform random variable in [0..interval).
-	 * Epochs always advance by precise multiples of interval, but we
-	 * randomize the deadline to reduce the likelihood of arenas purging in
-	 * lockstep.
-	 */
-	nstime_t		deadline;
-	/*
-	 * Number of unpurged pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between arena->decay_*.nunpurged and
-	 * extents_npages_get(&arena->extents_*) to determine how many dirty
-	 * pages, if any, were generated.
-	 */
-	size_t			nunpurged;
-	/*
-	 * Trailing log of how many unused dirty pages were generated during
-	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
-	 * element is the most recent epoch.  Corresponding epoch times are
-	 * relative to epoch.
-	 */
-	size_t			backlog[SMOOTHSTEP_NSTEPS];
-
-	/*
-	 * Pointer to associated stats.  These stats are embedded directly in
-	 * the arena's stats due to how stats structures are shared between the
-	 * arena and ctl code.
-	 *
-	 * Synchronization: Same as associated arena's stats field. */
-	arena_stats_decay_t	*stats;
-	/* Peak number of pages in associated extents.  Used for debug only. */
-	uint64_t		ceil_npages;
-};
-
-struct arena_s {
-	/*
-	 * Number of threads currently assigned to this arena.  Each thread has
-	 * two distinct assignments, one for application-serving allocation, and
-	 * the other for internal metadata allocation.  Internal metadata must
-	 * not be allocated from arenas explicitly created via the arenas.create
-	 * mallctl, because the arena.<i>.reset mallctl indiscriminately
-	 * discards all allocations for the affected arena.
-	 *
-	 *   0: Application allocation.
-	 *   1: Internal metadata allocation.
-	 *
-	 * Synchronization: atomic.
-	 */
-	atomic_u_t		nthreads[2];
-
-	/* Next bin shard for binding new threads. Synchronization: atomic. */
-	atomic_u_t		binshard_next;
-
-	/*
-	 * When percpu_arena is enabled, to amortize the cost of reading /
-	 * updating the current CPU id, track the most recent thread accessing
-	 * this arena, and only read CPU if there is a mismatch.
-	 */
-	tsdn_t		*last_thd;
-
-	/* Synchronization: internal. */
-	arena_stats_t		stats;
-
-	/*
-	 * Lists of tcaches and cache_bin_array_descriptors for extant threads
-	 * associated with this arena.  Stats from these are merged
-	 * incrementally, and at exit if opt_stats_print is enabled.
-	 *
-	 * Synchronization: tcache_ql_mtx.
-	 */
-	ql_head(tcache_t)			tcache_ql;
-	ql_head(cache_bin_array_descriptor_t)	cache_bin_array_descriptor_ql;
-	malloc_mutex_t				tcache_ql_mtx;
-
-	/* Synchronization: internal. */
-	prof_accum_t		prof_accum;
-
-	/*
-	 * PRNG state for cache index randomization of large allocation base
-	 * pointers.
-	 *
-	 * Synchronization: atomic.
-	 */
-	atomic_zu_t		offset_state;
-
-	/*
-	 * Extent serial number generator state.
-	 *
-	 * Synchronization: atomic.
-	 */
-	atomic_zu_t		extent_sn_next;
-
-	/*
-	 * Represents a dss_prec_t, but atomically.
-	 *
-	 * Synchronization: atomic.
-	 */
-	atomic_u_t		dss_prec;
-
-	/*
-	 * Number of pages in active extents.
-	 *
-	 * Synchronization: atomic.
-	 */
-	atomic_zu_t		nactive;
-
-	/*
-	 * Extant large allocations.
-	 *
-	 * Synchronization: large_mtx.
-	 */
-	extent_list_t		large;
-	/* Synchronizes all large allocation/update/deallocation. */
-	malloc_mutex_t		large_mtx;
-
-	/*
-	 * Collections of extents that were previously allocated.  These are
-	 * used when allocating extents, in an attempt to re-use address space.
-	 *
-	 * Synchronization: internal.
-	 */
-	extents_t		extents_dirty;
-	extents_t		extents_muzzy;
-	extents_t		extents_retained;
-
-	/*
-	 * Decay-based purging state, responsible for scheduling extent state
-	 * transitions.
-	 *
-	 * Synchronization: internal.
-	 */
-	arena_decay_t		decay_dirty; /* dirty --> muzzy */
-	arena_decay_t		decay_muzzy; /* muzzy --> retained */
-
-	/*
-	 * Next extent size class in a growing series to use when satisfying a
-	 * request via the extent hooks (only if opt_retain).  This limits the
-	 * number of disjoint virtual memory ranges so that extent merging can
-	 * be effective even if multiple arenas' extent allocation requests are
-	 * highly interleaved.
-	 *
-	 * retain_grow_limit is the max allowed size ind to expand (unless the
-	 * required size is greater).  Default is no limit, and controlled
-	 * through mallctl only.
-	 *
-	 * Synchronization: extent_grow_mtx
-	 */
-	pszind_t		extent_grow_next;
-	pszind_t		retain_grow_limit;
-	malloc_mutex_t		extent_grow_mtx;
-
-	/*
-	 * Available extent structures that were allocated via
-	 * base_alloc_extent().
-	 *
-	 * Synchronization: extent_avail_mtx.
-	 */
-	extent_tree_t		extent_avail;
-	atomic_zu_t		extent_avail_cnt;
-	malloc_mutex_t		extent_avail_mtx;
-
-	/*
-	 * bins is used to store heaps of free regions.
-	 *
-	 * Synchronization: internal.
-	 */
-	bins_t			bins[SC_NBINS];
-
-	/*
-	 * Base allocator, from which arena metadata are allocated.
-	 *
-	 * Synchronization: internal.
-	 */
-	base_t			*base;
-	/* Used to determine uptime.  Read-only after initialization. */
-	nstime_t		create_time;
-};
-
-/* Used in conjunction with tsd for fast arena-related context lookup. */
-struct arena_tdata_s {
-	ticker_t		decay_ticker;
-};
-
-/* Used to pass rtree lookup context down the path. */
-struct alloc_ctx_s {
-	szind_t szind;
-	bool slab;
-};
-
-#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index cb76a5e..55fcf3e 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -53,7 +53,7 @@
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/base_structs.h"
 #include "jemalloc/internal/prof_structs.h"
-#include "jemalloc/internal/arena_structs_b.h"
+#include "jemalloc/internal/arena_structs.h"
 #include "jemalloc/internal/tcache_structs.h"
 #include "jemalloc/internal/background_thread_structs.h"
 
-- 
cgit v0.12


From 41187bdfb024dcadcb0c279572dd6440084655f3 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 18:20:22 -0700
Subject: Extents: Break extent-struct/arena interactions

Specifically, the extent_arena_[g|s]et functions and the address randomization.

These are the only things that tie the extent struct itself to the arena code.
---
 include/jemalloc/internal/arena_inlines_b.h |  3 +-
 include/jemalloc/internal/extent_inlines.h  | 43 ++---------------
 src/arena.c                                 | 10 ++--
 src/ctl.c                                   |  3 +-
 src/extent.c                                | 72 +++++++++++++++++++++--------
 src/extent_dss.c                            |  9 ++--
 src/large.c                                 | 30 ++++++++----
 test/unit/rtree.c                           | 12 +++--
 test/unit/slab.c                            |  7 ++-
 9 files changed, 106 insertions(+), 83 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index dd92657..917a491 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -178,7 +178,8 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(tsdn_t *tsdn, const void *ptr) {
-	return extent_arena_get(iealloc(tsdn, ptr));
+	return (arena_t *)atomic_load_p(&arenas[extent_arena_ind_get(
+	    iealloc(tsdn, ptr))], ATOMIC_RELAXED);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 97dca04..95be084 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -44,13 +44,6 @@ extent_arena_ind_get(const extent_t *extent) {
 	return arena_ind;
 }
 
-static inline arena_t *
-extent_arena_get(const extent_t *extent) {
-	unsigned arena_ind = extent_arena_ind_get(extent);
-
-	return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE);
-}
-
 static inline szind_t
 extent_szind_get_maybe_invalid(const extent_t *extent) {
 	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
@@ -192,9 +185,7 @@ extent_prof_alloc_time_get(const extent_t *extent) {
 }
 
 static inline void
-extent_arena_set(extent_t *extent, arena_t *arena) {
-	unsigned arena_ind = (arena != NULL) ? arena_ind_get(arena) : ((1U <<
-	    MALLOCX_ARENA_BITS) - 1);
+extent_arena_ind_set(extent_t *extent, unsigned arena_ind) {
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
 	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
 }
@@ -213,32 +204,6 @@ extent_addr_set(extent_t *extent, void *addr) {
 }
 
 static inline void
-extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
-	assert(extent_base_get(extent) == extent_addr_get(extent));
-
-	if (alignment < PAGE) {
-		unsigned lg_range = LG_PAGE -
-		    lg_floor(CACHELINE_CEILING(alignment));
-		size_t r;
-		if (!tsdn_null(tsdn)) {
-			tsd_t *tsd = tsdn_tsd(tsdn);
-			r = (size_t)prng_lg_range_u64(
-			    tsd_offset_statep_get(tsd), lg_range);
-		} else {
-			r = prng_lg_range_zu(
-			    &extent_arena_get(extent)->offset_state,
-			    lg_range, true);
-		}
-		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
-		    lg_range);
-		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
-		    random_offset);
-		assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
-		    extent->e_addr);
-	}
-}
-
-static inline void
 extent_size_set(extent_t *extent, size_t size) {
 	assert((size & ~EXTENT_SIZE_MASK) == 0);
 	extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
@@ -364,12 +329,12 @@ extent_is_head_set(extent_t *extent, bool is_head) {
 }
 
 static inline void
-extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
+extent_init(extent_t *extent, unsigned arena_ind, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
     bool committed, bool dumpable, extent_head_state_t is_head) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
-	extent_arena_set(extent, arena);
+	extent_arena_ind_set(extent, arena_ind);
 	extent_addr_set(extent, addr);
 	extent_size_set(extent, size);
 	extent_slab_set(extent, slab);
@@ -391,7 +356,7 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 
 static inline void
 extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
-	extent_arena_set(extent, NULL);
+	extent_arena_ind_set(extent, (1U << MALLOCX_ARENA_BITS) - 1);
 	extent_addr_set(extent, addr);
 	extent_bsize_set(extent, bsize);
 	extent_slab_set(extent, false);
diff --git a/src/arena.c b/src/arena.c
index 1a3cf7b..231d668 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1566,7 +1566,8 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 
 	extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
-	arena_t *arena = extent_arena_get(extent);
+	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+	    ATOMIC_RELAXED);
 
 	szind_t szind = sz_size2index(usize);
 	extent_szind_set(extent, szind);
@@ -1731,7 +1732,8 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
 void
 arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
 	extent_t *extent = iealloc(tsdn, ptr);
-	arena_t *arena = extent_arena_get(extent);
+	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+	    ATOMIC_RELAXED);
 
 	arena_dalloc_bin(tsdn, arena, extent, ptr);
 	arena_decay_tick(tsdn, arena);
@@ -1767,7 +1769,9 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 			goto done;
 		}
 
-		arena_decay_tick(tsdn, extent_arena_get(extent));
+		arena_t *arena = atomic_load_p(
+		    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
+		arena_decay_tick(tsdn, arena);
 		ret = false;
 	} else if (oldsize >= SC_LARGE_MINCLASS
 	    && usize_max >= SC_LARGE_MINCLASS) {
diff --git a/src/ctl.c b/src/ctl.c
index 3ec6ca2..2be2f32 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2612,7 +2612,8 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	if (extent == NULL)
 		goto label_return;
 
-	arena = extent_arena_get(extent);
+	arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+	    ATOMIC_RELAXED);
 	if (arena == NULL)
 		goto label_return;
 
diff --git a/src/extent.c b/src/extent.c
index 9237f90..aac5455 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -176,6 +176,32 @@ extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
 	return ret;
 }
 
+static void
+extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    size_t alignment) {
+	assert(extent_base_get(extent) == extent_addr_get(extent));
+
+	if (alignment < PAGE) {
+		unsigned lg_range = LG_PAGE -
+		    lg_floor(CACHELINE_CEILING(alignment));
+		size_t r;
+		if (!tsdn_null(tsdn)) {
+			tsd_t *tsd = tsdn_tsd(tsdn);
+			r = (size_t)prng_lg_range_u64(
+			    tsd_offset_statep_get(tsd), lg_range);
+		} else {
+			r = prng_lg_range_zu(&arena->offset_state, lg_range,
+			    true);
+		}
+		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
+		    lg_range);
+		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
+		    random_offset);
+		assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
+		    extent->e_addr);
+	}
+}
+
 extent_t *
 extent_alloc(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
@@ -671,7 +697,7 @@ extents_postfork_child(tsdn_t *tsdn, extents_t *extents) {
 static void
 extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
     extent_t *extent) {
-	assert(extent_arena_get(extent) == arena);
+	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
 	assert(extent_state_get(extent) == extent_state_active);
 
 	extent_state_set(extent, extents_state_get(extents));
@@ -689,7 +715,7 @@ extent_deactivate(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 static void
 extent_activate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
     extent_t *extent) {
-	assert(extent_arena_get(extent) == arena);
+	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
 	assert(extent_state_get(extent) == extents_state_get(extents));
 
 	extents_remove_locked(tsdn, extents, extent);
@@ -927,7 +953,8 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 			 */
 			extent_t *unlock_extent = extent;
 			assert(extent_base_get(extent) == new_addr);
-			if (extent_arena_get(extent) != arena ||
+			if (extent_arena_ind_get(extent)
+			    != arena_ind_get(arena) ||
 			    extent_size_get(extent) < esize ||
 			    extent_state_get(extent) !=
 			    extents_state_get(extents)) {
@@ -1172,7 +1199,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	}
 
 	if (pad != 0) {
-		extent_addr_randomize(tsdn, extent, alignment);
+		extent_addr_randomize(tsdn, arena, extent, alignment);
 	}
 	assert(extent_state_get(extent) == extent_state_active);
 	if (slab) {
@@ -1342,8 +1369,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		extent_hook_post_reentrancy(tsdn);
 	}
 
-	extent_init(extent, arena, ptr, alloc_size, false, SC_NSIZES,
-	    arena_extent_sn_next(arena), extent_state_active, zeroed,
+	extent_init(extent, arena_ind_get(arena), ptr, alloc_size, false,
+	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
 	    committed, true, EXTENT_IS_HEAD);
 	if (ptr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
@@ -1434,7 +1461,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		extent_gdump_add(tsdn, extent);
 	}
 	if (pad != 0) {
-		extent_addr_randomize(tsdn, extent, alignment);
+		extent_addr_randomize(tsdn, arena, extent, alignment);
 	}
 	if (slab) {
 		rtree_ctx_t rtree_ctx_fallback;
@@ -1513,11 +1540,11 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		extent_dalloc(tsdn, arena, extent);
 		return NULL;
 	}
-	extent_init(extent, arena, addr, esize, slab, szind,
+	extent_init(extent, arena_ind_get(arena), addr, esize, slab, szind,
 	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
 	    true, EXTENT_NOT_HEAD);
 	if (pad != 0) {
-		extent_addr_randomize(tsdn, extent, alignment);
+		extent_addr_randomize(tsdn, arena, extent, alignment);
 	}
 	if (extent_register(tsdn, extent)) {
 		extent_dalloc(tsdn, arena, extent);
@@ -1559,8 +1586,8 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 static bool
 extent_can_coalesce(arena_t *arena, extents_t *extents, const extent_t *inner,
     const extent_t *outer) {
-	assert(extent_arena_get(inner) == arena);
-	if (extent_arena_get(outer) != arena) {
+	assert(extent_arena_ind_get(inner) == arena_ind_get(arena));
+	if (extent_arena_ind_get(outer) != arena_ind_get(arena)) {
 		return false;
 	}
 
@@ -2105,11 +2132,11 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 		goto label_error_a;
 	}
 
-	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
-	    size_a), size_b, slab_b, szind_b, extent_sn_get(extent),
-	    extent_state_get(extent), extent_zeroed_get(extent),
-	    extent_committed_get(extent), extent_dumpable_get(extent),
-	    EXTENT_NOT_HEAD);
+	extent_init(trail, arena_ind_get(arena),
+	    (void *)((uintptr_t)extent_base_get(extent) + size_a), size_b,
+	    slab_b, szind_b, extent_sn_get(extent), extent_state_get(extent),
+	    extent_zeroed_get(extent), extent_committed_get(extent),
+	    extent_dumpable_get(extent), EXTENT_NOT_HEAD);
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -2117,7 +2144,8 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 	{
 		extent_t lead;
 
-		extent_init(&lead, arena, extent_addr_get(extent), size_a,
+		extent_init(&lead, arena_ind_get(arena),
+		    extent_addr_get(extent), size_a,
 		    slab_a, szind_a, extent_sn_get(extent),
 		    extent_state_get(extent), extent_zeroed_get(extent),
 		    extent_committed_get(extent), extent_dumpable_get(extent),
@@ -2304,7 +2332,12 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
 
 	extent_unlock2(tsdn, a, b);
 
-	extent_dalloc(tsdn, extent_arena_get(b), b);
+	/*
+	 * If we got here, we merged the extents; so they must be from the same
+	 * arena (i.e. this one).
+	 */
+	assert(extent_arena_ind_get(b) == arena_ind_get(arena));
+	extent_dalloc(tsdn, arena, b);
 
 	return false;
 }
@@ -2384,7 +2417,8 @@ extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	assert(*nfree <= *nregs);
 	assert(*nfree * extent_usize_get(extent) <= *size);
 
-	const arena_t *arena = extent_arena_get(extent);
+	const arena_t *arena = (arena_t *)atomic_load_p(
+	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
 	assert(arena != NULL);
 	const unsigned binshard = extent_binshard_get(extent);
 	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 8581789..eb07480 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -153,9 +153,9 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			size_t gap_size_page = (uintptr_t)ret -
 			    (uintptr_t)gap_addr_page;
 			if (gap_size_page != 0) {
-				extent_init(gap, arena, gap_addr_page,
-				    gap_size_page, false, SC_NSIZES,
-				    arena_extent_sn_next(arena),
+				extent_init(gap, arena_ind_get(arena),
+				    gap_addr_page, gap_size_page, false,
+				    SC_NSIZES, arena_extent_sn_next(arena),
 				    extent_state_active, false, true, true,
 				    EXTENT_NOT_HEAD);
 			}
@@ -198,7 +198,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    EXTENT_HOOKS_INITIALIZER;
 					extent_t extent;
 
-					extent_init(&extent, arena, ret, size,
+					extent_init(&extent,
+					    arena_ind_get(arena), ret, size,
 					    size, false, SC_NSIZES,
 					    extent_state_active, false, true,
 					    true, EXTENT_NOT_HEAD);
diff --git a/src/large.c b/src/large.c
index 8e7a781..a5c2f9a 100644
--- a/src/large.c
+++ b/src/large.c
@@ -94,7 +94,8 @@ large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk =
 
 static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
-	arena_t *arena = extent_arena_get(extent);
+	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+	    ATOMIC_RELAXED);
 	size_t oldusize = extent_usize_get(extent);
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t diff = extent_size_get(extent) - (usize + sz_large_pad);
@@ -130,7 +131,8 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 static bool
 large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
     bool zero) {
-	arena_t *arena = extent_arena_get(extent);
+	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+	    ATOMIC_RELAXED);
 	size_t oldusize = extent_usize_get(extent);
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t trailsize = usize - oldusize;
@@ -230,14 +232,18 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 		/* Attempt to expand the allocation in-place. */
 		if (!large_ralloc_no_move_expand(tsdn, extent, usize_max,
 		    zero)) {
-			arena_decay_tick(tsdn, extent_arena_get(extent));
+			arena_decay_tick(tsdn,
+			    atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+			    ATOMIC_RELAXED));
 			return false;
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && usize_min > oldusize &&
 		    large_ralloc_no_move_expand(tsdn, extent, usize_min,
 		    zero)) {
-			arena_decay_tick(tsdn, extent_arena_get(extent));
+			arena_decay_tick(tsdn, atomic_load_p(
+			    &arenas[extent_arena_ind_get(extent)],
+			    ATOMIC_RELAXED));
 			return false;
 		}
 	}
@@ -247,14 +253,17 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	 * the new size.
 	 */
 	if (oldusize >= usize_min && oldusize <= usize_max) {
-		arena_decay_tick(tsdn, extent_arena_get(extent));
+		arena_decay_tick(tsdn, atomic_load_p(
+		    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED));
 		return false;
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (oldusize > usize_max) {
 		if (!large_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
-			arena_decay_tick(tsdn, extent_arena_get(extent));
+			arena_decay_tick(tsdn, atomic_load_p(
+			    &arenas[extent_arena_ind_get(extent)],
+			    ATOMIC_RELAXED));
 			return false;
 		}
 	}
@@ -348,17 +357,20 @@ large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 
 void
 large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_prep_impl(tsdn, extent_arena_get(extent), extent, true);
+	large_dalloc_prep_impl(tsdn, atomic_load_p(
+	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED), extent, true);
 }
 
 void
 large_dalloc_finish(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_finish_impl(tsdn, extent_arena_get(extent), extent);
+	large_dalloc_finish_impl(tsdn, atomic_load_p(
+	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED), extent);
 }
 
 void
 large_dalloc(tsdn_t *tsdn, extent_t *extent) {
-	arena_t *arena = extent_arena_get(extent);
+	arena_t *arena = atomic_load_p(
+	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
 	large_dalloc_prep_impl(tsdn, arena, extent, false);
 	large_dalloc_finish_impl(tsdn, arena, extent);
 	arena_decay_tick(tsdn, arena);
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 90adca1..9105e3e 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -2,6 +2,8 @@
 
 #include "jemalloc/internal/rtree.h"
 
+#define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1)
+
 rtree_node_alloc_t *rtree_node_alloc_orig;
 rtree_node_dalloc_t *rtree_node_dalloc_orig;
 rtree_leaf_alloc_t *rtree_leaf_alloc_orig;
@@ -85,10 +87,10 @@ TEST_END
 
 TEST_BEGIN(test_rtree_extrema) {
 	extent_t extent_a, extent_b;
-	extent_init(&extent_a, NULL, NULL, SC_LARGE_MINCLASS, false,
-	    sz_size2index(SC_LARGE_MINCLASS), 0,
+	extent_init(&extent_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
+	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
-	extent_init(&extent_b, NULL, NULL, 0, false, SC_NSIZES, 0,
+	extent_init(&extent_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	tsdn_t *tsdn = tsdn_fetch();
@@ -125,7 +127,7 @@ TEST_BEGIN(test_rtree_bits) {
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
 
 	extent_t extent;
-	extent_init(&extent, NULL, NULL, 0, false, SC_NSIZES, 0,
+	extent_init(&extent, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	rtree_t *rtree = &test_rtree;
@@ -166,7 +168,7 @@ TEST_BEGIN(test_rtree_random) {
 	rtree_ctx_data_init(&rtree_ctx);
 
 	extent_t extent;
-	extent_init(&extent, NULL, NULL, 0, false, SC_NSIZES, 0,
+	extent_init(&extent, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
diff --git a/test/unit/slab.c b/test/unit/slab.c
index c56af25..bcc752e 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1)
+
 TEST_BEGIN(test_arena_slab_regind) {
 	szind_t binind;
 
@@ -7,8 +9,9 @@ TEST_BEGIN(test_arena_slab_regind) {
 		size_t regind;
 		extent_t slab;
 		const bin_info_t *bin_info = &bin_infos[binind];
-		extent_init(&slab, NULL, mallocx(bin_info->slab_size,
-		    MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, true,
+		extent_init(&slab, INVALID_ARENA_IND,
+		    mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE)),
+		    bin_info->slab_size, true,
 		    binind, 0, extent_state_active, false, true, true,
 		    EXTENT_NOT_HEAD);
 		assert_ptr_not_null(extent_addr_get(&slab),
-- 
cgit v0.12


From 723ccc6c2757974112d31d254bcf74bf2beac6ec Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 16:18:41 -0700
Subject: Extents: Split out extent struct.

---
 Makefile.in                                        |   1 +
 include/jemalloc/internal/base_structs.h           |   1 +
 include/jemalloc/internal/bin.h                    |  45 +-
 include/jemalloc/internal/bin_info.h               |  50 ++
 include/jemalloc/internal/extent.h                 | 626 +++++++++++++++++++++
 include/jemalloc/internal/extent_inlines.h         | 428 --------------
 include/jemalloc/internal/extent_structs.h         | 177 ------
 include/jemalloc/internal/extent_types.h           |   6 -
 .../jemalloc/internal/jemalloc_internal_includes.h |   3 +-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |   1 +
 src/bin.c                                          |  26 -
 src/bin_info.c                                     |  30 +
 src/jemalloc.c                                     |   6 +-
 14 files changed, 715 insertions(+), 686 deletions(-)
 create mode 100644 include/jemalloc/internal/bin_info.h
 create mode 100644 include/jemalloc/internal/extent.h
 create mode 100644 src/bin_info.c

diff --git a/Makefile.in b/Makefile.in
index 7584f59..62ae71f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -99,6 +99,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/background_thread.c \
 	$(srcroot)src/base.c \
 	$(srcroot)src/bin.c \
+	$(srcroot)src/bin_info.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index 07f214e..cc0f9a5 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H
 #define JEMALLOC_INTERNAL_BASE_STRUCTS_H
 
+#include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 8547e89..70250a4 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
+#include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/extent_types.h"
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/mutex.h"
@@ -12,49 +13,6 @@
  * A bin contains a set of extents that are currently being used for slab
  * allocations.
  */
-
-/*
- * Read-only information associated with each element of arena_t's bins array
- * is stored separately, partly to reduce memory usage (only one copy, rather
- * than one per arena), but mainly to avoid false cacheline sharing.
- *
- * Each slab has the following layout:
- *
- *   /--------------------\
- *   | region 0           |
- *   |--------------------|
- *   | region 1           |
- *   |--------------------|
- *   | ...                |
- *   | ...                |
- *   | ...                |
- *   |--------------------|
- *   | region nregs-1     |
- *   \--------------------/
- */
-typedef struct bin_info_s bin_info_t;
-struct bin_info_s {
-	/* Size of regions in a slab for this bin's size class. */
-	size_t			reg_size;
-
-	/* Total size of a slab for this bin's size class. */
-	size_t			slab_size;
-
-	/* Total number of regions in a slab for this bin's size class. */
-	uint32_t		nregs;
-
-	/* Number of sharded bins in each arena for this size class. */
-	uint32_t		n_shards;
-
-	/*
-	 * Metadata used to manipulate bitmaps for slabs associated with this
-	 * bin.
-	 */
-	bitmap_info_t		bitmap_info;
-};
-
-extern bin_info_t bin_infos[SC_NBINS];
-
 typedef struct bin_s bin_t;
 struct bin_s {
 	/* All operations on bin_t fields require lock ownership. */
@@ -92,7 +50,6 @@ struct bins_s {
 void bin_shard_sizes_boot(unsigned bin_shards[SC_NBINS]);
 bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards);
-void bin_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
 
 /* Initializes a bin to empty.  Returns true on error. */
 bool bin_init(bin_t *bin);
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
new file mode 100644
index 0000000..7fe65c8
--- /dev/null
+++ b/include/jemalloc/internal/bin_info.h
@@ -0,0 +1,50 @@
+#ifndef JEMALLOC_INTERNAL_BIN_INFO_H
+#define JEMALLOC_INTERNAL_BIN_INFO_H
+
+#include "jemalloc/internal/bitmap.h"
+
+/*
+ * Read-only information associated with each element of arena_t's bins array
+ * is stored separately, partly to reduce memory usage (only one copy, rather
+ * than one per arena), but mainly to avoid false cacheline sharing.
+ *
+ * Each slab has the following layout:
+ *
+ *   /--------------------\
+ *   | region 0           |
+ *   |--------------------|
+ *   | region 1           |
+ *   |--------------------|
+ *   | ...                |
+ *   | ...                |
+ *   | ...                |
+ *   |--------------------|
+ *   | region nregs-1     |
+ *   \--------------------/
+ */
+typedef struct bin_info_s bin_info_t;
+struct bin_info_s {
+	/* Size of regions in a slab for this bin's size class. */
+	size_t			reg_size;
+
+	/* Total size of a slab for this bin's size class. */
+	size_t			slab_size;
+
+	/* Total number of regions in a slab for this bin's size class. */
+	uint32_t		nregs;
+
+	/* Number of sharded bins in each arena for this size class. */
+	uint32_t		n_shards;
+
+	/*
+	 * Metadata used to manipulate bitmaps for slabs associated with this
+	 * bin.
+	 */
+	bitmap_info_t		bitmap_info;
+};
+
+extern bin_info_t bin_infos[SC_NBINS];
+
+void bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
+
+#endif /* JEMALLOC_INTERNAL_BIN_INFO_H */
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
new file mode 100644
index 0000000..92c34ae
--- /dev/null
+++ b/include/jemalloc/internal/extent.h
@@ -0,0 +1,626 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_H
+#define JEMALLOC_INTERNAL_EXTENT_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bin_info.h"
+#include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/slab_data.h"
+#include "jemalloc/internal/sz.h"
+
+enum extent_state_e {
+	extent_state_active   = 0,
+	extent_state_dirty    = 1,
+	extent_state_muzzy    = 2,
+	extent_state_retained = 3
+};
+typedef enum extent_state_e extent_state_t;
+
+enum extent_head_state_e {
+	EXTENT_NOT_HEAD,
+	EXTENT_IS_HEAD   /* Only relevant for Windows && opt.retain. */
+};
+typedef enum extent_head_state_e extent_head_state_t;
+
+/* Extent (span of pages).  Use accessor functions for e_* fields. */
+typedef struct extent_s extent_t;
+typedef ql_head(extent_t) extent_list_t;
+typedef ph(extent_t) extent_tree_t;
+typedef ph(extent_t) extent_heap_t;
+struct extent_s {
+	/*
+	 * Bitfield containing several fields:
+	 *
+	 * a: arena_ind
+	 * b: slab
+	 * c: committed
+	 * d: dumpable
+	 * z: zeroed
+	 * t: state
+	 * i: szind
+	 * f: nfree
+	 * s: bin_shard
+	 * n: sn
+	 *
+	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
+	 *
+	 * arena_ind: Arena from which this extent came, or all 1 bits if
+	 *            unassociated.
+	 *
+	 * slab: The slab flag indicates whether the extent is used for a slab
+	 *       of small regions.  This helps differentiate small size classes,
+	 *       and it indicates whether interior pointers can be looked up via
+	 *       iealloc().
+	 *
+	 * committed: The committed flag indicates whether physical memory is
+	 *            committed to the extent, whether explicitly or implicitly
+	 *            as on a system that overcommits and satisfies physical
+	 *            memory needs on demand via soft page faults.
+	 *
+	 * dumpable: The dumpable flag indicates whether or not we've set the
+	 *           memory in question to be dumpable.  Note that this
+	 *           interacts somewhat subtly with user-specified extent hooks,
+	 *           since we don't know if *they* are fiddling with
+	 *           dumpability (in which case, we don't want to undo whatever
+	 *           they're doing).  To deal with this scenario, we:
+	 *             - Make dumpable false only for memory allocated with the
+	 *               default hooks.
+	 *             - Only allow memory to go from non-dumpable to dumpable,
+	 *               and only once.
+	 *             - Never make the OS call to allow dumping when the
+	 *               dumpable bit is already set.
+	 *           These three constraints mean that we will never
+	 *           accidentally dump user memory that the user meant to set
+	 *           nondumpable with their extent hooks.
+	 *
+	 *
+	 * zeroed: The zeroed flag is used by extent recycling code to track
+	 *         whether memory is zero-filled.
+	 *
+	 * state: The state flag is an extent_state_t.
+	 *
+	 * szind: The szind flag indicates usable size class index for
+	 *        allocations residing in this extent, regardless of whether the
+	 *        extent is a slab.  Extent size and usable size often differ
+	 *        even for non-slabs, either due to sz_large_pad or promotion of
+	 *        sampled small regions.
+	 *
+	 * nfree: Number of free regions in slab.
+	 *
+	 * bin_shard: the shard of the bin from which this extent came.
+	 *
+	 * sn: Serial number (potentially non-unique).
+	 *
+	 *     Serial numbers may wrap around if !opt_retain, but as long as
+	 *     comparison functions fall back on address comparison for equal
+	 *     serial numbers, stable (if imperfect) ordering is maintained.
+	 *
+	 *     Serial numbers may not be unique even in the absence of
+	 *     wrap-around, e.g. when splitting an extent and assigning the same
+	 *     serial number to both resulting adjacent extents.
+	 */
+	uint64_t		e_bits;
+#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
+
+#define EXTENT_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
+#define EXTENT_BITS_ARENA_SHIFT  0
+#define EXTENT_BITS_ARENA_MASK  MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
+
+#define EXTENT_BITS_SLAB_WIDTH  1
+#define EXTENT_BITS_SLAB_SHIFT  (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
+#define EXTENT_BITS_SLAB_MASK  MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
+
+#define EXTENT_BITS_COMMITTED_WIDTH  1
+#define EXTENT_BITS_COMMITTED_SHIFT  (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
+#define EXTENT_BITS_COMMITTED_MASK  MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
+
+#define EXTENT_BITS_DUMPABLE_WIDTH  1
+#define EXTENT_BITS_DUMPABLE_SHIFT  (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
+#define EXTENT_BITS_DUMPABLE_MASK  MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
+
+#define EXTENT_BITS_ZEROED_WIDTH  1
+#define EXTENT_BITS_ZEROED_SHIFT  (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
+#define EXTENT_BITS_ZEROED_MASK  MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
+
+#define EXTENT_BITS_STATE_WIDTH  2
+#define EXTENT_BITS_STATE_SHIFT  (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
+#define EXTENT_BITS_STATE_MASK  MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
+
+#define EXTENT_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
+#define EXTENT_BITS_SZIND_SHIFT  (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
+#define EXTENT_BITS_SZIND_MASK  MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
+
+#define EXTENT_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
+#define EXTENT_BITS_NFREE_SHIFT  (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
+#define EXTENT_BITS_NFREE_MASK  MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
+
+#define EXTENT_BITS_BINSHARD_WIDTH  6
+#define EXTENT_BITS_BINSHARD_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
+#define EXTENT_BITS_BINSHARD_MASK  MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
+
+#define EXTENT_BITS_IS_HEAD_WIDTH 1
+#define EXTENT_BITS_IS_HEAD_SHIFT  (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
+#define EXTENT_BITS_IS_HEAD_MASK  MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT)
+
+#define EXTENT_BITS_SN_SHIFT   (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT)
+#define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
+
+	/* Pointer to the extent that this structure is responsible for. */
+	void			*e_addr;
+
+	union {
+		/*
+		 * Extent size and serial number associated with the extent
+		 * structure (different than the serial number for the extent at
+		 * e_addr).
+		 *
+		 * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
+		 */
+		size_t			e_size_esn;
+	#define EXTENT_SIZE_MASK	((size_t)~(PAGE-1))
+	#define EXTENT_ESN_MASK		((size_t)PAGE-1)
+		/* Base extent size, which may not be a multiple of PAGE. */
+		size_t			e_bsize;
+	};
+
+	/*
+	 * List linkage, used by a variety of lists:
+	 * - bin_t's slabs_full
+	 * - extents_t's LRU
+	 * - stashed dirty extents
+	 * - arena's large allocations
+	 */
+	ql_elm(extent_t)	ql_link;
+
+	/*
+	 * Linkage for per size class sn/address-ordered heaps, and
+	 * for extent_avail
+	 */
+	phn(extent_t)		ph_link;
+
+	union {
+		/* Small region slab metadata. */
+		slab_data_t	e_slab_data;
+
+		/* Profiling data, used for large objects. */
+		struct {
+			/* Time when this was allocated. */
+			nstime_t		e_alloc_time;
+			/* Points to a prof_tctx_t. */
+			atomic_p_t		e_prof_tctx;
+		};
+	};
+};
+
+static inline unsigned
+extent_arena_ind_get(const extent_t *extent) {
+	unsigned arena_ind = (unsigned)((extent->e_bits &
+	    EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
+	assert(arena_ind < MALLOCX_ARENA_LIMIT);
+
+	return arena_ind;
+}
+
+static inline szind_t
+extent_szind_get_maybe_invalid(const extent_t *extent) {
+	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
+	    EXTENT_BITS_SZIND_SHIFT);
+	assert(szind <= SC_NSIZES);
+	return szind;
+}
+
+static inline szind_t
+extent_szind_get(const extent_t *extent) {
+	szind_t szind = extent_szind_get_maybe_invalid(extent);
+	assert(szind < SC_NSIZES); /* Never call when "invalid". */
+	return szind;
+}
+
+static inline size_t
+extent_usize_get(const extent_t *extent) {
+	return sz_index2size(extent_szind_get(extent));
+}
+
+static inline unsigned
+extent_binshard_get(const extent_t *extent) {
+	unsigned binshard = (unsigned)((extent->e_bits &
+	    EXTENT_BITS_BINSHARD_MASK) >> EXTENT_BITS_BINSHARD_SHIFT);
+	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+	return binshard;
+}
+
+static inline size_t
+extent_sn_get(const extent_t *extent) {
+	return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
+	    EXTENT_BITS_SN_SHIFT);
+}
+
+static inline extent_state_t
+extent_state_get(const extent_t *extent) {
+	return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >>
+	    EXTENT_BITS_STATE_SHIFT);
+}
+
+static inline bool
+extent_zeroed_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >>
+	    EXTENT_BITS_ZEROED_SHIFT);
+}
+
+static inline bool
+extent_committed_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >>
+	    EXTENT_BITS_COMMITTED_SHIFT);
+}
+
+static inline bool
+extent_dumpable_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
+	    EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
+static inline bool
+extent_slab_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
+	    EXTENT_BITS_SLAB_SHIFT);
+}
+
+static inline unsigned
+extent_nfree_get(const extent_t *extent) {
+	assert(extent_slab_get(extent));
+	return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >>
+	    EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void *
+extent_base_get(const extent_t *extent) {
+	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+	    !extent_slab_get(extent));
+	return PAGE_ADDR2BASE(extent->e_addr);
+}
+
+static inline void *
+extent_addr_get(const extent_t *extent) {
+	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+	    !extent_slab_get(extent));
+	return extent->e_addr;
+}
+
+static inline size_t
+extent_size_get(const extent_t *extent) {
+	return (extent->e_size_esn & EXTENT_SIZE_MASK);
+}
+
+static inline size_t
+extent_esn_get(const extent_t *extent) {
+	return (extent->e_size_esn & EXTENT_ESN_MASK);
+}
+
+static inline size_t
+extent_bsize_get(const extent_t *extent) {
+	return extent->e_bsize;
+}
+
+static inline void *
+extent_before_get(const extent_t *extent) {
+	return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
+}
+
+static inline void *
+extent_last_get(const extent_t *extent) {
+	return (void *)((uintptr_t)extent_base_get(extent) +
+	    extent_size_get(extent) - PAGE);
+}
+
+static inline void *
+extent_past_get(const extent_t *extent) {
+	return (void *)((uintptr_t)extent_base_get(extent) +
+	    extent_size_get(extent));
+}
+
+static inline slab_data_t *
+extent_slab_data_get(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	return &extent->e_slab_data;
+}
+
+static inline const slab_data_t *
+extent_slab_data_get_const(const extent_t *extent) {
+	assert(extent_slab_get(extent));
+	return &extent->e_slab_data;
+}
+
+static inline prof_tctx_t *
+extent_prof_tctx_get(const extent_t *extent) {
+	return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx,
+	    ATOMIC_ACQUIRE);
+}
+
+static inline nstime_t
+extent_prof_alloc_time_get(const extent_t *extent) {
+	return extent->e_alloc_time;
+}
+
+static inline void
+extent_arena_ind_set(extent_t *extent, unsigned arena_ind) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
+	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
+}
+
+static inline void
+extent_binshard_set(extent_t *extent, unsigned binshard) {
+	/* The assertion assumes szind is set already. */
+	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_BINSHARD_MASK) |
+	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT);
+}
+
+static inline void
+extent_addr_set(extent_t *extent, void *addr) {
+	extent->e_addr = addr;
+}
+
+static inline void
+extent_size_set(extent_t *extent, size_t size) {
+	assert((size & ~EXTENT_SIZE_MASK) == 0);
+	extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
+}
+
+static inline void
+extent_esn_set(extent_t *extent, size_t esn) {
+	extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn &
+	    EXTENT_ESN_MASK);
+}
+
+static inline void
+extent_bsize_set(extent_t *extent, size_t bsize) {
+	extent->e_bsize = bsize;
+}
+
+static inline void
+extent_szind_set(extent_t *extent, szind_t szind) {
+	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
+	    ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
+}
+
+static inline void
+extent_nfree_set(extent_t *extent, unsigned nfree) {
+	assert(extent_slab_get(extent));
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) |
+	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_binshard_set(extent_t *extent, unsigned nfree, unsigned binshard) {
+	/* The assertion assumes szind is set already. */
+	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+	extent->e_bits = (extent->e_bits &
+	    (~EXTENT_BITS_NFREE_MASK & ~EXTENT_BITS_BINSHARD_MASK)) |
+	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT) |
+	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_inc(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_dec(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_sub(extent_t *extent, uint64_t n) {
+	assert(extent_slab_get(extent));
+	extent->e_bits -= (n << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_sn_set(extent_t *extent, size_t sn) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
+	    ((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
+}
+
+static inline void
+extent_state_set(extent_t *extent, extent_state_t state) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) |
+	    ((uint64_t)state << EXTENT_BITS_STATE_SHIFT);
+}
+
+static inline void
+extent_zeroed_set(extent_t *extent, bool zeroed) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) |
+	    ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT);
+}
+
+static inline void
+extent_committed_set(extent_t *extent, bool committed) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) |
+	    ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
+}
+
+static inline void
+extent_dumpable_set(extent_t *extent, bool dumpable) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
+	    ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
+static inline void
+extent_slab_set(extent_t *extent, bool slab) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
+	    ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
+}
+
+static inline void
+extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
+	atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
+}
+
+static inline void
+extent_prof_alloc_time_set(extent_t *extent, nstime_t t) {
+	nstime_copy(&extent->e_alloc_time, &t);
+}
+
+static inline bool
+extent_is_head_get(extent_t *extent) {
+	if (maps_coalesce) {
+		not_reached();
+	}
+
+	return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >>
+	    EXTENT_BITS_IS_HEAD_SHIFT);
+}
+
+static inline void
+extent_is_head_set(extent_t *extent, bool is_head) {
+	if (maps_coalesce) {
+		not_reached();
+	}
+
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) |
+	    ((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT);
+}
+
+static inline void
+extent_init(extent_t *extent, unsigned arena_ind, void *addr, size_t size,
+    bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
+    bool committed, bool dumpable, extent_head_state_t is_head) {
+	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
+
+	extent_arena_ind_set(extent, arena_ind);
+	extent_addr_set(extent, addr);
+	extent_size_set(extent, size);
+	extent_slab_set(extent, slab);
+	extent_szind_set(extent, szind);
+	extent_sn_set(extent, sn);
+	extent_state_set(extent, state);
+	extent_zeroed_set(extent, zeroed);
+	extent_committed_set(extent, committed);
+	extent_dumpable_set(extent, dumpable);
+	ql_elm_new(extent, ql_link);
+	if (!maps_coalesce) {
+		extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true :
+		    false);
+	}
+	if (config_prof) {
+		extent_prof_tctx_set(extent, NULL);
+	}
+}
+
+static inline void
+extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
+	extent_arena_ind_set(extent, (1U << MALLOCX_ARENA_BITS) - 1);
+	extent_addr_set(extent, addr);
+	extent_bsize_set(extent, bsize);
+	extent_slab_set(extent, false);
+	extent_szind_set(extent, SC_NSIZES);
+	extent_sn_set(extent, sn);
+	extent_state_set(extent, extent_state_active);
+	extent_zeroed_set(extent, true);
+	extent_committed_set(extent, true);
+	extent_dumpable_set(extent, true);
+}
+
+static inline void
+extent_list_init(extent_list_t *list) {
+	ql_new(list);
+}
+
+static inline extent_t *
+extent_list_first(const extent_list_t *list) {
+	return ql_first(list);
+}
+
+static inline extent_t *
+extent_list_last(const extent_list_t *list) {
+	return ql_last(list, ql_link);
+}
+
+static inline void
+extent_list_append(extent_list_t *list, extent_t *extent) {
+	ql_tail_insert(list, extent, ql_link);
+}
+
+static inline void
+extent_list_prepend(extent_list_t *list, extent_t *extent) {
+	ql_head_insert(list, extent, ql_link);
+}
+
+static inline void
+extent_list_replace(extent_list_t *list, extent_t *to_remove,
+    extent_t *to_insert) {
+	ql_after_insert(to_remove, to_insert, ql_link);
+	ql_remove(list, to_remove, ql_link);
+}
+
+static inline void
+extent_list_remove(extent_list_t *list, extent_t *extent) {
+	ql_remove(list, extent, ql_link);
+}
+
+static inline int
+extent_sn_comp(const extent_t *a, const extent_t *b) {
+	size_t a_sn = extent_sn_get(a);
+	size_t b_sn = extent_sn_get(b);
+
+	return (a_sn > b_sn) - (a_sn < b_sn);
+}
+
+static inline int
+extent_esn_comp(const extent_t *a, const extent_t *b) {
+	size_t a_esn = extent_esn_get(a);
+	size_t b_esn = extent_esn_get(b);
+
+	return (a_esn > b_esn) - (a_esn < b_esn);
+}
+
+static inline int
+extent_ad_comp(const extent_t *a, const extent_t *b) {
+	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
+	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
+
+	return (a_addr > b_addr) - (a_addr < b_addr);
+}
+
+static inline int
+extent_ead_comp(const extent_t *a, const extent_t *b) {
+	uintptr_t a_eaddr = (uintptr_t)a;
+	uintptr_t b_eaddr = (uintptr_t)b;
+
+	return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
+}
+
+static inline int
+extent_snad_comp(const extent_t *a, const extent_t *b) {
+	int ret;
+
+	ret = extent_sn_comp(a, b);
+	if (ret != 0) {
+		return ret;
+	}
+
+	ret = extent_ad_comp(a, b);
+	return ret;
+}
+
+static inline int
+extent_esnead_comp(const extent_t *a, const extent_t *b) {
+	int ret;
+
+	ret = extent_esn_comp(a, b);
+	if (ret != 0) {
+		return ret;
+	}
+
+	ret = extent_ead_comp(a, b);
+	return ret;
+}
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_H */
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 95be084..2647df8 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -35,432 +35,4 @@ extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
 	    (uintptr_t)extent2);
 }
 
-static inline unsigned
-extent_arena_ind_get(const extent_t *extent) {
-	unsigned arena_ind = (unsigned)((extent->e_bits &
-	    EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
-	assert(arena_ind < MALLOCX_ARENA_LIMIT);
-
-	return arena_ind;
-}
-
-static inline szind_t
-extent_szind_get_maybe_invalid(const extent_t *extent) {
-	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
-	    EXTENT_BITS_SZIND_SHIFT);
-	assert(szind <= SC_NSIZES);
-	return szind;
-}
-
-static inline szind_t
-extent_szind_get(const extent_t *extent) {
-	szind_t szind = extent_szind_get_maybe_invalid(extent);
-	assert(szind < SC_NSIZES); /* Never call when "invalid". */
-	return szind;
-}
-
-static inline size_t
-extent_usize_get(const extent_t *extent) {
-	return sz_index2size(extent_szind_get(extent));
-}
-
-static inline unsigned
-extent_binshard_get(const extent_t *extent) {
-	unsigned binshard = (unsigned)((extent->e_bits &
-	    EXTENT_BITS_BINSHARD_MASK) >> EXTENT_BITS_BINSHARD_SHIFT);
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
-	return binshard;
-}
-
-static inline size_t
-extent_sn_get(const extent_t *extent) {
-	return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
-	    EXTENT_BITS_SN_SHIFT);
-}
-
-static inline extent_state_t
-extent_state_get(const extent_t *extent) {
-	return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >>
-	    EXTENT_BITS_STATE_SHIFT);
-}
-
-static inline bool
-extent_zeroed_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >>
-	    EXTENT_BITS_ZEROED_SHIFT);
-}
-
-static inline bool
-extent_committed_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >>
-	    EXTENT_BITS_COMMITTED_SHIFT);
-}
-
-static inline bool
-extent_dumpable_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
-	    EXTENT_BITS_DUMPABLE_SHIFT);
-}
-
-static inline bool
-extent_slab_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
-	    EXTENT_BITS_SLAB_SHIFT);
-}
-
-static inline unsigned
-extent_nfree_get(const extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >>
-	    EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void *
-extent_base_get(const extent_t *extent) {
-	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
-	    !extent_slab_get(extent));
-	return PAGE_ADDR2BASE(extent->e_addr);
-}
-
-static inline void *
-extent_addr_get(const extent_t *extent) {
-	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
-	    !extent_slab_get(extent));
-	return extent->e_addr;
-}
-
-static inline size_t
-extent_size_get(const extent_t *extent) {
-	return (extent->e_size_esn & EXTENT_SIZE_MASK);
-}
-
-static inline size_t
-extent_esn_get(const extent_t *extent) {
-	return (extent->e_size_esn & EXTENT_ESN_MASK);
-}
-
-static inline size_t
-extent_bsize_get(const extent_t *extent) {
-	return extent->e_bsize;
-}
-
-static inline void *
-extent_before_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
-}
-
-static inline void *
-extent_last_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) +
-	    extent_size_get(extent) - PAGE);
-}
-
-static inline void *
-extent_past_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) +
-	    extent_size_get(extent));
-}
-
-static inline slab_data_t *
-extent_slab_data_get(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return &extent->e_slab_data;
-}
-
-static inline const slab_data_t *
-extent_slab_data_get_const(const extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return &extent->e_slab_data;
-}
-
-static inline prof_tctx_t *
-extent_prof_tctx_get(const extent_t *extent) {
-	return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx,
-	    ATOMIC_ACQUIRE);
-}
-
-static inline nstime_t
-extent_prof_alloc_time_get(const extent_t *extent) {
-	return extent->e_alloc_time;
-}
-
-static inline void
-extent_arena_ind_set(extent_t *extent, unsigned arena_ind) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
-	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
-}
-
-static inline void
-extent_binshard_set(extent_t *extent, unsigned binshard) {
-	/* The assertion assumes szind is set already. */
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_BINSHARD_MASK) |
-	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT);
-}
-
-static inline void
-extent_addr_set(extent_t *extent, void *addr) {
-	extent->e_addr = addr;
-}
-
-static inline void
-extent_size_set(extent_t *extent, size_t size) {
-	assert((size & ~EXTENT_SIZE_MASK) == 0);
-	extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
-}
-
-static inline void
-extent_esn_set(extent_t *extent, size_t esn) {
-	extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn &
-	    EXTENT_ESN_MASK);
-}
-
-static inline void
-extent_bsize_set(extent_t *extent, size_t bsize) {
-	extent->e_bsize = bsize;
-}
-
-static inline void
-extent_szind_set(extent_t *extent, szind_t szind) {
-	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
-	    ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
-}
-
-static inline void
-extent_nfree_set(extent_t *extent, unsigned nfree) {
-	assert(extent_slab_get(extent));
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) |
-	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_binshard_set(extent_t *extent, unsigned nfree, unsigned binshard) {
-	/* The assertion assumes szind is set already. */
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
-	extent->e_bits = (extent->e_bits &
-	    (~EXTENT_BITS_NFREE_MASK & ~EXTENT_BITS_BINSHARD_MASK)) |
-	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT) |
-	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_inc(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_dec(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_sub(extent_t *extent, uint64_t n) {
-	assert(extent_slab_get(extent));
-	extent->e_bits -= (n << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_sn_set(extent_t *extent, size_t sn) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
-	    ((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
-}
-
-static inline void
-extent_state_set(extent_t *extent, extent_state_t state) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) |
-	    ((uint64_t)state << EXTENT_BITS_STATE_SHIFT);
-}
-
-static inline void
-extent_zeroed_set(extent_t *extent, bool zeroed) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) |
-	    ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT);
-}
-
-static inline void
-extent_committed_set(extent_t *extent, bool committed) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) |
-	    ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
-}
-
-static inline void
-extent_dumpable_set(extent_t *extent, bool dumpable) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
-	    ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
-}
-
-static inline void
-extent_slab_set(extent_t *extent, bool slab) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
-	    ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
-}
-
-static inline void
-extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
-	atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
-}
-
-static inline void
-extent_prof_alloc_time_set(extent_t *extent, nstime_t t) {
-	nstime_copy(&extent->e_alloc_time, &t);
-}
-
-static inline bool
-extent_is_head_get(extent_t *extent) {
-	if (maps_coalesce) {
-		not_reached();
-	}
-
-	return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >>
-	    EXTENT_BITS_IS_HEAD_SHIFT);
-}
-
-static inline void
-extent_is_head_set(extent_t *extent, bool is_head) {
-	if (maps_coalesce) {
-		not_reached();
-	}
-
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) |
-	    ((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT);
-}
-
-static inline void
-extent_init(extent_t *extent, unsigned arena_ind, void *addr, size_t size,
-    bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
-    bool committed, bool dumpable, extent_head_state_t is_head) {
-	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
-
-	extent_arena_ind_set(extent, arena_ind);
-	extent_addr_set(extent, addr);
-	extent_size_set(extent, size);
-	extent_slab_set(extent, slab);
-	extent_szind_set(extent, szind);
-	extent_sn_set(extent, sn);
-	extent_state_set(extent, state);
-	extent_zeroed_set(extent, zeroed);
-	extent_committed_set(extent, committed);
-	extent_dumpable_set(extent, dumpable);
-	ql_elm_new(extent, ql_link);
-	if (!maps_coalesce) {
-		extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true :
-		    false);
-	}
-	if (config_prof) {
-		extent_prof_tctx_set(extent, NULL);
-	}
-}
-
-static inline void
-extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
-	extent_arena_ind_set(extent, (1U << MALLOCX_ARENA_BITS) - 1);
-	extent_addr_set(extent, addr);
-	extent_bsize_set(extent, bsize);
-	extent_slab_set(extent, false);
-	extent_szind_set(extent, SC_NSIZES);
-	extent_sn_set(extent, sn);
-	extent_state_set(extent, extent_state_active);
-	extent_zeroed_set(extent, true);
-	extent_committed_set(extent, true);
-	extent_dumpable_set(extent, true);
-}
-
-static inline void
-extent_list_init(extent_list_t *list) {
-	ql_new(list);
-}
-
-static inline extent_t *
-extent_list_first(const extent_list_t *list) {
-	return ql_first(list);
-}
-
-static inline extent_t *
-extent_list_last(const extent_list_t *list) {
-	return ql_last(list, ql_link);
-}
-
-static inline void
-extent_list_append(extent_list_t *list, extent_t *extent) {
-	ql_tail_insert(list, extent, ql_link);
-}
-
-static inline void
-extent_list_prepend(extent_list_t *list, extent_t *extent) {
-	ql_head_insert(list, extent, ql_link);
-}
-
-static inline void
-extent_list_replace(extent_list_t *list, extent_t *to_remove,
-    extent_t *to_insert) {
-	ql_after_insert(to_remove, to_insert, ql_link);
-	ql_remove(list, to_remove, ql_link);
-}
-
-static inline void
-extent_list_remove(extent_list_t *list, extent_t *extent) {
-	ql_remove(list, extent, ql_link);
-}
-
-static inline int
-extent_sn_comp(const extent_t *a, const extent_t *b) {
-	size_t a_sn = extent_sn_get(a);
-	size_t b_sn = extent_sn_get(b);
-
-	return (a_sn > b_sn) - (a_sn < b_sn);
-}
-
-static inline int
-extent_esn_comp(const extent_t *a, const extent_t *b) {
-	size_t a_esn = extent_esn_get(a);
-	size_t b_esn = extent_esn_get(b);
-
-	return (a_esn > b_esn) - (a_esn < b_esn);
-}
-
-static inline int
-extent_ad_comp(const extent_t *a, const extent_t *b) {
-	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
-	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
-
-	return (a_addr > b_addr) - (a_addr < b_addr);
-}
-
-static inline int
-extent_ead_comp(const extent_t *a, const extent_t *b) {
-	uintptr_t a_eaddr = (uintptr_t)a;
-	uintptr_t b_eaddr = (uintptr_t)b;
-
-	return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
-}
-
-static inline int
-extent_snad_comp(const extent_t *a, const extent_t *b) {
-	int ret;
-
-	ret = extent_sn_comp(a, b);
-	if (ret != 0) {
-		return ret;
-	}
-
-	ret = extent_ad_comp(a, b);
-	return ret;
-}
-
-static inline int
-extent_esnead_comp(const extent_t *a, const extent_t *b) {
-	int ret;
-
-	ret = extent_esn_comp(a, b);
-	if (ret != 0) {
-		return ret;
-	}
-
-	ret = extent_ead_comp(a, b);
-	return ret;
-}
-
 #endif /* JEMALLOC_INTERNAL_EXTENT_INLINES_H */
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 827bd3b..108ac40 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -2,7 +2,6 @@
 #define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
 
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/ql.h"
@@ -10,182 +9,6 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/slab_data.h"
 
-typedef enum {
-	extent_state_active   = 0,
-	extent_state_dirty    = 1,
-	extent_state_muzzy    = 2,
-	extent_state_retained = 3
-} extent_state_t;
-
-/* Extent (span of pages).  Use accessor functions for e_* fields. */
-struct extent_s {
-	/*
-	 * Bitfield containing several fields:
-	 *
-	 * a: arena_ind
-	 * b: slab
-	 * c: committed
-	 * d: dumpable
-	 * z: zeroed
-	 * t: state
-	 * i: szind
-	 * f: nfree
-	 * s: bin_shard
-	 * n: sn
-	 *
-	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
-	 *
-	 * arena_ind: Arena from which this extent came, or all 1 bits if
-	 *            unassociated.
-	 *
-	 * slab: The slab flag indicates whether the extent is used for a slab
-	 *       of small regions.  This helps differentiate small size classes,
-	 *       and it indicates whether interior pointers can be looked up via
-	 *       iealloc().
-	 *
-	 * committed: The committed flag indicates whether physical memory is
-	 *            committed to the extent, whether explicitly or implicitly
-	 *            as on a system that overcommits and satisfies physical
-	 *            memory needs on demand via soft page faults.
-	 *
-	 * dumpable: The dumpable flag indicates whether or not we've set the
-	 *           memory in question to be dumpable.  Note that this
-	 *           interacts somewhat subtly with user-specified extent hooks,
-	 *           since we don't know if *they* are fiddling with
-	 *           dumpability (in which case, we don't want to undo whatever
-	 *           they're doing).  To deal with this scenario, we:
-	 *             - Make dumpable false only for memory allocated with the
-	 *               default hooks.
-	 *             - Only allow memory to go from non-dumpable to dumpable,
-	 *               and only once.
-	 *             - Never make the OS call to allow dumping when the
-	 *               dumpable bit is already set.
-	 *           These three constraints mean that we will never
-	 *           accidentally dump user memory that the user meant to set
-	 *           nondumpable with their extent hooks.
-	 *
-	 *
-	 * zeroed: The zeroed flag is used by extent recycling code to track
-	 *         whether memory is zero-filled.
-	 *
-	 * state: The state flag is an extent_state_t.
-	 *
-	 * szind: The szind flag indicates usable size class index for
-	 *        allocations residing in this extent, regardless of whether the
-	 *        extent is a slab.  Extent size and usable size often differ
-	 *        even for non-slabs, either due to sz_large_pad or promotion of
-	 *        sampled small regions.
-	 *
-	 * nfree: Number of free regions in slab.
-	 *
-	 * bin_shard: the shard of the bin from which this extent came.
-	 *
-	 * sn: Serial number (potentially non-unique).
-	 *
-	 *     Serial numbers may wrap around if !opt_retain, but as long as
-	 *     comparison functions fall back on address comparison for equal
-	 *     serial numbers, stable (if imperfect) ordering is maintained.
-	 *
-	 *     Serial numbers may not be unique even in the absence of
-	 *     wrap-around, e.g. when splitting an extent and assigning the same
-	 *     serial number to both resulting adjacent extents.
-	 */
-	uint64_t		e_bits;
-#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
-
-#define EXTENT_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
-#define EXTENT_BITS_ARENA_SHIFT  0
-#define EXTENT_BITS_ARENA_MASK  MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
-
-#define EXTENT_BITS_SLAB_WIDTH  1
-#define EXTENT_BITS_SLAB_SHIFT  (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
-#define EXTENT_BITS_SLAB_MASK  MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
-
-#define EXTENT_BITS_COMMITTED_WIDTH  1
-#define EXTENT_BITS_COMMITTED_SHIFT  (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
-#define EXTENT_BITS_COMMITTED_MASK  MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
-
-#define EXTENT_BITS_DUMPABLE_WIDTH  1
-#define EXTENT_BITS_DUMPABLE_SHIFT  (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
-#define EXTENT_BITS_DUMPABLE_MASK  MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
-
-#define EXTENT_BITS_ZEROED_WIDTH  1
-#define EXTENT_BITS_ZEROED_SHIFT  (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
-#define EXTENT_BITS_ZEROED_MASK  MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
-
-#define EXTENT_BITS_STATE_WIDTH  2
-#define EXTENT_BITS_STATE_SHIFT  (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
-#define EXTENT_BITS_STATE_MASK  MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
-
-#define EXTENT_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
-#define EXTENT_BITS_SZIND_SHIFT  (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
-#define EXTENT_BITS_SZIND_MASK  MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
-
-#define EXTENT_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
-#define EXTENT_BITS_NFREE_SHIFT  (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
-#define EXTENT_BITS_NFREE_MASK  MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
-
-#define EXTENT_BITS_BINSHARD_WIDTH  6
-#define EXTENT_BITS_BINSHARD_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
-#define EXTENT_BITS_BINSHARD_MASK  MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
-
-#define EXTENT_BITS_IS_HEAD_WIDTH 1
-#define EXTENT_BITS_IS_HEAD_SHIFT  (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
-#define EXTENT_BITS_IS_HEAD_MASK  MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT)
-
-#define EXTENT_BITS_SN_SHIFT   (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT)
-#define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
-
-	/* Pointer to the extent that this structure is responsible for. */
-	void			*e_addr;
-
-	union {
-		/*
-		 * Extent size and serial number associated with the extent
-		 * structure (different than the serial number for the extent at
-		 * e_addr).
-		 *
-		 * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
-		 */
-		size_t			e_size_esn;
-	#define EXTENT_SIZE_MASK	((size_t)~(PAGE-1))
-	#define EXTENT_ESN_MASK		((size_t)PAGE-1)
-		/* Base extent size, which may not be a multiple of PAGE. */
-		size_t			e_bsize;
-	};
-
-	/*
-	 * List linkage, used by a variety of lists:
-	 * - bin_t's slabs_full
-	 * - extents_t's LRU
-	 * - stashed dirty extents
-	 * - arena's large allocations
-	 */
-	ql_elm(extent_t)	ql_link;
-
-	/*
-	 * Linkage for per size class sn/address-ordered heaps, and
-	 * for extent_avail
-	 */
-	phn(extent_t)		ph_link;
-
-	union {
-		/* Small region slab metadata. */
-		slab_data_t	e_slab_data;
-
-		/* Profiling data, used for large objects. */
-		struct {
-			/* Time when this was allocated. */
-			nstime_t		e_alloc_time;
-			/* Points to a prof_tctx_t. */
-			atomic_p_t		e_prof_tctx;
-		};
-	};
-};
-typedef ql_head(extent_t) extent_list_t;
-typedef ph(extent_t) extent_tree_t;
-typedef ph(extent_t) extent_heap_t;
-
 /* Quantized collection of extents, with built-in LRU queue. */
 struct extents_s {
 	malloc_mutex_t		mtx;
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index 96925cf..a56410a 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -1,7 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_TYPES_H
 #define JEMALLOC_INTERNAL_EXTENT_TYPES_H
 
-typedef struct extent_s extent_t;
 typedef struct extents_s extents_t;
 
 typedef struct extent_util_stats_s extent_util_stats_t;
@@ -15,9 +14,4 @@ typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
  */
 #define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 
-typedef enum {
-	EXTENT_NOT_HEAD,
-	EXTENT_IS_HEAD   /* Only relevant for Windows && opt.retain. */
-} extent_head_state_t;
-
 #endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 55fcf3e..6755b43 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -50,10 +50,9 @@
 /* STRUCTS */
 /******************************************************************************/
 
-#include "jemalloc/internal/extent_structs.h"
-#include "jemalloc/internal/base_structs.h"
 #include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/arena_structs.h"
+#include "jemalloc/internal/base_structs.h"
 #include "jemalloc/internal/tcache_structs.h"
 #include "jemalloc/internal/background_thread_structs.h"
 
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index ed0e7b9..b6b8339 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -39,6 +39,7 @@
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
+    <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 1606a3a..f405ea3 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -39,6 +39,7 @@
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
+    <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
diff --git a/src/bin.c b/src/bin.c
index bca6b12..d7cbfb5 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -6,26 +6,6 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/witness.h"
 
-bin_info_t bin_infos[SC_NBINS];
-
-static void
-bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
-    bin_info_t bin_infos[SC_NBINS]) {
-	for (unsigned i = 0; i < SC_NBINS; i++) {
-		bin_info_t *bin_info = &bin_infos[i];
-		sc_t *sc = &sc_data->sc[i];
-		bin_info->reg_size = ((size_t)1U << sc->lg_base)
-		    + ((size_t)sc->ndelta << sc->lg_delta);
-		bin_info->slab_size = (sc->pgs << LG_PAGE);
-		bin_info->nregs =
-		    (uint32_t)(bin_info->slab_size / bin_info->reg_size);
-		bin_info->n_shards = bin_shard_sizes[i];
-		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
-		    bin_info->nregs);
-		bin_info->bitmap_info = bitmap_info;
-	}
-}
-
 bool
 bin_update_shard_size(unsigned bin_shard_sizes[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards) {
@@ -58,12 +38,6 @@ bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) {
 	}
 }
 
-void
-bin_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
-	assert(sc_data->initialized);
-	bin_infos_init(sc_data, bin_shard_sizes, bin_infos);
-}
-
 bool
 bin_init(bin_t *bin) {
 	if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
diff --git a/src/bin_info.c b/src/bin_info.c
new file mode 100644
index 0000000..20b93ea
--- /dev/null
+++ b/src/bin_info.c
@@ -0,0 +1,30 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/bin_info.h"
+
+bin_info_t bin_infos[SC_NBINS];
+
+static void
+bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    bin_info_t bin_infos[SC_NBINS]) {
+	for (unsigned i = 0; i < SC_NBINS; i++) {
+		bin_info_t *bin_info = &bin_infos[i];
+		sc_t *sc = &sc_data->sc[i];
+		bin_info->reg_size = ((size_t)1U << sc->lg_base)
+		    + ((size_t)sc->ndelta << sc->lg_delta);
+		bin_info->slab_size = (sc->pgs << LG_PAGE);
+		bin_info->nregs =
+		    (uint32_t)(bin_info->slab_size / bin_info->reg_size);
+		bin_info->n_shards = bin_shard_sizes[i];
+		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
+		    bin_info->nregs);
+		bin_info->bitmap_info = bitmap_info;
+	}
+}
+
+void
+bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
+	assert(sc_data->initialized);
+	bin_infos_init(sc_data, bin_shard_sizes, bin_infos);
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 753fcbe..fc7d289 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1488,8 +1488,8 @@ malloc_init_hard_a0_locked() {
 	 * Ordering here is somewhat tricky; we need sc_boot() first, since that
 	 * determines what the size classes will be, and then
 	 * malloc_conf_init(), since any slab size tweaking will need to be done
-	 * before sz_boot and bin_boot, which assume that the values they read
-	 * out of sc_data_global are final.
+	 * before sz_boot and bin_info_boot, which assume that the values they
+	 * read out of sc_data_global are final.
 	 */
 	sc_boot(&sc_data);
 	unsigned bin_shard_sizes[SC_NBINS];
@@ -1504,7 +1504,7 @@ malloc_init_hard_a0_locked() {
 	}
 	malloc_conf_init(&sc_data, bin_shard_sizes);
 	sz_boot(&sc_data);
-	bin_boot(&sc_data, bin_shard_sizes);
+	bin_info_boot(&sc_data, bin_shard_sizes);
 
 	if (opt_stats_print) {
 		/* Print statistics at exit. */
-- 
cgit v0.12


From 4e5e43f22eead4d1e3fcb4422410e0100b9d8448 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 19:59:55 -0700
Subject: Rename extents_t -> eset_t.

---
 include/jemalloc/internal/arena_structs.h  |   7 +-
 include/jemalloc/internal/eset.h           |  60 ++++++
 include/jemalloc/internal/extent_externs.h |  22 +-
 include/jemalloc/internal/extent_structs.h |  47 ----
 include/jemalloc/internal/extent_types.h   |   2 -
 src/arena.c                                |  50 ++---
 src/background_thread.c                    |   4 +-
 src/extent.c                               | 332 ++++++++++++++---------------
 8 files changed, 268 insertions(+), 256 deletions(-)
 create mode 100644 include/jemalloc/internal/eset.h

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index eeab57f..9563c3d 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/eset.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
@@ -161,9 +162,9 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	extents_t		extents_dirty;
-	extents_t		extents_muzzy;
-	extents_t		extents_retained;
+	eset_t		extents_dirty;
+	eset_t		extents_muzzy;
+	eset_t		extents_retained;
 
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
new file mode 100644
index 0000000..1c18f4e
--- /dev/null
+++ b/include/jemalloc/internal/eset.h
@@ -0,0 +1,60 @@
+#ifndef JEMALLOC_INTERNAL_ESET_H
+#define JEMALLOC_INTERNAL_ESET_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/mutex.h"
+
+/*
+ * An eset ("extent set") is a quantized collection of extents, with built-in
+ * LRU queue.
+ */
+typedef struct eset_s eset_t;
+struct eset_s {
+	malloc_mutex_t mtx;
+
+	/*
+	 * Quantized per size class heaps of extents.
+	 *
+	 * Synchronization: mtx.
+	 */
+	extent_heap_t heaps[SC_NPSIZES + 1];
+	atomic_zu_t nextents[SC_NPSIZES + 1];
+	atomic_zu_t nbytes[SC_NPSIZES + 1];
+
+	/*
+	 * Bitmap for which set bits correspond to non-empty heaps.
+	 *
+	 * Synchronization: mtx.
+	 */
+	bitmap_t bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
+
+	/*
+	 * LRU of all extents in heaps.
+	 *
+	 * Synchronization: mtx.
+	 */
+	extent_list_t lru;
+
+	/*
+	 * Page sum for all extents in heaps.
+	 *
+	 * The synchronization here is a little tricky.  Modifications to npages
+	 * must hold mtx, but reads need not (though, a reader who sees npages
+	 * without holding the mutex can't assume anything about the rest of the
+	 * state of the eset_t).
+	 */
+	atomic_zu_t npages;
+
+	/* All stored extents must be in the same state. */
+	extent_state_t state;
+
+	/*
+	 * If true, delay coalescing until eviction; otherwise coalesce during
+	 * deallocation.
+	 */
+	bool delay_coalesce;
+};
+
+#endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 8aba576..45271d7 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -27,25 +27,25 @@ size_t extent_size_quantize_ceil(size_t size);
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
-bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
+bool extents_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
     bool delay_coalesce);
-extent_state_t extents_state_get(const extents_t *extents);
-size_t extents_npages_get(extents_t *extents);
+extent_state_t extents_state_get(const eset_t *eset);
+size_t extents_npages_get(eset_t *eset);
 /* Get the number of extents in the given page size index. */
-size_t extents_nextents_get(extents_t *extents, pszind_t ind);
+size_t extents_nextents_get(eset_t *eset, pszind_t ind);
 /* Get the sum total bytes of the extents in the given page size index. */
-size_t extents_nbytes_get(extents_t *extents, pszind_t ind);
+size_t extents_nbytes_get(eset_t *eset, pszind_t ind);
 extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
+    extent_hooks_t **r_extent_hooks, eset_t *eset, void *new_addr,
     size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit);
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
+    extent_hooks_t **r_extent_hooks, eset_t *eset, extent_t *extent);
 extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min);
-void extents_prefork(tsdn_t *tsdn, extents_t *extents);
-void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents);
-void extents_postfork_child(tsdn_t *tsdn, extents_t *extents);
+    extent_hooks_t **r_extent_hooks, eset_t *eset, size_t npages_min);
+void extents_prefork(tsdn_t *tsdn, eset_t *eset);
+void extents_postfork_parent(tsdn_t *tsdn, eset_t *eset);
+void extents_postfork_child(tsdn_t *tsdn, eset_t *eset);
 extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit);
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 108ac40..4e6e085 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -9,53 +9,6 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/slab_data.h"
 
-/* Quantized collection of extents, with built-in LRU queue. */
-struct extents_s {
-	malloc_mutex_t		mtx;
-
-	/*
-	 * Quantized per size class heaps of extents.
-	 *
-	 * Synchronization: mtx.
-	 */
-	extent_heap_t		heaps[SC_NPSIZES + 1];
-	atomic_zu_t		nextents[SC_NPSIZES + 1];
-	atomic_zu_t		nbytes[SC_NPSIZES + 1];
-
-	/*
-	 * Bitmap for which set bits correspond to non-empty heaps.
-	 *
-	 * Synchronization: mtx.
-	 */
-	bitmap_t		bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
-
-	/*
-	 * LRU of all extents in heaps.
-	 *
-	 * Synchronization: mtx.
-	 */
-	extent_list_t		lru;
-
-	/*
-	 * Page sum for all extents in heaps.
-	 *
-	 * The synchronization here is a little tricky.  Modifications to npages
-	 * must hold mtx, but reads need not (though, a reader who sees npages
-	 * without holding the mutex can't assume anything about the rest of the
-	 * state of the extents_t).
-	 */
-	atomic_zu_t		npages;
-
-	/* All stored extents must be in the same state. */
-	extent_state_t		state;
-
-	/*
-	 * If true, delay coalescing until eviction; otherwise coalesce during
-	 * deallocation.
-	 */
-	bool			delay_coalesce;
-};
-
 /*
  * The following two structs are for experimental purposes. See
  * experimental_utilization_query_ctl and
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index a56410a..02d7b2c 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -1,8 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_TYPES_H
 #define JEMALLOC_INTERNAL_EXTENT_TYPES_H
 
-typedef struct extents_s extents_t;
-
 typedef struct extent_util_stats_s extent_util_stats_t;
 typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
 
diff --git a/src/arena.c b/src/arena.c
index 231d668..5380dee 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -55,7 +55,7 @@ static unsigned huge_arena_ind;
  */
 
 static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit,
+    arena_decay_t *decay, eset_t *eset, bool all, size_t npages_limit,
     size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
@@ -609,10 +609,10 @@ arena_decay_backlog_update(arena_decay_t *decay, uint64_t nadvance_u64,
 
 static void
 arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, size_t current_npages, size_t npages_limit,
+    eset_t *eset, size_t current_npages, size_t npages_limit,
     bool is_background_thread) {
 	if (current_npages > npages_limit) {
-		arena_decay_to_limit(tsdn, arena, decay, extents, false,
+		arena_decay_to_limit(tsdn, arena, decay, eset, false,
 		    npages_limit, current_npages - npages_limit,
 		    is_background_thread);
 	}
@@ -644,8 +644,8 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time,
 
 static void
 arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, const nstime_t *time, bool is_background_thread) {
-	size_t current_npages = extents_npages_get(extents);
+    eset_t *eset, const nstime_t *time, bool is_background_thread) {
+	size_t current_npages = extents_npages_get(eset);
 	arena_decay_epoch_advance_helper(decay, time, current_npages);
 
 	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
@@ -654,7 +654,7 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	    current_npages;
 
 	if (!background_thread_enabled() || is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, extents,
+		arena_decay_try_purge(tsdn, arena, decay, eset,
 		    current_npages, npages_limit, is_background_thread);
 	}
 }
@@ -712,15 +712,15 @@ arena_decay_ms_valid(ssize_t decay_ms) {
 
 static bool
 arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, bool is_background_thread) {
+    eset_t *eset, bool is_background_thread) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
 	ssize_t decay_ms = arena_decay_ms_read(decay);
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
-			arena_decay_to_limit(tsdn, arena, decay, extents, false,
-			    0, extents_npages_get(extents),
+			arena_decay_to_limit(tsdn, arena, decay, eset, false,
+			    0, extents_npages_get(eset),
 			    is_background_thread);
 		}
 		return false;
@@ -756,11 +756,11 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 */
 	bool advance_epoch = arena_decay_deadline_reached(decay, &time);
 	if (advance_epoch) {
-		arena_decay_epoch_advance(tsdn, arena, decay, extents, &time,
+		arena_decay_epoch_advance(tsdn, arena, decay, eset, &time,
 		    is_background_thread);
 	} else if (is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, extents,
-		    extents_npages_get(extents),
+		arena_decay_try_purge(tsdn, arena, decay, eset,
+		    extents_npages_get(eset),
 		    arena_decay_backlog_npages_limit(decay),
 		    is_background_thread);
 	}
@@ -785,7 +785,7 @@ arena_muzzy_decay_ms_get(arena_t *arena) {
 
 static bool
 arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, ssize_t decay_ms) {
+    eset_t *eset, ssize_t decay_ms) {
 	if (!arena_decay_ms_valid(decay_ms)) {
 		return true;
 	}
@@ -800,7 +800,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 * arbitrary change during initial arena configuration.
 	 */
 	arena_decay_reinit(decay, decay_ms);
-	arena_maybe_decay(tsdn, arena, decay, extents, false);
+	arena_maybe_decay(tsdn, arena, decay, eset, false);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
 	return false;
@@ -822,7 +822,7 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_limit,
+    extent_hooks_t **r_extent_hooks, eset_t *eset, size_t npages_limit,
 	size_t npages_decay_max, extent_list_t *decay_extents) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -831,7 +831,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	size_t nstashed = 0;
 	extent_t *extent;
 	while (nstashed < npages_decay_max &&
-	    (extent = extents_evict(tsdn, arena, r_extent_hooks, extents,
+	    (extent = extents_evict(tsdn, arena, r_extent_hooks, eset,
 	    npages_limit)) != NULL) {
 		extent_list_append(decay_extents, extent);
 		nstashed += extent_size_get(extent) >> LG_PAGE;
@@ -841,7 +841,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, arena_decay_t *decay, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, arena_decay_t *decay, eset_t *eset,
     bool all, extent_list_t *decay_extents, bool is_background_thread) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
@@ -861,7 +861,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 		size_t npages = extent_size_get(extent) >> LG_PAGE;
 		npurged += npages;
 		extent_list_remove(decay_extents, extent);
-		switch (extents_state_get(extents)) {
+		switch (extents_state_get(eset)) {
 		case extent_state_active:
 			not_reached();
 		case extent_state_dirty:
@@ -914,7 +914,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
  */
 static void
 arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, bool all, size_t npages_limit, size_t npages_decay_max,
+    eset_t *eset, bool all, size_t npages_limit, size_t npages_decay_max,
     bool is_background_thread) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
@@ -931,11 +931,11 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	extent_list_t decay_extents;
 	extent_list_init(&decay_extents);
 
-	size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, extents,
+	size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, eset,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = arena_decay_stashed(tsdn, arena,
-		    &extent_hooks, decay, extents, all, &decay_extents,
+		    &extent_hooks, decay, eset, all, &decay_extents,
 		    is_background_thread);
 		assert(npurged == npurge);
 	}
@@ -946,11 +946,11 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, bool is_background_thread, bool all) {
+    eset_t *eset, bool is_background_thread, bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
-		arena_decay_to_limit(tsdn, arena, decay, extents, all, 0,
-		    extents_npages_get(extents), is_background_thread);
+		arena_decay_to_limit(tsdn, arena, decay, eset, all, 0,
+		    extents_npages_get(eset), is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 
 		return false;
@@ -961,7 +961,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 		return true;
 	}
 
-	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, extents,
+	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, eset,
 	    is_background_thread);
 	size_t npages_new;
 	if (epoch_advanced) {
diff --git a/src/background_thread.c b/src/background_thread.c
index bea445f..f4b9cef 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -114,7 +114,7 @@ decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
 
 static uint64_t
 arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
-    extents_t *extents) {
+    eset_t *eset) {
 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
 		/* Use minimal interval if decay is contended. */
 		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
@@ -130,7 +130,7 @@ arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
 
 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
 	assert(decay_interval_ns > 0);
-	size_t npages = extents_npages_get(extents);
+	size_t npages = extents_npages_get(eset);
 	if (npages == 0) {
 		unsigned i;
 		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
diff --git a/src/extent.c b/src/extent.c
index aac5455..d535014 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,7 +19,7 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static const bitmap_info_t extents_bitmap_info =
+static const bitmap_info_t eset_bitmap_info =
     BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
 
 static void *extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr,
@@ -101,14 +101,14 @@ static atomic_zu_t highpages;
 
 static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
 static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
+    extent_hooks_t **r_extent_hooks, eset_t *eset, void *new_addr,
     size_t usize, size_t pad, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit, bool growing_retained);
 static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent,
+    extent_hooks_t **r_extent_hooks, eset_t *eset, extent_t *extent,
     bool growing_retained);
 
 /******************************************************************************/
@@ -309,118 +309,118 @@ extent_size_quantize_ceil(size_t size) {
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
 bool
-extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
+extents_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
     bool delay_coalesce) {
-	if (malloc_mutex_init(&extents->mtx, "extents", WITNESS_RANK_EXTENTS,
+	if (malloc_mutex_init(&eset->mtx, "extents", WITNESS_RANK_EXTENTS,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
-		extent_heap_new(&extents->heaps[i]);
+		extent_heap_new(&eset->heaps[i]);
 	}
-	bitmap_init(extents->bitmap, &extents_bitmap_info, true);
-	extent_list_init(&extents->lru);
-	atomic_store_zu(&extents->npages, 0, ATOMIC_RELAXED);
-	extents->state = state;
-	extents->delay_coalesce = delay_coalesce;
+	bitmap_init(eset->bitmap, &eset_bitmap_info, true);
+	extent_list_init(&eset->lru);
+	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
+	eset->state = state;
+	eset->delay_coalesce = delay_coalesce;
 	return false;
 }
 
 extent_state_t
-extents_state_get(const extents_t *extents) {
-	return extents->state;
+extents_state_get(const eset_t *eset) {
+	return eset->state;
 }
 
 size_t
-extents_npages_get(extents_t *extents) {
-	return atomic_load_zu(&extents->npages, ATOMIC_RELAXED);
+extents_npages_get(eset_t *eset) {
+	return atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
 }
 
 size_t
-extents_nextents_get(extents_t *extents, pszind_t pind) {
-	return atomic_load_zu(&extents->nextents[pind], ATOMIC_RELAXED);
+extents_nextents_get(eset_t *eset, pszind_t pind) {
+	return atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
 }
 
 size_t
-extents_nbytes_get(extents_t *extents, pszind_t pind) {
-	return atomic_load_zu(&extents->nbytes[pind], ATOMIC_RELAXED);
+extents_nbytes_get(eset_t *eset, pszind_t pind) {
+	return atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
 }
 
 static void
-extents_stats_add(extents_t *extent, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&extent->nextents[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&extent->nextents[pind], cur + 1, ATOMIC_RELAXED);
-	cur = atomic_load_zu(&extent->nbytes[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&extent->nbytes[pind], cur + sz, ATOMIC_RELAXED);
+extents_stats_add(eset_t *eset, pszind_t pind, size_t sz) {
+	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nextents[pind], cur + 1, ATOMIC_RELAXED);
+	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nbytes[pind], cur + sz, ATOMIC_RELAXED);
 }
 
 static void
-extents_stats_sub(extents_t *extent, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&extent->nextents[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&extent->nextents[pind], cur - 1, ATOMIC_RELAXED);
-	cur = atomic_load_zu(&extent->nbytes[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&extent->nbytes[pind], cur - sz, ATOMIC_RELAXED);
+extents_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
+	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nextents[pind], cur - 1, ATOMIC_RELAXED);
+	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nbytes[pind], cur - sz, ATOMIC_RELAXED);
 }
 
 static void
-extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
-	malloc_mutex_assert_owner(tsdn, &extents->mtx);
-	assert(extent_state_get(extent) == extents->state);
+extents_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
+	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+	assert(extent_state_get(extent) == eset->state);
 
 	size_t size = extent_size_get(extent);
 	size_t psz = extent_size_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	if (extent_heap_empty(&extents->heaps[pind])) {
-		bitmap_unset(extents->bitmap, &extents_bitmap_info,
+	if (extent_heap_empty(&eset->heaps[pind])) {
+		bitmap_unset(eset->bitmap, &eset_bitmap_info,
 		    (size_t)pind);
 	}
-	extent_heap_insert(&extents->heaps[pind], extent);
+	extent_heap_insert(&eset->heaps[pind], extent);
 
 	if (config_stats) {
-		extents_stats_add(extents, pind, size);
+		extents_stats_add(eset, pind, size);
 	}
 
-	extent_list_append(&extents->lru, extent);
+	extent_list_append(&eset->lru, extent);
 	size_t npages = size >> LG_PAGE;
 	/*
 	 * All modifications to npages hold the mutex (as asserted above), so we
 	 * don't need an atomic fetch-add; we can get by with a load followed by
 	 * a store.
 	 */
-	size_t cur_extents_npages =
-	    atomic_load_zu(&extents->npages, ATOMIC_RELAXED);
-	atomic_store_zu(&extents->npages, cur_extents_npages + npages,
+	size_t cur_eset_npages =
+	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+	atomic_store_zu(&eset->npages, cur_eset_npages + npages,
 	    ATOMIC_RELAXED);
 }
 
 static void
-extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
-	malloc_mutex_assert_owner(tsdn, &extents->mtx);
-	assert(extent_state_get(extent) == extents->state);
+extents_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
+	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+	assert(extent_state_get(extent) == eset->state);
 
 	size_t size = extent_size_get(extent);
 	size_t psz = extent_size_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	extent_heap_remove(&extents->heaps[pind], extent);
+	extent_heap_remove(&eset->heaps[pind], extent);
 
 	if (config_stats) {
-		extents_stats_sub(extents, pind, size);
+		extents_stats_sub(eset, pind, size);
 	}
 
-	if (extent_heap_empty(&extents->heaps[pind])) {
-		bitmap_set(extents->bitmap, &extents_bitmap_info,
+	if (extent_heap_empty(&eset->heaps[pind])) {
+		bitmap_set(eset->bitmap, &eset_bitmap_info,
 		    (size_t)pind);
 	}
-	extent_list_remove(&extents->lru, extent);
+	extent_list_remove(&eset->lru, extent);
 	size_t npages = size >> LG_PAGE;
 	/*
-	 * As in extents_insert_locked, we hold extents->mtx and so don't need
-	 * atomic operations for updating extents->npages.
+	 * As in extents_insert_locked, we hold eset->mtx and so don't need
+	 * atomic operations for updating eset->npages.
 	 */
 	size_t cur_extents_npages =
-	    atomic_load_zu(&extents->npages, ATOMIC_RELAXED);
+	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
 	assert(cur_extents_npages >= npages);
-	atomic_store_zu(&extents->npages,
+	atomic_store_zu(&eset->npages,
 	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
 }
 
@@ -429,18 +429,18 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
  * requirement.  For each size, try only the first extent in the heap.
  */
 static extent_t *
-extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size,
+extents_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
     size_t alignment) {
         pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(min_size));
         pszind_t pind_max = sz_psz2ind(extent_size_quantize_ceil(max_size));
 
-	for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
-	    &extents_bitmap_info, (size_t)pind); i < pind_max; i =
-	    (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
+	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
+	    &eset_bitmap_info, (size_t)pind); i < pind_max; i =
+	    (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
 	    (size_t)i+1)) {
 		assert(i < SC_NPSIZES);
-		assert(!extent_heap_empty(&extents->heaps[i]));
-		extent_t *extent = extent_heap_first(&extents->heaps[i]);
+		assert(!extent_heap_empty(&eset->heaps[i]));
+		extent_t *extent = extent_heap_first(&eset->heaps[i]);
 		uintptr_t base = (uintptr_t)extent_base_get(extent);
 		size_t candidate_size = extent_size_get(extent);
 		assert(candidate_size >= min_size);
@@ -466,7 +466,7 @@ extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size,
  * large enough.
  */
 static extent_t *
-extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     size_t size) {
 	extent_t *ret = NULL;
 
@@ -477,25 +477,25 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 		 * No split / merge allowed (Windows w/o retain). Try exact fit
 		 * only.
 		 */
-		return extent_heap_empty(&extents->heaps[pind]) ? NULL :
-		    extent_heap_first(&extents->heaps[pind]);
+		return extent_heap_empty(&eset->heaps[pind]) ? NULL :
+		    extent_heap_first(&eset->heaps[pind]);
 	}
 
-	for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
-	    &extents_bitmap_info, (size_t)pind);
+	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
+	    &eset_bitmap_info, (size_t)pind);
 	    i < SC_NPSIZES + 1;
-	    i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
+	    i = (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
 	    (size_t)i+1)) {
-		assert(!extent_heap_empty(&extents->heaps[i]));
-		extent_t *extent = extent_heap_first(&extents->heaps[i]);
+		assert(!extent_heap_empty(&eset->heaps[i]));
+		extent_t *extent = extent_heap_first(&eset->heaps[i]);
 		assert(extent_size_get(extent) >= size);
 		/*
 		 * In order to reduce fragmentation, avoid reusing and splitting
-		 * large extents for much smaller sizes.
+		 * large eset for much smaller sizes.
 		 *
-		 * Only do check for dirty extents (delay_coalesce).
+		 * Only do check for dirty eset (delay_coalesce).
 		 */
-		if (extents->delay_coalesce &&
+		if (eset->delay_coalesce &&
 		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
 			break;
 		}
@@ -513,12 +513,12 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 
 /*
  * Do first-fit extent selection, where the selection policy choice is
- * based on extents->delay_coalesce.
+ * based on eset->delay_coalesce.
  */
 static extent_t *
-extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+extents_fit_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     size_t esize, size_t alignment) {
-	malloc_mutex_assert_owner(tsdn, &extents->mtx);
+	malloc_mutex_assert_owner(tsdn, &eset->mtx);
 
 	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -527,7 +527,7 @@ extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	}
 
 	extent_t *extent =
-	    extents_first_fit_locked(tsdn, arena, extents, max_size);
+	    extents_first_fit_locked(tsdn, arena, eset, max_size);
 
 	if (alignment > PAGE && extent == NULL) {
 		/*
@@ -535,7 +535,7 @@ extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 		 * pessimistic.  Next we try to satisfy the aligned allocation
 		 * with sizes in [esize, max_size).
 		 */
-		extent = extents_fit_alignment(extents, esize, max_size,
+		extent = extents_fit_alignment(eset, esize, max_size,
 		    alignment);
 	}
 
@@ -544,31 +544,31 @@ extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent) {
 	extent_state_set(extent, extent_state_active);
 	bool coalesced;
 	extent = extent_try_coalesce(tsdn, arena, r_extent_hooks, rtree_ctx,
-	    extents, extent, &coalesced, false);
-	extent_state_set(extent, extents_state_get(extents));
+	    eset, extent, &coalesced, false);
+	extent_state_set(extent, extents_state_get(eset));
 
 	if (!coalesced) {
 		return true;
 	}
-	extents_insert_locked(tsdn, extents, extent);
+	extents_insert_locked(tsdn, eset, extent);
 	return false;
 }
 
 extent_t *
 extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, void *new_addr, size_t size, size_t pad,
+    eset_t *eset, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	assert(size + pad != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks, extents,
+	extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks, eset,
 	    new_addr, size, pad, alignment, slab, szind, zero, commit, false);
 	assert(extent == NULL || extent_dumpable_get(extent));
 	return extent;
@@ -576,7 +576,7 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 void
 extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, extent_t *extent) {
+    eset_t *eset, extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
 	assert(extent_dumpable_get(extent));
@@ -586,16 +586,16 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	extent_addr_set(extent, extent_base_get(extent));
 	extent_zeroed_set(extent, false);
 
-	extent_record(tsdn, arena, r_extent_hooks, extents, extent, false);
+	extent_record(tsdn, arena, r_extent_hooks, eset, extent, false);
 }
 
 extent_t *
 extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, size_t npages_min) {
+    eset_t *eset, size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	malloc_mutex_lock(tsdn, &extents->mtx);
+	malloc_mutex_lock(tsdn, &eset->mtx);
 
 	/*
 	 * Get the LRU coalesced extent, if any.  If coalescing was delayed,
@@ -604,24 +604,24 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	extent_t *extent;
 	while (true) {
 		/* Get the LRU extent, if any. */
-		extent = extent_list_first(&extents->lru);
+		extent = extent_list_first(&eset->lru);
 		if (extent == NULL) {
 			goto label_return;
 		}
 		/* Check the eviction limit. */
-		size_t extents_npages = atomic_load_zu(&extents->npages,
+		size_t extents_npages = atomic_load_zu(&eset->npages,
 		    ATOMIC_RELAXED);
 		if (extents_npages <= npages_min) {
 			extent = NULL;
 			goto label_return;
 		}
-		extents_remove_locked(tsdn, extents, extent);
-		if (!extents->delay_coalesce) {
+		extents_remove_locked(tsdn, eset, extent);
+		if (!eset->delay_coalesce) {
 			break;
 		}
 		/* Try to coalesce. */
 		if (extent_try_delayed_coalesce(tsdn, arena, r_extent_hooks,
-		    rtree_ctx, extents, extent)) {
+		    rtree_ctx, eset, extent)) {
 			break;
 		}
 		/*
@@ -634,7 +634,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	 * Either mark the extent active or deregister it to protect against
 	 * concurrent operations.
 	 */
-	switch (extents_state_get(extents)) {
+	switch (extents_state_get(eset)) {
 	case extent_state_active:
 		not_reached();
 	case extent_state_dirty:
@@ -649,7 +649,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	}
 
 label_return:
-	malloc_mutex_unlock(tsdn, &extents->mtx);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
 	return extent;
 }
 
@@ -659,7 +659,7 @@ label_return:
  */
 static void
 extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, extent_t *extent, bool growing_retained) {
+    eset_t *eset, extent_t *extent, bool growing_retained) {
 	size_t sz = extent_size_get(extent);
 	if (config_stats) {
 		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
@@ -668,7 +668,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
-	if (extents_state_get(extents) == extent_state_dirty) {
+	if (extents_state_get(eset) == extent_state_dirty) {
 		if (extent_purge_lazy_impl(tsdn, arena, r_extent_hooks,
 		    extent, 0, sz, growing_retained)) {
 			extent_purge_forced_impl(tsdn, arena, r_extent_hooks,
@@ -680,45 +680,45 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks
 }
 
 void
-extents_prefork(tsdn_t *tsdn, extents_t *extents) {
-	malloc_mutex_prefork(tsdn, &extents->mtx);
+extents_prefork(tsdn_t *tsdn, eset_t *eset) {
+	malloc_mutex_prefork(tsdn, &eset->mtx);
 }
 
 void
-extents_postfork_parent(tsdn_t *tsdn, extents_t *extents) {
-	malloc_mutex_postfork_parent(tsdn, &extents->mtx);
+extents_postfork_parent(tsdn_t *tsdn, eset_t *eset) {
+	malloc_mutex_postfork_parent(tsdn, &eset->mtx);
 }
 
 void
-extents_postfork_child(tsdn_t *tsdn, extents_t *extents) {
-	malloc_mutex_postfork_child(tsdn, &extents->mtx);
+extents_postfork_child(tsdn_t *tsdn, eset_t *eset) {
+	malloc_mutex_postfork_child(tsdn, &eset->mtx);
 }
 
 static void
-extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     extent_t *extent) {
 	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
 	assert(extent_state_get(extent) == extent_state_active);
 
-	extent_state_set(extent, extents_state_get(extents));
-	extents_insert_locked(tsdn, extents, extent);
+	extent_state_set(extent, extents_state_get(eset));
+	extents_insert_locked(tsdn, eset, extent);
 }
 
 static void
-extent_deactivate(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+extent_deactivate(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     extent_t *extent) {
-	malloc_mutex_lock(tsdn, &extents->mtx);
-	extent_deactivate_locked(tsdn, arena, extents, extent);
-	malloc_mutex_unlock(tsdn, &extents->mtx);
+	malloc_mutex_lock(tsdn, &eset->mtx);
+	extent_deactivate_locked(tsdn, arena, eset, extent);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
 }
 
 static void
-extent_activate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     extent_t *extent) {
 	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
-	assert(extent_state_get(extent) == extents_state_get(extents));
+	assert(extent_state_get(extent) == extents_state_get(eset));
 
-	extents_remove_locked(tsdn, extents, extent);
+	extents_remove_locked(tsdn, eset, extent);
 	extent_state_set(extent, extent_state_active);
 }
 
@@ -911,12 +911,12 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
 }
 
 /*
- * Tries to find and remove an extent from extents that can be used for the
+ * Tries to find and remove an extent from eset that can be used for the
  * given allocation request.
  */
 static extent_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -940,7 +940,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	size_t esize = size + pad;
-	malloc_mutex_lock(tsdn, &extents->mtx);
+	malloc_mutex_lock(tsdn, &eset->mtx);
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	extent_t *extent;
 	if (new_addr != NULL) {
@@ -957,22 +957,22 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 			    != arena_ind_get(arena) ||
 			    extent_size_get(extent) < esize ||
 			    extent_state_get(extent) !=
-			    extents_state_get(extents)) {
+			    extents_state_get(eset)) {
 				extent = NULL;
 			}
 			extent_unlock(tsdn, unlock_extent);
 		}
 	} else {
-		extent = extents_fit_locked(tsdn, arena, extents, esize,
+		extent = extents_fit_locked(tsdn, arena, eset, esize,
 		    alignment);
 	}
 	if (extent == NULL) {
-		malloc_mutex_unlock(tsdn, &extents->mtx);
+		malloc_mutex_unlock(tsdn, &eset->mtx);
 		return NULL;
 	}
 
-	extent_activate_locked(tsdn, arena, extents, extent);
-	malloc_mutex_unlock(tsdn, &extents->mtx);
+	extent_activate_locked(tsdn, arena, eset, extent);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
 
 	return extent;
 }
@@ -981,7 +981,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
  * Given an allocation request and an extent guaranteed to be able to satisfy
  * it, this splits off lead and trail extents, leaving extent pointing to an
  * extent satisfying the allocation.
- * This function doesn't put lead or trail into any extents_t; it's the caller's
+ * This function doesn't put lead or trail into any eset_t; it's the caller's
  * job to ensure that they can be reused.
  */
 typedef enum {
@@ -1078,11 +1078,11 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
  * This fulfills the indicated allocation request out of the given extent (which
  * the caller should have ensured was big enough).  If there's any unused space
  * before or after the resulting allocation, that space is given its own extent
- * and put back into extents.
+ * and put back into eset.
  */
 static extent_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, extent_t *extent, bool growing_retained) {
 	extent_t *lead;
@@ -1099,19 +1099,19 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 	    && !opt_retain) {
 		/*
 		 * Split isn't supported (implies Windows w/o retain).  Avoid
-		 * leaking the extents.
+		 * leaking the eset.
 		 */
 		assert(to_leak != NULL && lead == NULL && trail == NULL);
-		extent_deactivate(tsdn, arena, extents, to_leak);
+		extent_deactivate(tsdn, arena, eset, to_leak);
 		return NULL;
 	}
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_deactivate(tsdn, arena, extents, lead);
+			extent_deactivate(tsdn, arena, eset, lead);
 		}
 		if (trail != NULL) {
-			extent_deactivate(tsdn, arena, extents, trail);
+			extent_deactivate(tsdn, arena, eset, trail);
 		}
 		return extent;
 	} else {
@@ -1126,7 +1126,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 		if (to_leak != NULL) {
 			void *leak = extent_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, r_extent_hooks, extents,
+			extents_abandon_vm(tsdn, arena, r_extent_hooks, eset,
 			    to_leak, growing_retained);
 			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
 			    false) == NULL);
@@ -1149,11 +1149,11 @@ extent_need_manual_zero(arena_t *arena) {
 
 /*
  * Tries to satisfy the given allocation request by reusing one of the extents
- * in the given extents_t.
+ * in the given eset_t.
  */
 static extent_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, void *new_addr, size_t size, size_t pad,
+    eset_t *eset, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1166,14 +1166,14 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
 	extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks,
-	    rtree_ctx, extents, new_addr, size, pad, alignment, slab,
+	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
 	    growing_retained);
 	if (extent == NULL) {
 		return NULL;
 	}
 
 	extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx,
-	    extents, new_addr, size, pad, alignment, slab, szind, extent,
+	    eset, new_addr, size, pad, alignment, slab, szind, extent,
 	    growing_retained);
 	if (extent == NULL) {
 		return NULL;
@@ -1182,7 +1182,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	if (*commit && !extent_committed_get(extent)) {
 		if (extent_commit_impl(tsdn, arena, r_extent_hooks, extent,
 		    0, extent_size_get(extent), growing_retained)) {
-			extent_record(tsdn, arena, r_extent_hooks, extents,
+			extent_record(tsdn, arena, r_extent_hooks, eset,
 			    extent, growing_retained);
 			return NULL;
 		}
@@ -1584,7 +1584,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 }
 
 static bool
-extent_can_coalesce(arena_t *arena, extents_t *extents, const extent_t *inner,
+extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
     const extent_t *outer) {
 	assert(extent_arena_ind_get(inner) == arena_ind_get(arena));
 	if (extent_arena_ind_get(outer) != arena_ind_get(arena)) {
@@ -1592,7 +1592,7 @@ extent_can_coalesce(arena_t *arena, extents_t *extents, const extent_t *inner,
 	}
 
 	assert(extent_state_get(inner) == extent_state_active);
-	if (extent_state_get(outer) != extents->state) {
+	if (extent_state_get(outer) != eset->state) {
 		return false;
 	}
 
@@ -1605,19 +1605,19 @@ extent_can_coalesce(arena_t *arena, extents_t *extents, const extent_t *inner,
 
 static bool
 extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, extent_t *inner, extent_t *outer, bool forward,
+    eset_t *eset, extent_t *inner, extent_t *outer, bool forward,
     bool growing_retained) {
-	assert(extent_can_coalesce(arena, extents, inner, outer));
+	assert(extent_can_coalesce(arena, eset, inner, outer));
 
-	extent_activate_locked(tsdn, arena, extents, outer);
+	extent_activate_locked(tsdn, arena, eset, outer);
 
-	malloc_mutex_unlock(tsdn, &extents->mtx);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
 	bool err = extent_merge_impl(tsdn, arena, r_extent_hooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
-	malloc_mutex_lock(tsdn, &extents->mtx);
+	malloc_mutex_lock(tsdn, &eset->mtx);
 
 	if (err) {
-		extent_deactivate_locked(tsdn, arena, extents, outer);
+		extent_deactivate_locked(tsdn, arena, eset, outer);
 	}
 
 	return err;
@@ -1625,7 +1625,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 static extent_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained,
     bool inactive_only) {
 	/*
@@ -1646,19 +1646,19 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 		    extent_past_get(extent), inactive_only);
 		if (next != NULL) {
 			/*
-			 * extents->mtx only protects against races for
-			 * like-state extents, so call extent_can_coalesce()
+			 * eset->mtx only protects against races for
+			 * like-state eset, so call extent_can_coalesce()
 			 * before releasing next's pool lock.
 			 */
-			bool can_coalesce = extent_can_coalesce(arena, extents,
+			bool can_coalesce = extent_can_coalesce(arena, eset,
 			    extent, next);
 
 			extent_unlock(tsdn, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    r_extent_hooks, extents, extent, next, true,
+			    r_extent_hooks, eset, extent, next, true,
 			    growing_retained)) {
-				if (extents->delay_coalesce) {
+				if (eset->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
 					return extent;
@@ -1671,15 +1671,15 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 		extent_t *prev = extent_lock_from_addr(tsdn, rtree_ctx,
 		    extent_before_get(extent), inactive_only);
 		if (prev != NULL) {
-			bool can_coalesce = extent_can_coalesce(arena, extents,
+			bool can_coalesce = extent_can_coalesce(arena, eset,
 			    extent, prev);
 			extent_unlock(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    r_extent_hooks, extents, extent, prev, false,
+			    r_extent_hooks, eset, extent, prev, false,
 			    growing_retained)) {
 				extent = prev;
-				if (extents->delay_coalesce) {
+				if (eset->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
 					return extent;
@@ -1689,7 +1689,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 		}
 	} while (again);
 
-	if (extents->delay_coalesce) {
+	if (eset->delay_coalesce) {
 		*coalesced = false;
 	}
 	return extent;
@@ -1697,35 +1697,35 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 
 static extent_t *
 extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained) {
 	return extent_try_coalesce_impl(tsdn, arena, r_extent_hooks, rtree_ctx,
-	    extents, extent, coalesced, growing_retained, false);
+	    eset, extent, coalesced, growing_retained, false);
 }
 
 static extent_t *
 extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained) {
 	return extent_try_coalesce_impl(tsdn, arena, r_extent_hooks, rtree_ctx,
-	    extents, extent, coalesced, growing_retained, true);
+	    eset, extent, coalesced, growing_retained, true);
 }
 
 /*
  * Does the metadata management portions of putting an unused extent into the
- * given extents_t (coalesces, deregisters slab interiors, the heap operations).
+ * given eset_t (coalesces, deregisters slab interiors, the heap operations).
  */
 static void
 extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, extent_t *extent, bool growing_retained) {
+    eset_t *eset, extent_t *extent, bool growing_retained) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	assert((extents_state_get(extents) != extent_state_dirty &&
-	    extents_state_get(extents) != extent_state_muzzy) ||
+	assert((extents_state_get(eset) != extent_state_dirty &&
+	    extents_state_get(eset) != extent_state_muzzy) ||
 	    !extent_zeroed_get(extent));
 
-	malloc_mutex_lock(tsdn, &extents->mtx);
+	malloc_mutex_lock(tsdn, &eset->mtx);
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
 	extent_szind_set(extent, SC_NSIZES);
@@ -1737,29 +1737,29 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	assert(rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)extent_base_get(extent), true) == extent);
 
-	if (!extents->delay_coalesce) {
+	if (!eset->delay_coalesce) {
 		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
-		    rtree_ctx, extents, extent, NULL, growing_retained);
+		    rtree_ctx, eset, extent, NULL, growing_retained);
 	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
-		assert(extents == &arena->extents_dirty);
-		/* Always coalesce large extents eagerly. */
+		assert(eset == &arena->extents_dirty);
+		/* Always coalesce large eset eagerly. */
 		bool coalesced;
 		do {
 			assert(extent_state_get(extent) == extent_state_active);
 			extent = extent_try_coalesce_large(tsdn, arena,
-			    r_extent_hooks, rtree_ctx, extents, extent,
+			    r_extent_hooks, rtree_ctx, eset, extent,
 			    &coalesced, growing_retained);
 		} while (coalesced);
 		if (extent_size_get(extent) >= oversize_threshold) {
 			/* Shortcut to purge the oversize extent eagerly. */
-			malloc_mutex_unlock(tsdn, &extents->mtx);
+			malloc_mutex_unlock(tsdn, &eset->mtx);
 			arena_decay_extent(tsdn, arena, r_extent_hooks, extent);
 			return;
 		}
 	}
-	extent_deactivate_locked(tsdn, arena, extents, extent);
+	extent_deactivate_locked(tsdn, arena, eset, extent);
 
-	malloc_mutex_unlock(tsdn, &extents->mtx);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
 }
 
 void
-- 
cgit v0.12


From e6180fe1b485c6128de4169e86c178f3118dcde4 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 20:17:23 -0700
Subject: Eset: Add a source file.

This will let us move extents_* functions over one by one.
---
 Makefile.in                                    | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj | 1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj | 1 +
 src/eset.c                                     | 4 ++++
 4 files changed, 7 insertions(+)
 create mode 100644 src/eset.c

diff --git a/Makefile.in b/Makefile.in
index 62ae71f..21a1053 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -104,6 +104,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
+	$(srcroot)src/eset.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index b6b8339..a968338 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -44,6 +44,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index f405ea3..72a57e5 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -44,6 +44,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/src/eset.c b/src/eset.c
new file mode 100644
index 0000000..3b8d1cb
--- /dev/null
+++ b/src/eset.c
@@ -0,0 +1,4 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/eset.h"
-- 
cgit v0.12


From b416b96a397a2234d943d1e7e37e1dc208c971bc Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 20:37:15 -0700
Subject: Extents -> Eset: rename/move extents_init.

---
 include/jemalloc/internal/eset.h           |  6 ++++++
 include/jemalloc/internal/extent_externs.h |  2 --
 src/arena.c                                |  8 +++-----
 src/eset.c                                 | 21 +++++++++++++++++++++
 src/extent.c                               | 21 ---------------------
 5 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 1c18f4e..55db75e 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -6,6 +6,9 @@
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/mutex.h"
 
+/* This is a transitional declarion, while we move extent.c into eset.c. */
+extern const bitmap_info_t eset_bitmap_info;
+
 /*
  * An eset ("extent set") is a quantized collection of extents, with built-in
  * LRU queue.
@@ -57,4 +60,7 @@ struct eset_s {
 	bool delay_coalesce;
 };
 
+bool eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
+    bool delay_coalesce);
+
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 45271d7..7a22384 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -27,8 +27,6 @@ size_t extent_size_quantize_ceil(size_t size);
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
-bool extents_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
-    bool delay_coalesce);
 extent_state_t extents_state_get(const eset_t *eset);
 size_t extents_npages_get(eset_t *eset);
 /* Get the number of extents in the given page size index. */
diff --git a/src/arena.c b/src/arena.c
index 5380dee..1d269dc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2022,16 +2022,14 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * are likely to be reused soon after deallocation, and the cost of
 	 * merging/splitting extents is non-trivial.
 	 */
-	if (extents_init(tsdn, &arena->extents_dirty, extent_state_dirty,
-	    true)) {
+	if (eset_init(tsdn, &arena->extents_dirty, extent_state_dirty, true)) {
 		goto label_error;
 	}
 	/*
 	 * Coalesce muzzy extents immediately, because operations on them are in
 	 * the critical path much less often than for dirty extents.
 	 */
-	if (extents_init(tsdn, &arena->extents_muzzy, extent_state_muzzy,
-	    false)) {
+	if (eset_init(tsdn, &arena->extents_muzzy, extent_state_muzzy, false)) {
 		goto label_error;
 	}
 	/*
@@ -2040,7 +2038,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (extents_init(tsdn, &arena->extents_retained, extent_state_retained,
+	if (eset_init(tsdn, &arena->extents_retained, extent_state_retained,
 	    false)) {
 		goto label_error;
 	}
diff --git a/src/eset.c b/src/eset.c
index 3b8d1cb..09148d0 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -2,3 +2,24 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/eset.h"
+
+const bitmap_info_t eset_bitmap_info =
+    BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
+
+bool
+eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
+    bool delay_coalesce) {
+	if (malloc_mutex_init(&eset->mtx, "extents", WITNESS_RANK_EXTENTS,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
+		extent_heap_new(&eset->heaps[i]);
+	}
+	bitmap_init(eset->bitmap, &eset_bitmap_info, true);
+	extent_list_init(&eset->lru);
+	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
+	eset->state = state;
+	eset->delay_coalesce = delay_coalesce;
+	return false;
+}
diff --git a/src/extent.c b/src/extent.c
index d535014..51a145d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,9 +19,6 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static const bitmap_info_t eset_bitmap_info =
-    BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
-
 static void *extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit,
     unsigned arena_ind);
@@ -308,24 +305,6 @@ extent_size_quantize_ceil(size_t size) {
 /* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
-bool
-extents_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
-    bool delay_coalesce) {
-	if (malloc_mutex_init(&eset->mtx, "extents", WITNESS_RANK_EXTENTS,
-	    malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
-		extent_heap_new(&eset->heaps[i]);
-	}
-	bitmap_init(eset->bitmap, &eset_bitmap_info, true);
-	extent_list_init(&eset->lru);
-	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
-	eset->state = state;
-	eset->delay_coalesce = delay_coalesce;
-	return false;
-}
-
 extent_state_t
 extents_state_get(const eset_t *eset) {
 	return eset->state;
-- 
cgit v0.12


From 63d1b7a7a76b7294a7dd85599c24cd9b555ccf4e Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 20:45:16 -0700
Subject: Extents -> Eset: move extents_state_get.

---
 include/jemalloc/internal/eset.h           |  1 +
 include/jemalloc/internal/extent_externs.h |  1 -
 src/arena.c                                |  2 +-
 src/eset.c                                 |  5 +++++
 src/extent.c                               | 21 ++++++++-------------
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 55db75e..abd37ca 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -62,5 +62,6 @@ struct eset_s {
 
 bool eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
     bool delay_coalesce);
+extent_state_t eset_state_get(const eset_t *eset);
 
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 7a22384..2e196dd 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -27,7 +27,6 @@ size_t extent_size_quantize_ceil(size_t size);
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
-extent_state_t extents_state_get(const eset_t *eset);
 size_t extents_npages_get(eset_t *eset);
 /* Get the number of extents in the given page size index. */
 size_t extents_nextents_get(eset_t *eset, pszind_t ind);
diff --git a/src/arena.c b/src/arena.c
index 1d269dc..3eae7e3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -861,7 +861,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 		size_t npages = extent_size_get(extent) >> LG_PAGE;
 		npurged += npages;
 		extent_list_remove(decay_extents, extent);
-		switch (extents_state_get(eset)) {
+		switch (eset_state_get(eset)) {
 		case extent_state_active:
 			not_reached();
 		case extent_state_dirty:
diff --git a/src/eset.c b/src/eset.c
index 09148d0..d0b5594 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -23,3 +23,8 @@ eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
 	eset->delay_coalesce = delay_coalesce;
 	return false;
 }
+
+extent_state_t
+eset_state_get(const eset_t *eset) {
+	return eset->state;
+}
diff --git a/src/extent.c b/src/extent.c
index 51a145d..cdbf909 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -305,11 +305,6 @@ extent_size_quantize_ceil(size_t size) {
 /* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
-extent_state_t
-extents_state_get(const eset_t *eset) {
-	return eset->state;
-}
-
 size_t
 extents_npages_get(eset_t *eset) {
 	return atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
@@ -529,7 +524,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
 	bool coalesced;
 	extent = extent_try_coalesce(tsdn, arena, r_extent_hooks, rtree_ctx,
 	    eset, extent, &coalesced, false);
-	extent_state_set(extent, extents_state_get(eset));
+	extent_state_set(extent, eset_state_get(eset));
 
 	if (!coalesced) {
 		return true;
@@ -613,7 +608,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	 * Either mark the extent active or deregister it to protect against
 	 * concurrent operations.
 	 */
-	switch (extents_state_get(eset)) {
+	switch (eset_state_get(eset)) {
 	case extent_state_active:
 		not_reached();
 	case extent_state_dirty:
@@ -647,7 +642,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
-	if (extents_state_get(eset) == extent_state_dirty) {
+	if (eset_state_get(eset) == extent_state_dirty) {
 		if (extent_purge_lazy_impl(tsdn, arena, r_extent_hooks,
 		    extent, 0, sz, growing_retained)) {
 			extent_purge_forced_impl(tsdn, arena, r_extent_hooks,
@@ -679,7 +674,7 @@ extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
 	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
 	assert(extent_state_get(extent) == extent_state_active);
 
-	extent_state_set(extent, extents_state_get(eset));
+	extent_state_set(extent, eset_state_get(eset));
 	extents_insert_locked(tsdn, eset, extent);
 }
 
@@ -695,7 +690,7 @@ static void
 extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     extent_t *extent) {
 	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
-	assert(extent_state_get(extent) == extents_state_get(eset));
+	assert(extent_state_get(extent) == eset_state_get(eset));
 
 	extents_remove_locked(tsdn, eset, extent);
 	extent_state_set(extent, extent_state_active);
@@ -936,7 +931,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 			    != arena_ind_get(arena) ||
 			    extent_size_get(extent) < esize ||
 			    extent_state_get(extent) !=
-			    extents_state_get(eset)) {
+			    eset_state_get(eset)) {
 				extent = NULL;
 			}
 			extent_unlock(tsdn, unlock_extent);
@@ -1700,8 +1695,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	assert((extents_state_get(eset) != extent_state_dirty &&
-	    extents_state_get(eset) != extent_state_muzzy) ||
+	assert((eset_state_get(eset) != extent_state_dirty &&
+	    eset_state_get(eset) != extent_state_muzzy) ||
 	    !extent_zeroed_get(extent));
 
 	malloc_mutex_lock(tsdn, &eset->mtx);
-- 
cgit v0.12


From 820f070c6b5b7ff44902ddb45b4b8894075a5c96 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 23:54:57 -0700
Subject: Move page quantization to sz module.

---
 include/jemalloc/internal/sz.h |  3 ++
 src/extent.c                   | 63 ++++--------------------------------------
 src/sz.c                       | 48 ++++++++++++++++++++++++++++++++
 test/unit/extent_quantize.c    | 16 +++++------
 4 files changed, 64 insertions(+), 66 deletions(-)

diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 68e558a..6df541f 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -315,4 +315,7 @@ sz_sa2u(size_t size, size_t alignment) {
 	return usize;
 }
 
+size_t sz_psz_quantize_floor(size_t size);
+size_t sz_psz_quantize_ceil(size_t size);
+
 #endif /* JEMALLOC_INTERNAL_SIZE_H */
diff --git a/src/extent.c b/src/extent.c
index cdbf909..af23ca2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -249,59 +249,6 @@ extent_hooks_assure_initialized(arena_t *arena,
 	}
 }
 
-#ifndef JEMALLOC_JET
-static
-#endif
-size_t
-extent_size_quantize_floor(size_t size) {
-	size_t ret;
-	pszind_t pind;
-
-	assert(size > 0);
-	assert((size & PAGE_MASK) == 0);
-
-	pind = sz_psz2ind(size - sz_large_pad + 1);
-	if (pind == 0) {
-		/*
-		 * Avoid underflow.  This short-circuit would also do the right
-		 * thing for all sizes in the range for which there are
-		 * PAGE-spaced size classes, but it's simplest to just handle
-		 * the one case that would cause erroneous results.
-		 */
-		return size;
-	}
-	ret = sz_pind2sz(pind - 1) + sz_large_pad;
-	assert(ret <= size);
-	return ret;
-}
-
-#ifndef JEMALLOC_JET
-static
-#endif
-size_t
-extent_size_quantize_ceil(size_t size) {
-	size_t ret;
-
-	assert(size > 0);
-	assert(size - sz_large_pad <= SC_LARGE_MAXCLASS);
-	assert((size & PAGE_MASK) == 0);
-
-	ret = extent_size_quantize_floor(size);
-	if (ret < size) {
-		/*
-		 * Skip a quantization that may have an adequately large extent,
-		 * because under-sized extents may be mixed in.  This only
-		 * happens when an unusual size is requested, i.e. for aligned
-		 * allocation, and is just one of several places where linear
-		 * search would potentially find sufficiently aligned available
-		 * memory somewhere lower.
-		 */
-		ret = sz_pind2sz(sz_psz2ind(ret - sz_large_pad + 1)) +
-		    sz_large_pad;
-	}
-	return ret;
-}
-
 /* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
@@ -342,7 +289,7 @@ extents_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
 	assert(extent_state_get(extent) == eset->state);
 
 	size_t size = extent_size_get(extent);
-	size_t psz = extent_size_quantize_floor(size);
+	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
 	if (extent_heap_empty(&eset->heaps[pind])) {
 		bitmap_unset(eset->bitmap, &eset_bitmap_info,
@@ -373,7 +320,7 @@ extents_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
 	assert(extent_state_get(extent) == eset->state);
 
 	size_t size = extent_size_get(extent);
-	size_t psz = extent_size_quantize_floor(size);
+	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
 	extent_heap_remove(&eset->heaps[pind], extent);
 
@@ -405,8 +352,8 @@ extents_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
 static extent_t *
 extents_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
     size_t alignment) {
-        pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(min_size));
-        pszind_t pind_max = sz_psz2ind(extent_size_quantize_ceil(max_size));
+        pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
+        pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
 
 	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
 	    &eset_bitmap_info, (size_t)pind); i < pind_max; i =
@@ -444,7 +391,7 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     size_t size) {
 	extent_t *ret = NULL;
 
-	pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
 	if (!maps_coalesce && !opt_retain) {
 		/*
diff --git a/src/sz.c b/src/sz.c
index 8633fb0..7734f39 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -4,6 +4,54 @@
 JEMALLOC_ALIGNED(CACHELINE)
 size_t sz_pind2sz_tab[SC_NPSIZES+1];
 
+
+size_t
+sz_psz_quantize_floor(size_t size) {
+	size_t ret;
+	pszind_t pind;
+
+	assert(size > 0);
+	assert((size & PAGE_MASK) == 0);
+
+	pind = sz_psz2ind(size - sz_large_pad + 1);
+	if (pind == 0) {
+		/*
+		 * Avoid underflow.  This short-circuit would also do the right
+		 * thing for all sizes in the range for which there are
+		 * PAGE-spaced size classes, but it's simplest to just handle
+		 * the one case that would cause erroneous results.
+		 */
+		return size;
+	}
+	ret = sz_pind2sz(pind - 1) + sz_large_pad;
+	assert(ret <= size);
+	return ret;
+}
+
+size_t
+sz_psz_quantize_ceil(size_t size) {
+	size_t ret;
+
+	assert(size > 0);
+	assert(size - sz_large_pad <= SC_LARGE_MAXCLASS);
+	assert((size & PAGE_MASK) == 0);
+
+	ret = sz_psz_quantize_floor(size);
+	if (ret < size) {
+		/*
+		 * Skip a quantization that may have an adequately large extent,
+		 * because under-sized extents may be mixed in.  This only
+		 * happens when an unusual size is requested, i.e. for aligned
+		 * allocation, and is just one of several places where linear
+		 * search would potentially find sufficiently aligned available
+		 * memory somewhere lower.
+		 */
+		ret = sz_pind2sz(sz_psz2ind(ret - sz_large_pad + 1)) +
+		    sz_large_pad;
+	}
+	return ret;
+}
+
 static void
 sz_boot_pind2sz_tab(const sc_data_t *sc_data) {
 	int pind = 0;
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index 0ca7a75..64b3baa 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -23,11 +23,11 @@ TEST_BEGIN(test_small_extent_size) {
 		assert_d_eq(mallctlbymib(mib, miblen, (void *)&extent_size, &sz,
 		    NULL, 0), 0, "Unexpected mallctlbymib failure");
 		assert_zu_eq(extent_size,
-		    extent_size_quantize_floor(extent_size),
+		    sz_psz_quantize_floor(extent_size),
 		    "Small extent quantization should be a no-op "
 		    "(extent_size=%zu)", extent_size);
 		assert_zu_eq(extent_size,
-		    extent_size_quantize_ceil(extent_size),
+		    sz_psz_quantize_ceil(extent_size),
 		    "Small extent quantization should be a no-op "
 		    "(extent_size=%zu)", extent_size);
 	}
@@ -65,8 +65,8 @@ TEST_BEGIN(test_large_extent_size) {
 		    &sz, NULL, 0), 0, "Unexpected mallctlbymib failure");
 		extent_size = cache_oblivious ? lextent_size + PAGE :
 		    lextent_size;
-		floor = extent_size_quantize_floor(extent_size);
-		ceil = extent_size_quantize_ceil(extent_size);
+		floor = sz_psz_quantize_floor(extent_size);
+		ceil = sz_psz_quantize_ceil(extent_size);
 
 		assert_zu_eq(extent_size, floor,
 		    "Extent quantization should be a no-op for precise size "
@@ -79,7 +79,7 @@ TEST_BEGIN(test_large_extent_size) {
 
 		if (i > 0) {
 			assert_zu_eq(extent_size_prev,
-			    extent_size_quantize_floor(extent_size - PAGE),
+			    sz_psz_quantize_floor(extent_size - PAGE),
 			    "Floor should be a precise size");
 			if (extent_size_prev < ceil_prev) {
 				assert_zu_eq(ceil_prev, extent_size,
@@ -91,7 +91,7 @@ TEST_BEGIN(test_large_extent_size) {
 		}
 		if (i + 1 < nlextents) {
 			extent_size_prev = floor;
-			ceil_prev = extent_size_quantize_ceil(extent_size +
+			ceil_prev = sz_psz_quantize_ceil(extent_size +
 			    PAGE);
 		}
 	}
@@ -109,8 +109,8 @@ TEST_BEGIN(test_monotonic) {
 		size_t extent_size, floor, ceil;
 
 		extent_size = i << LG_PAGE;
-		floor = extent_size_quantize_floor(extent_size);
-		ceil = extent_size_quantize_ceil(extent_size);
+		floor = sz_psz_quantize_floor(extent_size);
+		ceil = sz_psz_quantize_ceil(extent_size);
 
 		assert_zu_le(floor, extent_size,
 		    "Floor should be <= (floor=%zu, extent_size=%zu, ceil=%zu)",
-- 
cgit v0.12


From a42861540e3a257259eb1c303c7750229ac62b71 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 20:52:13 -0700
Subject: Extents -> Eset: Convert some stats getters.

---
 include/jemalloc/internal/arena_structs.h  |  4 ++--
 include/jemalloc/internal/eset.h           |  6 +++++
 include/jemalloc/internal/extent_externs.h |  5 ----
 src/arena.c                                | 37 +++++++++++++++---------------
 src/background_thread.c                    |  6 ++---
 src/eset.c                                 | 15 ++++++++++++
 src/extent.c                               | 15 ------------
 7 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 9563c3d..6e8b829 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -52,8 +52,8 @@ struct arena_decay_s {
 	/*
 	 * Number of unpurged pages at beginning of current epoch.  During epoch
 	 * advancement we use the delta between arena->decay_*.nunpurged and
-	 * extents_npages_get(&arena->extents_*) to determine how many dirty
-	 * pages, if any, were generated.
+	 * eset_npages_get(&arena->extents_*) to determine how many dirty pages,
+	 * if any, were generated.
 	 */
 	size_t			nunpurged;
 	/*
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index abd37ca..1e05539 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -64,4 +64,10 @@ bool eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
     bool delay_coalesce);
 extent_state_t eset_state_get(const eset_t *eset);
 
+size_t eset_npages_get(eset_t *eset);
+/* Get the number of extents in the given page size index. */
+size_t eset_nextents_get(eset_t *eset, pszind_t ind);
+/* Get the sum total bytes of the extents in the given page size index. */
+size_t eset_nbytes_get(eset_t *eset, pszind_t ind);
+
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 2e196dd..1c93027 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -27,11 +27,6 @@ size_t extent_size_quantize_ceil(size_t size);
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
-size_t extents_npages_get(eset_t *eset);
-/* Get the number of extents in the given page size index. */
-size_t extents_nextents_get(eset_t *eset, pszind_t ind);
-/* Get the sum total bytes of the extents in the given page size index. */
-size_t extents_nbytes_get(eset_t *eset, pszind_t ind);
 extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, eset_t *eset, void *new_addr,
     size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
diff --git a/src/arena.c b/src/arena.c
index 3eae7e3..9dba4e7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -75,8 +75,8 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
 	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
 	*nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED);
-	*ndirty += extents_npages_get(&arena->extents_dirty);
-	*nmuzzy += extents_npages_get(&arena->extents_muzzy);
+	*ndirty += eset_npages_get(&arena->extents_dirty);
+	*nmuzzy += eset_npages_get(&arena->extents_muzzy);
 }
 
 void
@@ -99,7 +99,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->mapped, base_mapped
 	    + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
 	arena_stats_accum_zu(&astats->retained,
-	    extents_npages_get(&arena->extents_retained) << LG_PAGE);
+	    eset_npages_get(&arena->extents_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->extent_avail,
 	    atomic_load_zu(&arena->extent_avail_cnt, ATOMIC_RELAXED),
@@ -130,8 +130,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
 	arena_stats_accum_zu(&astats->resident, base_resident +
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
-	    extents_npages_get(&arena->extents_dirty) +
-	    extents_npages_get(&arena->extents_muzzy)) << LG_PAGE)));
+	    eset_npages_get(&arena->extents_dirty) +
+	    eset_npages_get(&arena->extents_muzzy)) << LG_PAGE)));
 	arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu(
 	    &arena->stats.abandoned_vm, ATOMIC_RELAXED));
 
@@ -173,13 +173,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
 		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
 		    retained_bytes;
-		dirty = extents_nextents_get(&arena->extents_dirty, i);
-		muzzy = extents_nextents_get(&arena->extents_muzzy, i);
-		retained = extents_nextents_get(&arena->extents_retained, i);
-		dirty_bytes = extents_nbytes_get(&arena->extents_dirty, i);
-		muzzy_bytes = extents_nbytes_get(&arena->extents_muzzy, i);
-		retained_bytes =
-		    extents_nbytes_get(&arena->extents_retained, i);
+		dirty = eset_nextents_get(&arena->extents_dirty, i);
+		muzzy = eset_nextents_get(&arena->extents_muzzy, i);
+		retained = eset_nextents_get(&arena->extents_retained, i);
+		dirty_bytes = eset_nbytes_get(&arena->extents_dirty, i);
+		muzzy_bytes = eset_nbytes_get(&arena->extents_muzzy, i);
+		retained_bytes = eset_nbytes_get(&arena->extents_retained, i);
 
 		atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED);
 		atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED);
@@ -645,7 +644,7 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time,
 static void
 arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
     eset_t *eset, const nstime_t *time, bool is_background_thread) {
-	size_t current_npages = extents_npages_get(eset);
+	size_t current_npages = eset_npages_get(eset);
 	arena_decay_epoch_advance_helper(decay, time, current_npages);
 
 	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
@@ -720,7 +719,7 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
 			arena_decay_to_limit(tsdn, arena, decay, eset, false,
-			    0, extents_npages_get(eset),
+			    0, eset_npages_get(eset),
 			    is_background_thread);
 		}
 		return false;
@@ -760,7 +759,7 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 		    is_background_thread);
 	} else if (is_background_thread) {
 		arena_decay_try_purge(tsdn, arena, decay, eset,
-		    extents_npages_get(eset),
+		    eset_npages_get(eset),
 		    arena_decay_backlog_npages_limit(decay),
 		    is_background_thread);
 	}
@@ -907,7 +906,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 
 /*
  * npages_limit: Decay at most npages_decay_max pages without violating the
- * invariant: (extents_npages_get(extents) >= npages_limit).  We need an upper
+ * invariant: (eset_npages_get(extents) >= npages_limit).  We need an upper
  * bound on number of pages in order to prevent unbounded growth (namely in
  * stashed), otherwise unbounded new pages could be added to extents during the
  * current decay run, so that the purging thread never finishes.
@@ -950,7 +949,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
 		arena_decay_to_limit(tsdn, arena, decay, eset, all, 0,
-		    extents_npages_get(eset), is_background_thread);
+		    eset_npages_get(eset), is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 
 		return false;
@@ -1177,8 +1176,8 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
 	 * extents, so only retained extents may remain.
 	 */
-	assert(extents_npages_get(&arena->extents_dirty) == 0);
-	assert(extents_npages_get(&arena->extents_muzzy) == 0);
+	assert(eset_npages_get(&arena->extents_dirty) == 0);
+	assert(eset_npages_get(&arena->extents_muzzy) == 0);
 
 	/* Deallocate retained memory. */
 	arena_destroy_retained(tsd_tsdn(tsd), arena);
diff --git a/src/background_thread.c b/src/background_thread.c
index f4b9cef..9476a12 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -130,7 +130,7 @@ arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
 
 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
 	assert(decay_interval_ns > 0);
-	size_t npages = extents_npages_get(eset);
+	size_t npages = eset_npages_get(eset);
 	if (npages == 0) {
 		unsigned i;
 		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
@@ -718,8 +718,8 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
 		should_signal = true;
 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (extents_npages_get(&arena->extents_dirty) > 0 ||
-	    extents_npages_get(&arena->extents_muzzy) > 0 ||
+	    (eset_npages_get(&arena->extents_dirty) > 0 ||
+	    eset_npages_get(&arena->extents_muzzy) > 0 ||
 	    info->npages_to_purge_new > 0)) {
 		should_signal = true;
 	} else {
diff --git a/src/eset.c b/src/eset.c
index d0b5594..d9457ee 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -28,3 +28,18 @@ extent_state_t
 eset_state_get(const eset_t *eset) {
 	return eset->state;
 }
+
+size_t
+eset_npages_get(eset_t *eset) {
+	return atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+}
+
+size_t
+eset_nextents_get(eset_t *eset, pszind_t pind) {
+	return atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
+}
+
+size_t
+eset_nbytes_get(eset_t *eset, pszind_t pind) {
+	return atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
+}
diff --git a/src/extent.c b/src/extent.c
index af23ca2..81ce308 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -252,21 +252,6 @@ extent_hooks_assure_initialized(arena_t *arena,
 /* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
-size_t
-extents_npages_get(eset_t *eset) {
-	return atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
-}
-
-size_t
-extents_nextents_get(eset_t *eset, pszind_t pind) {
-	return atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
-}
-
-size_t
-extents_nbytes_get(eset_t *eset, pszind_t pind) {
-	return atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
-}
-
 static void
 extents_stats_add(eset_t *eset, pszind_t pind, size_t sz) {
 	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
-- 
cgit v0.12


From 1210af9a4e26994c6f340085554f3519994ae682 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 23:51:13 -0700
Subject: Extent -> Eset: Move insertion and removal.

---
 include/jemalloc/internal/eset.h |  3 ++
 src/eset.c                       | 78 +++++++++++++++++++++++++++++++++++
 src/extent.c                     | 87 ++--------------------------------------
 3 files changed, 85 insertions(+), 83 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 1e05539..400316e 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -70,4 +70,7 @@ size_t eset_nextents_get(eset_t *eset, pszind_t ind);
 /* Get the sum total bytes of the extents in the given page size index. */
 size_t eset_nbytes_get(eset_t *eset, pszind_t ind);
 
+void eset_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
+void eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
+
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/src/eset.c b/src/eset.c
index d9457ee..21dccca 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -43,3 +43,81 @@ size_t
 eset_nbytes_get(eset_t *eset, pszind_t pind) {
 	return atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
 }
+
+static void
+eset_stats_add(eset_t *eset, pszind_t pind, size_t sz) {
+	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nextents[pind], cur + 1, ATOMIC_RELAXED);
+	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nbytes[pind], cur + sz, ATOMIC_RELAXED);
+}
+
+static void
+eset_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
+	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nextents[pind], cur - 1, ATOMIC_RELAXED);
+	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nbytes[pind], cur - sz, ATOMIC_RELAXED);
+}
+
+void
+eset_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
+	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+	assert(extent_state_get(extent) == eset->state);
+
+	size_t size = extent_size_get(extent);
+	size_t psz = sz_psz_quantize_floor(size);
+	pszind_t pind = sz_psz2ind(psz);
+	if (extent_heap_empty(&eset->heaps[pind])) {
+		bitmap_unset(eset->bitmap, &eset_bitmap_info,
+		    (size_t)pind);
+	}
+	extent_heap_insert(&eset->heaps[pind], extent);
+
+	if (config_stats) {
+		eset_stats_add(eset, pind, size);
+	}
+
+	extent_list_append(&eset->lru, extent);
+	size_t npages = size >> LG_PAGE;
+	/*
+	 * All modifications to npages hold the mutex (as asserted above), so we
+	 * don't need an atomic fetch-add; we can get by with a load followed by
+	 * a store.
+	 */
+	size_t cur_eset_npages =
+	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+	atomic_store_zu(&eset->npages, cur_eset_npages + npages,
+	    ATOMIC_RELAXED);
+}
+
+void
+eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
+	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+	assert(extent_state_get(extent) == eset->state);
+
+	size_t size = extent_size_get(extent);
+	size_t psz = sz_psz_quantize_floor(size);
+	pszind_t pind = sz_psz2ind(psz);
+	extent_heap_remove(&eset->heaps[pind], extent);
+
+	if (config_stats) {
+		eset_stats_sub(eset, pind, size);
+	}
+
+	if (extent_heap_empty(&eset->heaps[pind])) {
+		bitmap_set(eset->bitmap, &eset_bitmap_info,
+		    (size_t)pind);
+	}
+	extent_list_remove(&eset->lru, extent);
+	size_t npages = size >> LG_PAGE;
+	/*
+	 * As in eset_insert_locked, we hold eset->mtx and so don't need atomic
+	 * operations for updating eset->npages.
+	 */
+	size_t cur_extents_npages =
+	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+	assert(cur_extents_npages >= npages);
+	atomic_store_zu(&eset->npages,
+	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
+}
diff --git a/src/extent.c b/src/extent.c
index 81ce308..069899c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -249,87 +249,8 @@ extent_hooks_assure_initialized(arena_t *arena,
 	}
 }
 
-/* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
-static void
-extents_stats_add(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nextents[pind], cur + 1, ATOMIC_RELAXED);
-	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nbytes[pind], cur + sz, ATOMIC_RELAXED);
-}
-
-static void
-extents_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nextents[pind], cur - 1, ATOMIC_RELAXED);
-	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nbytes[pind], cur - sz, ATOMIC_RELAXED);
-}
-
-static void
-extents_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
-	malloc_mutex_assert_owner(tsdn, &eset->mtx);
-	assert(extent_state_get(extent) == eset->state);
-
-	size_t size = extent_size_get(extent);
-	size_t psz = sz_psz_quantize_floor(size);
-	pszind_t pind = sz_psz2ind(psz);
-	if (extent_heap_empty(&eset->heaps[pind])) {
-		bitmap_unset(eset->bitmap, &eset_bitmap_info,
-		    (size_t)pind);
-	}
-	extent_heap_insert(&eset->heaps[pind], extent);
-
-	if (config_stats) {
-		extents_stats_add(eset, pind, size);
-	}
-
-	extent_list_append(&eset->lru, extent);
-	size_t npages = size >> LG_PAGE;
-	/*
-	 * All modifications to npages hold the mutex (as asserted above), so we
-	 * don't need an atomic fetch-add; we can get by with a load followed by
-	 * a store.
-	 */
-	size_t cur_eset_npages =
-	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
-	atomic_store_zu(&eset->npages, cur_eset_npages + npages,
-	    ATOMIC_RELAXED);
-}
-
-static void
-extents_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
-	malloc_mutex_assert_owner(tsdn, &eset->mtx);
-	assert(extent_state_get(extent) == eset->state);
-
-	size_t size = extent_size_get(extent);
-	size_t psz = sz_psz_quantize_floor(size);
-	pszind_t pind = sz_psz2ind(psz);
-	extent_heap_remove(&eset->heaps[pind], extent);
-
-	if (config_stats) {
-		extents_stats_sub(eset, pind, size);
-	}
-
-	if (extent_heap_empty(&eset->heaps[pind])) {
-		bitmap_set(eset->bitmap, &eset_bitmap_info,
-		    (size_t)pind);
-	}
-	extent_list_remove(&eset->lru, extent);
-	size_t npages = size >> LG_PAGE;
-	/*
-	 * As in extents_insert_locked, we hold eset->mtx and so don't need
-	 * atomic operations for updating eset->npages.
-	 */
-	size_t cur_extents_npages =
-	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
-	assert(cur_extents_npages >= npages);
-	atomic_store_zu(&eset->npages,
-	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
-}
-
 /*
  * Find an extent with size [min_size, max_size) to satisfy the alignment
  * requirement.  For each size, try only the first extent in the heap.
@@ -461,7 +382,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
 	if (!coalesced) {
 		return true;
 	}
-	extents_insert_locked(tsdn, eset, extent);
+	eset_insert_locked(tsdn, eset, extent);
 	return false;
 }
 
@@ -521,7 +442,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			extent = NULL;
 			goto label_return;
 		}
-		extents_remove_locked(tsdn, eset, extent);
+		eset_remove_locked(tsdn, eset, extent);
 		if (!eset->delay_coalesce) {
 			break;
 		}
@@ -607,7 +528,7 @@ extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
 	assert(extent_state_get(extent) == extent_state_active);
 
 	extent_state_set(extent, eset_state_get(eset));
-	extents_insert_locked(tsdn, eset, extent);
+	eset_insert_locked(tsdn, eset, extent);
 }
 
 static void
@@ -624,7 +545,7 @@ extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
 	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
 	assert(extent_state_get(extent) == eset_state_get(eset));
 
-	extents_remove_locked(tsdn, eset, extent);
+	eset_remove_locked(tsdn, eset, extent);
 	extent_state_set(extent, extent_state_active);
 }
 
-- 
cgit v0.12


From 77bbb35a92821858b9054aa88f2c3bc76b29cbdc Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 21 Sep 2019 09:36:22 -0700
Subject: Extent -> Eset: Move extent fit functions.

---
 include/jemalloc/internal/eset.h |   6 ++
 src/eset.c                       | 112 ++++++++++++++++++++++++++++++++++++
 src/extent.c                     | 121 +--------------------------------------
 3 files changed, 119 insertions(+), 120 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 400316e..77a55e9 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -72,5 +72,11 @@ size_t eset_nbytes_get(eset_t *eset, pszind_t ind);
 
 void eset_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
 void eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
+/*
+ * Select an extent from this eset of the given size and alignment.  Returns
+ * null if no such item could be found.
+ */
+extent_t *eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize,
+    size_t alignment);
 
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/src/eset.c b/src/eset.c
index 21dccca..68ec7e4 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/eset.h"
+/* For opt_retain */
+#include "jemalloc/internal/extent_mmap.h"
 
 const bitmap_info_t eset_bitmap_info =
     BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
@@ -121,3 +123,113 @@ eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
 	atomic_store_zu(&eset->npages,
 	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
 }
+
+/*
+ * Find an extent with size [min_size, max_size) to satisfy the alignment
+ * requirement.  For each size, try only the first extent in the heap.
+ */
+static extent_t *
+eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
+    size_t alignment) {
+        pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
+        pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
+
+	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
+	    &eset_bitmap_info, (size_t)pind); i < pind_max; i =
+	    (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
+	    (size_t)i+1)) {
+		assert(i < SC_NPSIZES);
+		assert(!extent_heap_empty(&eset->heaps[i]));
+		extent_t *extent = extent_heap_first(&eset->heaps[i]);
+		uintptr_t base = (uintptr_t)extent_base_get(extent);
+		size_t candidate_size = extent_size_get(extent);
+		assert(candidate_size >= min_size);
+
+		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
+		    PAGE_CEILING(alignment));
+		if (base > next_align || base + candidate_size <= next_align) {
+			/* Overflow or not crossing the next alignment. */
+			continue;
+		}
+
+		size_t leadsize = next_align - base;
+		if (candidate_size - leadsize >= min_size) {
+			return extent;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Do first-fit extent selection, i.e. select the oldest/lowest extent that is
+ * large enough.
+ */
+static extent_t *
+eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
+	extent_t *ret = NULL;
+
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
+
+	if (!maps_coalesce && !opt_retain) {
+		/*
+		 * No split / merge allowed (Windows w/o retain). Try exact fit
+		 * only.
+		 */
+		return extent_heap_empty(&eset->heaps[pind]) ? NULL :
+		    extent_heap_first(&eset->heaps[pind]);
+	}
+
+	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
+	    &eset_bitmap_info, (size_t)pind);
+	    i < SC_NPSIZES + 1;
+	    i = (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
+	    (size_t)i+1)) {
+		assert(!extent_heap_empty(&eset->heaps[i]));
+		extent_t *extent = extent_heap_first(&eset->heaps[i]);
+		assert(extent_size_get(extent) >= size);
+		/*
+		 * In order to reduce fragmentation, avoid reusing and splitting
+		 * large eset for much smaller sizes.
+		 *
+		 * Only do check for dirty eset (delay_coalesce).
+		 */
+		if (eset->delay_coalesce &&
+		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
+			break;
+		}
+		if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
+			ret = extent;
+		}
+		if (i == SC_NPSIZES) {
+			break;
+		}
+		assert(i < SC_NPSIZES);
+	}
+
+	return ret;
+}
+
+extent_t *
+eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize, size_t alignment) {
+	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+
+	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
+	/* Beware size_t wrap-around. */
+	if (max_size < esize) {
+		return NULL;
+	}
+
+	extent_t *extent = eset_first_fit_locked(tsdn, eset, max_size);
+
+	if (alignment > PAGE && extent == NULL) {
+		/*
+		 * max_size guarantees the alignment requirement but is rather
+		 * pessimistic.  Next we try to satisfy the aligned allocation
+		 * with sizes in [esize, max_size).
+		 */
+		extent = eset_fit_alignment(eset, esize, max_size, alignment);
+	}
+
+	return extent;
+}
diff --git a/src/extent.c b/src/extent.c
index 069899c..a5f0048 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -251,124 +251,6 @@ extent_hooks_assure_initialized(arena_t *arena,
 
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
-/*
- * Find an extent with size [min_size, max_size) to satisfy the alignment
- * requirement.  For each size, try only the first extent in the heap.
- */
-static extent_t *
-extents_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
-    size_t alignment) {
-        pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
-        pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
-
-	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
-	    &eset_bitmap_info, (size_t)pind); i < pind_max; i =
-	    (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
-	    (size_t)i+1)) {
-		assert(i < SC_NPSIZES);
-		assert(!extent_heap_empty(&eset->heaps[i]));
-		extent_t *extent = extent_heap_first(&eset->heaps[i]);
-		uintptr_t base = (uintptr_t)extent_base_get(extent);
-		size_t candidate_size = extent_size_get(extent);
-		assert(candidate_size >= min_size);
-
-		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
-		    PAGE_CEILING(alignment));
-		if (base > next_align || base + candidate_size <= next_align) {
-			/* Overflow or not crossing the next alignment. */
-			continue;
-		}
-
-		size_t leadsize = next_align - base;
-		if (candidate_size - leadsize >= min_size) {
-			return extent;
-		}
-	}
-
-	return NULL;
-}
-
-/*
- * Do first-fit extent selection, i.e. select the oldest/lowest extent that is
- * large enough.
- */
-static extent_t *
-extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    size_t size) {
-	extent_t *ret = NULL;
-
-	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
-
-	if (!maps_coalesce && !opt_retain) {
-		/*
-		 * No split / merge allowed (Windows w/o retain). Try exact fit
-		 * only.
-		 */
-		return extent_heap_empty(&eset->heaps[pind]) ? NULL :
-		    extent_heap_first(&eset->heaps[pind]);
-	}
-
-	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
-	    &eset_bitmap_info, (size_t)pind);
-	    i < SC_NPSIZES + 1;
-	    i = (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
-	    (size_t)i+1)) {
-		assert(!extent_heap_empty(&eset->heaps[i]));
-		extent_t *extent = extent_heap_first(&eset->heaps[i]);
-		assert(extent_size_get(extent) >= size);
-		/*
-		 * In order to reduce fragmentation, avoid reusing and splitting
-		 * large eset for much smaller sizes.
-		 *
-		 * Only do check for dirty eset (delay_coalesce).
-		 */
-		if (eset->delay_coalesce &&
-		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
-			break;
-		}
-		if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
-			ret = extent;
-		}
-		if (i == SC_NPSIZES) {
-			break;
-		}
-		assert(i < SC_NPSIZES);
-	}
-
-	return ret;
-}
-
-/*
- * Do first-fit extent selection, where the selection policy choice is
- * based on eset->delay_coalesce.
- */
-static extent_t *
-extents_fit_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    size_t esize, size_t alignment) {
-	malloc_mutex_assert_owner(tsdn, &eset->mtx);
-
-	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
-	/* Beware size_t wrap-around. */
-	if (max_size < esize) {
-		return NULL;
-	}
-
-	extent_t *extent =
-	    extents_first_fit_locked(tsdn, arena, eset, max_size);
-
-	if (alignment > PAGE && extent == NULL) {
-		/*
-		 * max_size guarantees the alignment requirement but is rather
-		 * pessimistic.  Next we try to satisfy the aligned allocation
-		 * with sizes in [esize, max_size).
-		 */
-		extent = extents_fit_alignment(eset, esize, max_size,
-		    alignment);
-	}
-
-	return extent;
-}
-
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
@@ -790,8 +672,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, unlock_extent);
 		}
 	} else {
-		extent = extents_fit_locked(tsdn, arena, eset, esize,
-		    alignment);
+		extent = eset_fit_locked(tsdn, eset, esize, alignment);
 	}
 	if (extent == NULL) {
 		malloc_mutex_unlock(tsdn, &eset->mtx);
-- 
cgit v0.12


From e144b21e4be9a6353ff9fee1b10c90e4b1030879 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 21 Sep 2019 10:23:12 -0700
Subject: Extent -> Eset: Move fork handling.

---
 include/jemalloc/internal/eset.h           |  4 ++++
 include/jemalloc/internal/extent_externs.h |  3 ---
 src/arena.c                                | 18 +++++++++---------
 src/eset.c                                 | 15 +++++++++++++++
 src/extent.c                               | 15 ---------------
 5 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 77a55e9..5b479d5 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -79,4 +79,8 @@ void eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
 extent_t *eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize,
     size_t alignment);
 
+void eset_prefork(tsdn_t *tsdn, eset_t *eset);
+void eset_postfork_parent(tsdn_t *tsdn, eset_t *eset);
+void eset_postfork_child(tsdn_t *tsdn, eset_t *eset);
+
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 1c93027..cbfb2c7 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -35,9 +35,6 @@ void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, eset_t *eset, extent_t *extent);
 extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, eset_t *eset, size_t npages_min);
-void extents_prefork(tsdn_t *tsdn, eset_t *eset);
-void extents_postfork_parent(tsdn_t *tsdn, eset_t *eset);
-void extents_postfork_child(tsdn_t *tsdn, eset_t *eset);
 extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit);
diff --git a/src/arena.c b/src/arena.c
index 9dba4e7..f9d7dcd 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2200,9 +2200,9 @@ arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
-	extents_prefork(tsdn, &arena->extents_dirty);
-	extents_prefork(tsdn, &arena->extents_muzzy);
-	extents_prefork(tsdn, &arena->extents_retained);
+	eset_prefork(tsdn, &arena->extents_dirty);
+	eset_prefork(tsdn, &arena->extents_muzzy);
+	eset_prefork(tsdn, &arena->extents_retained);
 }
 
 void
@@ -2242,9 +2242,9 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_avail_mtx);
-	extents_postfork_parent(tsdn, &arena->extents_dirty);
-	extents_postfork_parent(tsdn, &arena->extents_muzzy);
-	extents_postfork_parent(tsdn, &arena->extents_retained);
+	eset_postfork_parent(tsdn, &arena->extents_dirty);
+	eset_postfork_parent(tsdn, &arena->extents_muzzy);
+	eset_postfork_parent(tsdn, &arena->extents_retained);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
@@ -2288,9 +2288,9 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_avail_mtx);
-	extents_postfork_child(tsdn, &arena->extents_dirty);
-	extents_postfork_child(tsdn, &arena->extents_muzzy);
-	extents_postfork_child(tsdn, &arena->extents_retained);
+	eset_postfork_child(tsdn, &arena->extents_dirty);
+	eset_postfork_child(tsdn, &arena->extents_muzzy);
+	eset_postfork_child(tsdn, &arena->extents_retained);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
diff --git a/src/eset.c b/src/eset.c
index 68ec7e4..9cc8cee 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -233,3 +233,18 @@ eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize, size_t alignment) {
 
 	return extent;
 }
+
+void
+eset_prefork(tsdn_t *tsdn, eset_t *eset) {
+	malloc_mutex_prefork(tsdn, &eset->mtx);
+}
+
+void
+eset_postfork_parent(tsdn_t *tsdn, eset_t *eset) {
+	malloc_mutex_postfork_parent(tsdn, &eset->mtx);
+}
+
+void
+eset_postfork_child(tsdn_t *tsdn, eset_t *eset) {
+	malloc_mutex_postfork_child(tsdn, &eset->mtx);
+}
diff --git a/src/extent.c b/src/extent.c
index a5f0048..b66afdb 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -388,21 +388,6 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks
 	extent_dalloc(tsdn, arena, extent);
 }
 
-void
-extents_prefork(tsdn_t *tsdn, eset_t *eset) {
-	malloc_mutex_prefork(tsdn, &eset->mtx);
-}
-
-void
-extents_postfork_parent(tsdn_t *tsdn, eset_t *eset) {
-	malloc_mutex_postfork_parent(tsdn, &eset->mtx);
-}
-
-void
-extents_postfork_child(tsdn_t *tsdn, eset_t *eset) {
-	malloc_mutex_postfork_child(tsdn, &eset->mtx);
-}
-
 static void
 extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     extent_t *extent) {
-- 
cgit v0.12


From 821dd53a1d46f07cc8252bea4b229a77caa4ca83 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 21 Sep 2019 10:35:47 -0700
Subject: Extent -> Eset: Rename arena members.

---
 include/jemalloc/internal/arena_structs.h |  6 +--
 src/arena.c                               | 78 +++++++++++++++----------------
 src/background_thread.c                   |  8 ++--
 src/ctl.c                                 |  6 +--
 src/extent.c                              | 16 +++----
 src/large.c                               |  4 +-
 6 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 6e8b829..54889dc 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -162,9 +162,9 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	eset_t		extents_dirty;
-	eset_t		extents_muzzy;
-	eset_t		extents_retained;
+	eset_t		eset_dirty;
+	eset_t		eset_muzzy;
+	eset_t		eset_retained;
 
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
diff --git a/src/arena.c b/src/arena.c
index f9d7dcd..37f4b55 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -75,8 +75,8 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
 	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
 	*nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED);
-	*ndirty += eset_npages_get(&arena->extents_dirty);
-	*nmuzzy += eset_npages_get(&arena->extents_muzzy);
+	*ndirty += eset_npages_get(&arena->eset_dirty);
+	*nmuzzy += eset_npages_get(&arena->eset_muzzy);
 }
 
 void
@@ -99,7 +99,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->mapped, base_mapped
 	    + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
 	arena_stats_accum_zu(&astats->retained,
-	    eset_npages_get(&arena->extents_retained) << LG_PAGE);
+	    eset_npages_get(&arena->eset_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->extent_avail,
 	    atomic_load_zu(&arena->extent_avail_cnt, ATOMIC_RELAXED),
@@ -130,8 +130,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
 	arena_stats_accum_zu(&astats->resident, base_resident +
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
-	    eset_npages_get(&arena->extents_dirty) +
-	    eset_npages_get(&arena->extents_muzzy)) << LG_PAGE)));
+	    eset_npages_get(&arena->eset_dirty) +
+	    eset_npages_get(&arena->eset_muzzy)) << LG_PAGE)));
 	arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu(
 	    &arena->stats.abandoned_vm, ATOMIC_RELAXED));
 
@@ -173,12 +173,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
 		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
 		    retained_bytes;
-		dirty = eset_nextents_get(&arena->extents_dirty, i);
-		muzzy = eset_nextents_get(&arena->extents_muzzy, i);
-		retained = eset_nextents_get(&arena->extents_retained, i);
-		dirty_bytes = eset_nbytes_get(&arena->extents_dirty, i);
-		muzzy_bytes = eset_nbytes_get(&arena->extents_muzzy, i);
-		retained_bytes = eset_nbytes_get(&arena->extents_retained, i);
+		dirty = eset_nextents_get(&arena->eset_dirty, i);
+		muzzy = eset_nextents_get(&arena->eset_muzzy, i);
+		retained = eset_nextents_get(&arena->eset_retained, i);
+		dirty_bytes = eset_nbytes_get(&arena->eset_dirty, i);
+		muzzy_bytes = eset_nbytes_get(&arena->eset_muzzy, i);
+		retained_bytes = eset_nbytes_get(&arena->eset_retained, i);
 
 		atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED);
 		atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED);
@@ -225,11 +225,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
 	READ_ARENA_MUTEX_PROF_DATA(extent_avail_mtx,
 	    arena_prof_mutex_extent_avail)
-	READ_ARENA_MUTEX_PROF_DATA(extents_dirty.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(eset_dirty.mtx,
 	    arena_prof_mutex_extents_dirty)
-	READ_ARENA_MUTEX_PROF_DATA(extents_muzzy.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(eset_muzzy.mtx,
 	    arena_prof_mutex_extents_muzzy)
-	READ_ARENA_MUTEX_PROF_DATA(extents_retained.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(eset_retained.mtx,
 	    arena_prof_mutex_extents_retained)
 	READ_ARENA_MUTEX_PROF_DATA(decay_dirty.mtx,
 	    arena_prof_mutex_decay_dirty)
@@ -257,7 +257,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extents_dalloc(tsdn, arena, r_extent_hooks, &arena->extents_dirty,
+	extents_dalloc(tsdn, arena, r_extent_hooks, &arena->eset_dirty,
 	    extent);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
@@ -435,11 +435,11 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	size_t mapped_add;
 	bool commit = true;
 	extent_t *extent = extents_alloc(tsdn, arena, &extent_hooks,
-	    &arena->extents_dirty, NULL, usize, sz_large_pad, alignment, false,
+	    &arena->eset_dirty, NULL, usize, sz_large_pad, alignment, false,
 	    szind, zero, &commit);
 	if (extent == NULL && arena_may_have_muzzy(arena)) {
 		extent = extents_alloc(tsdn, arena, &extent_hooks,
-		    &arena->extents_muzzy, NULL, usize, sz_large_pad, alignment,
+		    &arena->eset_muzzy, NULL, usize, sz_large_pad, alignment,
 		    false, szind, zero, &commit);
 	}
 	size_t size = usize + sz_large_pad;
@@ -809,14 +809,14 @@ bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->decay_dirty,
-	    &arena->extents_dirty, decay_ms);
+	    &arena->eset_dirty, decay_ms);
 }
 
 bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->decay_muzzy,
-	    &arena->extents_muzzy, decay_ms);
+	    &arena->eset_muzzy, decay_ms);
 }
 
 static size_t
@@ -869,7 +869,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 			    r_extent_hooks, extent, 0,
 			    extent_size_get(extent))) {
 				extents_dalloc(tsdn, arena, r_extent_hooks,
-				    &arena->extents_muzzy, extent);
+				    &arena->eset_muzzy, extent);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
 				break;
@@ -982,14 +982,14 @@ static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->decay_dirty,
-	    &arena->extents_dirty, is_background_thread, all);
+	    &arena->eset_dirty, is_background_thread, all);
 }
 
 static bool
 arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
-	    &arena->extents_muzzy, is_background_thread, all);
+	    &arena->eset_muzzy, is_background_thread, all);
 }
 
 void
@@ -1160,7 +1160,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	extent_t *extent;
 	while ((extent = extents_evict(tsdn, arena, &extent_hooks,
-	    &arena->extents_retained, 0)) != NULL) {
+	    &arena->eset_retained, 0)) != NULL) {
 		extent_destroy_wrapper(tsdn, arena, &extent_hooks, extent);
 	}
 }
@@ -1176,8 +1176,8 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
 	 * extents, so only retained extents may remain.
 	 */
-	assert(eset_npages_get(&arena->extents_dirty) == 0);
-	assert(eset_npages_get(&arena->extents_muzzy) == 0);
+	assert(eset_npages_get(&arena->eset_dirty) == 0);
+	assert(eset_npages_get(&arena->eset_muzzy) == 0);
 
 	/* Deallocate retained memory. */
 	arena_destroy_retained(tsd_tsdn(tsd), arena);
@@ -1235,11 +1235,11 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	bool zero = false;
 	bool commit = true;
 	extent_t *slab = extents_alloc(tsdn, arena, &extent_hooks,
-	    &arena->extents_dirty, NULL, bin_info->slab_size, 0, PAGE, true,
+	    &arena->eset_dirty, NULL, bin_info->slab_size, 0, PAGE, true,
 	    binind, &zero, &commit);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
 		slab = extents_alloc(tsdn, arena, &extent_hooks,
-		    &arena->extents_muzzy, NULL, bin_info->slab_size, 0, PAGE,
+		    &arena->eset_muzzy, NULL, bin_info->slab_size, 0, PAGE,
 		    true, binind, &zero, &commit);
 	}
 	if (slab == NULL) {
@@ -2021,14 +2021,14 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * are likely to be reused soon after deallocation, and the cost of
 	 * merging/splitting extents is non-trivial.
 	 */
-	if (eset_init(tsdn, &arena->extents_dirty, extent_state_dirty, true)) {
+	if (eset_init(tsdn, &arena->eset_dirty, extent_state_dirty, true)) {
 		goto label_error;
 	}
 	/*
 	 * Coalesce muzzy extents immediately, because operations on them are in
 	 * the critical path much less often than for dirty extents.
 	 */
-	if (eset_init(tsdn, &arena->extents_muzzy, extent_state_muzzy, false)) {
+	if (eset_init(tsdn, &arena->eset_muzzy, extent_state_muzzy, false)) {
 		goto label_error;
 	}
 	/*
@@ -2037,7 +2037,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (eset_init(tsdn, &arena->extents_retained, extent_state_retained,
+	if (eset_init(tsdn, &arena->eset_retained, extent_state_retained,
 	    false)) {
 		goto label_error;
 	}
@@ -2200,9 +2200,9 @@ arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
-	eset_prefork(tsdn, &arena->extents_dirty);
-	eset_prefork(tsdn, &arena->extents_muzzy);
-	eset_prefork(tsdn, &arena->extents_retained);
+	eset_prefork(tsdn, &arena->eset_dirty);
+	eset_prefork(tsdn, &arena->eset_muzzy);
+	eset_prefork(tsdn, &arena->eset_retained);
 }
 
 void
@@ -2242,9 +2242,9 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_avail_mtx);
-	eset_postfork_parent(tsdn, &arena->extents_dirty);
-	eset_postfork_parent(tsdn, &arena->extents_muzzy);
-	eset_postfork_parent(tsdn, &arena->extents_retained);
+	eset_postfork_parent(tsdn, &arena->eset_dirty);
+	eset_postfork_parent(tsdn, &arena->eset_muzzy);
+	eset_postfork_parent(tsdn, &arena->eset_retained);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
@@ -2288,9 +2288,9 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_avail_mtx);
-	eset_postfork_child(tsdn, &arena->extents_dirty);
-	eset_postfork_child(tsdn, &arena->extents_muzzy);
-	eset_postfork_child(tsdn, &arena->extents_retained);
+	eset_postfork_child(tsdn, &arena->eset_dirty);
+	eset_postfork_child(tsdn, &arena->eset_muzzy);
+	eset_postfork_child(tsdn, &arena->eset_retained);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
diff --git a/src/background_thread.c b/src/background_thread.c
index 9476a12..4a74edb 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -202,12 +202,12 @@ static uint64_t
 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	uint64_t i1, i2;
 	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
-	    &arena->extents_dirty);
+	    &arena->eset_dirty);
 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return i1;
 	}
 	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
-	    &arena->extents_muzzy);
+	    &arena->eset_muzzy);
 
 	return i1 < i2 ? i1 : i2;
 }
@@ -718,8 +718,8 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
 		should_signal = true;
 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (eset_npages_get(&arena->extents_dirty) > 0 ||
-	    eset_npages_get(&arena->extents_muzzy) > 0 ||
+	    (eset_npages_get(&arena->eset_dirty) > 0 ||
+	    eset_npages_get(&arena->eset_muzzy) > 0 ||
 	    info->npages_to_purge_new > 0)) {
 		should_signal = true;
 	} else {
diff --git a/src/ctl.c b/src/ctl.c
index 2be2f32..a29be19 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3001,9 +3001,9 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
 		MUTEX_PROF_RESET(arena->extent_avail_mtx);
-		MUTEX_PROF_RESET(arena->extents_dirty.mtx);
-		MUTEX_PROF_RESET(arena->extents_muzzy.mtx);
-		MUTEX_PROF_RESET(arena->extents_retained.mtx);
+		MUTEX_PROF_RESET(arena->eset_dirty.mtx);
+		MUTEX_PROF_RESET(arena->eset_muzzy.mtx);
+		MUTEX_PROF_RESET(arena->eset_retained.mtx);
 		MUTEX_PROF_RESET(arena->decay_dirty.mtx);
 		MUTEX_PROF_RESET(arena->decay_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
diff --git a/src/extent.c b/src/extent.c
index b66afdb..a015f9b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1097,11 +1097,11 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
 			extent_record(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, lead, true);
+			    &arena->eset_retained, lead, true);
 		}
 		if (trail != NULL) {
 			extent_record(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, trail, true);
+			    &arena->eset_retained, trail, true);
 		}
 	} else {
 		/*
@@ -1114,12 +1114,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 				extent_gdump_add(tsdn, to_salvage);
 			}
 			extent_record(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, to_salvage, true);
+			    &arena->eset_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_abandon_vm(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, to_leak, true);
+			    &arena->eset_retained, to_leak, true);
 		}
 		goto label_err;
 	}
@@ -1128,7 +1128,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		if (extent_commit_impl(tsdn, arena, r_extent_hooks, extent, 0,
 		    extent_size_get(extent), true)) {
 			extent_record(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, extent, true);
+			    &arena->eset_retained, extent, true);
 			goto label_err;
 		}
 		if (!extent_need_manual_zero(arena)) {
@@ -1189,7 +1189,7 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
 
 	extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks,
-	    &arena->extents_retained, new_addr, size, pad, alignment, slab,
+	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
 	    szind, zero, commit, true);
 	if (extent != NULL) {
 		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
@@ -1434,7 +1434,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
 		    rtree_ctx, eset, extent, NULL, growing_retained);
 	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
-		assert(eset == &arena->extents_dirty);
+		assert(eset == &arena->eset_dirty);
 		/* Always coalesce large eset eagerly. */
 		bool coalesced;
 		do {
@@ -1577,7 +1577,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 		extent_gdump_sub(tsdn, extent);
 	}
 
-	extent_record(tsdn, arena, r_extent_hooks, &arena->extents_retained,
+	extent_record(tsdn, arena, r_extent_hooks, &arena->eset_retained,
 	    extent, false);
 }
 
diff --git a/src/large.c b/src/large.c
index a5c2f9a..40afa62 100644
--- a/src/large.c
+++ b/src/large.c
@@ -155,10 +155,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	extent_t *trail;
 	bool new_mapping;
 	if ((trail = extents_alloc(tsdn, arena, &extent_hooks,
-	    &arena->extents_dirty, extent_past_get(extent), trailsize, 0,
+	    &arena->eset_dirty, extent_past_get(extent), trailsize, 0,
 	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL
 	    || (trail = extents_alloc(tsdn, arena, &extent_hooks,
-	    &arena->extents_muzzy, extent_past_get(extent), trailsize, 0,
+	    &arena->eset_muzzy, extent_past_get(extent), trailsize, 0,
 	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
-- 
cgit v0.12


From ce5b128f1006cb8bde04b633bfc43a4881e76490 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 21 Sep 2019 10:40:39 -0700
Subject: Remove the undefined extent_size_quantize declarations.

---
 include/jemalloc/internal/extent_externs.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index cbfb2c7..6963b47 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -19,11 +19,6 @@ extent_hooks_t *extent_hooks_get(arena_t *arena);
 extent_hooks_t *extent_hooks_set(tsd_t *tsd, arena_t *arena,
     extent_hooks_t *extent_hooks);
 
-#ifdef JEMALLOC_JET
-size_t extent_size_quantize_floor(size_t size);
-size_t extent_size_quantize_ceil(size_t size);
-#endif
-
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
-- 
cgit v0.12


From c97d255752e3dd53dbfcb5c3fdf9d972da2b47f1 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 21 Sep 2019 11:01:39 -0700
Subject: Eset: Remove temporary declaration.

---
 include/jemalloc/internal/eset.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 5b479d5..fae64c8 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -6,9 +6,6 @@
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/mutex.h"
 
-/* This is a transitional declarion, while we move extent.c into eset.c. */
-extern const bitmap_info_t eset_bitmap_info;
-
 /*
  * An eset ("extent set") is a quantized collection of extents, with built-in
  * LRU queue.
-- 
cgit v0.12


From 3d84bd57f4954a17059bd31330ec87d3c1876411 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 23 Sep 2019 18:05:57 -0700
Subject: Arena: Add helper function arena_get_from_extent.

---
 include/jemalloc/internal/arena_inlines_b.h |  6 ++++++
 src/arena.c                                 |  9 +++------
 src/ctl.c                                   |  3 +--
 src/large.c                                 | 31 ++++++++++-------------------
 4 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 917a491..a6135ee 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -8,6 +8,12 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
 
+static inline arena_t *
+arena_get_from_extent(extent_t *extent) {
+	return (arena_t *)atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+	    ATOMIC_RELAXED);
+}
+
 JEMALLOC_ALWAYS_INLINE bool
 arena_has_default_hooks(arena_t *arena) {
 	return (extent_hooks_get(arena) == &extent_hooks_default);
diff --git a/src/arena.c b/src/arena.c
index 37f4b55..e096f3a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1565,8 +1565,7 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 
 	extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
-	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
-	    ATOMIC_RELAXED);
+	arena_t *arena = arena_get_from_extent(extent);
 
 	szind_t szind = sz_size2index(usize);
 	extent_szind_set(extent, szind);
@@ -1731,8 +1730,7 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
 void
 arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
 	extent_t *extent = iealloc(tsdn, ptr);
-	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
-	    ATOMIC_RELAXED);
+	arena_t *arena = arena_get_from_extent(extent);
 
 	arena_dalloc_bin(tsdn, arena, extent, ptr);
 	arena_decay_tick(tsdn, arena);
@@ -1768,8 +1766,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 			goto done;
 		}
 
-		arena_t *arena = atomic_load_p(
-		    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
+		arena_t *arena = arena_get_from_extent(extent);
 		arena_decay_tick(tsdn, arena);
 		ret = false;
 	} else if (oldsize >= SC_LARGE_MINCLASS
diff --git a/src/ctl.c b/src/ctl.c
index a29be19..6bd534a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2612,8 +2612,7 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	if (extent == NULL)
 		goto label_return;
 
-	arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
-	    ATOMIC_RELAXED);
+	arena = arena_get_from_extent(extent);
 	if (arena == NULL)
 		goto label_return;
 
diff --git a/src/large.c b/src/large.c
index 40afa62..13d8e56 100644
--- a/src/large.c
+++ b/src/large.c
@@ -94,8 +94,7 @@ large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk =
 
 static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
-	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
-	    ATOMIC_RELAXED);
+	arena_t *arena = arena_get_from_extent(extent);
 	size_t oldusize = extent_usize_get(extent);
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t diff = extent_size_get(extent) - (usize + sz_large_pad);
@@ -131,8 +130,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 static bool
 large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
     bool zero) {
-	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
-	    ATOMIC_RELAXED);
+	arena_t *arena = arena_get_from_extent(extent);
 	size_t oldusize = extent_usize_get(extent);
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t trailsize = usize - oldusize;
@@ -232,18 +230,14 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 		/* Attempt to expand the allocation in-place. */
 		if (!large_ralloc_no_move_expand(tsdn, extent, usize_max,
 		    zero)) {
-			arena_decay_tick(tsdn,
-			    atomic_load_p(&arenas[extent_arena_ind_get(extent)],
-			    ATOMIC_RELAXED));
+			arena_decay_tick(tsdn, arena_get_from_extent(extent));
 			return false;
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && usize_min > oldusize &&
 		    large_ralloc_no_move_expand(tsdn, extent, usize_min,
 		    zero)) {
-			arena_decay_tick(tsdn, atomic_load_p(
-			    &arenas[extent_arena_ind_get(extent)],
-			    ATOMIC_RELAXED));
+			arena_decay_tick(tsdn, arena_get_from_extent(extent));
 			return false;
 		}
 	}
@@ -253,17 +247,14 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	 * the new size.
 	 */
 	if (oldusize >= usize_min && oldusize <= usize_max) {
-		arena_decay_tick(tsdn, atomic_load_p(
-		    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED));
+		arena_decay_tick(tsdn, arena_get_from_extent(extent));
 		return false;
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (oldusize > usize_max) {
 		if (!large_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
-			arena_decay_tick(tsdn, atomic_load_p(
-			    &arenas[extent_arena_ind_get(extent)],
-			    ATOMIC_RELAXED));
+			arena_decay_tick(tsdn, arena_get_from_extent(extent));
 			return false;
 		}
 	}
@@ -357,20 +348,18 @@ large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 
 void
 large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_prep_impl(tsdn, atomic_load_p(
-	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED), extent, true);
+	large_dalloc_prep_impl(tsdn, arena_get_from_extent(extent), extent,
+	    true);
 }
 
 void
 large_dalloc_finish(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_finish_impl(tsdn, atomic_load_p(
-	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED), extent);
+	large_dalloc_finish_impl(tsdn, arena_get_from_extent(extent), extent);
 }
 
 void
 large_dalloc(tsdn_t *tsdn, extent_t *extent) {
-	arena_t *arena = atomic_load_p(
-	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
+	arena_t *arena = arena_get_from_extent(extent);
 	large_dalloc_prep_impl(tsdn, arena, extent, false);
 	large_dalloc_finish_impl(tsdn, arena, extent);
 	arena_decay_tick(tsdn, arena);
-- 
cgit v0.12


From 1df9dd35154ca460facbd74f779a13dcece78dac Mon Sep 17 00:00:00 2001
From: Gareth Lloyd <gareth@ignition-web.co.uk>
Date: Tue, 24 Sep 2019 16:09:07 +0100
Subject: Fix je_ prefix issue in test

---
 msvc/test_threads/test_threads.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/msvc/test_threads/test_threads.cpp b/msvc/test_threads/test_threads.cpp
index 92e3162..6eed028 100644
--- a/msvc/test_threads/test_threads.cpp
+++ b/msvc/test_threads/test_threads.cpp
@@ -9,6 +9,7 @@
 #include <thread>
 #include <vector>
 #include <stdio.h>
+#define JEMALLOC_NO_DEMANGLE
 #include <jemalloc/jemalloc.h>
 
 using std::vector;
-- 
cgit v0.12


From beb7c16e946d5a48ac6c3e7318aa24be4e787c0c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 27 Aug 2019 14:42:14 -0700
Subject: Guard prof_active reset by opt_prof

Set `prof_active` to read-only when `opt_prof` is turned off.
---
 include/jemalloc/internal/prof_inlines_a.h | 11 +++++++++++
 src/ctl.c                                  | 10 ++++++++--
 src/prof.c                                 |  3 +++
 test/unit/mallctl.c                        | 27 +++++++++++++++++++++++++++
 4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index 471d985..6716d2f 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -71,8 +71,19 @@ prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum,
 #endif
 }
 
+JEMALLOC_ALWAYS_INLINE void
+prof_active_assert() {
+	cassert(config_prof);
+	/*
+	 * If opt_prof is off, then prof_active must always be off, regardless
+	 * of whether prof_active_mtx is in effect or not.
+	 */
+	assert(opt_prof || !prof_active);
+}
+
 JEMALLOC_ALWAYS_INLINE bool
 prof_active_get_unlocked(void) {
+	prof_active_assert();
 	/*
 	 * Even if opt_prof is true, sampling can be temporarily disabled by
 	 * setting prof_active to false.  No locking is used when reading
diff --git a/src/ctl.c b/src/ctl.c
index 6bd534a..fd05c08 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2662,7 +2662,8 @@ prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	bool oldval;
 
 	if (!config_prof) {
-		return ENOENT;
+		ret = ENOENT;
+		goto label_return;
 	}
 
 	if (newp != NULL) {
@@ -2670,7 +2671,12 @@ prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_active_set(tsd_tsdn(tsd), *(bool *)newp);
+		bool val = *(bool *)newp;
+		if (!opt_prof && val) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		oldval = prof_active_set(tsd_tsdn(tsd), val);
 	} else {
 		oldval = prof_active_get(tsd_tsdn(tsd));
 	}
diff --git a/src/prof.c b/src/prof.c
index 9ea4eda..e00151d 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -788,6 +788,7 @@ bool
 prof_active_get(tsdn_t *tsdn) {
 	bool prof_active_current;
 
+	prof_active_assert();
 	malloc_mutex_lock(tsdn, &prof_active_mtx);
 	prof_active_current = prof_active;
 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
@@ -798,10 +799,12 @@ bool
 prof_active_set(tsdn_t *tsdn, bool active) {
 	bool prof_active_old;
 
+	prof_active_assert();
 	malloc_mutex_lock(tsdn, &prof_active_mtx);
 	prof_active_old = prof_active;
 	prof_active = active;
 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
+	prof_active_assert();
 	return prof_active_old;
 }
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 3a75ac0..0e88f31 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -762,6 +762,32 @@ TEST_BEGIN(test_arenas_lookup) {
 }
 TEST_END
 
+TEST_BEGIN(test_prof_active) {
+	/*
+	 * If config_prof is off, then the test for prof_active in
+	 * test_mallctl_opt was already enough.
+	 */
+	test_skip_if(!config_prof);
+
+	bool active, old;
+	size_t len = sizeof(bool);
+
+	active = true;
+	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, len), ENOENT,
+	    "Setting prof_active to true should fail when opt_prof is off");
+	old = true;
+	assert_d_eq(mallctl("prof.active", &old, &len, &active, len), ENOENT,
+	    "Setting prof_active to true should fail when opt_prof is off");
+	assert_true(old, "old valud should not be touched when mallctl fails");
+	active = false;
+	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, len), 0,
+	    "Setting prof_active to false should succeed when opt_prof is off");
+	assert_d_eq(mallctl("prof.active", &old, &len, &active, len), 0,
+	    "Setting prof_active to false should succeed when opt_prof is off");
+	assert_false(old, "prof_active should be false when opt_prof is off");
+}
+TEST_END
+
 TEST_BEGIN(test_stats_arenas) {
 #define TEST_STATS_ARENAS(t, name) do {					\
 	t name;								\
@@ -882,6 +908,7 @@ main(void) {
 	    test_arenas_lextent_constants,
 	    test_arenas_create,
 	    test_arenas_lookup,
+	    test_prof_active,
 	    test_stats_arenas,
 	    test_hooks,
 	    test_hooks_exhaustion);
-- 
cgit v0.12


From 66e07f986d77e0b16fd236bbe3518790717d1a4d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 3 Oct 2019 13:01:12 -0700
Subject: Suppress tdata creation in reentrancy

This change suppresses tdata initialization and prof sample threshold
update in interrupting malloc calls.  Interrupting calls have no need
for tdata.  Delaying tdata creation aligns better with our lazy tdata
creation principle, and it also helps us gain control back from
interrupting calls more quickly and reduces any risk of delegating
tdata creation to an interrupting call.
---
 include/jemalloc/internal/prof_inlines_b.h | 10 ++++++----
 src/prof.c                                 | 17 +++++++++++++++--
 src/prof_data.c                            |  2 ++
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 860dfbe..c750a25 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -22,6 +22,7 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 
 	tdata = tsd_prof_tdata_get(tsd);
 	if (create) {
+		assert(tsd_reentrancy_level_get(tsd) == 0);
 		if (unlikely(tdata == NULL)) {
 			if (tsd_nominal(tsd)) {
 				tdata = prof_tdata_init(tsd);
@@ -109,7 +110,11 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}
 
-	bool booted = tsd_prof_tdata_get(tsd);
+	if (tsd_reentrancy_level_get(tsd) > 0) {
+		return true;
+	}
+
+	bool booted = prof_tdata_get(tsd, false);
 	tdata = prof_tdata_get(tsd, true);
 	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
 		tdata = NULL;
@@ -132,9 +137,6 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}
 
-	if (tsd_reentrancy_level_get(tsd) > 0) {
-		return true;
-	}
 	/* Compute new sample threshold. */
 	if (update) {
 		prof_sample_threshold_update(tdata);
diff --git a/src/prof.c b/src/prof.c
index e00151d..a702cc2 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -127,10 +127,15 @@ prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
 
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
-	prof_tdata_t *tdata;
-
 	cassert(config_prof);
 
+	if (tsd_reentrancy_level_get(tsd) > 0) {
+		assert((uintptr_t)tctx == (uintptr_t)1U);
+		return;
+	}
+
+	prof_tdata_t *tdata;
+
 	if (updated) {
 		/*
 		 * Compute a new sample threshold.  This isn't very important in
@@ -810,6 +815,8 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 
 const char *
 prof_thread_name_get(tsd_t *tsd) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
 	prof_tdata_t *tdata;
 
 	tdata = prof_tdata_get(tsd, true);
@@ -821,6 +828,8 @@ prof_thread_name_get(tsd_t *tsd) {
 
 int
 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
 	prof_tdata_t *tdata;
 	unsigned i;
 	char *s;
@@ -859,6 +868,8 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
 
 bool
 prof_thread_active_get(tsd_t *tsd) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
 	prof_tdata_t *tdata;
 
 	tdata = prof_tdata_get(tsd, true);
@@ -870,6 +881,8 @@ prof_thread_active_get(tsd_t *tsd) {
 
 bool
 prof_thread_active_set(tsd_t *tsd, bool active) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
 	prof_tdata_t *tdata;
 
 	tdata = prof_tdata_get(tsd, true);
diff --git a/src/prof_data.c b/src/prof_data.c
index bab8e5c..cd92ee6 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1199,6 +1199,8 @@ prof_bt_keycomp(const void *k1, const void *k2) {
 prof_tdata_t *
 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
     char *thread_name, bool active) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
-- 
cgit v0.12


From 4094b7c03fb5e814f6f4c85ff7e93b3228dc4d29 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 11 Sep 2019 10:21:46 -0700
Subject: Limit # of iters of test_bitmap_xfu.

Otherwise the test is too slow for higher page sizes such as 64k.
---
 test/unit/bitmap.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index cafb203..182f2f6 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -403,9 +403,11 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 }
 
 TEST_BEGIN(test_bitmap_xfu) {
-	size_t nbits;
+	size_t nbits, nbits_max;
 
-	for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
+	/* The test is O(n^2); large page sizes may slow down too much. */
+	nbits_max = BITMAP_MAXBITS > 512 ? 512 : BITMAP_MAXBITS;
+	for (nbits = 1; nbits <= nbits_max; nbits++) {
 		bitmap_info_t binfo;
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_xfu_body(&binfo, nbits);
-- 
cgit v0.12


From 4fbbc817c1130d3d6c066f132fb5a2b23803be89 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 15 Jul 2019 10:37:09 -0700
Subject: Simplify time setting and getting for prof log

---
 include/jemalloc/internal/arena_inlines_b.h | 6 ++----
 include/jemalloc/internal/prof_inlines_b.h  | 9 ++++-----
 src/prof.c                                  | 2 +-
 src/prof_log.c                              | 3 +--
 4 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index a6135ee..7ac2f94 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -90,8 +90,7 @@ arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE nstime_t
-arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr,
-    alloc_ctx_t *alloc_ctx) {
+arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -105,8 +104,7 @@ arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
-    nstime_t t) {
+arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index c750a25..6b10f5b 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -64,20 +64,19 @@ prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE nstime_t
-prof_alloc_time_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
+prof_alloc_time_get(tsdn_t *tsdn, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	return arena_prof_alloc_time_get(tsdn, ptr, alloc_ctx);
+	return arena_prof_alloc_time_get(tsdn, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
-    nstime_t t) {
+prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_alloc_time_set(tsdn, ptr, alloc_ctx, t);
+	arena_prof_alloc_time_set(tsdn, ptr, t);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/src/prof.c b/src/prof.c
index a702cc2..fc0c7d8 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -169,7 +169,7 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
 	 * when free() is called. */
 	nstime_t t = NSTIME_ZERO_INITIALIZER;
 	nstime_update(&t);
-	prof_alloc_time_set(tsdn, ptr, NULL, t);
+	prof_alloc_time_set(tsdn, ptr, t);
 
 	malloc_mutex_lock(tsdn, tctx->tdata->lock);
 	tctx->cnts.curobjs++;
diff --git a/src/prof_log.c b/src/prof_log.c
index af91af7..c95f29e 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -229,8 +229,7 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 		log_tables_initialized = true;
 	}
 
-	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
-			          (alloc_ctx_t *)NULL);
+	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr);
 	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
 	nstime_update(&free_time);
 
-- 
cgit v0.12


From 4fe50bc7d05083d822a34068bdd75e34f067e5e4 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 17 Oct 2019 16:46:45 -0700
Subject: Fix amd64 MSVC warning

---
 include/jemalloc/internal/cache_bin.h    |  7 ++--
 include/jemalloc/internal/safety_check.h |  2 +-
 src/prof_log.c                           |  2 +-
 src/stats.c                              | 56 +++++++++++++++++++-------------
 4 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 5396c2d..74ebbf7 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -120,8 +120,8 @@ cache_bin_ncached_max_get(szind_t ind) {
 
 static inline cache_bin_sz_t
 cache_bin_ncached_get(cache_bin_t *bin, szind_t ind) {
-	cache_bin_sz_t n = (tcache_bin_info[ind].stack_size +
-	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *);
+	cache_bin_sz_t n = (cache_bin_sz_t)((tcache_bin_info[ind].stack_size +
+	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *));
 	assert(n <= cache_bin_ncached_max_get(ind));
 	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
 
@@ -158,7 +158,8 @@ static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind);
 	cache_bin_sz_t low_water = ncached_max -
-	    (bin->low_water_position - bin->full_position) / sizeof(void *);
+	    (cache_bin_sz_t)((bin->low_water_position - bin->full_position) /
+	    sizeof(void *));
 	assert(low_water <= ncached_max);
 	assert(low_water <= cache_bin_ncached_get(bin, ind));
 	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index 53339ac..ec4b336 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -3,7 +3,7 @@
 
 void safety_check_fail(const char *format, ...);
 /* Can set to NULL for a default. */
-void safety_check_set_abort(void (*abort_fn)());
+void safety_check_set_abort(void (*abort_fn)(const char *));
 
 JEMALLOC_ALWAYS_INLINE void
 safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
diff --git a/src/prof_log.c b/src/prof_log.c
index c95f29e..73ca741 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -452,7 +452,7 @@ prof_emitter_write_cb(void *opaque, const char *to_write) {
 		return;
 	}
 #endif
-	arg->ret = write(arg->fd, (void *)to_write, bytes);
+	arg->ret = malloc_write_fd(arg->fd, to_write, bytes);
 }
 
 /*
diff --git a/src/stats.c b/src/stats.c
index cf75810..1718b61 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -118,7 +118,7 @@ mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
 
 #define WIDTH_uint32_t 12
 #define WIDTH_uint64_t 16
-#define OP(counter, counter_type, human, derived, base_counter)	\
+#define OP(counter, counter_type, human, derived, base_counter)		\
 	col = &col_##counter_type[k_##counter_type];			\
 	++k_##counter_type;						\
 	emitter_col_init(col, row);					\
@@ -145,16 +145,20 @@ mutex_stats_read_global(const char *name, emitter_col_t *col_name,
 	emitter_col_t *dst;
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)	\
+#define OP(counter, counter_type, human, derived, base_counter)		\
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
 	if (!derived) {							\
 		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
 		    "mutexes", name, #counter);				\
-		CTL_GET(cmd, (counter_type *)&dst->bool_val, counter_type);	\
-	} else { \
-	    emitter_col_t *base = &col_##counter_type[mutex_counter_##base_counter];	\
-	    dst->counter_type##_val = rate_per_second(base->counter_type##_val, uptime); \
+		CTL_GET(cmd, (counter_type *)&dst->bool_val,		\
+		    counter_type);					\
+	} else {							\
+		emitter_col_t *base =					\
+		    &col_##counter_type[mutex_counter_##base_counter];	\
+		dst->counter_type##_val =				\
+		    (counter_type)rate_per_second(			\
+		    base->counter_type##_val, uptime);			\
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -175,16 +179,21 @@ mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind,
 	emitter_col_t *dst;
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)	\
+#define OP(counter, counter_type, human, derived, base_counter)		\
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {                                   \
+	if (!derived) {							\
 		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,        \
-		    "arenas.0.mutexes", arena_mutex_names[mutex_ind], #counter);\
-		CTL_M2_GET(cmd, arena_ind, (counter_type *)&dst->bool_val, counter_type); \
-	} else {                      \
-		emitter_col_t *base = &col_##counter_type[mutex_counter_##base_counter];	\
-		dst->counter_type##_val = rate_per_second(base->counter_type##_val, uptime); \
+		    "arenas.0.mutexes", arena_mutex_names[mutex_ind],	\
+		    #counter);						\
+		CTL_M2_GET(cmd, arena_ind,				\
+		    (counter_type *)&dst->bool_val, counter_type);	\
+	} else {							\
+		emitter_col_t *base =					\
+		    &col_##counter_type[mutex_counter_##base_counter];	\
+		dst->counter_type##_val =				\
+		    (counter_type)rate_per_second(			\
+		    base->counter_type##_val, uptime);			\
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -202,17 +211,20 @@ mutex_stats_read_arena_bin(unsigned arena_ind, unsigned bin_ind,
 
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)	\
+#define OP(counter, counter_type, human, derived, base_counter)		\
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {                                   \
-		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,        \
-		    "arenas.0.bins.0","mutex", #counter);            \
-		CTL_M2_M4_GET(cmd, arena_ind, bin_ind,                \
-		    (counter_type *)&dst->bool_val, counter_type);  \
-	} else {                      \
-		emitter_col_t *base = &col_##counter_type[mutex_counter_##base_counter]; \
-		dst->counter_type##_val = rate_per_second(base->counter_type##_val, uptime); \
+	if (!derived) {							\
+		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
+		    "arenas.0.bins.0","mutex", #counter);		\
+		CTL_M2_M4_GET(cmd, arena_ind, bin_ind,			\
+		    (counter_type *)&dst->bool_val, counter_type);	\
+	} else {							\
+		emitter_col_t *base =					\
+		    &col_##counter_type[mutex_counter_##base_counter];	\
+		dst->counter_type##_val =				\
+		    (counter_type)rate_per_second(			\
+		    base->counter_type##_val, uptime);			\
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
-- 
cgit v0.12


From 05681e387a3202567ff95528dbc460e92e031a3c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 8 Oct 2019 11:33:55 -0700
Subject: Optimize cache_bin_alloc_easy for malloc fast path

`tcache_bin_info` is not accessed on malloc fast path but the
compiler reserves a register for it, as well as an additional
register for `tcache_bin_info[ind].stack_size`.  The optimization
gets rid of the need for the two registers.
---
 include/jemalloc/internal/cache_bin.h | 44 ++++++++++++++++++++++++++++-------
 src/jemalloc.c                        |  2 +-
 2 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 74ebbf7..38b8e32 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -183,8 +183,11 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 	descriptor->bins_large = bins_large;
 }
 
+#define INVALID_SZIND ((szind_t)(unsigned)-1)
+
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
+cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
+    const bool adjust_low_water) {
 	/*
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
@@ -192,20 +195,30 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
 	void *ret = *(bin->cur_ptr.ptr++);
 	/*
 	 * Check for both bin->ncached == 0 and ncached < low_water in a single
-	 * branch.  This also avoids accessing tcache_bin_info (which is on a
-	 * separate cacheline / page) in the common case.
+	 * branch.  When adjust_low_water is true, this also avoids accessing
+	 * tcache_bin_info (which is on a separate cacheline / page) in the
+	 * common case.
 	 */
 	if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
-		uint32_t empty_position = bin->full_position +
-		    tcache_bin_info[ind].stack_size;
-		if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
-			/* Over-allocated; revert. */
+		if (adjust_low_water) {
+			assert(ind != INVALID_SZIND);
+			uint32_t empty_position = bin->full_position +
+			    tcache_bin_info[ind].stack_size;
+			if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
+				/* Over-allocated; revert. */
+				bin->cur_ptr.ptr--;
+				assert(bin->cur_ptr.lowbits == empty_position);
+				*success = false;
+				return NULL;
+			}
+			bin->low_water_position = bin->cur_ptr.lowbits;
+		} else {
+			assert(ind == INVALID_SZIND);
 			bin->cur_ptr.ptr--;
-			assert(bin->cur_ptr.lowbits == empty_position);
+			assert(bin->cur_ptr.lowbits == bin->low_water_position);
 			*success = false;
 			return NULL;
 		}
-		bin->low_water_position = bin->cur_ptr.lowbits;
 	}
 
 	/*
@@ -220,6 +233,19 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
 	return ret;
 }
 
+JEMALLOC_ALWAYS_INLINE void *
+cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
+	/* The szind parameter won't be used. */
+	return cache_bin_alloc_easy_impl(bin, success, INVALID_SZIND, false);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
+	return cache_bin_alloc_easy_impl(bin, success, ind, true);
+}
+
+#undef INVALID_SZIND
+
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index fc7d289..7745e34 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2372,7 +2372,7 @@ je_malloc(size_t size) {
 
 	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
 	bool tcache_success;
-	void *ret = cache_bin_alloc_easy(bin, &tcache_success, ind);
+	void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success);
 
 	if (tcache_success) {
 		*tsd_thread_allocatedp_get(tsd) += usize;
-- 
cgit v0.12


From 4786099a3ad11dbf4027f453b8c6de1c1e8777db Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 24 Oct 2019 13:16:09 -0700
Subject: Increase column width for global malloc/free rate

---
 src/stats.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 1718b61..2b744e1 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -829,12 +829,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 
 	COL(alloc_count_row, count_nmalloc, right, 16, title);
 	col_count_nmalloc.str_val = "nmalloc";
-	COL(alloc_count_row, count_nmalloc_ps, right, 8, title);
+	COL(alloc_count_row, count_nmalloc_ps, right, 10, title);
 	col_count_nmalloc_ps.str_val = "(#/sec)";
 
 	COL(alloc_count_row, count_ndalloc, right, 16, title);
 	col_count_ndalloc.str_val = "ndalloc";
-	COL(alloc_count_row, count_ndalloc_ps, right, 8, title);
+	COL(alloc_count_row, count_ndalloc_ps, right, 10, title);
 	col_count_ndalloc_ps.str_val = "(#/sec)";
 
 	COL(alloc_count_row, count_nrequests, right, 16, title);
-- 
cgit v0.12


From bd6e28d6a3d0468e36d7da032966e0d786020bcc Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 28 Oct 2019 09:24:42 -0700
Subject: Guard slabcur fetching in extent_util

---
 src/ctl.c               | 3 ++-
 src/extent.c            | 9 +++++++--
 test/unit/extent_util.c | 6 ++----
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index fd05c08..206af4c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3199,7 +3199,8 @@ label_return:
  * otherwise their values are undefined.
  *
  * This API is mainly intended for small class allocations, where extents are
- * used as slab.
+ * used as slab.  Note that if the bin the extent belongs to is completely
+ * full, "(a)" will be NULL.
  *
  * In case of large class allocations, "(a)" will be NULL, and "(e)" and "(f)"
  * will be zero (if stats are enabled; otherwise undefined).  The other three
diff --git a/src/extent.c b/src/extent.c
index a015f9b..4bb358d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -2124,7 +2124,12 @@ extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	} else {
 		*bin_nfree = *bin_nregs = 0;
 	}
-	*slabcur_addr = extent_addr_get(bin->slabcur);
-	assert(*slabcur_addr != NULL);
+	extent_t *slab;
+	if (bin->slabcur != NULL) {
+		slab = bin->slabcur;
+	} else {
+		slab = extent_heap_first(&bin->slabs_nonfull);
+	}
+	*slabcur_addr = slab != NULL ? extent_addr_get(slab) : NULL;
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
diff --git a/test/unit/extent_util.c b/test/unit/extent_util.c
index 97e55f0..4de0b04 100644
--- a/test/unit/extent_util.c
+++ b/test/unit/extent_util.c
@@ -94,10 +94,8 @@ TEST_BEGIN(test_query) {
 			    "Extent region count exceeded size");
 			assert_zu_ne(NREGS_READ(out), 0,
 			    "Extent region count must be positive");
-			assert_ptr_not_null(SLABCUR_READ(out),
-			    "Current slab is null");
-			assert_true(NFREE_READ(out) == 0
-			    || SLABCUR_READ(out) <= p,
+			assert_true(NFREE_READ(out) == 0 || (SLABCUR_READ(out)
+			    != NULL && SLABCUR_READ(out) <= p),
 			    "Allocation should follow first fit principle");
 			if (config_stats) {
 				assert_zu_le(BIN_NFREE_READ(out),
-- 
cgit v0.12


From ee961c23100ebbe1e6eb7390a03be5456bc8814c Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 21 Oct 2019 18:44:42 -0700
Subject: Merge realloc and rallocx pathways.

---
 src/jemalloc.c | 253 +++++++++++++++++----------------------------------------
 1 file changed, 76 insertions(+), 177 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7745e34..8dd81bd 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2506,56 +2506,6 @@ je_calloc(size_t num, size_t size) {
 	return ret;
 }
 
-static void *
-irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
-    prof_tctx_t *tctx, hook_ralloc_args_t *hook_args) {
-	void *p;
-
-	if (tctx == NULL) {
-		return NULL;
-	}
-	if (usize <= SC_SMALL_MAXCLASS) {
-		p = iralloc(tsd, old_ptr, old_usize,
-		    SC_LARGE_MINCLASS, 0, false, hook_args);
-		if (p == NULL) {
-			return NULL;
-		}
-		arena_prof_promote(tsd_tsdn(tsd), p, usize);
-	} else {
-		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false,
-		    hook_args);
-	}
-
-	return p;
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
-   alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
-	void *p;
-	bool prof_active;
-	prof_tctx_t *old_tctx, *tctx;
-
-	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx);
-	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx,
-		    hook_args);
-	} else {
-		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false,
-		    hook_args);
-	}
-	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, true);
-		return NULL;
-	}
-	prof_realloc(tsd, p, usize, tctx, prof_active, true, old_ptr, old_usize,
-	    old_tctx);
-
-	return p;
-}
-
 JEMALLOC_ALWAYS_INLINE void
 ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	if (!slow_path) {
@@ -2645,121 +2595,6 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	}
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ALLOC_SIZE(2)
-je_realloc(void *ptr, size_t arg_size) {
-	void *ret;
-	tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL);
-	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
-	size_t old_usize = 0;
-	size_t size = arg_size;
-
-	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
-
-	if (unlikely(size == 0)) {
-		if (ptr != NULL) {
-			/* realloc(ptr, 0) is equivalent to free(ptr). */
-			UTRACE(ptr, 0, 0);
-			tcache_t *tcache;
-			tsd_t *tsd = tsd_fetch();
-			if (tsd_reentrancy_level_get(tsd) == 0) {
-				tcache = tcache_get(tsd);
-			} else {
-				tcache = NULL;
-			}
-
-			uintptr_t args[3] = {(uintptr_t)ptr, size};
-			hook_invoke_dalloc(hook_dalloc_realloc, ptr, args);
-
-			ifree(tsd, ptr, tcache, true);
-
-			LOG("core.realloc.exit", "result: %p", NULL);
-			return NULL;
-		}
-		size = 1;
-	}
-
-	if (likely(ptr != NULL)) {
-		assert(malloc_initialized() || IS_INITIALIZER);
-		tsd_t *tsd = tsd_fetch();
-
-		check_entry_exit_locking(tsd_tsdn(tsd));
-
-
-		hook_ralloc_args_t hook_args = {true, {(uintptr_t)ptr,
-			(uintptr_t)arg_size, 0, 0}};
-
-		alloc_ctx_t alloc_ctx;
-		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
-		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
-		assert(alloc_ctx.szind != SC_NSIZES);
-		old_usize = sz_index2size(alloc_ctx.szind);
-		assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
-		usize = sz_s2u(size);
-		if (config_prof && opt_prof) {
-			if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
-				ret = NULL;
-			} else {
-				ret = irealloc_prof(tsd, ptr, old_usize, usize,
-				    &alloc_ctx, &hook_args);
-			}
-		} else {
-			ret = iralloc(tsd, ptr, old_usize, size, 0, false,
-			    &hook_args);
-		}
-		tsdn = tsd_tsdn(tsd);
-	} else {
-		/* realloc(NULL, size) is equivalent to malloc(size). */
-		static_opts_t sopts;
-		dynamic_opts_t dopts;
-
-		static_opts_init(&sopts);
-		dynamic_opts_init(&dopts);
-
-		sopts.null_out_result_on_error = true;
-		sopts.set_errno_on_error = true;
-		sopts.oom_string =
-		    "<jemalloc>: Error in realloc(): out of memory\n";
-
-		dopts.result = &ret;
-		dopts.num_items = 1;
-		dopts.item_size = size;
-
-		imalloc(&sopts, &dopts);
-		if (sopts.slow) {
-			uintptr_t args[3] = {(uintptr_t)ptr, arg_size};
-			hook_invoke_alloc(hook_alloc_realloc, ret,
-			    (uintptr_t)ret, args);
-		}
-
-		return ret;
-	}
-
-	if (unlikely(ret == NULL)) {
-		if (config_xmalloc && unlikely(opt_xmalloc)) {
-			malloc_write("<jemalloc>: Error in realloc(): "
-			    "out of memory\n");
-			abort();
-		}
-		set_errno(ENOMEM);
-	}
-	if (likely(ret != NULL)) {
-		tsd_t *tsd;
-
-		assert(usize == isalloc(tsdn, ret));
-		tsd = tsdn_tsd(tsdn);
-		*tsd_thread_allocatedp_get(tsd) += usize;
-		*tsd_thread_deallocatedp_get(tsd) += old_usize;
-	}
-	UTRACE(ptr, size, ret);
-	check_entry_exit_locking(tsdn);
-
-	LOG("core.realloc.exit", "result: %p", ret);
-	return ret;
-}
-
 JEMALLOC_NOINLINE
 void
 free_default(void *ptr) {
@@ -3201,10 +3036,8 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	return p;
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ALLOC_SIZE(2)
-je_rallocx(void *ptr, size_t size, int flags) {
+JEMALLOC_ALWAYS_INLINE void *
+do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	void *p;
 	tsd_t *tsd;
 	size_t usize;
@@ -3214,10 +3047,6 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	arena_t *arena;
 	tcache_t *tcache;
 
-	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
-	    size, flags);
-
-
 	assert(ptr != NULL);
 	assert(size != 0);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -3252,8 +3081,8 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 
-	hook_ralloc_args_t hook_args = {false, {(uintptr_t)ptr, size, flags,
-		0}};
+	hook_ralloc_args_t hook_args = {is_realloc, {(uintptr_t)ptr, size,
+		flags, 0}};
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ?
 		    sz_s2u(size) : sz_sa2u(size, alignment);
@@ -3281,7 +3110,6 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	LOG("core.rallocx.exit", "result: %p", p);
 	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -3291,10 +3119,81 @@ label_oom:
 	UTRACE(ptr, size, 0);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	LOG("core.rallocx.exit", "result: %p", NULL);
 	return NULL;
 }
 
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2)
+je_rallocx(void *ptr, size_t size, int flags) {
+	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
+	    size, flags);
+	void *ret = do_rallocx(ptr, size, flags, false);
+	LOG("core.rallocx.exit", "result: %p", ret);
+	return ret;
+}
+
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2)
+je_realloc(void *ptr, size_t size) {
+	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
+
+	if (likely(ptr != NULL && size != 0)) {
+		void *ret = do_rallocx(ptr, size, 0, true);
+		LOG("core.realloc.exit", "result: %p", ret);
+		return ret;
+	} else if (ptr != NULL && size == 0) {
+		/* realloc(ptr, 0) is equivalent to free(ptr). */
+		UTRACE(ptr, 0, 0);
+		tcache_t *tcache;
+		tsd_t *tsd = tsd_fetch();
+		check_entry_exit_locking(tsd_tsdn(tsd));
+
+		if (tsd_reentrancy_level_get(tsd) == 0) {
+			tcache = tcache_get(tsd);
+		} else {
+			tcache = NULL;
+		}
+
+		uintptr_t args[3] = {(uintptr_t)ptr, size};
+		hook_invoke_dalloc(hook_dalloc_realloc, ptr, args);
+
+		ifree(tsd, ptr, tcache, true);
+
+		check_entry_exit_locking(tsd_tsdn(tsd));
+		LOG("core.realloc.exit", "result: %p", NULL);
+		return NULL;
+	} else {
+		/* realloc(NULL, size) is equivalent to malloc(size). */
+		void *ret;
+
+		static_opts_t sopts;
+		dynamic_opts_t dopts;
+
+		static_opts_init(&sopts);
+		dynamic_opts_init(&dopts);
+
+		sopts.null_out_result_on_error = true;
+		sopts.set_errno_on_error = true;
+		sopts.oom_string =
+		    "<jemalloc>: Error in realloc(): out of memory\n";
+
+		dopts.result = &ret;
+		dopts.num_items = 1;
+		dopts.item_size = size;
+
+		imalloc(&sopts, &dopts);
+		if (sopts.slow) {
+			uintptr_t args[3] = {(uintptr_t)ptr, size};
+			hook_invoke_alloc(hook_alloc_realloc, ret,
+			    (uintptr_t)ret, args);
+		}
+		LOG("core.realloc.exit", "result: %p", ret);
+		return ret;
+	}
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero) {
-- 
cgit v0.12


From 9cfa8059475745c31c9c646144432174a2165ca4 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 23 Sep 2019 17:56:19 -0700
Subject: Realloc: Make behavior of realloc(ptr, 0) configurable.

---
 Makefile.in                                        |  5 +-
 doc/jemalloc.xml.in                                | 27 ++++++++
 .../jemalloc/internal/jemalloc_internal_externs.h  |  2 +
 .../jemalloc/internal/jemalloc_internal_types.h    | 11 ++++
 src/ctl.c                                          |  6 +-
 src/jemalloc.c                                     | 77 ++++++++++++++++++----
 src/stats.c                                        |  1 +
 test/unit/hook.c                                   | 24 ++++---
 test/unit/mallctl.c                                | 13 +++-
 test/unit/zero_realloc_abort.c                     | 26 ++++++++
 test/unit/zero_realloc_abort.sh                    |  3 +
 test/unit/zero_realloc_free.c                      | 33 ++++++++++
 test/unit/zero_realloc_free.sh                     |  3 +
 test/unit/zero_realloc_strict.c                    | 48 ++++++++++++++
 test/unit/zero_realloc_strict.sh                   |  3 +
 15 files changed, 256 insertions(+), 26 deletions(-)
 create mode 100644 test/unit/zero_realloc_abort.c
 create mode 100644 test/unit/zero_realloc_abort.sh
 create mode 100644 test/unit/zero_realloc_free.c
 create mode 100644 test/unit/zero_realloc_free.sh
 create mode 100644 test/unit/zero_realloc_strict.c
 create mode 100644 test/unit/zero_realloc_strict.sh

diff --git a/Makefile.in b/Makefile.in
index 21a1053..e4d2180 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -232,7 +232,10 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
 	$(srcroot)test/unit/witness.c \
-	$(srcroot)test/unit/zero.c
+	$(srcroot)test/unit/zero.c \
+	$(srcroot)test/unit/zero_realloc_abort.c \
+	$(srcroot)test/unit/zero_realloc_free.c \
+	$(srcroot)test/unit/zero_realloc_strict.c
 ifeq (@enable_prof@, 1)
 TESTS_UNIT += \
 	$(srcroot)test/unit/arena_reset_prof.c
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index e83bfbf..746c6bd 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1489,6 +1489,33 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         by default.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.zero_realloc">
+        <term>
+          <mallctl>opt.zero_realloc</mallctl>
+          (<type>const char *</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para> Determines the behavior of
+	<function>realloc()</function> when passed a value of zero for the new
+	size.  <quote>strict</quote> treats this as an allocation of size zero
+	(and returns a non-null result except in case of resource exhaustion).
+	<quote>free</quote> treats this as a deallocation of the pointer, and
+	returns <constant>NULL</constant> without setting
+	<varname>errno</varname>.  <quote>abort</quote> aborts the process if
+	zero is passed.  The default is <quote>strict</quote>.</para>
+
+	<para>There is considerable divergence of behaviors across
+	implementations in handling this case. Many have the behavior of
+	<quote>free</quote>. This can introduce security vulnerabilities, since
+	a <constant>NULL</constant> return value indicates failure, and the
+	continued validity of the passed-in pointer (per POSIX and C11).
+	<quote>strict</quote> is safe, but can cause leaks in programs that
+	expect the common behavior.  Programs intended to be portable and
+	leak-free cannot assume either behavior, and must therefore never call
+	realloc with a size of 0.  The <quote>abort</quote> option enables these
+	testing this behavior.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="thread.arena">
         <term>
           <mallctl>thread.arena</mallctl>
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index d291170..dae77b4 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -18,6 +18,8 @@ extern bool opt_utrace;
 extern bool opt_xmalloc;
 extern bool opt_zero;
 extern unsigned opt_narenas;
+extern zero_realloc_action_t opt_zero_realloc_action;
+extern const char *zero_realloc_mode_names[];
 
 /* Number of CPUs. */
 extern unsigned ncpus;
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index e296c5a..324a4b1 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -12,6 +12,17 @@ typedef unsigned szind_t;
 /* Processor / core id type. */
 typedef int malloc_cpuid_t;
 
+/* When realloc(non-null-ptr, 0) is called, what happens? */
+enum zero_realloc_action_e {
+	/* Realloc(ptr, 0) is free(ptr); return malloc(0); */
+	zero_realloc_action_strict = 0,
+	/* Realloc(ptr, 0) is free(ptr); */
+	zero_realloc_action_free = 1,
+	/* Realloc(ptr, 0) aborts. */
+	zero_realloc_action_abort = 2
+};
+typedef enum zero_realloc_action_e zero_realloc_action_t;
+
 /*
  * Flags bits:
  *
diff --git a/src/ctl.c b/src/ctl.c
index 206af4c..b51207f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -112,6 +112,7 @@ CTL_PROTO(opt_prof_gdump)
 CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
+CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
@@ -339,7 +340,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_gdump"),	CTL(opt_prof_gdump)},
 	{NAME("prof_final"),	CTL(opt_prof_final)},
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
-	{NAME("prof_accum"),	CTL(opt_prof_accum)}
+	{NAME("prof_accum"),	CTL(opt_prof_accum)},
+	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
 };
 
 static const ctl_named_node_t	tcache_node[] = {
@@ -1793,6 +1795,8 @@ CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
+CTL_RO_NL_GEN(opt_zero_realloc,
+    zero_realloc_mode_names[opt_zero_realloc_action], const char *)
 
 /******************************************************************************/
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8dd81bd..35a9e7b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -67,6 +67,15 @@ bool	opt_junk_free =
 #endif
     ;
 
+zero_realloc_action_t opt_zero_realloc_action =
+    zero_realloc_action_strict;
+
+const char *zero_realloc_mode_names[] = {
+	"strict",
+	"free",
+	"abort",
+};
+
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_zero = false;
@@ -1411,6 +1420,22 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 				CONF_CONTINUE;
 			}
+			if (CONF_MATCH("zero_realloc")) {
+				if (CONF_MATCH_VALUE("strict")) {
+					opt_zero_realloc_action
+					    = zero_realloc_action_strict;
+				} else if (CONF_MATCH_VALUE("free")) {
+					opt_zero_realloc_action
+					    = zero_realloc_action_free;
+				} else if (CONF_MATCH_VALUE("abort")) {
+					opt_zero_realloc_action
+					    = zero_realloc_action_abort;
+				} else {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
 			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
 #undef CONF_ERROR
 #undef CONF_CONTINUE
@@ -3133,18 +3158,17 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	return ret;
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ALLOC_SIZE(2)
-je_realloc(void *ptr, size_t size) {
-	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
-
-	if (likely(ptr != NULL && size != 0)) {
-		void *ret = do_rallocx(ptr, size, 0, true);
-		LOG("core.realloc.exit", "result: %p", ret);
-		return ret;
-	} else if (ptr != NULL && size == 0) {
-		/* realloc(ptr, 0) is equivalent to free(ptr). */
+static void *
+do_realloc_nonnull_zero(void *ptr) {
+	if (opt_zero_realloc_action == zero_realloc_action_strict) {
+		/*
+		 * The user might have gotten a strict setting while expecting a
+		 * free setting.  If that's the case, we at least try to
+		 * reduce the harm, and turn off the tcache while allocating, so
+		 * that we'll get a true first fit.
+		 */
+		return do_rallocx(ptr, 1, MALLOCX_TCACHE_NONE, true);
+	} else if (opt_zero_realloc_action == zero_realloc_action_free) {
 		UTRACE(ptr, 0, 0);
 		tcache_t *tcache;
 		tsd_t *tsd = tsd_fetch();
@@ -3156,15 +3180,40 @@ je_realloc(void *ptr, size_t size) {
 			tcache = NULL;
 		}
 
-		uintptr_t args[3] = {(uintptr_t)ptr, size};
+		uintptr_t args[3] = {(uintptr_t)ptr, 0};
 		hook_invoke_dalloc(hook_dalloc_realloc, ptr, args);
 
 		ifree(tsd, ptr, tcache, true);
 
 		check_entry_exit_locking(tsd_tsdn(tsd));
-		LOG("core.realloc.exit", "result: %p", NULL);
 		return NULL;
 	} else {
+		safety_check_fail("Called realloc(non-null-ptr, 0) with "
+		    "zero_realloc:abort set\n");
+		/* In real code, this will never run; the safety check failure
+		 * will call abort.  In the unit test, we just want to bail out
+		 * without corrupting internal state that the test needs to
+		 * finish.
+		 */
+		return NULL;
+	}
+}
+
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2)
+je_realloc(void *ptr, size_t size) {
+	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
+
+	if (likely(ptr != NULL && size != 0)) {
+		void *ret = do_rallocx(ptr, size, 0, true);
+		LOG("core.realloc.exit", "result: %p", ret);
+		return ret;
+	} else if (ptr != NULL && size == 0) {
+		void *ret = do_realloc_nonnull_zero(ptr);
+		LOG("core.realloc.exit", "result: %p", ret);
+		return ret;
+	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		void *ret;
 
diff --git a/src/stats.c b/src/stats.c
index 2b744e1..c9bab4f 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1109,6 +1109,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("prof_leak")
 	OPT_WRITE_BOOL("stats_print")
 	OPT_WRITE_CHAR_P("stats_print_opts")
+	OPT_WRITE_CHAR_P("zero_realloc")
 
 	emitter_dict_end(emitter);
 
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 72fcc43..36dcb89 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -428,15 +428,21 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 	free(ptr);
 
 	/* realloc(ptr, 0) as free */
-	ptr = malloc(1);
-	reset();
-	realloc(ptr, 0);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_dalloc_realloc, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong pointer freed");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
-	assert_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong raw arg");
+	if (opt_zero_realloc_action == zero_realloc_action_free) {
+		ptr = malloc(1);
+		reset();
+		realloc(ptr, 0);
+		assert_d_eq(call_count, 1, "Hook not called");
+		assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+		assert_d_eq(arg_type, (int)hook_dalloc_realloc,
+		    "Wrong hook type");
+		assert_ptr_eq(ptr, arg_address,
+		    "Wrong pointer freed");
+		assert_u64_eq((uintptr_t)ptr, arg_args_raw[0],
+		    "Wrong raw arg");
+		assert_u64_eq((uintptr_t)0, arg_args_raw[1],
+		    "Wrong raw arg");
+	}
 
 	/* realloc(NULL, 0) as malloc(0) */
 	reset();
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 0e88f31..4c0830f 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -880,6 +880,16 @@ TEST_BEGIN(test_hooks_exhaustion) {
 }
 TEST_END
 
+TEST_BEGIN(test_zero_realloc) {
+	const char *val;
+	size_t sz = sizeof(val);
+	int err = mallctl("opt.zero_realloc", &val, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl result");
+	assert_str_eq(val, "strict",
+	    "Unexpected default zero_realloc_beahvior");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -911,5 +921,6 @@ main(void) {
 	    test_prof_active,
 	    test_stats_arenas,
 	    test_hooks,
-	    test_hooks_exhaustion);
+	    test_hooks_exhaustion,
+	    test_zero_realloc);
 }
diff --git a/test/unit/zero_realloc_abort.c b/test/unit/zero_realloc_abort.c
new file mode 100644
index 0000000..2f49392
--- /dev/null
+++ b/test/unit/zero_realloc_abort.c
@@ -0,0 +1,26 @@
+#include "test/jemalloc_test.h"
+
+#include <signal.h>
+
+static bool abort_called = false;
+
+void set_abort_called() {
+	abort_called = true;
+};
+
+TEST_BEGIN(test_realloc_abort) {
+	abort_called = false;
+	safety_check_set_abort(&set_abort_called);
+	void *ptr = mallocx(42, 0);
+	assert_ptr_not_null(ptr, "Unexpected mallocx error");
+	ptr = realloc(ptr, 0);
+	assert_true(abort_called, "Realloc with zero size didn't abort");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_realloc_abort);
+}
+
diff --git a/test/unit/zero_realloc_abort.sh b/test/unit/zero_realloc_abort.sh
new file mode 100644
index 0000000..37daeea
--- /dev/null
+++ b/test/unit/zero_realloc_abort.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="zero_realloc:abort"
diff --git a/test/unit/zero_realloc_free.c b/test/unit/zero_realloc_free.c
new file mode 100644
index 0000000..a073688
--- /dev/null
+++ b/test/unit/zero_realloc_free.c
@@ -0,0 +1,33 @@
+#include "test/jemalloc_test.h"
+
+static uint64_t
+deallocated() {
+	if (!config_stats) {
+		return 0;
+	}
+	uint64_t deallocated;
+	size_t sz = sizeof(deallocated);
+	assert_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+	return deallocated;
+}
+
+TEST_BEGIN(test_realloc_free) {
+	void *ptr = mallocx(42, 0);
+	assert_ptr_not_null(ptr, "Unexpected mallocx error");
+	uint64_t deallocated_before = deallocated();
+	ptr = realloc(ptr, 0);
+	uint64_t deallocated_after = deallocated();
+	assert_ptr_null(ptr, "Realloc didn't free");
+	if (config_stats) {
+		assert_u64_gt(deallocated_after, deallocated_before,
+		    "Realloc didn't free");
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_realloc_free);
+}
diff --git a/test/unit/zero_realloc_free.sh b/test/unit/zero_realloc_free.sh
new file mode 100644
index 0000000..51b01c9
--- /dev/null
+++ b/test/unit/zero_realloc_free.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="zero_realloc:free"
diff --git a/test/unit/zero_realloc_strict.c b/test/unit/zero_realloc_strict.c
new file mode 100644
index 0000000..b709951
--- /dev/null
+++ b/test/unit/zero_realloc_strict.c
@@ -0,0 +1,48 @@
+#include "test/jemalloc_test.h"
+
+static uint64_t
+allocated() {
+	if (!config_stats) {
+		return 0;
+	}
+	uint64_t allocated;
+	size_t sz = sizeof(allocated);
+	assert_d_eq(mallctl("thread.allocated", (void *)&allocated, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
+	return allocated;
+}
+
+static uint64_t
+deallocated() {
+	if (!config_stats) {
+		return 0;
+	}
+	uint64_t deallocated;
+	size_t sz = sizeof(deallocated);
+	assert_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+	return deallocated;
+}
+
+TEST_BEGIN(test_realloc_strict) {
+	void *ptr = mallocx(1, 0);
+	assert_ptr_not_null(ptr, "Unexpected mallocx error");
+	uint64_t allocated_before = allocated();
+	uint64_t deallocated_before = deallocated();
+	ptr = realloc(ptr, 0);
+	uint64_t allocated_after = allocated();
+	uint64_t deallocated_after = deallocated();
+	if (config_stats) {
+		assert_u64_lt(allocated_before, allocated_after,
+		    "Unexpected stats change");
+		assert_u64_lt(deallocated_before, deallocated_after,
+		    "Unexpected stats change");
+	}
+	dallocx(ptr, 0);
+}
+TEST_END
+int
+main(void) {
+	return test(
+	    test_realloc_strict);
+}
diff --git a/test/unit/zero_realloc_strict.sh b/test/unit/zero_realloc_strict.sh
new file mode 100644
index 0000000..314dcd0
--- /dev/null
+++ b/test/unit/zero_realloc_strict.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="zero_realloc:strict"
-- 
cgit v0.12


From de81a4eadabb85b4c911fc6301b69f093ad47b53 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 26 Oct 2019 11:04:46 -0700
Subject: Add stats counters for number of zero reallocs

---
 Makefile.in                                        |  3 +-
 doc/jemalloc.xml.in                                | 15 ++++++++
 .../jemalloc/internal/jemalloc_internal_externs.h  |  1 +
 src/ctl.c                                          |  7 +++-
 src/jemalloc.c                                     |  5 +++
 src/stats.c                                        |  9 +++++
 test/unit/zero_reallocs.c                          | 40 ++++++++++++++++++++++
 test/unit/zero_reallocs.sh                         |  3 ++
 8 files changed, 81 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/zero_reallocs.c
 create mode 100644 test/unit/zero_reallocs.sh

diff --git a/Makefile.in b/Makefile.in
index e4d2180..fede961 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -235,7 +235,8 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/zero.c \
 	$(srcroot)test/unit/zero_realloc_abort.c \
 	$(srcroot)test/unit/zero_realloc_free.c \
-	$(srcroot)test/unit/zero_realloc_strict.c
+	$(srcroot)test/unit/zero_realloc_strict.c \
+	$(srcroot)test/unit/zero_reallocs.c
 ifeq (@enable_prof@, 1)
 TESTS_UNIT += \
 	$(srcroot)test/unit/arena_reset_prof.c
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 746c6bd..77afb00 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2451,6 +2451,21 @@ struct extent_hooks_s {
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.zero_reallocs">
+        <term>
+          <mallctl>stats.zero_reallocs</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of times that the <function>realloc()</function>
+        was called with a non-<constant>NULL</constant> pointer argument and a
+        <constant>0</constant> size argument.  This is a fundamentally unsafe
+        pattern in portable programs; see <link linkend="opt.zero_realloc">
+        <mallctl>opt.zero_realloc</mallctl></link> for details.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.background_thread.num_threads">
         <term>
           <mallctl>stats.background_thread.num_threads</mallctl>
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index dae77b4..e9dbde8 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -20,6 +20,7 @@ extern bool opt_zero;
 extern unsigned opt_narenas;
 extern zero_realloc_action_t opt_zero_realloc_action;
 extern const char *zero_realloc_mode_names[];
+extern atomic_zu_t zero_realloc_count;
 
 /* Number of CPUs. */
 extern unsigned ncpus;
diff --git a/src/ctl.c b/src/ctl.c
index b51207f..abb82b5 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -224,6 +224,7 @@ CTL_PROTO(stats_metadata_thp)
 CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
 CTL_PROTO(stats_retained)
+CTL_PROTO(stats_zero_reallocs)
 CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
 CTL_PROTO(experimental_utilization_query)
@@ -593,7 +594,8 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("background_thread"),
 	 CHILD(named, stats_background_thread)},
 	{NAME("mutexes"),	CHILD(named, stats_mutexes)},
-	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
+	{NAME("arenas"),	CHILD(indexed, stats_arenas)},
+	{NAME("zero_reallocs"),	CTL(stats_zero_reallocs)},
 };
 
 static const ctl_named_node_t experimental_hooks_node[] = {
@@ -2841,6 +2843,9 @@ CTL_RO_CGEN(config_stats, stats_background_thread_num_runs,
 CTL_RO_CGEN(config_stats, stats_background_thread_run_interval,
     nstime_ns(&ctl_stats->background_thread.run_interval), uint64_t)
 
+CTL_RO_CGEN(config_stats, stats_zero_reallocs,
+    atomic_load_zu(&zero_realloc_count, ATOMIC_RELAXED), size_t)
+
 CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *)
 CTL_RO_GEN(stats_arenas_i_dirty_decay_ms, arenas_i(mib[2])->dirty_decay_ms,
     ssize_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 35a9e7b..88064df 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -70,6 +70,8 @@ bool	opt_junk_free =
 zero_realloc_action_t opt_zero_realloc_action =
     zero_realloc_action_strict;
 
+atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
+
 const char *zero_realloc_mode_names[] = {
 	"strict",
 	"free",
@@ -3160,6 +3162,9 @@ je_rallocx(void *ptr, size_t size, int flags) {
 
 static void *
 do_realloc_nonnull_zero(void *ptr) {
+	if (config_stats) {
+		atomic_fetch_add_zu(&zero_realloc_count, 1, ATOMIC_RELAXED);
+	}
 	if (opt_zero_realloc_action == zero_realloc_action_strict) {
 		/*
 		 * The user might have gotten a strict setting while expecting a
diff --git a/src/stats.c b/src/stats.c
index c9bab4f..41b990e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1252,6 +1252,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	size_t allocated, active, metadata, metadata_thp, resident, mapped,
 	    retained;
 	size_t num_background_threads;
+	size_t zero_reallocs;
 	uint64_t background_thread_num_runs, background_thread_run_interval;
 
 	CTL_GET("stats.allocated", &allocated, size_t);
@@ -1262,6 +1263,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	CTL_GET("stats.mapped", &mapped, size_t);
 	CTL_GET("stats.retained", &retained, size_t);
 
+	CTL_GET("stats.zero_reallocs", &zero_reallocs, size_t);
+
 	if (have_background_thread) {
 		CTL_GET("stats.background_thread.num_threads",
 		    &num_background_threads, size_t);
@@ -1285,12 +1288,18 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	emitter_json_kv(emitter, "resident", emitter_type_size, &resident);
 	emitter_json_kv(emitter, "mapped", emitter_type_size, &mapped);
 	emitter_json_kv(emitter, "retained", emitter_type_size, &retained);
+	emitter_json_kv(emitter, "zero_reallocs", emitter_type_size,
+	    &zero_reallocs);
 
 	emitter_table_printf(emitter, "Allocated: %zu, active: %zu, "
 	    "metadata: %zu (n_thp %zu), resident: %zu, mapped: %zu, "
 	    "retained: %zu\n", allocated, active, metadata, metadata_thp,
 	    resident, mapped, retained);
 
+	/* Strange behaviors */
+	emitter_table_printf(emitter,
+	    "Count of realloc(non-null-ptr, 0) calls: %zu\n", zero_reallocs);
+
 	/* Background thread stats. */
 	emitter_json_object_kv_begin(emitter, "background_thread");
 	emitter_json_kv(emitter, "num_threads", emitter_type_size,
diff --git a/test/unit/zero_reallocs.c b/test/unit/zero_reallocs.c
new file mode 100644
index 0000000..fd33aaf
--- /dev/null
+++ b/test/unit/zero_reallocs.c
@@ -0,0 +1,40 @@
+#include "test/jemalloc_test.h"
+
+static size_t
+zero_reallocs() {
+	if (!config_stats) {
+		return 0;
+	}
+	size_t count = 12345;
+	size_t sz = sizeof(count);
+
+	assert_d_eq(mallctl("stats.zero_reallocs", (void *)&count, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+	return count;
+}
+
+TEST_BEGIN(test_zero_reallocs) {
+	test_skip_if(!config_stats);
+
+	for (size_t i = 0; i < 100; ++i) {
+		void *ptr = mallocx(i * i + 1, 0);
+		assert_ptr_not_null(ptr, "Unexpected mallocx error");
+		size_t count = zero_reallocs();
+		assert_zu_eq(i, count, "Incorrect zero realloc count");
+		ptr = realloc(ptr, 0);
+		assert_ptr_null(ptr, "Realloc didn't free");
+		count = zero_reallocs();
+		assert_zu_eq(i + 1, count, "Realloc didn't adjust count");
+	}
+}
+TEST_END
+
+int
+main(void) {
+	/*
+	 * We expect explicit counts; reentrant tests run multiple times, so
+	 * counts leak across runs.
+	 */
+	return test_no_reentrancy(
+	    test_zero_reallocs);
+}
diff --git a/test/unit/zero_reallocs.sh b/test/unit/zero_reallocs.sh
new file mode 100644
index 0000000..51b01c9
--- /dev/null
+++ b/test/unit/zero_reallocs.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="zero_realloc:free"
-- 
cgit v0.12


From 6924f83cb21f75e1c892d8f469500e12f1a3f5a7 Mon Sep 17 00:00:00 2001
From: RingsC <hom.lee@hotmail.com>
Date: Sun, 13 Oct 2019 23:11:23 +0800
Subject: use SYS_openat when available

some architecture like AArch64 may not have the open syscall, but have
openat syscall. so check and use SYS_openat if SYS_openat available if
SYS_open is not supported at init_thp_state.
---
 src/pages.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 13de27a..75c8dd9 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -563,6 +563,9 @@ init_thp_state(void) {
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
 	int fd = (int)syscall(SYS_open,
 	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
+	int fd = (int)syscall(SYS_openat,
+		    AT_FDCWD, "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
 #else
 	int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
 #endif
-- 
cgit v0.12


From 152c0ef954f19fc2bbe53fead9c62c9824f06109 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 3 Sep 2019 15:04:48 -0700
Subject: Build a general purpose thread event handler

---
 Makefile.in                                        |  10 +-
 include/jemalloc/internal/prof_inlines_b.h         |  62 ++---
 include/jemalloc/internal/thread_event.h           | 139 +++++++++++
 include/jemalloc/internal/tsd.h                    |  76 +++---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   3 +-
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  27 ++-
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |   3 +-
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  31 +--
 src/jemalloc.c                                     |  78 +++----
 src/prof.c                                         |  21 +-
 src/thread_event.c                                 | 255 +++++++++++++++++++++
 test/unit/thread_event.c                           |  57 +++++
 test/unit/thread_event.sh                          |   5 +
 13 files changed, 629 insertions(+), 138 deletions(-)
 create mode 100644 include/jemalloc/internal/thread_event.h
 create mode 100644 src/thread_event.c
 create mode 100644 test/unit/thread_event.c
 create mode 100644 test/unit/thread_event.sh

diff --git a/Makefile.in b/Makefile.in
index fede961..7eba774 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -123,11 +123,12 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
-	$(srcroot)src/stats.c \
 	$(srcroot)src/sc.c \
+	$(srcroot)src/stats.c \
 	$(srcroot)src/sz.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/test_hooks.c \
+	$(srcroot)src/thread_event.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
 	$(srcroot)src/witness.c
@@ -176,9 +177,9 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/background_thread.c \
 	$(srcroot)test/unit/background_thread_enable.c \
 	$(srcroot)test/unit/base.c \
+	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
-	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/buf_writer.c \
 	$(srcroot)test/unit/cache_bin.c \
 	$(srcroot)test/unit/ckh.c \
@@ -200,6 +201,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
+	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/pages.c \
 	$(srcroot)test/unit/ph.c \
@@ -218,9 +220,9 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/retained.c \
 	$(srcroot)test/unit/rtree.c \
 	$(srcroot)test/unit/safety_check.c \
+	$(srcroot)test/unit/sc.c \
 	$(srcroot)test/unit/seq.c \
 	$(srcroot)test/unit/SFMT.c \
-	$(srcroot)test/unit/sc.c \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/slab.c \
 	$(srcroot)test/unit/smoothstep.c \
@@ -228,8 +230,8 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/stats_print.c \
 	$(srcroot)test/unit/test_hooks.c \
+	$(srcroot)test/unit/thread_event.c \
 	$(srcroot)test/unit/ticker.c \
-	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
 	$(srcroot)test/unit/witness.c \
 	$(srcroot)test/unit/zero.c \
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 6b10f5b..b4e65c0 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/thread_event.h"
 
 JEMALLOC_ALWAYS_INLINE bool
 prof_gdump_get_unlocked(void) {
@@ -80,24 +81,6 @@ prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-prof_sample_check(tsd_t *tsd, size_t usize, bool update) {
-	ssize_t check = update ? 0 : usize;
-
-	int64_t bytes_until_sample = tsd_bytes_until_sample_get(tsd);
-	if (update) {
-		bytes_until_sample -= usize;
-		if (tsd_nominal(tsd)) {
-			tsd_bytes_until_sample_set(tsd, bytes_until_sample);
-		}
-	}
-	if (likely(bytes_until_sample >= check)) {
-		return true;
-	}
-
-	return false;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 			 prof_tdata_t **tdata_out) {
 	prof_tdata_t *tdata;
@@ -105,7 +88,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 	cassert(config_prof);
 
 	/* Fastpath: no need to load tdata */
-	if (likely(prof_sample_check(tsd, usize, update))) {
+	if (likely(prof_sample_event_wait_get(tsd) > 0)) {
 		return true;
 	}
 
@@ -127,13 +110,40 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}
 
-	/*
-	 * If this was the first creation of tdata, then
-	 * prof_tdata_get() reset bytes_until_sample, so decrement and
-	 * check it again
-	 */
-	if (!booted && prof_sample_check(tsd, usize, update)) {
-		return true;
+	if (!booted) {
+		/*
+		 * If this was the first creation of tdata, then it means that
+		 * the previous thread_event() relied on the wrong prof_sample
+		 * wait time, and that it should have relied on the new
+		 * prof_sample wait time just set by prof_tdata_get(), so we
+		 * now manually check again.
+		 *
+		 * If the check fails, then even though we relied on the wrong
+		 * prof_sample wait time, we're now actually in perfect shape,
+		 * in the sense that we can pretend that we have used the right
+		 * prof_sample wait time.
+		 *
+		 * If the check succeeds, then we are now in a tougher
+		 * situation, in the sense that we cannot pretend that we have
+		 * used the right prof_sample wait time.  A straightforward
+		 * solution would be to fully roll back thread_event(), set the
+		 * right prof_sample wait time, and then redo thread_event().
+		 * A simpler way, which is implemented below, is to just set a
+		 * new prof_sample wait time that is usize less, and do nothing
+		 * else.  Strictly speaking, the thread event handler may end
+		 * up in a wrong state, since it has still recorded an event
+		 * whereas in reality there may be no event.  However, the
+		 * difference in the wait time offsets the wrongly recorded
+		 * event, so that, functionally, the countdown to the next
+		 * event will behave exactly as if we have used the right
+		 * prof_sample wait time in the first place.
+		 */
+		uint64_t wait = prof_sample_event_wait_get(tsd);
+		assert(wait > 0);
+		if (usize < wait) {
+			thread_prof_sample_event_update(tsd, wait - usize);
+			return true;
+		}
 	}
 
 	/* Compute new sample threshold. */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
new file mode 100644
index 0000000..08678b7
--- /dev/null
+++ b/include/jemalloc/internal/thread_event.h
@@ -0,0 +1,139 @@
+#ifndef JEMALLOC_INTERNAL_THREAD_EVENT_H
+#define JEMALLOC_INTERNAL_THREAD_EVENT_H
+
+#include "jemalloc/internal/tsd.h"
+
+/*
+ * Maximum threshold on thread_allocated_next_event_fast, so that there is no
+ * need to check overflow in malloc fast path. (The allocation size in malloc
+ * fast path never exceeds SC_LOOKUP_MAXCLASS.)
+ */
+#define THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX				\
+    (UINT64_MAX - SC_LOOKUP_MAXCLASS + 1U)
+
+/*
+ * The max interval helps make sure that malloc stays on the fast path in the
+ * common case, i.e. thread_allocated < thread_allocated_next_event_fast.
+ * When thread_allocated is within an event's distance to
+ * THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX above, thread_allocated_next_event_fast
+ * is wrapped around and we fall back to the medium-fast path. The max interval
+ * makes sure that we're not staying on the fallback case for too long, even if
+ * there's no active event or if all active events have long wait times.
+ */
+#define THREAD_EVENT_MAX_INTERVAL ((uint64_t)(4U << 20))
+
+void thread_event_assert_invariants_debug(tsd_t *tsd);
+void thread_event_trigger(tsd_t *tsd, bool delay_event);
+void thread_event_rollback(tsd_t *tsd, size_t diff);
+void thread_event_update(tsd_t *tsd);
+void thread_event_boot();
+
+/*
+ * List of all events, in the following format:
+ *  E(event,		(condition))
+ */
+#define ITERATE_OVER_ALL_EVENTS						\
+    E(prof_sample,	(config_prof && opt_prof))
+
+#define E(event, condition)						\
+    C(event##_event_wait)
+
+/* List of all thread event counters. */
+#define ITERATE_OVER_ALL_COUNTERS					\
+    C(thread_allocated)							\
+    C(thread_allocated_next_event_fast)					\
+    C(thread_allocated_last_event)					\
+    C(thread_allocated_next_event)					\
+    ITERATE_OVER_ALL_EVENTS
+
+/* Getters directly wrap TSD getters. */
+#define C(counter)							\
+JEMALLOC_ALWAYS_INLINE uint64_t						\
+counter##_get(tsd_t *tsd) {						\
+	return tsd_##counter##_get(tsd);				\
+}
+
+ITERATE_OVER_ALL_COUNTERS
+#undef C
+
+/*
+ * Setters call the TSD pointer getters rather than the TSD setters, so that
+ * the counters can be modified even when TSD state is reincarnated or
+ * minimal_initialized: if an event is triggered in such cases, we will
+ * temporarily delay the event and let it be immediately triggered at the next
+ * allocation call.
+ */
+#define C(counter)							\
+JEMALLOC_ALWAYS_INLINE void						\
+counter##_set(tsd_t *tsd, uint64_t v) {					\
+	*tsd_##counter##p_get(tsd) = v;					\
+}
+
+ITERATE_OVER_ALL_COUNTERS
+#undef C
+
+/*
+ * For generating _event_wait getter / setter functions for each individual
+ * event.
+ */
+#undef E
+
+/*
+ * The function checks in debug mode whether the thread event counters are in
+ * a consistent state, which forms the invariants before and after each round
+ * of thread event handling that we can rely on and need to promise.
+ * The invariants are only temporarily violated in the middle of:
+ * (a) thread_event() if an event is triggered (the thread_event_trigger() call
+ *     at the end will restore the invariants),
+ * (b) thread_##event##_event_update() (the thread_event_update() call at the
+ *     end will restore the invariants), or
+ * (c) thread_event_rollback() if the rollback falls below the last_event (the
+ *     thread_event_update() call at the end will restore the invariants).
+ */
+JEMALLOC_ALWAYS_INLINE void
+thread_event_assert_invariants(tsd_t *tsd) {
+	if (config_debug) {
+		thread_event_assert_invariants_debug(tsd);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+thread_event(tsd_t *tsd, size_t usize) {
+	thread_event_assert_invariants(tsd);
+
+	uint64_t thread_allocated_before = thread_allocated_get(tsd);
+	thread_allocated_set(tsd, thread_allocated_before + usize);
+
+	/* The subtraction is intentionally susceptible to underflow. */
+	if (likely(usize < thread_allocated_next_event_get(tsd) -
+	    thread_allocated_before)) {
+		thread_event_assert_invariants(tsd);
+	} else {
+		thread_event_trigger(tsd, false);
+	}
+}
+
+#define E(event, condition)						\
+JEMALLOC_ALWAYS_INLINE void						\
+thread_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {	\
+	thread_event_assert_invariants(tsd);				\
+	assert(condition);						\
+	assert(tsd_nominal(tsd));					\
+	assert(tsd_reentrancy_level_get(tsd) == 0);			\
+	assert(event_wait > 0U);					\
+	if (THREAD_EVENT_MIN_START_WAIT > 1U &&				\
+	    unlikely(event_wait < THREAD_EVENT_MIN_START_WAIT)) {	\
+		event_wait = THREAD_EVENT_MIN_START_WAIT;		\
+	}								\
+	if (THREAD_EVENT_MAX_START_WAIT < UINT64_MAX &&			\
+	    unlikely(event_wait > THREAD_EVENT_MAX_START_WAIT)) {	\
+		event_wait = THREAD_EVENT_MAX_START_WAIT;		\
+	}								\
+	event##_event_wait_set(tsd, event_wait);			\
+	thread_event_update(tsd);					\
+}
+
+ITERATE_OVER_ALL_EVENTS
+#undef E
+
+#endif /* JEMALLOC_INTERNAL_THREAD_EVENT_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index e2cc774..14ad53d 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -15,39 +15,45 @@
 
 /*
  * Thread-Specific-Data layout
- * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof ---
+ * --- data accessed on tcache fast path: state, rtree_ctx, stats ---
  * s: state
  * e: tcache_enabled
  * m: thread_allocated
+ * k: thread_allocated_next_event_fast
  * f: thread_deallocated
- * b: bytes_until_sample (config_prof)
- * p: prof_tdata (config_prof)
  * c: rtree_ctx (rtree cache accessed on deallocation)
  * t: tcache
  * --- data not accessed on tcache fast path: arena-related fields ---
  * d: arenas_tdata_bypass
  * r: reentrancy_level
  * x: narenas_tdata
+ * l: thread_allocated_last_event
+ * j: thread_allocated_next_event
+ * w: prof_sample_event_wait (config_prof)
+ * p: prof_tdata (config_prof)
  * v: offset_state
  * i: iarena
  * a: arena
  * o: arenas_tdata
+ * b: binshards
  * Loading TSD data is on the critical path of basically all malloc operations.
  * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective.
  * Use a compact layout to reduce cache footprint.
  * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
  * |----------------------------  1st cacheline  ----------------------------|
- * | sedrxxxx vvvvvvvv mmmmmmmm ffffffff bbbbbbbb pppppppp [c * 16  .......] |
+ * | sedrxxxx mmmmmmmm kkkkkkkk ffffffff [c * 32  ........ ........ .......] |
  * |----------------------------  2nd cacheline  ----------------------------|
  * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
  * |----------------------------  3nd cacheline  ----------------------------|
- * | [c * 48  ........ ........ ........ ........ .......] iiiiiiii aaaaaaaa |
+ * | [c * 32  ........ ........ .......] llllllll jjjjjjjj wwwwwwww pppppppp |
  * +----------------------------  4th cacheline  ----------------------------+
- * | oooooooo [t...... ........ ........ ........ ........ ........ ........ |
+ * | vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ ........ |
+ * +----------------------------  5th cacheline  ----------------------------+
+ * | ..b][t.. ........ ........ ........ ........ ........ ........ ........ |
  * +-------------------------------------------------------------------------+
  * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
  *
- * The last 3 members (i, a and o) before tcache isn't really needed on tcache
+ * The elements after rtree_ctx and before tcache aren't really needed on tcache
  * fast path.  However we have a number of unused tcache bins and witnesses
  * (never touched unless config_debug) at the end of tcache, so we place them
  * there to avoid breaking the cachelines and possibly paging in an extra page.
@@ -64,18 +70,21 @@ typedef void (*test_callback_t)(int *);
 #  define MALLOC_TEST_TSD_INITIALIZER
 #endif
 
-/*  O(name,			type,			nullable type */
+/*  O(name,			type,			nullable type) */
 #define MALLOC_TSD							\
     O(tcache_enabled,		bool,			bool)		\
     O(arenas_tdata_bypass,	bool,			bool)		\
     O(reentrancy_level,		int8_t,			int8_t)		\
     O(narenas_tdata,		uint32_t,		uint32_t)	\
-    O(offset_state,		uint64_t,		uint64_t)	\
     O(thread_allocated,		uint64_t,		uint64_t)	\
+    O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
     O(thread_deallocated,	uint64_t,		uint64_t)	\
-    O(bytes_until_sample,	int64_t,		int64_t)	\
-    O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
+    O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
+    O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
+    O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
+    O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
+    O(offset_state,		uint64_t,		uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
@@ -84,25 +93,34 @@ typedef void (*test_callback_t)(int *);
     O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
     MALLOC_TEST_TSD
 
+/*
+ * THREAD_EVENT_MIN_START_WAIT should not exceed the minimal allocation usize.
+ */
+#define THREAD_EVENT_MIN_START_WAIT ((uint64_t)1U)
+#define THREAD_EVENT_MAX_START_WAIT UINT64_MAX
+
 #define TSD_INITIALIZER {						\
-    ATOMIC_INIT(tsd_state_uninitialized),				\
-    TCACHE_ENABLED_ZERO_INITIALIZER,					\
-    false,								\
-    0,									\
-    0,									\
-    0,									\
-    0,									\
-    0,									\
-    0,									\
-    NULL,								\
-    RTREE_CTX_ZERO_INITIALIZER,						\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    TSD_BINSHARDS_ZERO_INITIALIZER,					\
-    TCACHE_ZERO_INITIALIZER,						\
-    WITNESS_TSD_INITIALIZER						\
-    MALLOC_TEST_TSD_INITIALIZER						\
+    /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
+    /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
+    /* arenas_tdata_bypass */	false,					\
+    /* reentrancy_level */	0,					\
+    /* narenas_tdata */		0,					\
+    /* thread_allocated */	0,					\
+    /* thread_allocated_next_event_fast */ THREAD_EVENT_MIN_START_WAIT,	\
+    /* thread_deallocated */	0,					\
+    /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
+    /* thread_allocated_last_event */	0,			\
+    /* thread_allocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* prof_tdata */		NULL,					\
+    /* offset_state */		0,					\
+    /* iarena */		NULL,					\
+    /* arena */			NULL,					\
+    /* arenas_tdata */		NULL,					\
+    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
+    /* tcache */		TCACHE_ZERO_INITIALIZER,		\
+    /* witness */		WITNESS_TSD_INITIALIZER			\
+    /* test data */		MALLOC_TEST_TSD_INITIALIZER		\
 }
 
 void *malloc_tsd_malloc(size_t size);
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index a968338..5838e93 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -63,15 +63,16 @@
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
-    <ClCompile Include="..\..\..\..\src\safety_check.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index bc40883..3551ba5 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -16,6 +16,9 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bitmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -25,6 +28,9 @@
     <ClCompile Include="..\..\..\..\src\ctl.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\div.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\extent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -46,6 +52,9 @@
     <ClCompile Include="..\..\..\..\src\large.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\malloc_io.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -76,6 +85,9 @@
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\sc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -91,6 +103,9 @@
     <ClCompile Include="..\..\..\..\src\test_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -100,17 +115,5 @@
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\log.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\bin.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\div.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\safety_check.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
   </ItemGroup>
 </Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 72a57e5..b9d4f68 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -63,15 +63,16 @@
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
-    <ClCompile Include="..\..\..\..\src\safety_check.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 622b93f..3551ba5 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -16,6 +16,9 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bitmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -25,6 +28,9 @@
     <ClCompile Include="..\..\..\..\src\ctl.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\div.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\extent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -46,6 +52,9 @@
     <ClCompile Include="..\..\..\..\src\large.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\malloc_io.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -76,6 +85,9 @@
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\sc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -88,28 +100,19 @@
     <ClCompile Include="..\..\..\..\src\tcache.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\ticker.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\tsd.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\witness.c">
+    <ClCompile Include="..\..\..\..\src\test_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\log.c">
+    <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\bin.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\div.c">
+    <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\test_hooks.c">
+    <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\safety_check.c">
+    <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 88064df..63a1e30 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -18,6 +18,7 @@
 #include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
+#include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
@@ -1530,6 +1531,7 @@ malloc_init_hard_a0_locked() {
 		prof_boot0();
 	}
 	malloc_conf_init(&sc_data, bin_shard_sizes);
+	thread_event_boot();
 	sz_boot(&sc_data);
 	bin_info_boot(&sc_data, bin_shard_sizes);
 
@@ -2128,6 +2130,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		dopts->arena_ind = 0;
 	}
 
+	thread_event(tsd, usize);
+
 	/*
 	 * If dopts->alignment > 0, then ind is still 0, but usize was computed
 	 * in the previous if statement.  Down the positive alignment path,
@@ -2136,20 +2140,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 	/* If profiling is on, get our profiling context. */
 	if (config_prof && opt_prof) {
-		/*
-		 * The fast path modifies bytes_until_sample regardless of
-		 * prof_active.  We reset it to be the sample interval, so that
-		 * there won't be excessive routings to the slow path, and that
-		 * when prof_active is turned on later, the counting for
-		 * sampling can immediately resume as normal (though the very
-		 * first sampling interval is not randomized).
-		 */
-		if (unlikely(tsd_bytes_until_sample_get(tsd) < 0) &&
-		    !prof_active_get_unlocked()) {
-			tsd_bytes_until_sample_set(tsd,
-			    (ssize_t)(1 << lg_prof_sample));
-		}
-
 		prof_tctx_t *tctx = prof_alloc_prep(
 		    tsd, usize, prof_active_get_unlocked(), true);
 
@@ -2167,24 +2157,17 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 
 		if (unlikely(allocation == NULL)) {
+			thread_event_rollback(tsd, usize);
 			prof_alloc_rollback(tsd, tctx, true);
 			goto label_oom;
 		}
 		prof_malloc(tsd_tsdn(tsd), allocation, usize, &alloc_ctx, tctx);
 	} else {
 		assert(!opt_prof);
-		/*
-		 * The fast path modifies bytes_until_sample regardless of
-		 * opt_prof.  We reset it to a huge value here, so as to
-		 * minimize the triggering for slow path.
-		 */
-		if (config_prof &&
-		    unlikely(tsd_bytes_until_sample_get(tsd) < 0)) {
-			tsd_bytes_until_sample_set(tsd, SSIZE_MAX);
-		}
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
+			thread_event_rollback(tsd, usize);
 			goto label_oom;
 		}
 	}
@@ -2197,7 +2180,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	    || ((uintptr_t)allocation & (dopts->alignment - 1)) == ZU(0));
 
 	assert(usize == isalloc(tsd_tsdn(tsd), allocation));
-	*tsd_thread_allocatedp_get(tsd) += usize;
 
 	if (sopts->slow) {
 		UTRACE(0, size, allocation);
@@ -2373,7 +2355,12 @@ je_malloc(size_t size) {
 	}
 
 	szind_t ind = sz_size2index_lookup(size);
-	/* usize is always needed to increment thread_allocated. */
+	/*
+	 * The thread_allocated counter in tsd serves as a general purpose
+	 * accumulator for bytes of allocation to trigger different types of
+	 * events.  usize is always needed to advance thread_allocated, though
+	 * it's not always needed in the core allocation logic.
+	 */
 	size_t usize = sz_index2size(ind);
 	/*
 	 * Fast path relies on size being a bin.
@@ -2382,19 +2369,12 @@ je_malloc(size_t size) {
 	assert(ind < SC_NBINS);
 	assert(size <= SC_SMALL_MAXCLASS);
 
-	if (config_prof) {
-		int64_t bytes_until_sample = tsd_bytes_until_sample_get(tsd);
-		bytes_until_sample -= usize;
-		tsd_bytes_until_sample_set(tsd, bytes_until_sample);
-
-		if (unlikely(bytes_until_sample < 0)) {
-			/*
-			 * Avoid a prof_active check on the fastpath.
-			 * If prof_active is false, bytes_until_sample will be
-			 * reset in slow path.
-			 */
-			return malloc_default(size);
-		}
+	uint64_t thread_allocated_after = thread_allocated_get(tsd) + usize;
+	assert(thread_allocated_next_event_fast_get(tsd) <=
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	if (unlikely(thread_allocated_after >=
+	    thread_allocated_next_event_fast_get(tsd))) {
+		return malloc_default(size);
 	}
 
 	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
@@ -2402,7 +2382,7 @@ je_malloc(size_t size) {
 	void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success);
 
 	if (tcache_success) {
-		*tsd_thread_allocatedp_get(tsd) += usize;
+		thread_allocated_set(tsd, thread_allocated_after);
 		if (config_stats) {
 			bin->tstats.nrequests++;
 		}
@@ -3116,9 +3096,11 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
+		thread_event(tsd, usize);
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
 		    zero, tcache, arena, &alloc_ctx, &hook_args);
 		if (unlikely(p == NULL)) {
+			thread_event_rollback(tsd, usize);
 			goto label_oom;
 		}
 	} else {
@@ -3128,10 +3110,10 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 			goto label_oom;
 		}
 		usize = isalloc(tsd_tsdn(tsd), p);
+		thread_event(tsd, usize);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 
-	*tsd_thread_allocatedp_get(tsd) += usize;
 	*tsd_thread_deallocatedp_get(tsd) += old_usize;
 
 	UTRACE(ptr, size, p);
@@ -3307,6 +3289,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 			usize_max = SC_LARGE_MAXCLASS;
 		}
 	}
+	thread_event(tsd, usize_max);
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
@@ -3316,6 +3299,18 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
 		    extra, alignment, zero);
 	}
+	if (usize <= usize_max) {
+		thread_event_rollback(tsd, usize_max - usize);
+	} else {
+		/*
+		 * For downsizing request, usize_max can be less than usize.
+		 * We here further increase thread event counters so as to
+		 * record the true usize, and then when the execution goes back
+		 * to xallocx(), the entire usize will be rolled back if it's
+		 * equal to the old usize.
+		 */
+		thread_event(tsd, usize - usize_max);
+	}
 	if (usize == old_usize) {
 		prof_alloc_rollback(tsd, tctx, false);
 		return usize;
@@ -3373,12 +3368,13 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	} else {
 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
 		    extra, alignment, zero);
+		thread_event(tsd, usize);
 	}
 	if (unlikely(usize == old_usize)) {
+		thread_event_rollback(tsd, usize);
 		goto label_not_resized;
 	}
 
-	*tsd_thread_allocatedp_get(tsd) += usize;
 	*tsd_thread_deallocatedp_get(tsd) += old_usize;
 
 label_not_resized:
diff --git a/src/prof.c b/src/prof.c
index fc0c7d8..7e219dc 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/thread_event.h"
 
 /*
  * This file implements the profiling "APIs" needed by other parts of jemalloc,
@@ -471,8 +472,11 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 		return;
 	}
 
+	tsd_t *tsd = tsd_fetch();
+
 	if (lg_prof_sample == 0) {
-		tsd_bytes_until_sample_set(tsd_fetch(), 0);
+		thread_prof_sample_event_update(tsd,
+		    THREAD_EVENT_MIN_START_WAIT);
 		return;
 	}
 
@@ -480,11 +484,11 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 	 * Compute sample interval as a geometrically distributed random
 	 * variable with mean (2^lg_prof_sample).
 	 *
-	 *                             __        __
-	 *                             |  log(u)  |                     1
-	 * tdata->bytes_until_sample = | -------- |, where p = ---------------
-	 *                             | log(1-p) |             lg_prof_sample
-	 *                                                     2
+	 *                      __        __
+	 *                      |  log(u)  |                     1
+	 * bytes_until_sample = | -------- |, where p = ---------------
+	 *                      | log(1-p) |             lg_prof_sample
+	 *                                              2
 	 *
 	 * For more information on the math, see:
 	 *
@@ -499,10 +503,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 	uint64_t bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
-	if (bytes_until_sample > SSIZE_MAX) {
-		bytes_until_sample = SSIZE_MAX;
-	}
-	tsd_bytes_until_sample_set(tsd_fetch(), bytes_until_sample);
+	thread_prof_sample_event_update(tsd, bytes_until_sample);
 
 #endif
 }
diff --git a/src/thread_event.c b/src/thread_event.c
new file mode 100644
index 0000000..c6542f4
--- /dev/null
+++ b/src/thread_event.c
@@ -0,0 +1,255 @@
+#define JEMALLOC_THREAD_EVENT_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/thread_event.h"
+
+/*
+ * There's no lock for thread_event_active because write is only done in
+ * malloc_init(), where init_lock there serves as the guard, and ever since
+ * then thread_event_active becomes read only.
+ */
+static bool thread_event_active = false;
+
+/* Event handler function signatures. */
+#define E(event, condition)						\
+static void thread_##event##_event_handler(tsd_t *tsd);
+
+ITERATE_OVER_ALL_EVENTS
+#undef E
+
+static uint64_t
+thread_allocated_next_event_compute(tsd_t *tsd) {
+	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
+	bool no_event_on = true;
+
+#define E(event, condition)						\
+	if (condition) {						\
+		no_event_on = false;					\
+		uint64_t event_wait =					\
+		    event##_event_wait_get(tsd);			\
+		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		if (event_wait > 0U && event_wait < wait) {		\
+			wait = event_wait;				\
+		}							\
+	}
+
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+
+	assert(no_event_on == !thread_event_active);
+	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
+	return wait;
+}
+
+void
+thread_event_assert_invariants_debug(tsd_t *tsd) {
+	uint64_t thread_allocated = thread_allocated_get(tsd);
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t next_event = thread_allocated_next_event_get(tsd);
+	uint64_t next_event_fast = thread_allocated_next_event_fast_get(tsd);
+
+	assert(last_event != next_event);
+	if (next_event <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX) {
+		assert(next_event_fast == next_event);
+	} else {
+		assert(next_event_fast == 0U);
+	}
+
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t interval = next_event - last_event;
+
+	/* The subtraction is intentionally susceptible to underflow. */
+	assert(thread_allocated - last_event < interval);
+
+	uint64_t min_wait = thread_allocated_next_event_compute(tsd);
+
+	/*
+	 * next_event should have been pushed up only except when no event is
+	 * on and the TSD is just initialized.  The last_event == 0U guard
+	 * below is stronger than needed, but having an exactly accurate guard
+	 * is more complicated to implement.
+	 */
+	assert((!thread_event_active && last_event == 0U) ||
+	    interval == min_wait ||
+	    (interval < min_wait && interval == THREAD_EVENT_MAX_INTERVAL));
+}
+
+static void
+thread_event_adjust_thresholds_helper(tsd_t *tsd, uint64_t wait) {
+	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
+	uint64_t next_event = thread_allocated_last_event_get(tsd) + (wait <=
+	    THREAD_EVENT_MAX_INTERVAL ? wait : THREAD_EVENT_MAX_INTERVAL);
+	thread_allocated_next_event_set(tsd, next_event);
+	uint64_t next_event_fast = (next_event <=
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX) ? next_event : 0U;
+	thread_allocated_next_event_fast_set(tsd, next_event_fast);
+}
+
+static void
+thread_prof_sample_event_handler(tsd_t *tsd) {
+	assert(config_prof && opt_prof);
+	assert(prof_sample_event_wait_get(tsd) == 0U);
+	if (!prof_active_get_unlocked()) {
+		/*
+		 * If prof_active is off, we reset prof_sample_event_wait to be
+		 * the sample interval when it drops to 0, so that there won't
+		 * be excessive routings to the slow path, and that when
+		 * prof_active is turned on later, the counting for sampling
+		 * can immediately resume as normal.
+		 */
+		thread_prof_sample_event_update(tsd,
+		    (uint64_t)(1 << lg_prof_sample));
+	}
+}
+
+static uint64_t
+thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
+    bool allow_event_trigger) {
+	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
+
+#define E(event, condition)						\
+	if (condition) {						\
+		uint64_t event_wait = event##_event_wait_get(tsd);	\
+		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		if (event_wait > accumbytes) {				\
+			event_wait -= accumbytes;			\
+		} else {						\
+			event_wait = 0U;				\
+			if (!allow_event_trigger) {			\
+				event_wait =				\
+				    THREAD_EVENT_MIN_START_WAIT;	\
+			}						\
+		}							\
+		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		event##_event_wait_set(tsd, event_wait);		\
+		/*							\
+		 * If there is a single event, then the remaining wait	\
+		 * time may become zero, and we rely on either the	\
+		 * event handler or a thread_event_update() call later	\
+		 * to properly set next_event; if there are multiple	\
+		 * events, then	here we can get the minimum remaining	\
+		 * wait time to	the next already set event.		\
+		 */							\
+		if (event_wait > 0U && event_wait < wait) {		\
+			wait = event_wait;				\
+		}							\
+	}
+
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+
+	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
+	return wait;
+}
+
+void
+thread_event_trigger(tsd_t *tsd, bool delay_event) {
+	/* usize has already been added to thread_allocated. */
+	uint64_t thread_allocated_after = thread_allocated_get(tsd);
+
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t accumbytes = thread_allocated_after -
+	    thread_allocated_last_event_get(tsd);
+
+	/* Make sure that accumbytes cannot overflow uint64_t. */
+	cassert(THREAD_EVENT_MAX_INTERVAL <=
+	    UINT64_MAX - SC_LARGE_MAXCLASS + 1);
+
+	thread_allocated_last_event_set(tsd, thread_allocated_after);
+	bool allow_event_trigger = !delay_event && tsd_nominal(tsd) &&
+	    tsd_reentrancy_level_get(tsd) == 0;
+	uint64_t wait = thread_event_trigger_batch_update(tsd, accumbytes,
+	    allow_event_trigger);
+	thread_event_adjust_thresholds_helper(tsd, wait);
+
+	thread_event_assert_invariants(tsd);
+
+#define E(event, condition)						\
+	if (condition && event##_event_wait_get(tsd) == 0U) {		\
+		assert(allow_event_trigger);				\
+		thread_##event##_event_handler(tsd);			\
+	}
+
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+
+	thread_event_assert_invariants(tsd);
+}
+
+void
+thread_event_rollback(tsd_t *tsd, size_t diff) {
+	thread_event_assert_invariants(tsd);
+
+	if (diff == 0U) {
+		return;
+	}
+
+	uint64_t thread_allocated = thread_allocated_get(tsd);
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t thread_allocated_rollback = thread_allocated - diff;
+	thread_allocated_set(tsd, thread_allocated_rollback);
+
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	/* Both subtractions are intentionally susceptible to underflow. */
+	if (thread_allocated_rollback - last_event <=
+	    thread_allocated - last_event) {
+		thread_event_assert_invariants(tsd);
+		return;
+	}
+
+	thread_allocated_last_event_set(tsd, thread_allocated_rollback);
+
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t wait_diff = last_event - thread_allocated_rollback;
+	assert(wait_diff <= diff);
+
+#define E(event, condition)						\
+	if (condition) {						\
+		uint64_t event_wait = event##_event_wait_get(tsd);	\
+		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		if (event_wait > 0U) {					\
+			if (wait_diff >					\
+			    THREAD_EVENT_MAX_START_WAIT - event_wait) {	\
+				event_wait =				\
+				    THREAD_EVENT_MAX_START_WAIT;	\
+			} else {					\
+				event_wait += wait_diff;		\
+			}						\
+			assert(event_wait <=				\
+			    THREAD_EVENT_MAX_START_WAIT);		\
+			event##_event_wait_set(tsd, event_wait);	\
+		}							\
+	}
+
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+
+	thread_event_update(tsd);
+}
+
+void
+thread_event_update(tsd_t *tsd) {
+	uint64_t wait = thread_allocated_next_event_compute(tsd);
+	thread_event_adjust_thresholds_helper(tsd, wait);
+
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+
+	/* Both subtractions are intentionally susceptible to underflow. */
+	if (thread_allocated_get(tsd) - last_event >=
+	    thread_allocated_next_event_get(tsd) - last_event) {
+		thread_event_trigger(tsd, true);
+	} else {
+		thread_event_assert_invariants(tsd);
+	}
+}
+
+void thread_event_boot() {
+#define E(event, condition)						\
+	if (condition) {						\
+		thread_event_active = true;				\
+	}
+
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+}
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
new file mode 100644
index 0000000..6817262
--- /dev/null
+++ b/test/unit/thread_event.c
@@ -0,0 +1,57 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_next_event_fast_roll_back) {
+	tsd_t *tsd = tsd_fetch();
+	thread_allocated_last_event_set(tsd, 0);
+	thread_allocated_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX - 8U);
+	thread_allocated_next_event_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	thread_allocated_next_event_fast_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	prof_sample_event_wait_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	void *p = malloc(16U);
+	assert_ptr_not_null(p, "malloc() failed");
+	free(p);
+}
+TEST_END
+
+TEST_BEGIN(test_next_event_fast_resume) {
+	tsd_t *tsd = tsd_fetch();
+	thread_allocated_last_event_set(tsd, 0);
+	thread_allocated_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 8U);
+	thread_allocated_next_event_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
+	thread_allocated_next_event_fast_set(tsd, 0);
+	prof_sample_event_wait_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
+	void *p = malloc(SC_LOOKUP_MAXCLASS);
+	assert_ptr_not_null(p, "malloc() failed");
+	free(p);
+}
+TEST_END
+
+TEST_BEGIN(test_event_rollback) {
+	tsd_t *tsd = tsd_fetch();
+	const uint64_t diff = THREAD_EVENT_MAX_INTERVAL >> 2;
+	size_t count = 10;
+	uint64_t thread_allocated = thread_allocated_get(tsd);
+	while (count-- != 0) {
+		thread_event_rollback(tsd, diff);
+		uint64_t thread_allocated_after = thread_allocated_get(tsd);
+		assert_u64_eq(thread_allocated - thread_allocated_after, diff,
+		    "thread event counters are not properly rolled back");
+		thread_allocated = thread_allocated_after;
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_next_event_fast_roll_back,
+	    test_next_event_fast_resume,
+	    test_event_rollback);
+}
diff --git a/test/unit/thread_event.sh b/test/unit/thread_event.sh
new file mode 100644
index 0000000..8fcc7d8
--- /dev/null
+++ b/test/unit/thread_event.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi
-- 
cgit v0.12


From 198f02e7972023d10c9e4c4c6ab162738d103707 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 14 Oct 2019 09:35:51 -0700
Subject: Pull prof_accumbytes into thread event handler

---
 include/jemalloc/internal/arena_externs.h          |  2 +-
 include/jemalloc/internal/arena_inlines_a.h        | 11 ---
 .../internal/jemalloc_internal_inlines_b.h         |  2 +-
 include/jemalloc/internal/prof_externs.h           | 14 ++--
 include/jemalloc/internal/prof_inlines_a.h         | 90 ++++++----------------
 include/jemalloc/internal/tcache_inlines.h         |  6 --
 include/jemalloc/internal/tcache_structs.h         |  3 +-
 include/jemalloc/internal/tcache_types.h           |  4 +-
 include/jemalloc/internal/thread_event.h           |  3 +-
 include/jemalloc/internal/tsd.h                    | 11 ++-
 src/arena.c                                        | 14 +---
 src/jemalloc.c                                     |  3 -
 src/large.c                                        |  3 -
 src/prof.c                                         | 83 ++++++++++++++++++--
 src/tcache.c                                       | 36 +--------
 src/thread_event.c                                 | 40 ++++++----
 16 files changed, 148 insertions(+), 177 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index a4523ae..a71f944 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -49,7 +49,7 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
+    cache_bin_t *tbin, szind_t binind);
 void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info,
     bool zero);
 
diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index 9abf7f6..27434c3 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -21,17 +21,6 @@ arena_internal_get(arena_t *arena) {
 	return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED);
 }
 
-static inline bool
-arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
-	cassert(config_prof);
-
-	if (likely(prof_interval == 0 || !prof_active_get_unlocked())) {
-		return false;
-	}
-
-	return prof_accum_add(tsdn, &arena->prof_accum, accumbytes);
-}
-
 static inline void
 percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 	assert(have_percpu_arena);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 70d6e57..f0b73d0 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -24,7 +24,7 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 		if (tcache_available(tsd)) {
 			tcache_t *tcache = tcache_get(tsd);
 			if (tcache->arena != NULL) {
-				/* See comments in tcache_data_init().*/
+				/* See comments in tsd_tcache_data_init().*/
 				assert(tcache->arena ==
 				    arena_get(tsd_tsdn(tsd), 0, false));
 				if (tcache->arena != ret) {
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 7befad6..94fbd75 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -33,13 +33,7 @@ extern bool	prof_active;
 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
 extern bool	prof_gdump_val;
 
-/*
- * Profile dump interval, measured in bytes allocated.  Each arena triggers a
- * profile dump when it reaches this threshold.  The effect is that the
- * interval between profile dumps averages prof_interval, though the actual
- * interval between dumps will tend to be sporadic, and the interval will be a
- * maximum of approximately (prof_interval * narenas).
- */
+/* Profile dump interval, measured in bytes allocated. */
 extern uint64_t	prof_interval;
 
 /*
@@ -50,6 +44,10 @@ extern size_t	lg_prof_sample;
 
 extern bool	prof_booted;
 
+/* Functions only accessed in prof_inlines_a.h */
+bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
+void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
+
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
 void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
@@ -73,7 +71,7 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 #endif
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
-bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
+bool prof_accum_init(tsdn_t *tsdn);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index 6716d2f..61773a2 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -3,74 +3,6 @@
 
 #include "jemalloc/internal/mutex.h"
 
-static inline bool
-prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum,
-    uint64_t accumbytes) {
-	cassert(config_prof);
-
-	bool overflow;
-	uint64_t a0, a1;
-
-	/*
-	 * If the application allocates fast enough (and/or if idump is slow
-	 * enough), extreme overflow here (a1 >= prof_interval * 2) can cause
-	 * idump trigger coalescing.  This is an intentional mechanism that
-	 * avoids rate-limiting allocation.
-	 */
-#ifdef JEMALLOC_ATOMIC_U64
-	a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
-	do {
-		a1 = a0 + accumbytes;
-		assert(a1 >= a0);
-		overflow = (a1 >= prof_interval);
-		if (overflow) {
-			a1 %= prof_interval;
-		}
-	} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
-	    a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
-#else
-	malloc_mutex_lock(tsdn, &prof_accum->mtx);
-	a0 = prof_accum->accumbytes;
-	a1 = a0 + accumbytes;
-	overflow = (a1 >= prof_interval);
-	if (overflow) {
-		a1 %= prof_interval;
-	}
-	prof_accum->accumbytes = a1;
-	malloc_mutex_unlock(tsdn, &prof_accum->mtx);
-#endif
-	return overflow;
-}
-
-static inline void
-prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum,
-    size_t usize) {
-	cassert(config_prof);
-
-	/*
-	 * Cancel out as much of the excessive prof_accumbytes increase as
-	 * possible without underflowing.  Interval-triggered dumps occur
-	 * slightly more often than intended as a result of incomplete
-	 * canceling.
-	 */
-	uint64_t a0, a1;
-#ifdef JEMALLOC_ATOMIC_U64
-	a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
-	do {
-		a1 = (a0 >= SC_LARGE_MINCLASS - usize)
-		    ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
-	} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
-	    a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
-#else
-	malloc_mutex_lock(tsdn, &prof_accum->mtx);
-	a0 = prof_accum->accumbytes;
-	a1 = (a0 >= SC_LARGE_MINCLASS - usize)
-	    ?  a0 - (SC_LARGE_MINCLASS - usize) : 0;
-	prof_accum->accumbytes = a1;
-	malloc_mutex_unlock(tsdn, &prof_accum->mtx);
-#endif
-}
-
 JEMALLOC_ALWAYS_INLINE void
 prof_active_assert() {
 	cassert(config_prof);
@@ -93,4 +25,26 @@ prof_active_get_unlocked(void) {
 	return prof_active;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes) {
+	cassert(config_prof);
+
+	if (prof_interval == 0 || !prof_active_get_unlocked()) {
+		return false;
+	}
+
+	return prof_idump_accum_impl(tsdn, accumbytes);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_idump_rollback(tsdn_t *tsdn, size_t usize) {
+	cassert(config_prof);
+
+	if (prof_interval == 0 || !prof_active_get_unlocked()) {
+		return;
+	}
+
+	prof_idump_rollback_impl(tsdn, usize);
+}
+
 #endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 8988ae9..85c6cc4 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -93,9 +93,6 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 	if (config_stats) {
 		bin->tstats.nrequests++;
 	}
-	if (config_prof) {
-		tcache->prof_accumbytes += usize;
-	}
 	tcache_event(tsd, tcache);
 	return ret;
 }
@@ -151,9 +148,6 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		if (config_stats) {
 			bin->tstats.nrequests++;
 		}
-		if (config_prof) {
-			tcache->prof_accumbytes += usize;
-		}
 	}
 
 	tcache_event(tsd, tcache);
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 008b1f7..98d3ef7 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -16,10 +16,9 @@ struct tcache_s {
 	 * together at the start of this struct.
 	 */
 
-	/* Cleared after arena_prof_accum(). */
-	uint64_t	prof_accumbytes;
 	/* Drives incremental GC. */
 	ticker_t	gc_ticker;
+
 	/*
 	 * The pointer stacks associated with bins follow as a contiguous array.
 	 * During tcache initialization, the avail pointer in each element of
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index dce6938..60261fc 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -47,8 +47,8 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_GC_INCR							\
     ((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1))
 
-/* Used in TSD static initializer only. Real init in tcache_data_init(). */
-#define TCACHE_ZERO_INITIALIZER {0}
+/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
+#define TCACHE_ZERO_INITIALIZER {{0}}
 
 /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
 #define TCACHE_ENABLED_ZERO_INITIALIZER false
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 08678b7..6aa334f 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -44,7 +44,8 @@ void thread_event_boot();
     C(thread_allocated_next_event_fast)					\
     C(thread_allocated_last_event)					\
     C(thread_allocated_next_event)					\
-    ITERATE_OVER_ALL_EVENTS
+    ITERATE_OVER_ALL_EVENTS						\
+    C(prof_sample_last_event)
 
 /* Getters directly wrap TSD getters. */
 #define C(counter)							\
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 14ad53d..60500df 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -30,6 +30,7 @@
  * l: thread_allocated_last_event
  * j: thread_allocated_next_event
  * w: prof_sample_event_wait (config_prof)
+ * x: prof_sample_last_event (config_prof)
  * p: prof_tdata (config_prof)
  * v: offset_state
  * i: iarena
@@ -45,11 +46,11 @@
  * |----------------------------  2nd cacheline  ----------------------------|
  * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
  * |----------------------------  3nd cacheline  ----------------------------|
- * | [c * 32  ........ ........ .......] llllllll jjjjjjjj wwwwwwww pppppppp |
+ * | [c * 32  ........ ........ .......] llllllll jjjjjjjj wwwwwwww xxxxxxxx |
  * +----------------------------  4th cacheline  ----------------------------+
- * | vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ ........ |
+ * | pppppppp vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ |
  * +----------------------------  5th cacheline  ----------------------------+
- * | ..b][t.. ........ ........ ........ ........ ........ ........ ........ |
+ * | ........ ..b][t.. ........ ........ ........ ........ ........ ........ |
  * +-------------------------------------------------------------------------+
  * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
  *
@@ -83,6 +84,7 @@ typedef void (*test_callback_t)(int *);
     O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
     O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
+    O(prof_sample_last_event,	uint64_t,		uint64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(offset_state,		uint64_t,		uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
@@ -109,9 +111,10 @@ typedef void (*test_callback_t)(int *);
     /* thread_allocated_next_event_fast */ THREAD_EVENT_MIN_START_WAIT,	\
     /* thread_deallocated */	0,					\
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
-    /* thread_allocated_last_event */	0,			\
+    /* thread_allocated_last_event */	0,				\
     /* thread_allocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* prof_sample_last_event */	0,				\
     /* prof_tdata */		NULL,					\
     /* offset_state */		0,					\
     /* iarena */		NULL,					\
diff --git a/src/arena.c b/src/arena.c
index e096f3a..a60a684 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1378,13 +1378,10 @@ arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
+    cache_bin_t *tbin, szind_t binind) {
 	unsigned i, nfill, cnt;
 
 	assert(cache_bin_ncached_get(tbin, binind) == 0);
-	if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) {
-		prof_idump(tsdn);
-	}
 	tcache->bin_refilled[binind] = true;
 
 	unsigned binshard;
@@ -1484,10 +1481,8 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 		bin->stats.nrequests++;
 		bin->stats.curregs++;
 	}
+
 	malloc_mutex_unlock(tsdn, &bin->lock);
-	if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
-		prof_idump(tsdn);
-	}
 
 	if (!zero) {
 		if (config_fill) {
@@ -1565,14 +1560,13 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 
 	extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
-	arena_t *arena = arena_get_from_extent(extent);
 
 	szind_t szind = sz_size2index(usize);
 	extent_szind_set(extent, szind);
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
 	    szind, false);
 
-	prof_accum_cancel(tsdn, &arena->prof_accum, usize);
+	prof_idump_rollback(tsdn, usize);
 
 	assert(isalloc(tsdn, ptr) == usize);
 }
@@ -1982,7 +1976,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	if (config_prof) {
-		if (prof_accum_init(tsdn, &arena->prof_accum)) {
+		if (prof_accum_init(tsdn)) {
 			goto label_error;
 		}
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 63a1e30..264b3f3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2386,9 +2386,6 @@ je_malloc(size_t size) {
 		if (config_stats) {
 			bin->tstats.nrequests++;
 		}
-		if (config_prof) {
-			tcache->prof_accumbytes += usize;
-		}
 
 		LOG("core.malloc.exit", "result: %p", ret);
 
diff --git a/src/large.c b/src/large.c
index 13d8e56..8aaa3ce 100644
--- a/src/large.c
+++ b/src/large.c
@@ -56,9 +56,6 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		extent_list_append(&arena->large, extent);
 		malloc_mutex_unlock(tsdn, &arena->large_mtx);
 	}
-	if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
-		prof_idump(tsdn);
-	}
 
 	if (zero) {
 		assert(is_zeroed);
diff --git a/src/prof.c b/src/prof.c
index 7e219dc..5360662 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -45,6 +45,9 @@ bool		opt_prof_leak = false;
 bool		opt_prof_accum = false;
 char		opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 
+/* Accessed via prof_idump_[accum/rollback](). */
+static prof_accum_t	prof_idump_accumulated;
+
 /*
  * Initialized as opt_prof_active, and accessed via
  * prof_active_[gs]et{_unlocked,}().
@@ -586,22 +589,92 @@ prof_fdump(void) {
 }
 
 bool
-prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
+prof_accum_init(tsdn_t *tsdn) {
 	cassert(config_prof);
 
 #ifndef JEMALLOC_ATOMIC_U64
-	if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
+	if (malloc_mutex_init(&prof_idump_accumulated.mtx, "prof_accum",
 	    WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	prof_accum->accumbytes = 0;
+	prof_idump_accumulated.accumbytes = 0;
 #else
-	atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED);
+	atomic_store_u64(&prof_idump_accumulated.accumbytes, 0,
+	    ATOMIC_RELAXED);
 #endif
 	return false;
 }
 
 bool
+prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes) {
+	cassert(config_prof);
+
+	bool overflow;
+	uint64_t a0, a1;
+
+	/*
+	 * If the application allocates fast enough (and/or if idump is slow
+	 * enough), extreme overflow here (a1 >= prof_interval * 2) can cause
+	 * idump trigger coalescing.  This is an intentional mechanism that
+	 * avoids rate-limiting allocation.
+	 */
+#ifdef JEMALLOC_ATOMIC_U64
+	a0 = atomic_load_u64(&prof_idump_accumulated.accumbytes,
+	    ATOMIC_RELAXED);
+	do {
+		a1 = a0 + accumbytes;
+		assert(a1 >= a0);
+		overflow = (a1 >= prof_interval);
+		if (overflow) {
+			a1 %= prof_interval;
+		}
+	} while (!atomic_compare_exchange_weak_u64(
+	    &prof_idump_accumulated.accumbytes, &a0, a1, ATOMIC_RELAXED,
+	    ATOMIC_RELAXED));
+#else
+	malloc_mutex_lock(tsdn, &prof_idump_accumulated.mtx);
+	a0 = prof_idump_accumulated.accumbytes;
+	a1 = a0 + accumbytes;
+	overflow = (a1 >= prof_interval);
+	if (overflow) {
+		a1 %= prof_interval;
+	}
+	prof_idump_accumulated.accumbytes = a1;
+	malloc_mutex_unlock(tsdn, &prof_idump_accumulated.mtx);
+#endif
+	return overflow;
+}
+
+void
+prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize) {
+	cassert(config_prof);
+
+	/*
+	 * Cancel out as much of the excessive accumbytes increase as possible
+	 * without underflowing.  Interval-triggered dumps occur slightly more
+	 * often than intended as a result of incomplete canceling.
+	 */
+	uint64_t a0, a1;
+#ifdef JEMALLOC_ATOMIC_U64
+	a0 = atomic_load_u64(&prof_idump_accumulated.accumbytes,
+	    ATOMIC_RELAXED);
+	do {
+		a1 = (a0 >= SC_LARGE_MINCLASS - usize)
+		    ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
+	} while (!atomic_compare_exchange_weak_u64(
+	    &prof_idump_accumulated.accumbytes, &a0, a1, ATOMIC_RELAXED,
+	    ATOMIC_RELAXED));
+#else
+	malloc_mutex_lock(tsdn, &prof_idump_accumulated.mtx);
+	a0 = prof_idump_accumulated.accumbytes;
+	a1 = (a0 >= SC_LARGE_MINCLASS - usize)
+	    ?  a0 - (SC_LARGE_MINCLASS - usize) : 0;
+	prof_idump_accumulated.accumbytes = a1;
+	malloc_mutex_unlock(tsdn, &prof_idump_accumulated.mtx);
+#endif
+}
+
+bool
 prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	cassert(config_prof);
 	ctl_mtx_assert_held(tsdn);
@@ -641,7 +714,7 @@ prof_idump(tsdn_t *tsdn) {
 		return;
 	}
 
-	tdata = prof_tdata_get(tsd, false);
+	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
 		return;
 	}
diff --git a/src/tcache.c b/src/tcache.c
index e17b67a..7758c4f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -106,11 +106,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	void *ret;
 
 	assert(tcache->arena != NULL);
-	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind,
-	    config_prof ? tcache->prof_accumbytes : 0);
-	if (config_prof) {
-		tcache->prof_accumbytes = 0;
-	}
+	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
 	ret = cache_bin_alloc_easy(tbin, tcache_success, binind);
 
 	return ret;
@@ -181,14 +177,6 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		assert(binshard < bin_infos[binind].n_shards);
 		bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard];
 
-		if (config_prof && bin_arena == arena) {
-			if (arena_prof_accum(tsd_tsdn(tsd), arena,
-			    tcache->prof_accumbytes)) {
-				prof_idump(tsd_tsdn(tsd));
-			}
-			tcache->prof_accumbytes = 0;
-		}
-
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		if (config_stats && bin_arena == arena && !merged_stats) {
 			merged_stats = true;
@@ -274,11 +262,6 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 		unsigned locked_arena_ind = extent_arena_ind_get(extent);
 		arena_t *locked_arena = arena_get(tsd_tsdn(tsd),
 		    locked_arena_ind, false);
-		bool idump;
-
-		if (config_prof) {
-			idump = false;
-		}
 
 		bool lock_large = !arena_is_auto(locked_arena);
 		if (lock_large) {
@@ -295,11 +278,6 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 		}
 		if ((config_prof || config_stats) &&
 		    (locked_arena == tcache_arena)) {
-			if (config_prof) {
-				idump = arena_prof_accum(tsd_tsdn(tsd),
-				    tcache_arena, tcache->prof_accumbytes);
-				tcache->prof_accumbytes = 0;
-			}
 			if (config_stats) {
 				merged_stats = true;
 				arena_stats_large_flush_nrequests_add(
@@ -332,9 +310,6 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 				ndeferred++;
 			}
 		}
-		if (config_prof && idump) {
-			prof_idump(tsd_tsdn(tsd));
-		}
 		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
 		    ndeferred);
 		nflush = ndeferred;
@@ -462,7 +437,6 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 	assert(!tcache_bin_lowbits_overflowable(avail_stack));
 
 	memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
-	tcache->prof_accumbytes = 0;
 	tcache->next_gc_bin = 0;
 	tcache->arena = NULL;
 
@@ -590,14 +564,6 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 			assert(tbin->tstats.nrequests == 0);
 		}
 	}
-
-	if (config_prof && tcache->prof_accumbytes > 0) {
-		if (arena_prof_accum(tsd_tsdn(tsd), tcache->arena,
-		    tcache->prof_accumbytes)) {
-			prof_idump(tsd_tsdn(tsd));
-		}
-		tcache->prof_accumbytes = 0;
-	}
 }
 
 void
diff --git a/src/thread_event.c b/src/thread_event.c
index c6542f4..312dff2 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -18,6 +18,29 @@ static void thread_##event##_event_handler(tsd_t *tsd);
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
+static void
+thread_prof_sample_event_handler(tsd_t *tsd) {
+	assert(config_prof && opt_prof);
+	assert(prof_sample_event_wait_get(tsd) == 0U);
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t last_sample_event = prof_sample_last_event_get(tsd);
+	prof_sample_last_event_set(tsd, last_event);
+	if (prof_idump_accum(tsd_tsdn(tsd), last_event - last_sample_event)) {
+		prof_idump(tsd_tsdn(tsd));
+	}
+	if (!prof_active_get_unlocked()) {
+		/*
+		 * If prof_active is off, we reset prof_sample_event_wait to be
+		 * the sample interval when it drops to 0, so that there won't
+		 * be excessive routings to the slow path, and that when
+		 * prof_active is turned on later, the counting for sampling
+		 * can immediately resume as normal.
+		 */
+		thread_prof_sample_event_update(tsd,
+		    (uint64_t)(1 << lg_prof_sample));
+	}
+}
+
 static uint64_t
 thread_allocated_next_event_compute(tsd_t *tsd) {
 	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
@@ -86,23 +109,6 @@ thread_event_adjust_thresholds_helper(tsd_t *tsd, uint64_t wait) {
 	thread_allocated_next_event_fast_set(tsd, next_event_fast);
 }
 
-static void
-thread_prof_sample_event_handler(tsd_t *tsd) {
-	assert(config_prof && opt_prof);
-	assert(prof_sample_event_wait_get(tsd) == 0U);
-	if (!prof_active_get_unlocked()) {
-		/*
-		 * If prof_active is off, we reset prof_sample_event_wait to be
-		 * the sample interval when it drops to 0, so that there won't
-		 * be excessive routings to the slow path, and that when
-		 * prof_active is turned on later, the counting for sampling
-		 * can immediately resume as normal.
-		 */
-		thread_prof_sample_event_update(tsd,
-		    (uint64_t)(1 << lg_prof_sample));
-	}
-}
-
 static uint64_t
 thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
     bool allow_event_trigger) {
-- 
cgit v0.12


From 97f93fa0f2d7343d308bbcd5cf551492d5652d0a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 24 Oct 2019 16:41:45 -0700
Subject: Pull tcache GC events into thread event handler

---
 include/jemalloc/internal/tcache_inlines.h |  2 --
 include/jemalloc/internal/tcache_types.h   |  5 ++++-
 include/jemalloc/internal/thread_event.h   |  1 +
 include/jemalloc/internal/tsd.h            |  9 ++++++---
 src/jemalloc.c                             |  4 ----
 src/thread_event.c                         | 11 +++++++++++
 src/tsd.c                                  |  4 ++++
 test/unit/thread_event.c                   | 10 ++++++++--
 8 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 85c6cc4..40c4286 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -93,7 +93,6 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 	if (config_stats) {
 		bin->tstats.nrequests++;
 	}
-	tcache_event(tsd, tcache);
 	return ret;
 }
 
@@ -150,7 +149,6 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		}
 	}
 
-	tcache_event(tsd, tcache);
 	return ret;
 }
 
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 60261fc..9fd3926 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -43,10 +43,13 @@ typedef struct tcaches_s tcaches_t;
  */
 #define TCACHE_GC_SWEEP			8192
 
-/* Number of tcache allocation/deallocation events between incremental GCs. */
+/* Number of tcache deallocation events between incremental GCs. */
 #define TCACHE_GC_INCR							\
     ((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1))
 
+/* Number of allocation bytes between tcache incremental GCs. */
+#define TCACHE_GC_INCR_BYTES 65536U
+
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
 #define TCACHE_ZERO_INITIALIZER {{0}}
 
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 6aa334f..3da9f0a 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -33,6 +33,7 @@ void thread_event_boot();
  *  E(event,		(condition))
  */
 #define ITERATE_OVER_ALL_EVENTS						\
+    E(tcache_gc,	(TCACHE_GC_INCR_BYTES > 0))			\
     E(prof_sample,	(config_prof && opt_prof))
 
 #define E(event, condition)						\
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 60500df..17bfc88 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -29,6 +29,7 @@
  * x: narenas_tdata
  * l: thread_allocated_last_event
  * j: thread_allocated_next_event
+ * g: tcache_gc_event_wait
  * w: prof_sample_event_wait (config_prof)
  * x: prof_sample_last_event (config_prof)
  * p: prof_tdata (config_prof)
@@ -46,11 +47,11 @@
  * |----------------------------  2nd cacheline  ----------------------------|
  * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
  * |----------------------------  3nd cacheline  ----------------------------|
- * | [c * 32  ........ ........ .......] llllllll jjjjjjjj wwwwwwww xxxxxxxx |
+ * | [c * 32  ........ ........ .......] llllllll jjjjjjjj gggggggg wwwwwwww |
  * +----------------------------  4th cacheline  ----------------------------+
- * | pppppppp vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ |
+ * | xxxxxxxx pppppppp vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ |
  * +----------------------------  5th cacheline  ----------------------------+
- * | ........ ..b][t.. ........ ........ ........ ........ ........ ........ |
+ * | ........ ........ ..b][t.. ........ ........ ........ ........ ........ |
  * +-------------------------------------------------------------------------+
  * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
  *
@@ -83,6 +84,7 @@ typedef void (*test_callback_t)(int *);
     O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
     O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
+    O(tcache_gc_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
@@ -113,6 +115,7 @@ typedef void (*test_callback_t)(int *);
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
     /* thread_allocated_last_event */	0,				\
     /* thread_allocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* tcache_gc_event_wait */		THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_last_event */	0,				\
     /* prof_tdata */		NULL,					\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 264b3f3..1073512 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2350,10 +2350,6 @@ je_malloc(size_t size) {
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 
-	if (unlikely(ticker_trytick(&tcache->gc_ticker))) {
-		return malloc_default(size);
-	}
-
 	szind_t ind = sz_size2index_lookup(size);
 	/*
 	 * The thread_allocated counter in tsd serves as a general purpose
diff --git a/src/thread_event.c b/src/thread_event.c
index 312dff2..33d669a 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -19,6 +19,17 @@ ITERATE_OVER_ALL_EVENTS
 #undef E
 
 static void
+thread_tcache_gc_event_handler(tsd_t *tsd) {
+	assert(TCACHE_GC_INCR_BYTES > 0);
+	assert(tcache_gc_event_wait_get(tsd) == 0U);
+	thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	tcache_t *tcache = tcache_get(tsd);
+	if (tcache != NULL) {
+		tcache_event_hard(tsd, tcache);
+	}
+}
+
+static void
 thread_prof_sample_event_handler(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
 	assert(prof_sample_event_wait_get(tsd) == 0U);
diff --git a/src/tsd.c b/src/tsd.c
index a31f6b9..3fa43d3 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -233,6 +233,10 @@ tsd_data_init(tsd_t *tsd) {
 	*tsd_offset_statep_get(tsd) = config_debug ? 0 :
 	    (uint64_t)(uintptr_t)tsd;
 
+	if (TCACHE_GC_INCR_BYTES > 0) {
+		thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	}
+
 	return tsd_tcache_enabled_data_init(tsd);
 }
 
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index 6817262..cf5b2e5 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -9,8 +9,11 @@ TEST_BEGIN(test_next_event_fast_roll_back) {
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
 	thread_allocated_next_event_fast_set(tsd,
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
-	prof_sample_event_wait_set(tsd,
+#define E(event, condition)						\
+	event##_event_wait_set(tsd,					\
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	ITERATE_OVER_ALL_EVENTS
+#undef E
 	void *p = malloc(16U);
 	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
@@ -25,8 +28,11 @@ TEST_BEGIN(test_next_event_fast_resume) {
 	thread_allocated_next_event_set(tsd,
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
 	thread_allocated_next_event_fast_set(tsd, 0);
-	prof_sample_event_wait_set(tsd,
+#define E(event, condition)						\
+	event##_event_wait_set(tsd,					\
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
+	ITERATE_OVER_ALL_EVENTS
+#undef E
 	void *p = malloc(SC_LOOKUP_MAXCLASS);
 	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
-- 
cgit v0.12


From 43f0ce92d881f945da54a498cadc654ddb9403a1 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 1 Nov 2019 14:11:59 -0700
Subject: Define general purpose tsd_thread_event_init()

---
 include/jemalloc/internal/thread_event.h |  1 +
 src/thread_event.c                       | 29 +++++++++++++++++++++++++++++
 src/tsd.c                                |  4 +---
 3 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 3da9f0a..8a05eae 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -27,6 +27,7 @@ void thread_event_trigger(tsd_t *tsd, bool delay_event);
 void thread_event_rollback(tsd_t *tsd, size_t diff);
 void thread_event_update(tsd_t *tsd);
 void thread_event_boot();
+void tsd_thread_event_init(tsd_t *tsd);
 
 /*
  * List of all events, in the following format:
diff --git a/src/thread_event.c b/src/thread_event.c
index 33d669a..f27a37a 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -11,6 +11,13 @@
  */
 static bool thread_event_active = false;
 
+/* TSD event init function signatures. */
+#define E(event, condition)						\
+static void tsd_thread_##event##_event_init(tsd_t *tsd);
+
+ITERATE_OVER_ALL_EVENTS
+#undef E
+
 /* Event handler function signatures. */
 #define E(event, condition)						\
 static void thread_##event##_event_handler(tsd_t *tsd);
@@ -19,6 +26,18 @@ ITERATE_OVER_ALL_EVENTS
 #undef E
 
 static void
+tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
+	assert(TCACHE_GC_INCR_BYTES > 0);
+	thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+}
+
+static void
+tsd_thread_prof_sample_event_init(tsd_t *tsd) {
+	assert(config_prof && opt_prof);
+	/* Do not set sample interval until the first allocation. */
+}
+
+static void
 thread_tcache_gc_event_handler(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
 	assert(tcache_gc_event_wait_get(tsd) == 0U);
@@ -270,3 +289,13 @@ void thread_event_boot() {
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 }
+
+void tsd_thread_event_init(tsd_t *tsd) {
+#define E(event, condition)						\
+	if (condition) {						\
+		tsd_thread_##event##_event_init(tsd);			\
+	}
+
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+}
diff --git a/src/tsd.c b/src/tsd.c
index 3fa43d3..bb40af1 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -233,9 +233,7 @@ tsd_data_init(tsd_t *tsd) {
 	*tsd_offset_statep_get(tsd) = config_debug ? 0 :
 	    (uint64_t)(uintptr_t)tsd;
 
-	if (TCACHE_GC_INCR_BYTES > 0) {
-		thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
-	}
+	tsd_thread_event_init(tsd);
 
 	return tsd_tcache_enabled_data_init(tsd);
 }
-- 
cgit v0.12


From a8b578d538adced7506aec1179379eb541c0198d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 5 Nov 2019 06:46:52 -0800
Subject: Remove mallctl test for zero_realloc

---
 test/unit/mallctl.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 4c0830f..ebbaed7 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -178,6 +178,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, lg_extent_max_active_fit, always);
 	TEST_MALLCTL_OPT(size_t, lg_tcache_max, always);
 	TEST_MALLCTL_OPT(const char *, thp, always);
+	TEST_MALLCTL_OPT(const char *, zero_realloc, always);
 	TEST_MALLCTL_OPT(bool, prof, prof);
 	TEST_MALLCTL_OPT(const char *, prof_prefix, prof);
 	TEST_MALLCTL_OPT(bool, prof_active, prof);
@@ -880,16 +881,6 @@ TEST_BEGIN(test_hooks_exhaustion) {
 }
 TEST_END
 
-TEST_BEGIN(test_zero_realloc) {
-	const char *val;
-	size_t sz = sizeof(val);
-	int err = mallctl("opt.zero_realloc", &val, &sz, NULL, 0);
-	assert_d_eq(err, 0, "Unexpected mallctl result");
-	assert_str_eq(val, "strict",
-	    "Unexpected default zero_realloc_beahvior");
-}
-TEST_END
-
 int
 main(void) {
 	return test(
@@ -921,6 +912,5 @@ main(void) {
 	    test_prof_active,
 	    test_stats_arenas,
 	    test_hooks,
-	    test_hooks_exhaustion,
-	    test_zero_realloc);
+	    test_hooks_exhaustion);
 }
-- 
cgit v0.12


From d01b425e5d1e1ed3d7f7c5571002681469acf601 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Tue, 29 Oct 2019 13:03:41 -0700
Subject: Add -Wimplicit-fallthrough checks if supported

Clang since r369414 (clang-10) can now check -Wimplicit-fallthrough for
C code, and use the GNU C style attribute to denote fallthrough.

Move the test from header only to autoconf. The previous test used
brittle version detection which did not work for newer clang that
supported this feature.

The attribute has to be its own statement, hence the added `;`. It also
can only precede case statements, so the final cases should be
explicitly terminated with break statements.

Fixes commit 3d29d11ac2c1 ("Clean compilation -Wextra")
Link: https://github.com/llvm/llvm-project/commit/1e0affb6e564b7361b0aadb38805f26deff4ecfc
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
---
 configure.ac                                       | 20 +++++++
 include/jemalloc/internal/hash.h                   | 65 +++++++++++-----------
 .../jemalloc/internal/jemalloc_internal_macros.h   |  7 ---
 include/jemalloc/jemalloc_defs.h.in                |  3 +
 include/jemalloc/jemalloc_macros.h.in              |  7 +++
 src/arena.c                                        |  2 +-
 src/malloc_io.c                                    |  6 +-
 src/tsd.c                                          |  2 +-
 8 files changed, 68 insertions(+), 44 deletions(-)

diff --git a/configure.ac b/configure.ac
index bca422a..c3f53f7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -866,6 +866,26 @@ if test "x${je_cv_format_arg}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_ARG], [ ])
 fi
 
+dnl Check for fallthrough attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Wimplicit-fallthrough])
+JE_COMPILABLE([fallthrough attribute],
+              [#if !__has_attribute(fallthrough)
+               #error "foo"
+               #endif],
+              [int x = 0;
+               switch (x) {
+               case 0: __attribute__((__fallthrough__));
+               case 1: return 1;
+               }],
+              [je_cv_fallthrough])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_fallthrough}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_FALLTHROUGH], [ ])
+  JE_CFLAGS_ADD([-Wimplicit-fallthrough])
+  JE_CXXFLAGS_ADD([-Wimplicit-fallthrough])
+fi
+
 dnl Support optional additions to rpath.
 AC_ARG_WITH([rpath],
   [AS_HELP_STRING([--with-rpath=<rpath>], [Colon-separated rpath (ELF systems only)])],
diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 0270034..9132b60 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -104,8 +104,8 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 		uint32_t k1 = 0;
 
 		switch (len & 3) {
-		case 3: k1 ^= tail[2] << 16; JEMALLOC_FALLTHROUGH
-		case 2: k1 ^= tail[1] << 8; JEMALLOC_FALLTHROUGH
+		case 3: k1 ^= tail[2] << 16; JEMALLOC_FALLTHROUGH;
+		case 2: k1 ^= tail[1] << 8; JEMALLOC_FALLTHROUGH;
 		case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15);
 			k1 *= c2; h1 ^= k1;
 		}
@@ -177,29 +177,29 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
 		uint32_t k4 = 0;
 
 		switch (len & 15) {
-		case 15: k4 ^= tail[14] << 16; JEMALLOC_FALLTHROUGH
-		case 14: k4 ^= tail[13] << 8; JEMALLOC_FALLTHROUGH
+		case 15: k4 ^= tail[14] << 16; JEMALLOC_FALLTHROUGH;
+		case 14: k4 ^= tail[13] << 8; JEMALLOC_FALLTHROUGH;
 		case 13: k4 ^= tail[12] << 0;
 			k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
-      JEMALLOC_FALLTHROUGH
-		case 12: k3 ^= tail[11] << 24; JEMALLOC_FALLTHROUGH
-		case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH
-		case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH
+			JEMALLOC_FALLTHROUGH;
+		case 12: k3 ^= tail[11] << 24; JEMALLOC_FALLTHROUGH;
+		case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH;
+		case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH;
 		case  9: k3 ^= tail[ 8] << 0;
-		     k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
-         JEMALLOC_FALLTHROUGH
-		case  8: k2 ^= tail[ 7] << 24; JEMALLOC_FALLTHROUGH
-		case  7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH
-		case  6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH
+			k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
+			JEMALLOC_FALLTHROUGH;
+		case  8: k2 ^= tail[ 7] << 24; JEMALLOC_FALLTHROUGH;
+		case  7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH;
+		case  6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH;
 		case  5: k2 ^= tail[ 4] << 0;
 			k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
-      JEMALLOC_FALLTHROUGH
-		case  4: k1 ^= tail[ 3] << 24; JEMALLOC_FALLTHROUGH
-		case  3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH
-		case  2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH
+			JEMALLOC_FALLTHROUGH;
+		case  4: k1 ^= tail[ 3] << 24; JEMALLOC_FALLTHROUGH;
+		case  3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH;
+		case  2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH;
 		case  1: k1 ^= tail[ 0] << 0;
 			k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
-      JEMALLOC_FALLTHROUGH
+			break;
 		}
 	}
 
@@ -261,24 +261,25 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
 		uint64_t k2 = 0;
 
 		switch (len & 15) {
-		case 15: k2 ^= ((uint64_t)(tail[14])) << 48; JEMALLOC_FALLTHROUGH
-		case 14: k2 ^= ((uint64_t)(tail[13])) << 40; JEMALLOC_FALLTHROUGH
-		case 13: k2 ^= ((uint64_t)(tail[12])) << 32; JEMALLOC_FALLTHROUGH
-		case 12: k2 ^= ((uint64_t)(tail[11])) << 24; JEMALLOC_FALLTHROUGH
-		case 11: k2 ^= ((uint64_t)(tail[10])) << 16; JEMALLOC_FALLTHROUGH
-		case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;  JEMALLOC_FALLTHROUGH
+		case 15: k2 ^= ((uint64_t)(tail[14])) << 48; JEMALLOC_FALLTHROUGH;
+		case 14: k2 ^= ((uint64_t)(tail[13])) << 40; JEMALLOC_FALLTHROUGH;
+		case 13: k2 ^= ((uint64_t)(tail[12])) << 32; JEMALLOC_FALLTHROUGH;
+		case 12: k2 ^= ((uint64_t)(tail[11])) << 24; JEMALLOC_FALLTHROUGH;
+		case 11: k2 ^= ((uint64_t)(tail[10])) << 16; JEMALLOC_FALLTHROUGH;
+		case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;  JEMALLOC_FALLTHROUGH;
 		case  9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
 			k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
-			JEMALLOC_FALLTHROUGH
-		case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56; JEMALLOC_FALLTHROUGH
-		case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48; JEMALLOC_FALLTHROUGH
-		case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40; JEMALLOC_FALLTHROUGH
-		case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32; JEMALLOC_FALLTHROUGH
-		case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24; JEMALLOC_FALLTHROUGH
-		case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16; JEMALLOC_FALLTHROUGH
-		case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;  JEMALLOC_FALLTHROUGH
+			JEMALLOC_FALLTHROUGH;
+		case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56; JEMALLOC_FALLTHROUGH;
+		case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48; JEMALLOC_FALLTHROUGH;
+		case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40; JEMALLOC_FALLTHROUGH;
+		case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32; JEMALLOC_FALLTHROUGH;
+		case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24; JEMALLOC_FALLTHROUGH;
+		case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16; JEMALLOC_FALLTHROUGH;
+		case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;  JEMALLOC_FALLTHROUGH;
 		case  1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
 			k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
+			break;
 		}
 	}
 
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index d8ea06f..ece3b87 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -40,13 +40,6 @@
 #define JEMALLOC_VA_ARGS_HEAD(head, ...) head
 #define JEMALLOC_VA_ARGS_TAIL(head, ...) __VA_ARGS__
 
-#if (defined(__GNUC__) || defined(__GNUG__)) && !defined(__clang__) \
-  && defined(JEMALLOC_HAVE_ATTR) && (__GNUC__ >= 7)
-#define JEMALLOC_FALLTHROUGH JEMALLOC_ATTR(fallthrough);
-#else
-#define JEMALLOC_FALLTHROUGH /* falls through */
-#endif
-
 /* Diagnostic suppression macros */
 #if defined(_MSC_VER) && !defined(__clang__)
 #  define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push))
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index 11c3918..032fba4 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -13,6 +13,9 @@
 /* Defined if format(printf, ...) attribute is supported. */
 #undef JEMALLOC_HAVE_ATTR_FORMAT_PRINTF
 
+/* Defined if fallthrough attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_FALLTHROUGH
+
 /*
  * Define overrides for non-standard allocator-related functions if they are
  * present on the system.
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 59e2955..b4469d8 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -71,6 +71,7 @@
 #  endif
 #  define JEMALLOC_FORMAT_ARG(i)
 #  define JEMALLOC_FORMAT_PRINTF(s, i)
+#  define JEMALLOC_FALLTHROUGH
 #  define JEMALLOC_NOINLINE __declspec(noinline)
 #  ifdef __cplusplus
 #    define JEMALLOC_NOTHROW __declspec(nothrow)
@@ -109,6 +110,11 @@
 #  else
 #    define JEMALLOC_FORMAT_PRINTF(s, i)
 #  endif
+#  ifdef JEMALLOC_HAVE_ATTR_FALLTHROUGH
+#    define JEMALLOC_FALLTHROUGH JEMALLOC_ATTR(fallthrough)
+#  else
+#    define JEMALLOC_FALLTHROUGH
+#  endif
 #  define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline)
 #  define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow)
 #  define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s))
@@ -121,6 +127,7 @@
 #  define JEMALLOC_ALLOC_SIZE2(s1, s2)
 #  define JEMALLOC_EXPORT
 #  define JEMALLOC_FORMAT_PRINTF(s, i)
+#  define JEMALLOC_FALLTHROUGH
 #  define JEMALLOC_NOINLINE
 #  define JEMALLOC_NOTHROW
 #  define JEMALLOC_SECTION(s)
diff --git a/src/arena.c b/src/arena.c
index a60a684..e4dd477 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -874,7 +874,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 				    arena, is_background_thread);
 				break;
 			}
-			/* Fall through. */
+			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
 			extent_dalloc_wrapper(tsdn, arena, r_extent_hooks,
 			    extent);
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 2fae757..fc7ff72 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -135,10 +135,10 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 			break;
 		case '-':
 			neg = true;
-			/* Fall through. */
+			JEMALLOC_FALLTHROUGH;
 		case '+':
 			p++;
-			/* Fall through. */
+			JEMALLOC_FALLTHROUGH;
 		default:
 			goto label_prefix;
 		}
@@ -289,7 +289,7 @@ d2s(intmax_t x, char sign, char *s, size_t *slen_p) {
 		if (!neg) {
 			break;
 		}
-		/* Fall through. */
+		JEMALLOC_FALLTHROUGH;
 	case ' ':
 	case '+':
 		s--;
diff --git a/src/tsd.c b/src/tsd.c
index bb40af1..6c90ade 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -389,7 +389,7 @@ tsd_cleanup(void *arg) {
 		 * is still called for testing and completeness.
 		 */
 		assert_tsd_data_cleanup_done(tsd);
-		/* Fall through. */
+		JEMALLOC_FALLTHROUGH;
 	case tsd_state_nominal:
 	case tsd_state_nominal_slow:
 		tsd_do_data_cleanup(tsd);
-- 
cgit v0.12


From 19a51abf337d35b3bdbbac22d8c513f4fd8b6c57 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 4 Nov 2019 16:44:37 -0800
Subject: Avoid arena->offset_state when tsd not available for prng.

Use stack locals and remove the offset_state in arena.
---
 include/jemalloc/internal/arena_structs.h |  8 --------
 src/arena.c                               | 12 ------------
 src/extent.c                              |  4 ++--
 3 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 54889dc..bc8c039 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -119,14 +119,6 @@ struct arena_s {
 	prof_accum_t		prof_accum;
 
 	/*
-	 * PRNG state for cache index randomization of large allocation base
-	 * pointers.
-	 *
-	 * Synchronization: atomic.
-	 */
-	atomic_zu_t		offset_state;
-
-	/*
 	 * Extent serial number generator state.
 	 *
 	 * Synchronization: atomic.
diff --git a/src/arena.c b/src/arena.c
index e4dd477..fa18d14 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1981,18 +1981,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		}
 	}
 
-	if (config_cache_oblivious) {
-		/*
-		 * A nondeterministic seed based on the address of arena reduces
-		 * the likelihood of lockstep non-uniform cache index
-		 * utilization among identical concurrent processes, but at the
-		 * cost of test repeatability.  For debug builds, instead use a
-		 * deterministic seed.
-		 */
-		atomic_store_zu(&arena->offset_state, config_debug ? ind :
-		    (size_t)(uintptr_t)arena, ATOMIC_RELAXED);
-	}
-
 	atomic_store_zu(&arena->extent_sn_next, 0, ATOMIC_RELAXED);
 
 	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
diff --git a/src/extent.c b/src/extent.c
index 4bb358d..50a8105 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -187,8 +187,8 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 			r = (size_t)prng_lg_range_u64(
 			    tsd_offset_statep_get(tsd), lg_range);
 		} else {
-			r = prng_lg_range_zu(&arena->offset_state, lg_range,
-			    true);
+			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
+			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
 		}
 		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
 		    lg_range);
-- 
cgit v0.12


From bc774a3519788bec8b18f0a5988767fc11d034fa Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 4 Nov 2019 16:48:12 -0800
Subject: Rename tsd->offset_state to tsd->prng_state.

---
 include/jemalloc/internal/tsd.h | 6 +++---
 src/extent.c                    | 2 +-
 src/tsd.c                       | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 17bfc88..6332a00 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -33,7 +33,7 @@
  * w: prof_sample_event_wait (config_prof)
  * x: prof_sample_last_event (config_prof)
  * p: prof_tdata (config_prof)
- * v: offset_state
+ * v: prng_state
  * i: iarena
  * a: arena
  * o: arenas_tdata
@@ -88,7 +88,7 @@ typedef void (*test_callback_t)(int *);
     O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
-    O(offset_state,		uint64_t,		uint64_t)	\
+    O(prng_state,		uint64_t,		uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
@@ -119,7 +119,7 @@ typedef void (*test_callback_t)(int *);
     /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_last_event */	0,				\
     /* prof_tdata */		NULL,					\
-    /* offset_state */		0,					\
+    /* prng_state */		0,					\
     /* iarena */		NULL,					\
     /* arena */			NULL,					\
     /* arenas_tdata */		NULL,					\
diff --git a/src/extent.c b/src/extent.c
index 50a8105..d9eff76 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -185,7 +185,7 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 		if (!tsdn_null(tsdn)) {
 			tsd_t *tsd = tsdn_tsd(tsdn);
 			r = (size_t)prng_lg_range_u64(
-			    tsd_offset_statep_get(tsd), lg_range);
+			    tsd_prng_statep_get(tsd), lg_range);
 		} else {
 			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
 			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
diff --git a/src/tsd.c b/src/tsd.c
index 6c90ade..5053f12 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -230,7 +230,7 @@ tsd_data_init(tsd_t *tsd) {
 	 * cost of test repeatability.  For debug builds, instead use a
 	 * deterministic seed.
 	 */
-	*tsd_offset_statep_get(tsd) = config_debug ? 0 :
+	*tsd_prng_statep_get(tsd) = config_debug ? 0 :
 	    (uint64_t)(uintptr_t)tsd;
 
 	tsd_thread_event_init(tsd);
-- 
cgit v0.12


From da50d8ce87cb21963596825ebc5faf6d8abd4d2c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 4 Nov 2019 17:22:25 -0800
Subject: Refactor and optimize prof sampling initialization.

Makes the prof sample prng use the tsd prng_state.  This allows us to properly
initialize the sample interval event, without having to create tdata.  As a
result, tdata will be created on demand (when a thread reaches the sample
interval bytes allocated), instead of on the first allocation.
---
 include/jemalloc/internal/prof_externs.h   |  4 +--
 include/jemalloc/internal/prof_inlines_b.h | 45 ++----------------------------
 include/jemalloc/internal/prof_structs.h   |  3 --
 src/prof.c                                 | 13 ++++-----
 src/prof_data.c                            |  7 +++--
 src/thread_event.c                         |  2 +-
 src/tsd.c                                  |  1 +
 7 files changed, 16 insertions(+), 59 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 94fbd75..fd18ac4 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -100,7 +100,7 @@ void prof_prefork0(tsdn_t *tsdn);
 void prof_prefork1(tsdn_t *tsdn);
 void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
-void prof_sample_threshold_update(prof_tdata_t *tdata);
+void prof_sample_threshold_update(tsd_t *tsd);
 
 void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx);
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
@@ -120,7 +120,7 @@ bool prof_data_init(tsd_t *tsd);
 bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
     bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
-    uint64_t thr_discrim, char *thread_name, bool active);
+    uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
 void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index b4e65c0..388537e 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -82,9 +82,7 @@ prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
 
 JEMALLOC_ALWAYS_INLINE bool
 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
-			 prof_tdata_t **tdata_out) {
-	prof_tdata_t *tdata;
-
+    prof_tdata_t **tdata_out) {
 	cassert(config_prof);
 
 	/* Fastpath: no need to load tdata */
@@ -96,8 +94,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}
 
-	bool booted = prof_tdata_get(tsd, false);
-	tdata = prof_tdata_get(tsd, true);
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
 	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
 		tdata = NULL;
 	}
@@ -110,45 +107,9 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}
 
-	if (!booted) {
-		/*
-		 * If this was the first creation of tdata, then it means that
-		 * the previous thread_event() relied on the wrong prof_sample
-		 * wait time, and that it should have relied on the new
-		 * prof_sample wait time just set by prof_tdata_get(), so we
-		 * now manually check again.
-		 *
-		 * If the check fails, then even though we relied on the wrong
-		 * prof_sample wait time, we're now actually in perfect shape,
-		 * in the sense that we can pretend that we have used the right
-		 * prof_sample wait time.
-		 *
-		 * If the check succeeds, then we are now in a tougher
-		 * situation, in the sense that we cannot pretend that we have
-		 * used the right prof_sample wait time.  A straightforward
-		 * solution would be to fully roll back thread_event(), set the
-		 * right prof_sample wait time, and then redo thread_event().
-		 * A simpler way, which is implemented below, is to just set a
-		 * new prof_sample wait time that is usize less, and do nothing
-		 * else.  Strictly speaking, the thread event handler may end
-		 * up in a wrong state, since it has still recorded an event
-		 * whereas in reality there may be no event.  However, the
-		 * difference in the wait time offsets the wrongly recorded
-		 * event, so that, functionally, the countdown to the next
-		 * event will behave exactly as if we have used the right
-		 * prof_sample wait time in the first place.
-		 */
-		uint64_t wait = prof_sample_event_wait_get(tsd);
-		assert(wait > 0);
-		if (usize < wait) {
-			thread_prof_sample_event_update(tsd, wait - usize);
-			return true;
-		}
-	}
-
 	/* Compute new sample threshold. */
 	if (update) {
-		prof_sample_threshold_update(tdata);
+		prof_sample_threshold_update(tsd);
 	}
 	return !tdata->active;
 }
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 34ed482..9a00a18 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -167,9 +167,6 @@ struct prof_tdata_s {
 	 */
 	ckh_t			bt2tctx;
 
-	/* Sampling state. */
-	uint64_t		prng_state;
-
 	/* State used to avoid dumping while operating on prof internals. */
 	bool			enq;
 	bool			enq_idump;
diff --git a/src/prof.c b/src/prof.c
index 5360662..0590482 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -149,7 +149,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 		 */
 		tdata = prof_tdata_get(tsd, true);
 		if (tdata != NULL) {
-			prof_sample_threshold_update(tdata);
+			prof_sample_threshold_update(tsd);
 		}
 	}
 
@@ -469,14 +469,12 @@ prof_tdata_mutex_choose(uint64_t thr_uid) {
  * -mno-sse) in order for the workaround to be complete.
  */
 void
-prof_sample_threshold_update(prof_tdata_t *tdata) {
+prof_sample_threshold_update(tsd_t *tsd) {
 #ifdef JEMALLOC_PROF
 	if (!config_prof) {
 		return;
 	}
 
-	tsd_t *tsd = tsd_fetch();
-
 	if (lg_prof_sample == 0) {
 		thread_prof_sample_event_update(tsd,
 		    THREAD_EVENT_MIN_START_WAIT);
@@ -501,13 +499,12 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 	 *   pp 500
 	 *   (http://luc.devroye.org/rnbookindex.html)
 	 */
-	uint64_t r = prng_lg_range_u64(&tdata->prng_state, 53);
+	uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
 	double u = (double)r * (1.0/9007199254740992.0L);
 	uint64_t bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
 	thread_prof_sample_event_update(tsd, bytes_until_sample);
-
 #endif
 }
 
@@ -810,7 +807,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn) {
 prof_tdata_t *
 prof_tdata_init(tsd_t *tsd) {
 	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
-	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
+	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)), false);
 }
 
 static char *
@@ -846,7 +843,7 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 
 	prof_tdata_detach(tsd, tdata);
 	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
-	    active);
+	    active, true);
 }
 
 void
diff --git a/src/prof_data.c b/src/prof_data.c
index cd92ee6..2f8bd2d 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1198,7 +1198,7 @@ prof_bt_keycomp(const void *k1, const void *k2) {
 
 prof_tdata_t *
 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
-    char *thread_name, bool active) {
+    char *thread_name, bool active, bool reset_interval) {
 	assert(tsd_reentrancy_level_get(tsd) == 0);
 
 	prof_tdata_t *tdata;
@@ -1227,8 +1227,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 		return NULL;
 	}
 
-	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
-	prof_sample_threshold_update(tdata);
+	if (reset_interval) {
+		prof_sample_threshold_update(tsd);
+	}
 
 	tdata->enq = false;
 	tdata->enq_idump = false;
diff --git a/src/thread_event.c b/src/thread_event.c
index f27a37a..9f6c927 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -34,7 +34,7 @@ tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
 static void
 tsd_thread_prof_sample_event_init(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
-	/* Do not set sample interval until the first allocation. */
+	prof_sample_threshold_update(tsd);
 }
 
 static void
diff --git a/src/tsd.c b/src/tsd.c
index 5053f12..6e0ee93 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -233,6 +233,7 @@ tsd_data_init(tsd_t *tsd) {
 	*tsd_prng_statep_get(tsd) = config_debug ? 0 :
 	    (uint64_t)(uintptr_t)tsd;
 
+	/* event_init may use the prng state above. */
 	tsd_thread_event_init(tsd);
 
 	return tsd_tcache_enabled_data_init(tsd);
-- 
cgit v0.12


From 9c59abe42afd044b742bd5c2ec8c1e01a4a8c1ca Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 11 Nov 2019 12:13:48 -0800
Subject: Fix a typo in Makefile.

---
 Makefile.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index 7eba774..0bbf106 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -418,7 +418,7 @@ $(objroot)include/jemalloc/internal/private_namespace_jet.gen.h: $(C_JET_SYMS)
 	$(SHELL) $(srcroot)include/jemalloc/internal/private_namespace.sh $^ > $@
 
 %.h: %.gen.h
-	@if ! `cmp -s $< $@` ; then echo "cp $< $<"; cp $< $@ ; fi
+	@if ! `cmp -s $< $@` ; then echo "cp $< $@"; cp $< $@ ; fi
 
 $(CPP_OBJS) $(CPP_PIC_OBJS) $(TESTS_CPP_OBJS): %.$(O):
 	@mkdir -p $(@D)
-- 
cgit v0.12


From 836d7a7e69011321ba75620279a31d43a05bf0d6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 4 Nov 2019 18:24:39 -0800
Subject: Check for large size first in the uncommon case of malloc.

Larger sizes are not that uncommon comparing to !tsd_fast.
---
 src/jemalloc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1073512..239494d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2344,12 +2344,10 @@ je_malloc(size_t size) {
 	}
 
 	tsd_t *tsd = tsd_get(false);
-	if (unlikely(!tsd || !tsd_fast(tsd) || (size > SC_LOOKUP_MAXCLASS))) {
+	if (unlikely((size > SC_LOOKUP_MAXCLASS) || !tsd || !tsd_fast(tsd))) {
 		return malloc_default(size);
 	}
 
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-
 	szind_t ind = sz_size2index_lookup(size);
 	/*
 	 * The thread_allocated counter in tsd serves as a general purpose
@@ -2373,6 +2371,7 @@ je_malloc(size_t size) {
 		return malloc_default(size);
 	}
 
+	tcache_t *tcache = tsd_tcachep_get(tsd);
 	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
 	bool tcache_success;
 	void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success);
-- 
cgit v0.12


From c462753cc8e1d70318b6fcc4ffa0b8498588205c Mon Sep 17 00:00:00 2001
From: Leonardo Santagada <santagada@gmail.com>
Date: Wed, 23 Oct 2019 15:00:49 +0200
Subject: Use __forceinline for JEMALLOC_ALWAYS_INLINE on msvc

---
 include/jemalloc/internal/jemalloc_internal_macros.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index ece3b87..e97b5f9 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -4,7 +4,11 @@
 #ifdef JEMALLOC_DEBUG
 #  define JEMALLOC_ALWAYS_INLINE static inline
 #else
-#  define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline
+#  ifdef _MSC_VER
+#    define JEMALLOC_ALWAYS_INLINE static __forceinline
+#  else
+#    define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline
+#  endif
 #endif
 #ifdef _MSC_VER
 #  define inline _inline
-- 
cgit v0.12


From e4c36a6f30d5b393f05daa2850e2c03406c5c4c2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 12 Nov 2019 23:44:01 -0800
Subject: Emphasize no modification through thread.allocatedp allowed.

---
 doc/jemalloc.xml.in | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 77afb00..76edab8 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1554,7 +1554,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <link
         linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*()</function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.  Note that the underlying counter
+        should not be modified by the application.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.deallocated">
@@ -1581,7 +1582,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <link
         linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*()</function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.  Note that the underlying counter
+        should not be modified by the application.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.tcache.enabled">
-- 
cgit v0.12


From 3b5eecf102dcc3eb9a4a50346cdfa96917683e0a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 15 Nov 2019 11:43:25 -0800
Subject: Fix bug in prof_realloc

We should pass in `old_ptr` rather than the new `ptr` to
`prof_free_sampled_object()` when `old_ptr` points to a sampled
allocation.
---
 include/jemalloc/internal/prof_inlines_b.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 388537e..3465397 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -203,7 +203,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	 * counters.
 	 */
 	if (unlikely(old_sampled)) {
-		prof_free_sampled_object(tsd, ptr, old_usize, old_tctx);
+		prof_free_sampled_object(tsd, old_ptr, old_usize, old_tctx);
 	}
 }
 
-- 
cgit v0.12


From 73510dfd150d0c28d48b15f28f8329a108c53af0 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 15 Nov 2019 14:20:31 -0800
Subject: Revert "Fix bug in prof_realloc"

This reverts commit 3b5eecf102dcc3eb9a4a50346cdfa96917683e0a.
---
 include/jemalloc/internal/prof_inlines_b.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 3465397..388537e 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -203,7 +203,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	 * counters.
 	 */
 	if (unlikely(old_sampled)) {
-		prof_free_sampled_object(tsd, old_ptr, old_usize, old_tctx);
+		prof_free_sampled_object(tsd, ptr, old_usize, old_tctx);
 	}
 }
 
-- 
cgit v0.12


From 04cb7d4d6b8cd2fb1c615aeb049e00a51c66083e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 6 Nov 2019 23:09:20 -0800
Subject: Bail out early for muzzy decay.

This avoids taking the muzzy decay mutex with the default setting.
---
 src/arena.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index fa18d14..a8cfcee 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -919,7 +919,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	    WITNESS_RANK_CORE, 1);
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
-	if (decay->purging) {
+	if (decay->purging || npages_decay_max == 0) {
 		return;
 	}
 	decay->purging = true;
@@ -988,6 +988,10 @@ arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 static bool
 arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
+	if (eset_npages_get(&arena->eset_muzzy) == 0 &&
+	    arena_muzzy_decay_ms_get(arena) <= 0) {
+		return false;
+	}
 	return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
 	    &arena->eset_muzzy, is_background_thread, all);
 }
-- 
cgit v0.12


From a787d2f5b35f8a28738e19efeea626c2a3999104 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 8 Nov 2019 13:05:43 -0800
Subject: Prefer getaffinity() to detect number of CPUs.

---
 src/jemalloc.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 239494d..0e379d4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -742,18 +742,28 @@ malloc_ncpus(void) {
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	result = si.dwNumberOfProcessors;
-#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK) && defined(CPU_COUNT)
+#elif defined(CPU_COUNT)
 	/*
 	 * glibc >= 2.6 has the CPU_COUNT macro.
 	 *
 	 * glibc's sysconf() uses isspace().  glibc allocates for the first time
 	 * *before* setting up the isspace tables.  Therefore we need a
 	 * different method to get the number of CPUs.
+	 *
+	 * The getaffinity approach is also preferred when only a subset of CPUs
+	 * is available, to avoid using more arenas than necessary.
 	 */
 	{
+#  if defined(__FreeBSD__)
+		cpuset_t set;
+#  else
 		cpu_set_t set;
-
+#  endif
+#  if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+		sched_getaffinity(0, sizeof(set), &set);
+#  else
 		pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
+#  endif
 		result = CPU_COUNT(&set);
 	}
 #else
-- 
cgit v0.12


From 7160617107af5f566902ea3d1281b3a3c3cb6eea Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 15 Nov 2019 22:47:49 -0800
Subject: Add branch hints to free_fastpath.

Explicityly mark the non-slab case unlikely.  Previously there were jumps in the
common case.
---
 src/jemalloc.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0e379d4..a5d6677 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2647,8 +2647,6 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		return false;
 	}
 
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-
 	alloc_ctx_t alloc_ctx;
 	/*
 	 * If !config_cache_oblivious, we can check PAGE alignment to
@@ -2658,27 +2656,29 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	 */
 	if (!size_hint || config_cache_oblivious) {
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &extents_rtree,
-						      rtree_ctx, (uintptr_t)ptr,
-						      &alloc_ctx.szind, &alloc_ctx.slab);
+		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd),
+		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, &alloc_ctx.szind,
+		    &alloc_ctx.slab);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
-		if (!res || !alloc_ctx.slab) {
+		if (unlikely(!res || !alloc_ctx.slab)) {
 			return false;
 		}
 		assert(alloc_ctx.szind != SC_NSIZES);
 	} else {
 		/*
-		 * Check for both sizes that are too large, and for sampled objects.
-		 * Sampled objects are always page-aligned.  The sampled object check
-		 * will also check for null ptr.
+		 * Check for both sizes that are too large, and for sampled
+		 * objects.  Sampled objects are always page-aligned.  The
+		 * sampled object check will also check for null ptr.
 		 */
-		if (size > SC_LOOKUP_MAXCLASS || (((uintptr_t)ptr & PAGE_MASK) == 0)) {
+		if (unlikely(size > SC_LOOKUP_MAXCLASS ||
+		    (((uintptr_t)ptr & PAGE_MASK) == 0))) {
 			return false;
 		}
 		alloc_ctx.szind = sz_size2index_lookup(size);
 	}
 
+	tcache_t *tcache = tsd_tcachep_get(tsd);
 	if (unlikely(ticker_trytick(&tcache->gc_ticker))) {
 		return false;
 	}
@@ -3532,7 +3532,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 		size, flags);
 
-	if (flags !=0 || !free_fastpath(ptr, size, true)) {
+	if (flags != 0 || !free_fastpath(ptr, size, true)) {
 		sdallocx_default(ptr, size, flags);
 	}
 
-- 
cgit v0.12


From cb1a1f4adadc85366e51afcf1a53b359828fba67 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 15 Nov 2019 22:54:15 -0800
Subject: Remove the unnecessary alloc_ctx on free_fastpath.

---
 src/jemalloc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index a5d6677..e8ac2fc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2647,7 +2647,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		return false;
 	}
 
-	alloc_ctx_t alloc_ctx;
+	szind_t szind;
 	/*
 	 * If !config_cache_oblivious, we can check PAGE alignment to
 	 * detect sampled objects.  Otherwise addresses are
@@ -2655,16 +2655,16 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	 * See also isfree().
 	 */
 	if (!size_hint || config_cache_oblivious) {
+		bool slab;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd),
-		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, &alloc_ctx.szind,
-		    &alloc_ctx.slab);
+		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, &szind, &slab);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
-		if (unlikely(!res || !alloc_ctx.slab)) {
+		if (unlikely(!res || !slab)) {
 			return false;
 		}
-		assert(alloc_ctx.szind != SC_NSIZES);
+		assert(szind != SC_NSIZES);
 	} else {
 		/*
 		 * Check for both sizes that are too large, and for sampled
@@ -2675,7 +2675,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		    (((uintptr_t)ptr & PAGE_MASK) == 0))) {
 			return false;
 		}
-		alloc_ctx.szind = sz_size2index_lookup(size);
+		szind = sz_size2index_lookup(size);
 	}
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
@@ -2683,12 +2683,12 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		return false;
 	}
 
-	cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
+	cache_bin_t *bin = tcache_small_bin_get(tcache, szind);
 	if (!cache_bin_dalloc_easy(bin, ptr)) {
 		return false;
 	}
 
-	size_t usize = sz_index2size(alloc_ctx.szind);
+	size_t usize = sz_index2size(szind);
 	*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	return true;
-- 
cgit v0.12


From 9a7ae3c97fd4753981d3a14a4b6a72b2d2a83f44 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 5 Nov 2019 20:43:59 -0800
Subject: Reduce footprint of bin_t.

Avoid storing mutex_prof_data_t in bin_t.  Added bin_stats_data_t which is used
for reporting bin stats.
---
 include/jemalloc/internal/arena_externs.h |  2 +-
 include/jemalloc/internal/bin.h           | 23 +++++-----
 include/jemalloc/internal/bin_stats.h     |  5 ++-
 include/jemalloc/internal/ctl.h           |  2 +-
 src/arena.c                               |  2 +-
 src/ctl.c                                 | 75 ++++++++++++++-----------------
 6 files changed, 53 insertions(+), 56 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index a71f944..5178e23 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -25,7 +25,7 @@ void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    bin_stats_t *bstats, arena_stats_large_t *lstats,
+    bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     arena_stats_extents_t *estats);
 void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 70250a4..0d6aff8 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -61,19 +61,20 @@ void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 
 /* Stats. */
 static inline void
-bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) {
+bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) {
 	malloc_mutex_lock(tsdn, &bin->lock);
 	malloc_mutex_prof_accum(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
-	dst_bin_stats->nmalloc += bin->stats.nmalloc;
-	dst_bin_stats->ndalloc += bin->stats.ndalloc;
-	dst_bin_stats->nrequests += bin->stats.nrequests;
-	dst_bin_stats->curregs += bin->stats.curregs;
-	dst_bin_stats->nfills += bin->stats.nfills;
-	dst_bin_stats->nflushes += bin->stats.nflushes;
-	dst_bin_stats->nslabs += bin->stats.nslabs;
-	dst_bin_stats->reslabs += bin->stats.reslabs;
-	dst_bin_stats->curslabs += bin->stats.curslabs;
-	dst_bin_stats->nonfull_slabs += bin->stats.nonfull_slabs;
+	bin_stats_t *stats = &dst_bin_stats->stats_data;
+	stats->nmalloc += bin->stats.nmalloc;
+	stats->ndalloc += bin->stats.ndalloc;
+	stats->nrequests += bin->stats.nrequests;
+	stats->curregs += bin->stats.curregs;
+	stats->nfills += bin->stats.nfills;
+	stats->nflushes += bin->stats.nflushes;
+	stats->nslabs += bin->stats.nslabs;
+	stats->reslabs += bin->stats.reslabs;
+	stats->curslabs += bin->stats.curslabs;
+	stats->nonfull_slabs += bin->stats.nonfull_slabs;
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index d04519c..0b99297 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -47,8 +47,11 @@ struct bin_stats_s {
 
 	/* Current size of nonfull slabs heap in this bin. */
 	size_t		nonfull_slabs;
+};
 
+typedef struct bin_stats_data_s bin_stats_data_t;
+struct bin_stats_data_s {
+	bin_stats_t stats_data;
 	mutex_prof_data_t mutex_data;
 };
-
 #endif /* JEMALLOC_INTERNAL_BIN_STATS_H */
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 8ddf7f8..55a8ff4 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -42,7 +42,7 @@ typedef struct ctl_arena_stats_s {
 	uint64_t nfills_small;
 	uint64_t nflushes_small;
 
-	bin_stats_t bstats[SC_NBINS];
+	bin_stats_data_t bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
 	arena_stats_extents_t estats[SC_NPSIZES];
 } ctl_arena_stats_t;
diff --git a/src/arena.c b/src/arena.c
index a8cfcee..f6e9402 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -83,7 +83,7 @@ void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    bin_stats_t *bstats, arena_stats_large_t *lstats,
+    bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     arena_stats_extents_t *estats) {
 	cassert(config_stats);
 
diff --git a/src/ctl.c b/src/ctl.c
index abb82b5..e2cdc29 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -789,7 +789,7 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		ctl_arena->astats->nfills_small = 0;
 		ctl_arena->astats->nflushes_small = 0;
 		memset(ctl_arena->astats->bstats, 0, SC_NBINS *
-		    sizeof(bin_stats_t));
+		    sizeof(bin_stats_data_t));
 		memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) *
 		    sizeof(arena_stats_large_t));
 		memset(ctl_arena->astats->estats, 0, SC_NPSIZES *
@@ -810,19 +810,15 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		    ctl_arena->astats->lstats, ctl_arena->astats->estats);
 
 		for (i = 0; i < SC_NBINS; i++) {
-			ctl_arena->astats->allocated_small +=
-			    ctl_arena->astats->bstats[i].curregs *
+			bin_stats_t *bstats =
+			    &ctl_arena->astats->bstats[i].stats_data;
+			ctl_arena->astats->allocated_small += bstats->curregs *
 			    sz_index2size(i);
-			ctl_arena->astats->nmalloc_small +=
-			    ctl_arena->astats->bstats[i].nmalloc;
-			ctl_arena->astats->ndalloc_small +=
-			    ctl_arena->astats->bstats[i].ndalloc;
-			ctl_arena->astats->nrequests_small +=
-			    ctl_arena->astats->bstats[i].nrequests;
-			ctl_arena->astats->nfills_small +=
-			    ctl_arena->astats->bstats[i].nfills;
-			ctl_arena->astats->nflushes_small +=
-			    ctl_arena->astats->bstats[i].nflushes;
+			ctl_arena->astats->nmalloc_small += bstats->nmalloc;
+			ctl_arena->astats->ndalloc_small += bstats->ndalloc;
+			ctl_arena->astats->nrequests_small += bstats->nrequests;
+			ctl_arena->astats->nfills_small += bstats->nfills;
+			ctl_arena->astats->nflushes_small += bstats->nflushes;
 		}
 	} else {
 		arena_basic_stats_merge(tsdn, arena, &ctl_arena->nthreads,
@@ -935,29 +931,26 @@ MUTEX_PROF_ARENA_MUTEXES
 
 		/* Merge bin stats. */
 		for (i = 0; i < SC_NBINS; i++) {
-			sdstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
-			sdstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
-			sdstats->bstats[i].nrequests +=
-			    astats->bstats[i].nrequests;
+			bin_stats_t *bstats = &astats->bstats[i].stats_data;
+			bin_stats_t *merged = &sdstats->bstats[i].stats_data;
+			merged->nmalloc += bstats->nmalloc;
+			merged->ndalloc += bstats->ndalloc;
+			merged->nrequests += bstats->nrequests;
 			if (!destroyed) {
-				sdstats->bstats[i].curregs +=
-				    astats->bstats[i].curregs;
+				merged->curregs += bstats->curregs;
 			} else {
-				assert(astats->bstats[i].curregs == 0);
+				assert(bstats->curregs == 0);
 			}
-			sdstats->bstats[i].nfills += astats->bstats[i].nfills;
-			sdstats->bstats[i].nflushes +=
-			    astats->bstats[i].nflushes;
-			sdstats->bstats[i].nslabs += astats->bstats[i].nslabs;
-			sdstats->bstats[i].reslabs += astats->bstats[i].reslabs;
+			merged->nfills += bstats->nfills;
+			merged->nflushes += bstats->nflushes;
+			merged->nslabs += bstats->nslabs;
+			merged->reslabs += bstats->reslabs;
 			if (!destroyed) {
-				sdstats->bstats[i].curslabs +=
-				    astats->bstats[i].curslabs;
-				sdstats->bstats[i].nonfull_slabs +=
-				    astats->bstats[i].nonfull_slabs;
+				merged->curslabs += bstats->curslabs;
+				merged->nonfull_slabs += bstats->nonfull_slabs;
 			} else {
-				assert(astats->bstats[i].curslabs == 0);
-				assert(astats->bstats[i].nonfull_slabs == 0);
+				assert(bstats->curslabs == 0);
+				assert(bstats->nonfull_slabs == 0);
 			}
 			malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
 			    &astats->bstats[i].mutex_data);
@@ -3035,25 +3028,25 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc,
-    arenas_i(mib[2])->astats->bstats[mib[4]].nmalloc, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nmalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc,
-    arenas_i(mib[2])->astats->bstats[mib[4]].ndalloc, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.ndalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests,
-    arenas_i(mib[2])->astats->bstats[mib[4]].nrequests, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nrequests, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curregs,
-    arenas_i(mib[2])->astats->bstats[mib[4]].curregs, size_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.curregs, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nfills,
-    arenas_i(mib[2])->astats->bstats[mib[4]].nfills, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nfills, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nflushes,
-    arenas_i(mib[2])->astats->bstats[mib[4]].nflushes, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nflushes, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nslabs,
-    arenas_i(mib[2])->astats->bstats[mib[4]].nslabs, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nslabs, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs,
-    arenas_i(mib[2])->astats->bstats[mib[4]].reslabs, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.reslabs, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
-    arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.curslabs, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
-    arenas_i(mib[2])->astats->bstats[mib[4]].nonfull_slabs, size_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nonfull_slabs, size_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
-- 
cgit v0.12


From 9a3c73800991d3508516208127994a1fc3837de5 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 5 Nov 2019 13:22:54 -0800
Subject: Refactor arena_bin_malloc_hard().

---
 src/arena.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index f6e9402..5537e66 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1311,21 +1311,21 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 static void *
 arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, unsigned binshard) {
-	const bin_info_t *bin_info;
-	extent_t *slab;
 
-	bin_info = &bin_infos[binind];
-	if (!arena_is_auto(arena) && bin->slabcur != NULL) {
-		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
-		bin->slabcur = NULL;
+	if (bin->slabcur != NULL) {
+		/* Only attempted when current slab is full. */
+		assert(extent_nfree_get(bin->slabcur) == 0);
 	}
-	slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind, binshard);
+
+	const bin_info_t *bin_info = &bin_infos[binind];
+	extent_t *slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind,
+	    binshard);
 	if (bin->slabcur != NULL) {
-		/*
-		 * Another thread updated slabcur while this one ran without the
-		 * bin lock in arena_bin_nonfull_slab_get().
-		 */
 		if (extent_nfree_get(bin->slabcur) > 0) {
+			/*
+			 * Another thread updated slabcur while this one ran
+			 * without the bin lock in arena_bin_nonfull_slab_get().
+			 */
 			void *ret = arena_slab_reg_alloc(bin->slabcur,
 			    bin_info);
 			if (slab != NULL) {
@@ -1357,7 +1357,6 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 		return NULL;
 	}
 	bin->slabcur = slab;
-
 	assert(extent_nfree_get(bin->slabcur) > 0);
 
 	return arena_slab_reg_alloc(slab, bin_info);
-- 
cgit v0.12


From 8b2c2a596da9bed11432ac703a6c0b0a76ec4dfd Mon Sep 17 00:00:00 2001
From: Mark Santaniello <marksan@fb.com>
Date: Sat, 26 Oct 2019 23:28:42 -0700
Subject: Support C++17 over-aligned allocation

Summary:
Add support for C++17 over-aligned allocation:
http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0035r4.html

Supporting all 10 operators means we avoid thunking thru libstdc++-v3/libsupc++ and just call jemalloc directly.

It's also worth noting that there is now an aligned *and sized* operator delete:
```
void operator delete(void* ptr, std::size_t size, std::align_val_t al) noexcept;
```

If JeMalloc did not provide this, the default implementation would ignore the size parameter entirely:
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/libsupc%2B%2B/del_opsa.cc#L30-L33

(I must also update ax_cxx_compile_stdcxx.m4 to a newer version with C++17 support.)

Test Plan:
Wrote a simple test that allocates and then deletes an over-aligned type:
```
struct alignas(32) Foo {};
Foo *f;

int main()
{
  f = new Foo;
  delete f;
}
```

Before this change, both new and delete go thru PLT, and we end up calling regular old free:
```
(gdb) disassemble
Dump of assembler code for function main():
...
   0x00000000004029b7 <+55>:    call   0x4022d0 <_ZnwmSt11align_val_t@plt>
...
   0x00000000004029d5 <+85>:    call   0x4022e0 <_ZdlPvmSt11align_val_t@plt>
...
(gdb) s
free (ptr=0x7ffff6408020) at /home/engshare/third-party2/jemalloc/master/src/jemalloc.git-trunk/src/jemalloc.c:2842
2842            if (!free_fastpath(ptr, 0, false)) {
```

After this change, we directly call new/delete and ultimately call sdallocx:
```
(gdb) disassemble
Dump of assembler code for function main():
...
   0x0000000000402b77 <+55>:    call   0x496ca0 <operator new(unsigned long, std::align_val_t)>
...
   0x0000000000402b95 <+85>:    call   0x496e60 <operator delete(void*, unsigned long, std::align_val_t)>
...
(gdb) s
116             je_sdallocx_noflags(ptr, size);
```
---
 configure.ac                                       |   7 +-
 .../jemalloc/internal/jemalloc_internal_decls.h    |   1 +
 m4/ax_cxx_compile_stdcxx.m4                        | 449 +++++++++++++++++++--
 src/jemalloc_cpp.cpp                               | 121 +++++-
 4 files changed, 536 insertions(+), 42 deletions(-)

diff --git a/configure.ac b/configure.ac
index c3f53f7..5e56e16 100644
--- a/configure.ac
+++ b/configure.ac
@@ -290,8 +290,11 @@ if test "x$enable_cxx" = "x1" ; then
   dnl Require at least c++14, which is the first version to support sized
   dnl deallocation.  C++ support is not compiled otherwise.
   m4_include([m4/ax_cxx_compile_stdcxx.m4])
-  AX_CXX_COMPILE_STDCXX([14], [noext], [optional])
-  if test "x${HAVE_CXX14}" = "x1" ; then
+  AX_CXX_COMPILE_STDCXX([17], [noext], [optional])
+  if test "x${HAVE_CXX17}" != "x1"; then
+    AX_CXX_COMPILE_STDCXX([14], [noext], [optional])
+  fi
+  if test "x${HAVE_CXX14}" = "x1" -o "x${HAVE_CXX17}" = "x1"; then
     JE_CXXFLAGS_ADD([-Wall])
     JE_CXXFLAGS_ADD([-Wextra])
     JE_CXXFLAGS_ADD([-g3])
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 7d6053e..042a1fa 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -5,6 +5,7 @@
 #ifdef _WIN32
 #  include <windows.h>
 #  include "msvc_compat/windows_extra.h"
+#  include "msvc_compat/strings.h"
 #  ifdef _WIN64
 #    if LG_VADDR <= 32
 #      error Generate the headers using x64 vcargs
diff --git a/m4/ax_cxx_compile_stdcxx.m4 b/m4/ax_cxx_compile_stdcxx.m4
index 2c18e49..43087b2 100644
--- a/m4/ax_cxx_compile_stdcxx.m4
+++ b/m4/ax_cxx_compile_stdcxx.m4
@@ -1,5 +1,5 @@
 # ===========================================================================
-#   http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html
+#  https://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html
 # ===========================================================================
 #
 # SYNOPSIS
@@ -33,21 +33,23 @@
 #   Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov <sokolov@google.com>
 #   Copyright (c) 2015 Paul Norman <penorman@mac.com>
 #   Copyright (c) 2015 Moritz Klammler <moritz@klammler.eu>
+#   Copyright (c) 2016, 2018 Krzesimir Nowak <qdlacz@gmail.com>
+#   Copyright (c) 2019 Enji Cooper <yaneurabeya@gmail.com>
 #
 #   Copying and distribution of this file, with or without modification, are
 #   permitted in any medium without royalty provided the copyright notice
 #   and this notice are preserved.  This file is offered as-is, without any
 #   warranty.
 
-#serial 4
+#serial 11
 
 dnl  This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro
 dnl  (serial version number 13).
 
 AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
-  m4_if([$1], [11], [],
-        [$1], [14], [],
-        [$1], [17], [m4_fatal([support for C++17 not yet implemented in AX_CXX_COMPILE_STDCXX])],
+  m4_if([$1], [11], [ax_cxx_compile_alternatives="11 0x"],
+        [$1], [14], [ax_cxx_compile_alternatives="14 1y"],
+        [$1], [17], [ax_cxx_compile_alternatives="17 1z"],
         [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl
   m4_if([$2], [], [],
         [$2], [ext], [],
@@ -59,18 +61,11 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
         [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])])
   AC_LANG_PUSH([C++])dnl
   ac_success=no
-  AC_CACHE_CHECK(whether $CXX supports C++$1 features by default,
-  ax_cv_cxx_compile_cxx$1,
-  [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
-    [ax_cv_cxx_compile_cxx$1=yes],
-    [ax_cv_cxx_compile_cxx$1=no])])
-  if test x$ax_cv_cxx_compile_cxx$1 = xyes; then
-    ac_success=yes
-  fi
 
   m4_if([$2], [noext], [], [dnl
   if test x$ac_success = xno; then
-    for switch in -std=gnu++$1 -std=gnu++0x; do
+    for alternative in ${ax_cxx_compile_alternatives}; do
+      switch="-std=gnu++${alternative}"
       cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
       AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
                      $cachevar,
@@ -96,22 +91,27 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
     dnl HP's aCC needs +std=c++11 according to:
     dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf
     dnl Cray's crayCC needs "-h std=c++11"
-    for switch in -std=c++$1 -std=c++0x +std=c++$1 "-h std=c++$1"; do
-      cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
-      AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
-                     $cachevar,
-        [ac_save_CXX="$CXX"
-         CXX="$CXX $switch"
-         AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
-          [eval $cachevar=yes],
-          [eval $cachevar=no])
-         CXX="$ac_save_CXX"])
-      if eval test x\$$cachevar = xyes; then
-        CXX="$CXX $switch"
-        if test -n "$CXXCPP" ; then
-          CXXCPP="$CXXCPP $switch"
+    for alternative in ${ax_cxx_compile_alternatives}; do
+      for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}"; do
+        cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
+        AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
+                       $cachevar,
+          [ac_save_CXX="$CXX"
+           CXX="$CXX $switch"
+           AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
+            [eval $cachevar=yes],
+            [eval $cachevar=no])
+           CXX="$ac_save_CXX"])
+        if eval test x\$$cachevar = xyes; then
+          CXX="$CXX $switch"
+          if test -n "$CXXCPP" ; then
+            CXXCPP="$CXXCPP $switch"
+          fi
+          ac_success=yes
+          break
         fi
-        ac_success=yes
+      done
+      if test x$ac_success = xyes; then
         break
       fi
     done
@@ -148,6 +148,11 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14],
   _AX_CXX_COMPILE_STDCXX_testbody_new_in_14
 )
 
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_17],
+  _AX_CXX_COMPILE_STDCXX_testbody_new_in_11
+  _AX_CXX_COMPILE_STDCXX_testbody_new_in_14
+  _AX_CXX_COMPILE_STDCXX_testbody_new_in_17
+)
 
 dnl  Tests for new features in C++11
 
@@ -185,11 +190,13 @@ namespace cxx11
 
     struct Base
     {
+      virtual ~Base() {}
       virtual void f() {}
     };
 
     struct Derived : public Base
     {
+      virtual ~Derived() override {}
       virtual void f() override {}
     };
 
@@ -518,7 +525,7 @@ namespace cxx14
 
   }
 
-  namespace test_digit_seperators
+  namespace test_digit_separators
   {
 
     constexpr auto ten_million = 100'000'000;
@@ -560,3 +567,385 @@ namespace cxx14
 #endif  // __cplusplus >= 201402L
 
 ]])
+
+
+dnl  Tests for new features in C++17
+
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_17], [[
+
+// If the compiler admits that it is not ready for C++17, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201703L
+
+#error "This is not a C++17 compiler"
+
+#else
+
+#include <initializer_list>
+#include <utility>
+#include <type_traits>
+
+namespace cxx17
+{
+
+  namespace test_constexpr_lambdas
+  {
+
+    constexpr int foo = [](){return 42;}();
+
+  }
+
+  namespace test::nested_namespace::definitions
+  {
+
+  }
+
+  namespace test_fold_expression
+  {
+
+    template<typename... Args>
+    int multiply(Args... args)
+    {
+      return (args * ... * 1);
+    }
+
+    template<typename... Args>
+    bool all(Args... args)
+    {
+      return (args && ...);
+    }
+
+  }
+
+  namespace test_extended_static_assert
+  {
+
+    static_assert (true);
+
+  }
+
+  namespace test_auto_brace_init_list
+  {
+
+    auto foo = {5};
+    auto bar {5};
+
+    static_assert(std::is_same<std::initializer_list<int>, decltype(foo)>::value);
+    static_assert(std::is_same<int, decltype(bar)>::value);
+  }
+
+  namespace test_typename_in_template_template_parameter
+  {
+
+    template<template<typename> typename X> struct D;
+
+  }
+
+  namespace test_fallthrough_nodiscard_maybe_unused_attributes
+  {
+
+    int f1()
+    {
+      return 42;
+    }
+
+    [[nodiscard]] int f2()
+    {
+      [[maybe_unused]] auto unused = f1();
+
+      switch (f1())
+      {
+      case 17:
+        f1();
+        [[fallthrough]];
+      case 42:
+        f1();
+      }
+      return f1();
+    }
+
+  }
+
+  namespace test_extended_aggregate_initialization
+  {
+
+    struct base1
+    {
+      int b1, b2 = 42;
+    };
+
+    struct base2
+    {
+      base2() {
+        b3 = 42;
+      }
+      int b3;
+    };
+
+    struct derived : base1, base2
+    {
+        int d;
+    };
+
+    derived d1 {{1, 2}, {}, 4};  // full initialization
+    derived d2 {{}, {}, 4};      // value-initialized bases
+
+  }
+
+  namespace test_general_range_based_for_loop
+  {
+
+    struct iter
+    {
+      int i;
+
+      int& operator* ()
+      {
+        return i;
+      }
+
+      const int& operator* () const
+      {
+        return i;
+      }
+
+      iter& operator++()
+      {
+        ++i;
+        return *this;
+      }
+    };
+
+    struct sentinel
+    {
+      int i;
+    };
+
+    bool operator== (const iter& i, const sentinel& s)
+    {
+      return i.i == s.i;
+    }
+
+    bool operator!= (const iter& i, const sentinel& s)
+    {
+      return !(i == s);
+    }
+
+    struct range
+    {
+      iter begin() const
+      {
+        return {0};
+      }
+
+      sentinel end() const
+      {
+        return {5};
+      }
+    };
+
+    void f()
+    {
+      range r {};
+
+      for (auto i : r)
+      {
+        [[maybe_unused]] auto v = i;
+      }
+    }
+
+  }
+
+  namespace test_lambda_capture_asterisk_this_by_value
+  {
+
+    struct t
+    {
+      int i;
+      int foo()
+      {
+        return [*this]()
+        {
+          return i;
+        }();
+      }
+    };
+
+  }
+
+  namespace test_enum_class_construction
+  {
+
+    enum class byte : unsigned char
+    {};
+
+    byte foo {42};
+
+  }
+
+  namespace test_constexpr_if
+  {
+
+    template <bool cond>
+    int f ()
+    {
+      if constexpr(cond)
+      {
+        return 13;
+      }
+      else
+      {
+        return 42;
+      }
+    }
+
+  }
+
+  namespace test_selection_statement_with_initializer
+  {
+
+    int f()
+    {
+      return 13;
+    }
+
+    int f2()
+    {
+      if (auto i = f(); i > 0)
+      {
+        return 3;
+      }
+
+      switch (auto i = f(); i + 4)
+      {
+      case 17:
+        return 2;
+
+      default:
+        return 1;
+      }
+    }
+
+  }
+
+  namespace test_template_argument_deduction_for_class_templates
+  {
+
+    template <typename T1, typename T2>
+    struct pair
+    {
+      pair (T1 p1, T2 p2)
+        : m1 {p1},
+          m2 {p2}
+      {}
+
+      T1 m1;
+      T2 m2;
+    };
+
+    void f()
+    {
+      [[maybe_unused]] auto p = pair{13, 42u};
+    }
+
+  }
+
+  namespace test_non_type_auto_template_parameters
+  {
+
+    template <auto n>
+    struct B
+    {};
+
+    B<5> b1;
+    B<'a'> b2;
+
+  }
+
+  namespace test_structured_bindings
+  {
+
+    int arr[2] = { 1, 2 };
+    std::pair<int, int> pr = { 1, 2 };
+
+    auto f1() -> int(&)[2]
+    {
+      return arr;
+    }
+
+    auto f2() -> std::pair<int, int>&
+    {
+      return pr;
+    }
+
+    struct S
+    {
+      int x1 : 2;
+      volatile double y1;
+    };
+
+    S f3()
+    {
+      return {};
+    }
+
+    auto [ x1, y1 ] = f1();
+    auto& [ xr1, yr1 ] = f1();
+    auto [ x2, y2 ] = f2();
+    auto& [ xr2, yr2 ] = f2();
+    const auto [ x3, y3 ] = f3();
+
+  }
+
+  namespace test_exception_spec_type_system
+  {
+
+    struct Good {};
+    struct Bad {};
+
+    void g1() noexcept;
+    void g2();
+
+    template<typename T>
+    Bad
+    f(T*, T*);
+
+    template<typename T1, typename T2>
+    Good
+    f(T1*, T2*);
+
+    static_assert (std::is_same_v<Good, decltype(f(g1, g2))>);
+
+  }
+
+  namespace test_inline_variables
+  {
+
+    template<class T> void f(T)
+    {}
+
+    template<class T> inline T g(T)
+    {
+      return T{};
+    }
+
+    template<> inline void f<>(int)
+    {}
+
+    template<> int g<>(int)
+    {
+      return 5;
+    }
+
+  }
+
+}  // namespace cxx17
+
+#endif  // __cplusplus < 201703L
+
+]])
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index da0441a..f10970a 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -39,6 +39,20 @@ void	operator delete(void *ptr, std::size_t size) noexcept;
 void	operator delete[](void *ptr, std::size_t size) noexcept;
 #endif
 
+#if __cpp_aligned_new >= 201606
+/* C++17's over-aligned operators. */
+void	*operator new(std::size_t size, std::align_val_t);
+void	*operator new(std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
+void	*operator new[](std::size_t size, std::align_val_t);
+void	*operator new[](std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
+void	operator delete(void* ptr, std::align_val_t) noexcept;
+void	operator delete(void* ptr, std::align_val_t, const std::nothrow_t &) noexcept;
+void	operator delete(void* ptr, std::size_t size, std::align_val_t al) noexcept;
+void	operator delete[](void* ptr, std::align_val_t) noexcept;
+void	operator delete[](void* ptr, std::align_val_t, const std::nothrow_t &) noexcept;
+void	operator delete[](void* ptr, std::size_t size, std::align_val_t al) noexcept;
+#endif
+
 JEMALLOC_NOINLINE
 static void *
 handleOOM(std::size_t size, bool nothrow) {
@@ -76,12 +90,46 @@ JEMALLOC_ALWAYS_INLINE
 void *
 newImpl(std::size_t size) noexcept(IsNoExcept) {
 	void *ptr = je_malloc(size);
-	if (likely(ptr != nullptr))
+	if (likely(ptr != nullptr)) {
 		return ptr;
+	}
 
 	return handleOOM(size, IsNoExcept);
 }
 
+template <bool IsNoExcept>
+JEMALLOC_ALWAYS_INLINE
+void *
+alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(IsNoExcept) {
+	void *ptr = je_aligned_alloc(static_cast<std::size_t>(alignment), size);
+	if (likely(ptr != nullptr)) {
+		return ptr;
+	}
+
+	return handleOOM(size, IsNoExcept);
+}
+
+JEMALLOC_ALWAYS_INLINE
+void
+sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
+	if (unlikely(ptr == nullptr)) {
+		return;
+	}
+	je_sdallocx_noflags(ptr, size);
+}
+
+JEMALLOC_ALWAYS_INLINE
+void
+alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+	if (config_debug) {
+		assert(((size_t)alignment & ((size_t)alignment - 1)) == 0);
+	}
+	if (unlikely(ptr == nullptr)) {
+		return;
+	}
+	je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
+}
+
 void *
 operator new(std::size_t size) {
 	return newImpl<false>(size);
@@ -102,6 +150,30 @@ operator new[](std::size_t size, const std::nothrow_t &) noexcept {
 	return newImpl<true>(size);
 }
 
+#if __cpp_aligned_new >= 201606
+
+void *
+operator new(std::size_t size, std::align_val_t alignment) {
+	return alignedNewImpl<false>(size, alignment);
+}
+
+void *
+operator new(std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
+	return alignedNewImpl<true>(size, alignment);
+}
+
+void *
+operator new[](std::size_t size, std::align_val_t alignment) {
+	return alignedNewImpl<false>(size, alignment);
+}
+
+void *
+operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
+	return alignedNewImpl<true>(size, alignment);
+}
+
+#endif  // __cpp_aligned_new
+
 void
 operator delete(void *ptr) noexcept {
 	je_free(ptr);
@@ -125,17 +197,46 @@ void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
 
 void
 operator delete(void *ptr, std::size_t size) noexcept {
-	if (unlikely(ptr == nullptr)) {
-		return;
-	}
-	je_sdallocx_noflags(ptr, size);
+	sizedDeleteImpl(ptr, size);
 }
 
-void operator delete[](void *ptr, std::size_t size) noexcept {
-	if (unlikely(ptr == nullptr)) {
-		return;
-	}
-	je_sdallocx_noflags(ptr, size);
+void
+operator delete[](void *ptr, std::size_t size) noexcept {
+	sizedDeleteImpl(ptr, size);
 }
 
 #endif  // __cpp_sized_deallocation
+
+#if __cpp_aligned_new >= 201606
+
+void
+operator delete(void* ptr, std::align_val_t) noexcept {
+	je_free(ptr);
+}
+
+void
+operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+	je_free(ptr);
+}
+
+void
+operator delete[](void* ptr, std::align_val_t) noexcept {
+	je_free(ptr);
+}
+
+void
+operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+	je_free(ptr);
+}
+
+void
+operator delete(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+	alignedSizedDeleteImpl(ptr, size, alignment);
+}
+
+void
+operator delete[](void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+	alignedSizedDeleteImpl(ptr, size, alignment);
+}
+
+#endif  // __cpp_aligned_new
-- 
cgit v0.12


From b55419f9b99ab416f035179593370401af8d213f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 19 Nov 2019 16:24:57 -0800
Subject: Restructure profiling

Develop new data structure and code logic for holding profiling
related information stored in the extent that may be needed after the
extent is released, which in particular is the case for the
reallocation code path (e.g. in `rallocx()` and `xallocx()`).  The
data structure is a generalization of `prof_tctx_t`: we previously
only copy out the `prof_tctx` before the extent is released, but we
may be in need of additional fields. Currently the only additional
field is the allocation time field, but there may be more fields in
the future.

The restructuring also resolved a bug: `prof_realloc()` mistakenly
passed the new `ptr` to `prof_free_sampled_object()`, but passing in
the `old_ptr` would crash because it's already been released.  Now
the essential profiling information is collectively copied out early
and safely passed to `prof_free_sampled_object()` after the extent is
released.
---
 include/jemalloc/internal/arena_inlines_b.h | 42 +++++++++++++----------------
 include/jemalloc/internal/extent.h          | 15 +++++------
 include/jemalloc/internal/large_externs.h   |  5 ++--
 include/jemalloc/internal/prof_externs.h    |  5 ++--
 include/jemalloc/internal/prof_inlines_b.h  | 34 +++++++++++------------
 include/jemalloc/internal/prof_structs.h    |  7 +++++
 include/jemalloc/internal/prof_types.h      |  1 +
 src/jemalloc.c                              | 27 +++++++++----------
 src/large.c                                 | 12 +++------
 src/prof.c                                  |  9 ++++---
 src/prof_log.c                              |  5 ++--
 test/unit/prof_tctx.c                       | 10 +++----
 12 files changed, 81 insertions(+), 91 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 7ac2f94..dd743ce 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -40,23 +40,31 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 	return arena_choose(tsd, NULL);
 }
 
-JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
+JEMALLOC_ALWAYS_INLINE void
+arena_prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
+    prof_info_t *prof_info) {
 	cassert(config_prof);
 	assert(ptr != NULL);
+	assert(prof_info != NULL);
+
+	const extent_t *extent;
+	bool is_slab;
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		const extent_t *extent = iealloc(tsdn, ptr);
-		if (unlikely(!extent_slab_get(extent))) {
-			return large_prof_tctx_get(tsdn, extent);
-		}
+		extent = iealloc(tsdn, ptr);
+		is_slab = extent_slab_get(extent);
+	} else if (!unlikely(is_slab = alloc_ctx->slab)) {
+		extent = iealloc(tsdn, ptr);
+	}
+
+	if (unlikely(!is_slab)) {
+		/* extent must have been initialized at this point. */
+		large_prof_info_get(tsdn, extent, prof_info);
 	} else {
-		if (unlikely(!alloc_ctx->slab)) {
-			return large_prof_tctx_get(tsdn, iealloc(tsdn, ptr));
-		}
+		memset(prof_info, 0, sizeof(prof_info_t));
+		prof_info->prof_tctx = (prof_tctx_t *)(uintptr_t)1U;
 	}
-	return (prof_tctx_t *)(uintptr_t)1U;
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -89,20 +97,6 @@ arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 	large_prof_tctx_reset(tsdn, extent);
 }
 
-JEMALLOC_ALWAYS_INLINE nstime_t
-arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-
-	extent_t *extent = iealloc(tsdn, ptr);
-	/*
-	 * Unlike arena_prof_prof_tctx_{get, set}, we only call this once we're
-	 * sure we have a sampled allocation.
-	 */
-	assert(!extent_slab_get(extent));
-	return large_prof_alloc_time_get(extent);
-}
-
 JEMALLOC_ALWAYS_INLINE void
 arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
 	cassert(config_prof);
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 92c34ae..c47beaf 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -333,15 +333,12 @@ extent_slab_data_get_const(const extent_t *extent) {
 	return &extent->e_slab_data;
 }
 
-static inline prof_tctx_t *
-extent_prof_tctx_get(const extent_t *extent) {
-	return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx,
-	    ATOMIC_ACQUIRE);
-}
-
-static inline nstime_t
-extent_prof_alloc_time_get(const extent_t *extent) {
-	return extent->e_alloc_time;
+static inline void
+extent_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
+	assert(prof_info != NULL);
+	prof_info->prof_tctx = (prof_tctx_t *)atomic_load_p(
+	    &extent->e_prof_tctx, ATOMIC_ACQUIRE);
+	prof_info->alloc_time = extent->e_alloc_time;
 }
 
 static inline void
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index a05019e..9a1ff16 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -22,11 +22,10 @@ void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent);
 void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
 void large_dalloc(tsdn_t *tsdn, extent_t *extent);
 size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
-prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
+void large_prof_info_get(tsdn_t *tsdn, const extent_t *extent,
+    prof_info_t *prof_info);
 void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
 void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
-
-nstime_t large_prof_alloc_time_get(const extent_t *extent);
 void large_prof_alloc_time_set(extent_t *extent, nstime_t time);
 
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index fd18ac4..47e47ba 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -51,8 +51,7 @@ void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
 void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
+void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
@@ -102,7 +101,7 @@ void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
 void prof_sample_threshold_update(tsd_t *tsd);
 
-void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx);
+void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
 bool prof_log_init(tsd_t *tsdn);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 388537e..5acb4ca 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -39,12 +39,14 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 	return tdata;
 }
 
-JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
+JEMALLOC_ALWAYS_INLINE void
+prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
+    prof_info_t *prof_info) {
 	cassert(config_prof);
 	assert(ptr != NULL);
+	assert(prof_info != NULL);
 
-	return arena_prof_tctx_get(tsdn, ptr, alloc_ctx);
+	arena_prof_info_get(tsdn, ptr, alloc_ctx, prof_info);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -64,14 +66,6 @@ prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 	arena_prof_tctx_reset(tsdn, ptr, tctx);
 }
 
-JEMALLOC_ALWAYS_INLINE nstime_t
-prof_alloc_time_get(tsdn_t *tsdn, const void *ptr) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-
-	return arena_prof_alloc_time_get(tsdn, ptr);
-}
-
 JEMALLOC_ALWAYS_INLINE void
 prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
 	cassert(config_prof);
@@ -152,7 +146,7 @@ prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
 JEMALLOC_ALWAYS_INLINE void
 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
     bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
-    prof_tctx_t *old_tctx) {
+    prof_info_t *old_prof_info) {
 	bool sampled, old_sampled, moved;
 
 	cassert(config_prof);
@@ -174,7 +168,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	}
 
 	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
-	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
+	old_sampled = ((uintptr_t)old_prof_info->prof_tctx > (uintptr_t)1U);
 	moved = (ptr != old_ptr);
 
 	if (unlikely(sampled)) {
@@ -191,8 +185,9 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 		 */
 		prof_tctx_reset(tsd_tsdn(tsd), ptr, tctx);
 	} else {
-		assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), ptr, NULL) ==
-		    (uintptr_t)1U);
+		prof_info_t prof_info;
+		prof_info_get(tsd_tsdn(tsd), ptr, NULL, &prof_info);
+		assert((uintptr_t)prof_info.prof_tctx == (uintptr_t)1U);
 	}
 
 	/*
@@ -203,19 +198,20 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	 * counters.
 	 */
 	if (unlikely(old_sampled)) {
-		prof_free_sampled_object(tsd, ptr, old_usize, old_tctx);
+		prof_free_sampled_object(tsd, old_usize, old_prof_info);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
-	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx);
+	prof_info_t prof_info;
+	prof_info_get(tsd_tsdn(tsd), ptr, alloc_ctx, &prof_info);
 
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
-	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
-		prof_free_sampled_object(tsd, ptr, usize, tctx);
+	if (unlikely((uintptr_t)prof_info.prof_tctx > (uintptr_t)1U)) {
+		prof_free_sampled_object(tsd, usize, &prof_info);
 	}
 }
 
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 9a00a18..17a5650 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -96,6 +96,13 @@ struct prof_tctx_s {
 };
 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
 
+struct prof_info_s {
+	/* Points to the prof_tctx_t corresponding to the allocation. */
+	prof_tctx_t		*prof_tctx;
+	/* Time when the allocation was made. */
+	nstime_t		alloc_time;
+};
+
 struct prof_gctx_s {
 	/* Protects nlimbo, cnt_summed, and tctxs. */
 	malloc_mutex_t		*lock;
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index a50653b..7a34385 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -5,6 +5,7 @@ typedef struct prof_bt_s prof_bt_t;
 typedef struct prof_accum_s prof_accum_t;
 typedef struct prof_cnt_s prof_cnt_t;
 typedef struct prof_tctx_s prof_tctx_t;
+typedef struct prof_info_s prof_info_t;
 typedef struct prof_gctx_s prof_gctx_t;
 typedef struct prof_tdata_s prof_tdata_t;
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e8ac2fc..1770992 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3009,13 +3009,11 @@ JEMALLOC_ALWAYS_INLINE void *
 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
     arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
+	prof_info_t old_prof_info;
+	prof_info_get(tsd_tsdn(tsd), old_ptr, alloc_ctx, &old_prof_info);
+	bool prof_active = prof_active_get_unlocked();
+	prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	void *p;
-	bool prof_active;
-	prof_tctx_t *old_tctx, *tctx;
-
-	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx);
-	tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
 		    *usize, alignment, zero, tcache, arena, tctx, hook_args);
@@ -3040,7 +3038,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		*usize = isalloc(tsd_tsdn(tsd), p);
 	}
 	prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr,
-	    old_usize, old_tctx);
+	    old_usize, &old_prof_info);
 
 	return p;
 }
@@ -3262,18 +3260,15 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
 JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
-	size_t usize_max, usize;
-	bool prof_active;
-	prof_tctx_t *old_tctx, *tctx;
-
-	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx);
+	prof_info_t old_prof_info;
+	prof_info_get(tsd_tsdn(tsd), ptr, alloc_ctx, &old_prof_info);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
 	 * prof_alloc_prep() to decide whether to capture a backtrace.
 	 * prof_realloc() will use the actual usize to decide whether to sample.
 	 */
+	size_t usize_max;
 	if (alignment == 0) {
 		usize_max = sz_s2u(size+extra);
 		assert(usize_max > 0
@@ -3292,8 +3287,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		}
 	}
 	thread_event(tsd, usize_max);
-	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
+	bool prof_active = prof_active_get_unlocked();
+	prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 
+	size_t usize;
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
 		    size, extra, alignment, zero, tctx);
@@ -3318,7 +3315,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		return usize;
 	}
 	prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize,
-	    old_tctx);
+	    &old_prof_info);
 
 	return usize;
 }
diff --git a/src/large.c b/src/large.c
index 8aaa3ce..6eeb7f4 100644
--- a/src/large.c
+++ b/src/large.c
@@ -367,9 +367,10 @@ large_salloc(tsdn_t *tsdn, const extent_t *extent) {
 	return extent_usize_get(extent);
 }
 
-prof_tctx_t *
-large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent) {
-	return extent_prof_tctx_get(extent);
+void
+large_prof_info_get(tsdn_t *tsdn, const extent_t *extent,
+    prof_info_t *prof_info) {
+	extent_prof_info_get(extent, prof_info);
 }
 
 void
@@ -382,11 +383,6 @@ large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent) {
 	large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
 }
 
-nstime_t
-large_prof_alloc_time_get(const extent_t *extent) {
-	return extent_prof_alloc_time_get(extent);
-}
-
 void
 large_prof_alloc_time_set(extent_t *extent, nstime_t t) {
 	extent_prof_alloc_time_set(extent, t);
diff --git a/src/prof.c b/src/prof.c
index 0590482..ccac3c0 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -187,8 +187,11 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
 }
 
 void
-prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_tctx_t *tctx) {
+prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
+	assert(prof_info != NULL);
+	prof_tctx_t *tctx = prof_info->prof_tctx;
+	assert((uintptr_t)tctx > (uintptr_t)1U);
+
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	assert(tctx->cnts.curobjs > 0);
@@ -196,7 +199,7 @@ prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
 	tctx->cnts.curobjs--;
 	tctx->cnts.curbytes -= usize;
 
-	prof_try_log(tsd, ptr, usize, tctx);
+	prof_try_log(tsd, usize, prof_info);
 
 	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
 		prof_tctx_destroy(tsd, tctx);
diff --git a/src/prof_log.c b/src/prof_log.c
index 73ca741..5747c8d 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -199,7 +199,8 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 }
 
 void
-prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
+prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
+	prof_tctx_t *tctx = prof_info->prof_tctx;
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
@@ -229,7 +230,7 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 		log_tables_initialized = true;
 	}
 
-	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr);
+	nstime_t alloc_time = prof_info->alloc_time;
 	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
 	nstime_update(&free_time);
 
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index ff3b2b0..30df71b 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -4,7 +4,7 @@ TEST_BEGIN(test_prof_realloc) {
 	tsdn_t *tsdn;
 	int flags;
 	void *p, *q;
-	prof_tctx_t *tctx_p, *tctx_q;
+	prof_info_t prof_info_p, prof_info_q;
 	uint64_t curobjs_0, curobjs_1, curobjs_2, curobjs_3;
 
 	test_skip_if(!config_prof);
@@ -15,8 +15,8 @@ TEST_BEGIN(test_prof_realloc) {
 	prof_cnt_all(&curobjs_0, NULL, NULL, NULL);
 	p = mallocx(1024, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	tctx_p = prof_tctx_get(tsdn, p, NULL);
-	assert_ptr_ne(tctx_p, (prof_tctx_t *)(uintptr_t)1U,
+	prof_info_get(tsdn, p, NULL, &prof_info_p);
+	assert_ptr_ne(prof_info_p.prof_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
 	assert_u64_eq(curobjs_0 + 1, curobjs_1,
@@ -25,8 +25,8 @@ TEST_BEGIN(test_prof_realloc) {
 	q = rallocx(p, 2048, flags);
 	assert_ptr_ne(p, q, "Expected move");
 	assert_ptr_not_null(p, "Unexpected rmallocx() failure");
-	tctx_q = prof_tctx_get(tsdn, q, NULL);
-	assert_ptr_ne(tctx_q, (prof_tctx_t *)(uintptr_t)1U,
+	prof_info_get(tsdn, q, NULL, &prof_info_q);
+	assert_ptr_ne(prof_info_q.prof_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_2, NULL, NULL, NULL);
 	assert_u64_eq(curobjs_1, curobjs_2,
-- 
cgit v0.12


From 694537177851b52851b89bf59f1692d2b9e348aa Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 22 Nov 2019 11:42:01 -0800
Subject: Change tsdn to tsd for profiling code path

---
 include/jemalloc/internal/arena_inlines_b.h | 26 +++++++++++-----------
 include/jemalloc/internal/large_externs.h   |  7 +++---
 include/jemalloc/internal/prof_externs.h    |  2 +-
 include/jemalloc/internal/prof_inlines_b.h  | 34 ++++++++++++++---------------
 src/jemalloc.c                              |  6 ++---
 src/large.c                                 |  9 ++++----
 src/prof.c                                  | 10 ++++-----
 test/unit/prof_tctx.c                       |  8 +++----
 8 files changed, 50 insertions(+), 52 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index dd743ce..6ec1a12 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -41,7 +41,7 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
+arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
     prof_info_t *prof_info) {
 	cassert(config_prof);
 	assert(ptr != NULL);
@@ -52,15 +52,15 @@ arena_prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		extent = iealloc(tsdn, ptr);
+		extent = iealloc(tsd_tsdn(tsd), ptr);
 		is_slab = extent_slab_get(extent);
 	} else if (!unlikely(is_slab = alloc_ctx->slab)) {
-		extent = iealloc(tsdn, ptr);
+		extent = iealloc(tsd_tsdn(tsd), ptr);
 	}
 
 	if (unlikely(!is_slab)) {
 		/* extent must have been initialized at this point. */
-		large_prof_info_get(tsdn, extent, prof_info);
+		large_prof_info_get(extent, prof_info);
 	} else {
 		memset(prof_info, 0, sizeof(prof_info_t));
 		prof_info->prof_tctx = (prof_tctx_t *)(uintptr_t)1U;
@@ -68,41 +68,41 @@ arena_prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
     alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		extent_t *extent = iealloc(tsdn, ptr);
+		extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
 		if (unlikely(!extent_slab_get(extent))) {
-			large_prof_tctx_set(tsdn, extent, tctx);
+			large_prof_tctx_set(extent, tctx);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
-			large_prof_tctx_set(tsdn, iealloc(tsdn, ptr), tctx);
+			large_prof_tctx_set(iealloc(tsd_tsdn(tsd), ptr), tctx);
 		}
 	}
 }
 
 static inline void
-arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
+arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	extent_t *extent = iealloc(tsdn, ptr);
+	extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
 	assert(!extent_slab_get(extent));
 
-	large_prof_tctx_reset(tsdn, extent);
+	large_prof_tctx_reset(extent);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
+arena_prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	extent_t *extent = iealloc(tsdn, ptr);
+	extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
 	assert(!extent_slab_get(extent));
 	large_prof_alloc_time_set(extent, t);
 }
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 9a1ff16..85786bb 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -22,10 +22,9 @@ void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent);
 void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
 void large_dalloc(tsdn_t *tsdn, extent_t *extent);
 size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
-void large_prof_info_get(tsdn_t *tsdn, const extent_t *extent,
-    prof_info_t *prof_info);
-void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
-void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
+void large_prof_info_get(const extent_t *extent, prof_info_t *prof_info);
+void large_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
+void large_prof_tctx_reset(extent_t *extent);
 void large_prof_alloc_time_set(extent_t *extent, nstime_t time);
 
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 47e47ba..6e020be 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -49,7 +49,7 @@ bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
 void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
 
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
+void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 void bt_init(prof_bt_t *bt, void **vec);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 5acb4ca..827476d 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -40,38 +40,38 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
+prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
     prof_info_t *prof_info) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(prof_info != NULL);
 
-	arena_prof_info_get(tsdn, ptr, alloc_ctx, prof_info);
+	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
     alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(tsdn, ptr, usize, alloc_ctx, tctx);
+	arena_prof_tctx_set(tsd, ptr, usize, alloc_ctx, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
+prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(tsdn, ptr, tctx);
+	arena_prof_tctx_reset(tsd, ptr, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
+prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_alloc_time_set(tsdn, ptr, t);
+	arena_prof_alloc_time_set(tsd, ptr, t);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -129,16 +129,16 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
+prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
     prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(usize == isalloc(tsdn, ptr));
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
-		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
+		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	} else {
-		prof_tctx_set(tsdn, ptr, usize, alloc_ctx,
+		prof_tctx_set(tsd, ptr, usize, alloc_ctx,
 		    (prof_tctx_t *)(uintptr_t)1U);
 	}
 }
@@ -172,9 +172,9 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	moved = (ptr != old_ptr);
 
 	if (unlikely(sampled)) {
-		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
+		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	} else if (moved) {
-		prof_tctx_set(tsd_tsdn(tsd), ptr, usize, NULL,
+		prof_tctx_set(tsd, ptr, usize, NULL,
 		    (prof_tctx_t *)(uintptr_t)1U);
 	} else if (unlikely(old_sampled)) {
 		/*
@@ -183,10 +183,10 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 		 * to do here in the presence of explicit knowledge re: moved
 		 * state.
 		 */
-		prof_tctx_reset(tsd_tsdn(tsd), ptr, tctx);
+		prof_tctx_reset(tsd, ptr, tctx);
 	} else {
 		prof_info_t prof_info;
-		prof_info_get(tsd_tsdn(tsd), ptr, NULL, &prof_info);
+		prof_info_get(tsd, ptr, NULL, &prof_info);
 		assert((uintptr_t)prof_info.prof_tctx == (uintptr_t)1U);
 	}
 
@@ -205,7 +205,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
 	prof_info_t prof_info;
-	prof_info_get(tsd_tsdn(tsd), ptr, alloc_ctx, &prof_info);
+	prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
 
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1770992..13bf8d7 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2171,7 +2171,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 			prof_alloc_rollback(tsd, tctx, true);
 			goto label_oom;
 		}
-		prof_malloc(tsd_tsdn(tsd), allocation, usize, &alloc_ctx, tctx);
+		prof_malloc(tsd, allocation, usize, &alloc_ctx, tctx);
 	} else {
 		assert(!opt_prof);
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
@@ -3010,7 +3010,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
     arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
 	prof_info_t old_prof_info;
-	prof_info_get(tsd_tsdn(tsd), old_ptr, alloc_ctx, &old_prof_info);
+	prof_info_get(tsd, old_ptr, alloc_ctx, &old_prof_info);
 	bool prof_active = prof_active_get_unlocked();
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	void *p;
@@ -3261,7 +3261,7 @@ JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
 	prof_info_t old_prof_info;
-	prof_info_get(tsd_tsdn(tsd), ptr, alloc_ctx, &old_prof_info);
+	prof_info_get(tsd, ptr, alloc_ctx, &old_prof_info);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
diff --git a/src/large.c b/src/large.c
index 6eeb7f4..4d1257f 100644
--- a/src/large.c
+++ b/src/large.c
@@ -368,19 +368,18 @@ large_salloc(tsdn_t *tsdn, const extent_t *extent) {
 }
 
 void
-large_prof_info_get(tsdn_t *tsdn, const extent_t *extent,
-    prof_info_t *prof_info) {
+large_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
 	extent_prof_info_get(extent, prof_info);
 }
 
 void
-large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx) {
+large_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
 	extent_prof_tctx_set(extent, tctx);
 }
 
 void
-large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent) {
-	large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
+large_prof_tctx_reset(extent_t *extent) {
+	large_prof_tctx_set(extent, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 void
diff --git a/src/prof.c b/src/prof.c
index ccac3c0..36945bd 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -165,17 +165,17 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 }
 
 void
-prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
+prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
-	prof_tctx_set(tsdn, ptr, usize, NULL, tctx);
+	prof_tctx_set(tsd, ptr, usize, NULL, tctx);
 
 	/* Get the current time and set this in the extent_t. We'll read this
 	 * when free() is called. */
 	nstime_t t = NSTIME_ZERO_INITIALIZER;
 	nstime_update(&t);
-	prof_alloc_time_set(tsdn, ptr, t);
+	prof_alloc_time_set(tsd, ptr, t);
 
-	malloc_mutex_lock(tsdn, tctx->tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	tctx->cnts.curobjs++;
 	tctx->cnts.curbytes += usize;
 	if (opt_prof_accum) {
@@ -183,7 +183,7 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
 		tctx->cnts.accumbytes += usize;
 	}
 	tctx->prepared = false;
-	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 }
 
 void
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 30df71b..4e77545 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 TEST_BEGIN(test_prof_realloc) {
-	tsdn_t *tsdn;
+	tsd_t *tsd;
 	int flags;
 	void *p, *q;
 	prof_info_t prof_info_p, prof_info_q;
@@ -9,13 +9,13 @@ TEST_BEGIN(test_prof_realloc) {
 
 	test_skip_if(!config_prof);
 
-	tsdn = tsdn_fetch();
+	tsd = tsd_fetch();
 	flags = MALLOCX_TCACHE_NONE;
 
 	prof_cnt_all(&curobjs_0, NULL, NULL, NULL);
 	p = mallocx(1024, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	prof_info_get(tsdn, p, NULL, &prof_info_p);
+	prof_info_get(tsd, p, NULL, &prof_info_p);
 	assert_ptr_ne(prof_info_p.prof_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
@@ -25,7 +25,7 @@ TEST_BEGIN(test_prof_realloc) {
 	q = rallocx(p, 2048, flags);
 	assert_ptr_ne(p, q, "Expected move");
 	assert_ptr_not_null(p, "Unexpected rmallocx() failure");
-	prof_info_get(tsdn, q, NULL, &prof_info_q);
+	prof_info_get(tsd, q, NULL, &prof_info_q);
 	assert_ptr_ne(prof_info_q.prof_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_2, NULL, NULL, NULL);
-- 
cgit v0.12


From 5c47a3022775080866fd37d74c0143d7ffec3915 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 25 Nov 2019 15:27:52 -0800
Subject: Guard C++ aligned APIs

---
 src/jemalloc_cpp.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index f10970a..c2110a1 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -97,6 +97,7 @@ newImpl(std::size_t size) noexcept(IsNoExcept) {
 	return handleOOM(size, IsNoExcept);
 }
 
+#if __cpp_aligned_new >= 201606
 template <bool IsNoExcept>
 JEMALLOC_ALWAYS_INLINE
 void *
@@ -108,6 +109,7 @@ alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(IsNoExcept
 
 	return handleOOM(size, IsNoExcept);
 }
+#endif  // __cpp_aligned_new
 
 JEMALLOC_ALWAYS_INLINE
 void
@@ -118,6 +120,7 @@ sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
 	je_sdallocx_noflags(ptr, size);
 }
 
+#if __cpp_aligned_new >= 201606
 JEMALLOC_ALWAYS_INLINE
 void
 alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
@@ -129,6 +132,7 @@ alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment)
 	}
 	je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
 }
+#endif  // __cpp_aligned_new
 
 void *
 operator new(std::size_t size) {
-- 
cgit v0.12


From a70909b130ab37a0e87627122f1f637f08173431 Mon Sep 17 00:00:00 2001
From: Li-Wen Hsu <lwhsu@lwhsu.org>
Date: Tue, 3 Dec 2019 02:18:27 +0800
Subject: Test on all supported release of FreeBSD

Keep 11.2 because 11.3 is temporarily not available for now.
---
 .cirrus.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.cirrus.yml b/.cirrus.yml
index 019d2c3..a9de953 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -5,6 +5,7 @@ env:
 task:
   freebsd_instance:
     matrix:
+      image: freebsd-12-1-release-amd64
       image: freebsd-12-0-release-amd64
       image: freebsd-11-2-release-amd64
   install_script:
-- 
cgit v0.12


From 1b1e76acfe281e5b27a2ce0e28342cbc04c01b37 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Dec 2019 10:16:44 -0800
Subject: Disable some spuriously-triggering warnings

---
 configure.ac                             |  5 +++++
 include/jemalloc/internal/tcache_types.h |  2 +-
 src/prof.c                               | 17 ++++++++++++++++-
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5e56e16..6ccd009 100644
--- a/configure.ac
+++ b/configure.ac
@@ -250,6 +250,11 @@ if test "x$GCC" = "xyes" ; then
   JE_CFLAGS_ADD([-Wsign-compare])
   JE_CFLAGS_ADD([-Wundef])
   JE_CFLAGS_ADD([-Wno-format-zero-length])
+  dnl This warning triggers on the use of the universal zero initializer, which
+  dnl is a very handy idiom for things like the tcache static initializer (which
+  dnl has lots of nested structs).  See the discussion at.
+  dnl https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119
+  JE_CFLAGS_ADD([-Wno-missing-braces])
   JE_CFLAGS_ADD([-pipe])
   JE_CFLAGS_ADD([-g3])
 elif test "x$je_cv_msvc" = "xyes" ; then
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 9fd3926..c30a533 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -51,7 +51,7 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_GC_INCR_BYTES 65536U
 
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
-#define TCACHE_ZERO_INITIALIZER {{0}}
+#define TCACHE_ZERO_INITIALIZER {0}
 
 /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
 #define TCACHE_ENABLED_ZERO_INITIALIZER false
diff --git a/src/prof.c b/src/prof.c
index 36945bd..9c2357c 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -113,6 +113,21 @@ bool			prof_booted = false;
 
 /******************************************************************************/
 
+/*
+ * If profiling is off, then PROF_DUMP_FILENAME_LEN is 1, so we'll end up
+ * calling strncpy with a size of 0, which triggers a -Wstringop-truncation
+ * warning (strncpy can never actually be called in this case, since we bail out
+ * much earlier when config_prof is false).  This function works around the
+ * warning to let us leave the warning on.
+ */
+static inline void
+prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
+	cassert(config_prof);
+#ifdef JEMALLOC_PROF
+	strncpy(dest, src, size);
+#endif
+}
+
 static bool
 prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
@@ -692,7 +707,7 @@ prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	}
 	assert(prof_dump_prefix != NULL);
 
-	strncpy(prof_dump_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
+	prof_strncpy(prof_dump_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
 	prof_dump_prefix[PROF_DUMP_FILENAME_LEN - 1] = '\0';
 	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 
-- 
cgit v0.12


From 5e0b090992ba4399b65c177cd30d56cc69c96646 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 5 Dec 2019 15:15:36 -0800
Subject: No need to pass usize to prof_tctx_set()

---
 include/jemalloc/internal/arena_inlines_b.h |  4 ++--
 include/jemalloc/internal/prof_inlines_b.h  | 11 +++++------
 src/prof.c                                  |  2 +-
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 6ec1a12..fb25c8f 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -68,8 +68,8 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
-    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
+arena_prof_tctx_set(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
+    prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 827476d..06689c8 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -50,12 +50,12 @@ prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
-    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
+prof_tctx_set(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
+    prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(tsd, ptr, usize, alloc_ctx, tctx);
+	arena_prof_tctx_set(tsd, ptr, alloc_ctx, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -138,7 +138,7 @@ prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
 		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	} else {
-		prof_tctx_set(tsd, ptr, usize, alloc_ctx,
+		prof_tctx_set(tsd, ptr, alloc_ctx,
 		    (prof_tctx_t *)(uintptr_t)1U);
 	}
 }
@@ -174,8 +174,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	if (unlikely(sampled)) {
 		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	} else if (moved) {
-		prof_tctx_set(tsd, ptr, usize, NULL,
-		    (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(tsd, ptr, NULL, (prof_tctx_t *)(uintptr_t)1U);
 	} else if (unlikely(old_sampled)) {
 		/*
 		 * prof_tctx_set() would work for the !moved case as well, but
diff --git a/src/prof.c b/src/prof.c
index 9c2357c..d0c06a8 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -182,7 +182,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
-	prof_tctx_set(tsd, ptr, usize, NULL, tctx);
+	prof_tctx_set(tsd, ptr, NULL, tctx);
 
 	/* Get the current time and set this in the extent_t. We'll read this
 	 * when free() is called. */
-- 
cgit v0.12


From aa1d71fb7ab34ce96743753f08a761747b5449c8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 5 Dec 2019 15:35:12 -0800
Subject: Rename prof_tctx to alloc_tctx in prof_info_t

---
 include/jemalloc/internal/arena_inlines_b.h | 2 +-
 include/jemalloc/internal/extent.h          | 2 +-
 include/jemalloc/internal/prof_inlines_b.h  | 6 +++---
 include/jemalloc/internal/prof_structs.h    | 2 +-
 src/prof.c                                  | 2 +-
 src/prof_log.c                              | 2 +-
 test/unit/prof_tctx.c                       | 4 ++--
 7 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index fb25c8f..930daba 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -63,7 +63,7 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 		large_prof_info_get(extent, prof_info);
 	} else {
 		memset(prof_info, 0, sizeof(prof_info_t));
-		prof_info->prof_tctx = (prof_tctx_t *)(uintptr_t)1U;
+		prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
 	}
 }
 
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index c47beaf..3a20540 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -336,7 +336,7 @@ extent_slab_data_get_const(const extent_t *extent) {
 static inline void
 extent_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
 	assert(prof_info != NULL);
-	prof_info->prof_tctx = (prof_tctx_t *)atomic_load_p(
+	prof_info->alloc_tctx = (prof_tctx_t *)atomic_load_p(
 	    &extent->e_prof_tctx, ATOMIC_ACQUIRE);
 	prof_info->alloc_time = extent->e_alloc_time;
 }
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 06689c8..3c0594e 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -168,7 +168,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	}
 
 	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
-	old_sampled = ((uintptr_t)old_prof_info->prof_tctx > (uintptr_t)1U);
+	old_sampled = ((uintptr_t)old_prof_info->alloc_tctx > (uintptr_t)1U);
 	moved = (ptr != old_ptr);
 
 	if (unlikely(sampled)) {
@@ -186,7 +186,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	} else {
 		prof_info_t prof_info;
 		prof_info_get(tsd, ptr, NULL, &prof_info);
-		assert((uintptr_t)prof_info.prof_tctx == (uintptr_t)1U);
+		assert((uintptr_t)prof_info.alloc_tctx == (uintptr_t)1U);
 	}
 
 	/*
@@ -209,7 +209,7 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
-	if (unlikely((uintptr_t)prof_info.prof_tctx > (uintptr_t)1U)) {
+	if (unlikely((uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U)) {
 		prof_free_sampled_object(tsd, usize, &prof_info);
 	}
 }
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 17a5650..6223adc 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -98,7 +98,7 @@ typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
 
 struct prof_info_s {
 	/* Points to the prof_tctx_t corresponding to the allocation. */
-	prof_tctx_t		*prof_tctx;
+	prof_tctx_t		*alloc_tctx;
 	/* Time when the allocation was made. */
 	nstime_t		alloc_time;
 };
diff --git a/src/prof.c b/src/prof.c
index d0c06a8..3be461b 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -204,7 +204,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 void
 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	assert(prof_info != NULL);
-	prof_tctx_t *tctx = prof_info->prof_tctx;
+	prof_tctx_t *tctx = prof_info->alloc_tctx;
 	assert((uintptr_t)tctx > (uintptr_t)1U);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
diff --git a/src/prof_log.c b/src/prof_log.c
index 5747c8d..b587934 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -200,7 +200,7 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 
 void
 prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
-	prof_tctx_t *tctx = prof_info->prof_tctx;
+	prof_tctx_t *tctx = prof_info->alloc_tctx;
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 4e77545..4dde0ab 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -16,7 +16,7 @@ TEST_BEGIN(test_prof_realloc) {
 	p = mallocx(1024, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 	prof_info_get(tsd, p, NULL, &prof_info_p);
-	assert_ptr_ne(prof_info_p.prof_tctx, (prof_tctx_t *)(uintptr_t)1U,
+	assert_ptr_ne(prof_info_p.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
 	assert_u64_eq(curobjs_0 + 1, curobjs_1,
@@ -26,7 +26,7 @@ TEST_BEGIN(test_prof_realloc) {
 	assert_ptr_ne(p, q, "Expected move");
 	assert_ptr_not_null(p, "Unexpected rmallocx() failure");
 	prof_info_get(tsd, q, NULL, &prof_info_q);
-	assert_ptr_ne(prof_info_q.prof_tctx, (prof_tctx_t *)(uintptr_t)1U,
+	assert_ptr_ne(prof_info_q.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_2, NULL, NULL, NULL);
 	assert_u64_eq(curobjs_1, curobjs_2,
-- 
cgit v0.12


From dfdd46f6c1e136b57cc943a8569f7f95312f88c6 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 5 Dec 2019 15:52:54 -0800
Subject: Refactor prof_tctx_t creation

---
 include/jemalloc/internal/prof_externs.h   |  2 +-
 include/jemalloc/internal/prof_inlines_b.h | 23 +++++------------------
 src/prof_data.c                            | 16 ++++++++++++----
 3 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 6e020be..86f4193 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -54,7 +54,7 @@ void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
-prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
+prof_tctx_t *prof_tctx_create(tsd_t *tsd);
 #ifdef JEMALLOC_JET
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 3c0594e..2aebb3d 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -75,8 +75,7 @@ prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t t) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
-    prof_tdata_t **tdata_out) {
+prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update) {
 	cassert(config_prof);
 
 	/* Fastpath: no need to load tdata */
@@ -90,14 +89,6 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 
 	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
 	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
-		tdata = NULL;
-	}
-
-	if (tdata_out != NULL) {
-		*tdata_out = tdata;
-	}
-
-	if (unlikely(tdata == NULL)) {
 		return true;
 	}
 
@@ -111,18 +102,14 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
 prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 	prof_tctx_t *ret;
-	prof_tdata_t *tdata;
-	prof_bt_t bt;
 
 	assert(usize == sz_s2u(usize));
 
-	if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
-	    &tdata))) {
+	if (!prof_active ||
+	    likely(prof_sample_accum_update(tsd, usize, update))) {
 		ret = (prof_tctx_t *)(uintptr_t)1U;
 	} else {
-		bt_init(&bt, tdata->vec);
-		prof_backtrace(tsd, &bt);
-		ret = prof_lookup(tsd, &bt);
+		ret = prof_tctx_create(tsd);
 	}
 
 	return ret;
@@ -154,7 +141,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 
 	if (prof_active && !updated && ptr != NULL) {
 		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
-		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
+		if (prof_sample_accum_update(tsd, usize, true)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
 			 * was larger than what actually got allocated, so a
diff --git a/src/prof_data.c b/src/prof_data.c
index 2f8bd2d..1b32152 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -300,7 +300,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 	return false;
 }
 
-prof_tctx_t *
+static prof_tctx_t *
 prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	union {
 		prof_tctx_t	*p;
@@ -312,9 +312,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	cassert(config_prof);
 
 	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		return NULL;
-	}
+	assert(tdata != NULL);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
@@ -374,6 +372,16 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	return ret.p;
 }
 
+prof_tctx_t *
+prof_tctx_create(tsd_t *tsd) {
+	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
+	assert(tdata != NULL);
+	prof_bt_t bt;
+	bt_init(&bt, tdata->vec);
+	prof_backtrace(tsd, &bt);
+	return prof_lookup(tsd, &bt);
+}
+
 #ifdef JEMALLOC_JET
 static prof_tdata_t *
 prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-- 
cgit v0.12


From 7e3671911f9343a40702801fcbb3833bd98d0c46 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 6 Dec 2019 09:45:40 -0800
Subject: Get rid of old indentation style for prof

---
 include/jemalloc/internal/prof_externs.h | 38 ++++++++---------
 src/prof.c                               | 72 ++++++++++++++++----------------
 src/prof_data.c                          | 10 ++---
 3 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 86f4193..6d29692 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -3,24 +3,24 @@
 
 #include "jemalloc/internal/mutex.h"
 
-extern malloc_mutex_t	bt2gctx_mtx;
-extern malloc_mutex_t	tdatas_mtx;
-extern malloc_mutex_t	prof_dump_mtx;
+extern malloc_mutex_t bt2gctx_mtx;
+extern malloc_mutex_t tdatas_mtx;
+extern malloc_mutex_t prof_dump_mtx;
 
 malloc_mutex_t *prof_gctx_mutex_choose(void);
 malloc_mutex_t *prof_tdata_mutex_choose(uint64_t thr_uid);
 
-extern bool	opt_prof;
-extern bool	opt_prof_active;
-extern bool	opt_prof_thread_active_init;
-extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
-extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
-extern bool	opt_prof_gdump;       /* High-water memory dumping. */
-extern bool	opt_prof_final;       /* Final profile dumping. */
-extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
-extern bool	opt_prof_accum;       /* Report cumulative bytes. */
-extern bool	opt_prof_log;	      /* Turn logging on at boot. */
-extern char	opt_prof_prefix[
+extern bool opt_prof;
+extern bool opt_prof_active;
+extern bool opt_prof_thread_active_init;
+extern size_t opt_lg_prof_sample;    /* Mean bytes between samples. */
+extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
+extern bool opt_prof_gdump;          /* High-water memory dumping. */
+extern bool opt_prof_final;          /* Final profile dumping. */
+extern bool opt_prof_leak;           /* Dump leak summary at exit. */
+extern bool opt_prof_accum;          /* Report cumulative bytes. */
+extern bool opt_prof_log;            /* Turn logging on at boot. */
+extern char opt_prof_prefix[
     /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
     PATH_MAX +
@@ -28,21 +28,21 @@ extern char	opt_prof_prefix[
     1];
 
 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
-extern bool	prof_active;
+extern bool prof_active;
 
 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
-extern bool	prof_gdump_val;
+extern bool prof_gdump_val;
 
 /* Profile dump interval, measured in bytes allocated. */
-extern uint64_t	prof_interval;
+extern uint64_t prof_interval;
 
 /*
  * Initialized as opt_lg_prof_sample, and potentially modified during profiling
  * resets.
  */
-extern size_t	lg_prof_sample;
+extern size_t lg_prof_sample;
 
-extern bool	prof_booted;
+extern bool prof_booted;
 
 /* Functions only accessed in prof_inlines_a.h */
 bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
diff --git a/src/prof.c b/src/prof.c
index 3be461b..a9849b0 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -34,44 +34,44 @@
 /******************************************************************************/
 /* Data. */
 
-bool		opt_prof = false;
-bool		opt_prof_active = true;
-bool		opt_prof_thread_active_init = true;
-size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
-ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
-bool		opt_prof_gdump = false;
-bool		opt_prof_final = false;
-bool		opt_prof_leak = false;
-bool		opt_prof_accum = false;
-char		opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
+bool opt_prof = false;
+bool opt_prof_active = true;
+bool opt_prof_thread_active_init = true;
+size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
+ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
+bool opt_prof_gdump = false;
+bool opt_prof_final = false;
+bool opt_prof_leak = false;
+bool opt_prof_accum = false;
+char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 
 /* Accessed via prof_idump_[accum/rollback](). */
-static prof_accum_t	prof_idump_accumulated;
+static prof_accum_t prof_idump_accumulated;
 
 /*
  * Initialized as opt_prof_active, and accessed via
  * prof_active_[gs]et{_unlocked,}().
  */
-bool			prof_active;
-static malloc_mutex_t	prof_active_mtx;
+bool prof_active;
+static malloc_mutex_t prof_active_mtx;
 
 /*
  * Initialized as opt_prof_thread_active_init, and accessed via
  * prof_thread_active_init_[gs]et().
  */
-static bool		prof_thread_active_init;
-static malloc_mutex_t	prof_thread_active_init_mtx;
+static bool prof_thread_active_init;
+static malloc_mutex_t prof_thread_active_init_mtx;
 
 /*
  * Initialized as opt_prof_gdump, and accessed via
  * prof_gdump_[gs]et{_unlocked,}().
  */
-bool			prof_gdump_val;
-static malloc_mutex_t	prof_gdump_mtx;
+bool prof_gdump_val;
+static malloc_mutex_t prof_gdump_mtx;
 
-uint64_t	prof_interval = 0;
+uint64_t prof_interval = 0;
 
-size_t		lg_prof_sample;
+size_t lg_prof_sample;
 
 /*
  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
@@ -80,8 +80,8 @@ size_t		lg_prof_sample;
  * and destroying mutexes causes complications for systems that allocate when
  * creating/destroying mutexes.
  */
-static malloc_mutex_t	*gctx_locks;
-static atomic_u_t	cum_gctxs; /* Atomic counter. */
+static malloc_mutex_t *gctx_locks;
+static atomic_u_t cum_gctxs; /* Atomic counter. */
 
 /*
  * Table of mutexes that are shared among tdata's.  No operations require
@@ -89,27 +89,27 @@ static atomic_u_t	cum_gctxs; /* Atomic counter. */
  * than one tdata at the same time, even though a gctx lock may be acquired
  * while holding a tdata lock.
  */
-static malloc_mutex_t	*tdata_locks;
+static malloc_mutex_t *tdata_locks;
 
 /* Non static to enable profiling. */
-malloc_mutex_t		bt2gctx_mtx;
+malloc_mutex_t bt2gctx_mtx;
 
-malloc_mutex_t	tdatas_mtx;
+malloc_mutex_t tdatas_mtx;
 
-static uint64_t		next_thr_uid;
-static malloc_mutex_t	next_thr_uid_mtx;
+static uint64_t next_thr_uid;
+static malloc_mutex_t next_thr_uid_mtx;
 
-static malloc_mutex_t	prof_dump_filename_mtx;
-static uint64_t		prof_dump_seq;
-static uint64_t		prof_dump_iseq;
-static uint64_t		prof_dump_mseq;
-static uint64_t		prof_dump_useq;
+static malloc_mutex_t prof_dump_filename_mtx;
+static uint64_t prof_dump_seq;
+static uint64_t prof_dump_iseq;
+static uint64_t prof_dump_mseq;
+static uint64_t prof_dump_useq;
 
-malloc_mutex_t	prof_dump_mtx;
-static char	*prof_dump_prefix = NULL;
+malloc_mutex_t prof_dump_mtx;
+static char *prof_dump_prefix = NULL;
 
 /* Do not dump any profiles until bootstrapping is complete. */
-bool			prof_booted = false;
+bool prof_booted = false;
 
 /******************************************************************************/
 
@@ -550,8 +550,8 @@ prof_dump_prefix_is_empty(tsdn_t *tsdn) {
 	return ret;
 }
 
-#define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
-#define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
+#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
+#define VSEQ_INVALID UINT64_C(0xffffffffffffffff)
 static void
 prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 	cassert(config_prof);
diff --git a/src/prof_data.c b/src/prof_data.c
index 1b32152..ecabed3 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -29,19 +29,19 @@
  * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
  * structure that knows about all backtraces currently captured.
  */
-static ckh_t		bt2gctx;
+static ckh_t bt2gctx;
 
 /*
  * Tree of all extant prof_tdata_t structures, regardless of state,
  * {attached,detached,expired}.
  */
-static prof_tdata_tree_t	tdatas;
+static prof_tdata_tree_t tdatas;
 
 /*
  * This buffer is rather large for stack allocation, so use a single buffer for
  * all profile dumps.
  */
-static char		prof_dump_buf[
+static char prof_dump_buf[
     /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
     PROF_DUMP_BUFSIZE
@@ -49,8 +49,8 @@ static char		prof_dump_buf[
     1
 #endif
 ];
-static size_t		prof_dump_buf_end;
-static int		prof_dump_fd;
+static size_t prof_dump_buf_end;
+static int prof_dump_fd;
 
 /******************************************************************************/
 /* Red-black trees. */
-- 
cgit v0.12


From 055478cca8ca8d00e74119ef6210ac64713b0ffb Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 10 Dec 2019 10:03:54 -0800
Subject: Threshold is no longer updated before prof_realloc()

---
 include/jemalloc/internal/prof_inlines_b.h | 4 ++--
 src/jemalloc.c                             | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 2aebb3d..c6f12ca 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -132,14 +132,14 @@ prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
 
 JEMALLOC_ALWAYS_INLINE void
 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
-    bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
+    bool prof_active, const void *old_ptr, size_t old_usize,
     prof_info_t *old_prof_info) {
 	bool sampled, old_sampled, moved;
 
 	cassert(config_prof);
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
-	if (prof_active && !updated && ptr != NULL) {
+	if (prof_active && ptr != NULL) {
 		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 		if (prof_sample_accum_update(tsd, usize, true)) {
 			/*
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 13bf8d7..e25e064 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3037,8 +3037,8 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		 */
 		*usize = isalloc(tsd_tsdn(tsd), p);
 	}
-	prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr,
-	    old_usize, &old_prof_info);
+	prof_realloc(tsd, p, *usize, tctx, prof_active, old_ptr, old_usize,
+	    &old_prof_info);
 
 	return p;
 }
@@ -3314,7 +3314,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		prof_alloc_rollback(tsd, tctx, false);
 		return usize;
 	}
-	prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize,
+	prof_realloc(tsd, ptr, usize, tctx, prof_active, ptr, old_usize,
 	    &old_prof_info);
 
 	return usize;
-- 
cgit v0.12


From 7d2bac5a384a2fded203298c36ce91b24cbbd497 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 10 Dec 2019 10:46:31 -0800
Subject: Refactor destroy code path for prof_tctx

---
 include/jemalloc/internal/prof_externs.h |  4 ++--
 src/prof.c                               | 28 ++--------------------------
 src/prof_data.c                          | 29 ++++++++++++++++++++++++++++-
 3 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 6d29692..bd73a29 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -114,13 +114,13 @@ bool prof_log_rep_check(void);
 void prof_log_dummy_set(bool new_value);
 #endif
 
-/* Functions in prof_data.c only accessed in prof.c */
+/* Functions in prof_data.c only used in profiling code. */
 bool prof_data_init(tsd_t *tsd);
 bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
     bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
     uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
-void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/src/prof.c b/src/prof.c
index a9849b0..0d6da21 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -128,22 +128,6 @@ prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
 #endif
 }
 
-static bool
-prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
-	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
-
-	if (opt_prof_accum) {
-		return false;
-	}
-	if (tctx->cnts.curobjs != 0) {
-		return false;
-	}
-	if (tctx->prepared) {
-		return false;
-	}
-	return true;
-}
-
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 	cassert(config_prof);
@@ -171,11 +155,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 	if ((uintptr_t)tctx > (uintptr_t)1U) {
 		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 		tctx->prepared = false;
-		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
-			prof_tctx_destroy(tsd, tctx);
-		} else {
-			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
-		}
+		prof_tctx_try_destroy(tsd, tctx);
 	}
 }
 
@@ -216,11 +196,7 @@ prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 
 	prof_try_log(tsd, usize, prof_info);
 
-	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
-		prof_tctx_destroy(tsd, tctx);
-	} else {
-		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
-	}
+	prof_tctx_try_destroy(tsd, tctx);
 }
 
 void
diff --git a/src/prof_data.c b/src/prof_data.c
index ecabed3..8a2cc84 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1373,7 +1373,23 @@ prof_reset(tsd_t *tsd, size_t lg_sample) {
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 }
 
-void
+static bool
+prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	if (opt_prof_accum) {
+		return false;
+	}
+	if (tctx->cnts.curobjs != 0) {
+		return false;
+	}
+	if (tctx->prepared) {
+		return false;
+	}
+	return true;
+}
+
+static void
 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	prof_tdata_t *tdata = tctx->tdata;
 	prof_gctx_t *gctx = tctx->gctx;
@@ -1449,4 +1465,15 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	}
 }
 
+void
+prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+	if (prof_tctx_should_destroy(tsd, tctx)) {
+		/* tctx->tdata->lock will be released in prof_tctx_destroy(). */
+		prof_tctx_destroy(tsd, tctx);
+	} else {
+		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
+	}
+}
+
 /******************************************************************************/
-- 
cgit v0.12


From 45836d7fd3edca6e71031bce2291b48c4bb3cf76 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 10 Dec 2019 17:07:41 -0800
Subject: Pass nstime_t pointer for profiling

---
 include/jemalloc/internal/arena_inlines_b.h | 2 +-
 include/jemalloc/internal/extent.h          | 4 ++--
 include/jemalloc/internal/large_externs.h   | 2 +-
 include/jemalloc/internal/prof_inlines_b.h  | 2 +-
 src/large.c                                 | 2 +-
 src/prof.c                                  | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 930daba..fbb8fa1 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -98,7 +98,7 @@ arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t t) {
+arena_prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t *t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 3a20540..fa7d126 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -461,8 +461,8 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
 }
 
 static inline void
-extent_prof_alloc_time_set(extent_t *extent, nstime_t t) {
-	nstime_copy(&extent->e_alloc_time, &t);
+extent_prof_alloc_time_set(extent_t *extent, nstime_t *t) {
+	nstime_copy(&extent->e_alloc_time, t);
 }
 
 static inline bool
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 85786bb..a0f48b8 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -25,6 +25,6 @@ size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
 void large_prof_info_get(const extent_t *extent, prof_info_t *prof_info);
 void large_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
 void large_prof_tctx_reset(extent_t *extent);
-void large_prof_alloc_time_set(extent_t *extent, nstime_t time);
+void large_prof_alloc_time_set(extent_t *extent, nstime_t *time);
 
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index c6f12ca..657e116 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -67,7 +67,7 @@ prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t t) {
+prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t *t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
diff --git a/src/large.c b/src/large.c
index 4d1257f..f10b0d1 100644
--- a/src/large.c
+++ b/src/large.c
@@ -383,6 +383,6 @@ large_prof_tctx_reset(extent_t *extent) {
 }
 
 void
-large_prof_alloc_time_set(extent_t *extent, nstime_t t) {
+large_prof_alloc_time_set(extent_t *extent, nstime_t *t) {
 	extent_prof_alloc_time_set(extent, t);
 }
diff --git a/src/prof.c b/src/prof.c
index 0d6da21..4d3a800 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -168,7 +168,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 	 * when free() is called. */
 	nstime_t t = NSTIME_ZERO_INITIALIZER;
 	nstime_update(&t);
-	prof_alloc_time_set(tsd, ptr, t);
+	prof_alloc_time_set(tsd, ptr, &t);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	tctx->cnts.curobjs++;
-- 
cgit v0.12


From 1decf958d1dabc1d1d217889cdcea7edb2eefd3e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 21 Nov 2019 14:10:03 -0800
Subject: Fix incorrect usage of cassert.

---
 src/tcache.c          | 2 +-
 src/thread_event.c    | 3 +--
 test/unit/cache_bin.c | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 7758c4f..7922e59 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -402,7 +402,7 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 
 static bool
 tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
-	cassert(sizeof(bin->cur_ptr) == sizeof(void *));
+	assert(sizeof(bin->cur_ptr) == sizeof(void *));
 	/*
 	 * The full_position points to the lowest available space.  Allocations
 	 * will access the slots toward higher addresses (for the benefit of
diff --git a/src/thread_event.c b/src/thread_event.c
index 9f6c927..9a1d0f9 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -189,8 +189,7 @@ thread_event_trigger(tsd_t *tsd, bool delay_event) {
 	    thread_allocated_last_event_get(tsd);
 
 	/* Make sure that accumbytes cannot overflow uint64_t. */
-	cassert(THREAD_EVENT_MAX_INTERVAL <=
-	    UINT64_MAX - SC_LARGE_MAXCLASS + 1);
+	assert(THREAD_EVENT_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
 
 	thread_allocated_last_event_set(tsd, thread_allocated_after);
 	bool allow_event_trigger = !delay_event && tsd_nominal(tsd) &&
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index f469b8d..12201a2 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -4,7 +4,7 @@ cache_bin_t test_bin;
 
 TEST_BEGIN(test_cache_bin) {
 	cache_bin_t *bin = &test_bin;
-	cassert(PAGE > TCACHE_NSLOTS_SMALL_MAX * sizeof(void *));
+	assert(PAGE > TCACHE_NSLOTS_SMALL_MAX * sizeof(void *));
 	/* Page aligned to make sure lowbits not overflowable. */
 	void **stack = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ALIGN(PAGE));
 
-- 
cgit v0.12


From dd649c94859e2cdbe7b527cfb743b549c8d8bf50 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 11 Nov 2019 16:34:48 -0800
Subject: Optimize away the tsd_fast() check on fastpath.

Fold the tsd_state check onto the event threshold check.  The fast threshold is
set to 0 when tsd switch to non-nominal.

The fast_threshold can be reset by remote threads, to refect the non nominal tsd
state change.
---
 include/jemalloc/internal/thread_event.h | 57 +++++++++++++++++++++++-
 include/jemalloc/internal/tsd.h          |  2 +-
 src/jemalloc.c                           | 18 +++++---
 src/thread_event.c                       | 75 +++++++++++++++++++++++++++++---
 src/tsd.c                                | 10 ++++-
 test/unit/thread_event.c                 |  3 --
 6 files changed, 144 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 8a05eae..3ceb470 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -27,6 +27,7 @@ void thread_event_trigger(tsd_t *tsd, bool delay_event);
 void thread_event_rollback(tsd_t *tsd, size_t diff);
 void thread_event_update(tsd_t *tsd);
 void thread_event_boot();
+void thread_event_recompute_fast_threshold(tsd_t *tsd);
 void tsd_thread_event_init(tsd_t *tsd);
 
 /*
@@ -43,9 +44,7 @@ void tsd_thread_event_init(tsd_t *tsd);
 /* List of all thread event counters. */
 #define ITERATE_OVER_ALL_COUNTERS					\
     C(thread_allocated)							\
-    C(thread_allocated_next_event_fast)					\
     C(thread_allocated_last_event)					\
-    C(thread_allocated_next_event)					\
     ITERATE_OVER_ALL_EVENTS						\
     C(prof_sample_last_event)
 
@@ -82,6 +81,60 @@ ITERATE_OVER_ALL_COUNTERS
 #undef E
 
 /*
+ * Two malloc fastpath getters -- use the unsafe getters since tsd may be
+ * non-nominal, in which case the fast_threshold will be set to 0.  This allows
+ * checking for events and tsd non-nominal in a single branch.
+ *
+ * Note that these can only be used on the fastpath.
+ */
+JEMALLOC_ALWAYS_INLINE uint64_t
+thread_allocated_malloc_fastpath(tsd_t *tsd) {
+	return *tsd_thread_allocatedp_get_unsafe(tsd);
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+thread_allocated_next_event_malloc_fastpath(tsd_t *tsd) {
+	uint64_t v = *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd);
+	assert(v <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	return v;
+}
+
+/* Below 3 for next_event_fast. */
+JEMALLOC_ALWAYS_INLINE uint64_t
+thread_allocated_next_event_fast_get(tsd_t *tsd) {
+	uint64_t v = tsd_thread_allocated_next_event_fast_get(tsd);
+	assert(v <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	return v;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+thread_allocated_next_event_fast_set(tsd_t *tsd, uint64_t v) {
+	assert(v <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	*tsd_thread_allocated_next_event_fastp_get(tsd) = v;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+thread_allocated_next_event_fast_set_non_nominal(tsd_t *tsd) {
+	/*
+	 * Set the fast threshold to zero when tsd is non-nominal.  Use the
+	 * unsafe getter as this may get called during tsd init and clean up.
+	 */
+	*tsd_thread_allocated_next_event_fastp_get_unsafe(tsd) = 0;
+}
+
+/* For next_event.  Setter also updates the fast threshold. */
+JEMALLOC_ALWAYS_INLINE uint64_t
+thread_allocated_next_event_get(tsd_t *tsd) {
+	return tsd_thread_allocated_next_event_get(tsd);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+thread_allocated_next_event_set(tsd_t *tsd, uint64_t v) {
+	*tsd_thread_allocated_next_eventp_get(tsd) = v;
+	thread_event_recompute_fast_threshold(tsd);
+}
+
+/*
  * The function checks in debug mode whether the thread event counters are in
  * a consistent state, which forms the invariants before and after each round
  * of thread event handling that we can rely on and need to promise.
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 6332a00..961fc1f 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -110,7 +110,7 @@ typedef void (*test_callback_t)(int *);
     /* reentrancy_level */	0,					\
     /* narenas_tdata */		0,					\
     /* thread_allocated */	0,					\
-    /* thread_allocated_next_event_fast */ THREAD_EVENT_MIN_START_WAIT,	\
+    /* thread_allocated_next_event_fast */ 0, 				\
     /* thread_deallocated */	0,					\
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
     /* thread_allocated_last_event */	0,				\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e25e064..af72d41 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2354,7 +2354,7 @@ je_malloc(size_t size) {
 	}
 
 	tsd_t *tsd = tsd_get(false);
-	if (unlikely((size > SC_LOOKUP_MAXCLASS) || !tsd || !tsd_fast(tsd))) {
+	if (unlikely((size > SC_LOOKUP_MAXCLASS) || tsd == NULL)) {
 		return malloc_default(size);
 	}
 
@@ -2373,13 +2373,17 @@ je_malloc(size_t size) {
 	assert(ind < SC_NBINS);
 	assert(size <= SC_SMALL_MAXCLASS);
 
-	uint64_t thread_allocated_after = thread_allocated_get(tsd) + usize;
-	assert(thread_allocated_next_event_fast_get(tsd) <=
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
-	if (unlikely(thread_allocated_after >=
-	    thread_allocated_next_event_fast_get(tsd))) {
+	uint64_t allocated = thread_allocated_malloc_fastpath(tsd);
+	uint64_t threshold = thread_allocated_next_event_malloc_fastpath(tsd);
+	/*
+	 * Check for events and tsd non-nominal (fast_threshold will be set to
+	 * 0) in a single branch.
+	 */
+	uint64_t allocated_after = allocated + usize;
+	if (unlikely(allocated_after >= threshold)) {
 		return malloc_default(size);
 	}
+	assert(tsd_fast(tsd));
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
@@ -2387,7 +2391,7 @@ je_malloc(size_t size) {
 	void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success);
 
 	if (tcache_success) {
-		thread_allocated_set(tsd, thread_allocated_after);
+		thread_allocated_set(tsd, allocated_after);
 		if (config_stats) {
 			bin->tstats.nrequests++;
 		}
diff --git a/src/thread_event.c b/src/thread_event.c
index 9a1d0f9..0657c84 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -103,10 +103,11 @@ thread_event_assert_invariants_debug(tsd_t *tsd) {
 	uint64_t next_event_fast = thread_allocated_next_event_fast_get(tsd);
 
 	assert(last_event != next_event);
-	if (next_event <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX) {
-		assert(next_event_fast == next_event);
-	} else {
+	if (next_event > THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX ||
+	    !tsd_fast(tsd)) {
 		assert(next_event_fast == 0U);
+	} else {
+		assert(next_event_fast == next_event);
 	}
 
 	/* The subtraction is intentionally susceptible to underflow. */
@@ -128,15 +129,77 @@ thread_event_assert_invariants_debug(tsd_t *tsd) {
 	    (interval < min_wait && interval == THREAD_EVENT_MAX_INTERVAL));
 }
 
+/*
+ * Synchronization around the fast threshold in tsd --
+ * There are two threads to consider in the synchronization here:
+ * - The owner of the tsd being updated by a slow path change
+ * - The remote thread, doing that slow path change.
+ *
+ * As a design constraint, we want to ensure that a slow-path transition cannot
+ * be ignored for arbitrarily long, and that if the remote thread causes a
+ * slow-path transition and then communicates with the owner thread that it has
+ * occurred, then the owner will go down the slow path on the next allocator
+ * operation (so that we don't want to just wait until the owner hits its slow
+ * path reset condition on its own).
+ *
+ * Here's our strategy to do that:
+ *
+ * The remote thread will update the slow-path stores to TSD variables, issue a
+ * SEQ_CST fence, and then update the TSD next_event_fast counter. The owner
+ * thread will update next_event_fast, issue an SEQ_CST fence, and then check
+ * its TSD to see if it's on the slow path.
+
+ * This is fairly straightforward when 64-bit atomics are supported. Assume that
+ * the remote fence is sandwiched between two owner fences in the reset pathway.
+ * The case where there is no preceding or trailing owner fence (i.e. because
+ * the owner thread is near the beginning or end of its life) can be analyzed
+ * similarly. The owner store to next_event_fast preceding the earlier owner
+ * fence will be earlier in coherence order than the remote store to it, so that
+ * the owner thread will go down the slow path once the store becomes visible to
+ * it, which is no later than the time of the second fence.
+
+ * The case where we don't support 64-bit atomics is trickier, since word
+ * tearing is possible. We'll repeat the same analysis, and look at the two
+ * owner fences sandwiching the remote fence. The next_event_fast stores done
+ * alongside the earlier owner fence cannot overwrite any of the remote stores
+ * (since they precede the earlier owner fence in sb, which precedes the remote
+ * fence in sc, which precedes the remote stores in sb). After the second owner
+ * fence there will be a re-check of the slow-path variables anyways, so the
+ * "owner will notice that it's on the slow path eventually" guarantee is
+ * satisfied. To make sure that the out-of-band-messaging constraint is as well,
+ * note that either the message passing is sequenced before the second owner
+ * fence (in which case the remote stores happen before the second set of owner
+ * stores, so malloc sees a value of zero for next_event_fast and goes down the
+ * slow path), or it is not (in which case the owner sees the tsd slow-path
+ * writes on its previous update). This leaves open the possibility that the
+ * remote thread will (at some arbitrary point in the future) zero out one half
+ * of the owner thread's next_event_fast, but that's always safe (it just sends
+ * it down the slow path earlier).
+ */
+void
+thread_event_recompute_fast_threshold(tsd_t *tsd) {
+	if (tsd_state_get(tsd) != tsd_state_nominal) {
+		/* Check first because this is also called on purgatory. */
+		thread_allocated_next_event_fast_set_non_nominal(tsd);
+		return;
+	}
+	uint64_t next_event = thread_allocated_next_event_get(tsd);
+	uint64_t next_event_fast = (next_event <=
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX) ? next_event : 0U;
+	thread_allocated_next_event_fast_set(tsd, next_event_fast);
+
+	atomic_fence(ATOMIC_SEQ_CST);
+	if (tsd_state_get(tsd) != tsd_state_nominal) {
+		thread_allocated_next_event_fast_set_non_nominal(tsd);
+	}
+}
+
 static void
 thread_event_adjust_thresholds_helper(tsd_t *tsd, uint64_t wait) {
 	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
 	uint64_t next_event = thread_allocated_last_event_get(tsd) + (wait <=
 	    THREAD_EVENT_MAX_INTERVAL ? wait : THREAD_EVENT_MAX_INTERVAL);
 	thread_allocated_next_event_set(tsd, next_event);
-	uint64_t next_event_fast = (next_event <=
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX) ? next_event : 0U;
-	thread_allocated_next_event_fast_set(tsd, next_event_fast);
 }
 
 static uint64_t
diff --git a/src/tsd.c b/src/tsd.c
index 6e0ee93..17e9eed 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -115,8 +115,11 @@ tsd_force_recompute(tsdn_t *tsdn) {
 	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
 		assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
 		    <= tsd_state_nominal_max);
-		tsd_atomic_store(&remote_tsd->state, tsd_state_nominal_recompute,
-		    ATOMIC_RELAXED);
+		tsd_atomic_store(&remote_tsd->state,
+		    tsd_state_nominal_recompute, ATOMIC_RELAXED);
+		/* See comments in thread_event_recompute_fast_threshold(). */
+		atomic_fence(ATOMIC_SEQ_CST);
+		thread_allocated_next_event_fast_set_non_nominal(remote_tsd);
 	}
 	malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
 }
@@ -175,6 +178,8 @@ tsd_slow_update(tsd_t *tsd) {
 		old_state = tsd_atomic_exchange(&tsd->state, new_state,
 		    ATOMIC_ACQUIRE);
 	} while (old_state == tsd_state_nominal_recompute);
+
+	thread_event_recompute_fast_threshold(tsd);
 }
 
 void
@@ -213,6 +218,7 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
 			tsd_slow_update(tsd);
 		}
 	}
+	thread_event_recompute_fast_threshold(tsd);
 }
 
 static bool
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index cf5b2e5..f016cc5 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -7,8 +7,6 @@ TEST_BEGIN(test_next_event_fast_roll_back) {
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX - 8U);
 	thread_allocated_next_event_set(tsd,
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
-	thread_allocated_next_event_fast_set(tsd,
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
 #define E(event, condition)						\
 	event##_event_wait_set(tsd,					\
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
@@ -27,7 +25,6 @@ TEST_BEGIN(test_next_event_fast_resume) {
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 8U);
 	thread_allocated_next_event_set(tsd,
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
-	thread_allocated_next_event_fast_set(tsd, 0);
 #define E(event, condition)						\
 	event##_event_wait_set(tsd,					\
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
-- 
cgit v0.12


From 1d01e4c770c3229041f1010037da2533568fef05 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 16 Dec 2019 12:41:06 -0800
Subject: Initialization utilities for nstime

---
 include/jemalloc/internal/nstime.h |  8 ++++++++
 src/arena.c                        |  9 +++------
 src/background_thread.c            |  7 +++----
 src/ctl.c                          |  2 +-
 src/mutex.c                        |  8 ++++----
 src/nstime.c                       |  6 ++++++
 src/prof.c                         |  4 ++--
 src/prof_log.c                     | 12 +++++++-----
 test/src/timer.c                   |  3 +--
 test/unit/background_thread.c      |  9 ++++-----
 test/unit/decay.c                  |  9 +++------
 test/unit/nstime.c                 | 10 ++++------
 12 files changed, 46 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 17c177c..a3766ff 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -31,4 +31,12 @@ extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
 typedef bool (nstime_update_t)(nstime_t *);
 extern nstime_update_t *JET_MUTABLE nstime_update;
 
+bool nstime_init_update(nstime_t *time);
+
+JEMALLOC_ALWAYS_INLINE void
+nstime_init_zero(nstime_t *time) {
+	static const nstime_t zero = NSTIME_ZERO_INITIALIZER;
+	nstime_copy(time, &zero);
+}
+
 #endif /* JEMALLOC_INTERNAL_NSTIME_H */
diff --git a/src/arena.c b/src/arena.c
index 5537e66..05c4021 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -667,8 +667,7 @@ arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) {
 		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
 	}
 
-	nstime_init(&decay->epoch, 0);
-	nstime_update(&decay->epoch);
+	nstime_init_update(&decay->epoch);
 	decay->jitter_state = (uint64_t)(uintptr_t)decay;
 	arena_decay_deadline_init(decay);
 	decay->nunpurged = 0;
@@ -726,8 +725,7 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	}
 
 	nstime_t time;
-	nstime_init(&time, 0);
-	nstime_update(&time);
+	nstime_init_update(&time);
 	if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch, &time)
 	    > 0)) {
 		/*
@@ -2066,8 +2064,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	/* Set arena before creating background threads. */
 	arena_set(ind, arena);
 
-	nstime_init(&arena->create_time, 0);
-	nstime_update(&arena->create_time);
+	nstime_init_update(&arena->create_time);
 
 	/* We don't support reentrancy for arena 0 bootstrapping. */
 	if (ind != 0) {
diff --git a/src/background_thread.c b/src/background_thread.c
index 4a74edb..400dae5 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -74,7 +74,7 @@ background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
 	info->npages_to_purge_new = 0;
 	if (config_stats) {
 		info->tot_n_runs = 0;
-		nstime_init(&info->tot_sleep_time, 0);
+		nstime_init_zero(&info->tot_sleep_time);
 	}
 }
 
@@ -236,8 +236,7 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
 		/* We need malloc clock (can be different from tv). */
 		nstime_t next_wakeup;
-		nstime_init(&next_wakeup, 0);
-		nstime_update(&next_wakeup);
+		nstime_init_update(&next_wakeup);
 		nstime_iadd(&next_wakeup, interval);
 		assert(nstime_ns(&next_wakeup) <
 		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
@@ -794,7 +793,7 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 		return true;
 	}
 
-	nstime_init(&stats->run_interval, 0);
+	nstime_init_zero(&stats->run_interval);
 	memset(&stats->max_counter_per_bg_thd, 0, sizeof(mutex_prof_data_t));
 
 	uint64_t num_runs = 0;
diff --git a/src/ctl.c b/src/ctl.c
index e2cdc29..24f530f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1037,7 +1037,7 @@ ctl_background_thread_stats_read(tsdn_t *tsdn) {
 	if (!have_background_thread ||
 	    background_thread_stats_read(tsdn, stats)) {
 		memset(stats, 0, sizeof(background_thread_stats_t));
-		nstime_init(&stats->run_interval, 0);
+		nstime_init_zero(&stats->run_interval);
 	}
 	malloc_mutex_prof_copy(
 	    &ctl_stats->mutex_prof_data[global_prof_mutex_max_per_bg_thd],
diff --git a/src/mutex.c b/src/mutex.c
index 3f920f5..bffcfb5 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -46,7 +46,7 @@ JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
 void
 malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	mutex_prof_data_t *data = &mutex->prof_data;
-	nstime_t before = NSTIME_ZERO_INITIALIZER;
+	nstime_t before;
 
 	if (ncpus == 1) {
 		goto label_spin_done;
@@ -68,7 +68,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 		return;
 	}
 label_spin_done:
-	nstime_update(&before);
+	nstime_init_update(&before);
 	/* Copy before to after to avoid clock skews. */
 	nstime_t after;
 	nstime_copy(&after, &before);
@@ -104,8 +104,8 @@ label_spin_done:
 static void
 mutex_prof_data_init(mutex_prof_data_t *data) {
 	memset(data, 0, sizeof(mutex_prof_data_t));
-	nstime_init(&data->max_wait_time, 0);
-	nstime_init(&data->tot_wait_time, 0);
+	nstime_init_zero(&data->max_wait_time);
+	nstime_init_zero(&data->tot_wait_time);
 	data->prev_owner = NULL;
 }
 
diff --git a/src/nstime.c b/src/nstime.c
index 71db353..eb8f6c0 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -168,3 +168,9 @@ nstime_update_impl(nstime_t *time) {
 	return false;
 }
 nstime_update_t *JET_MUTABLE nstime_update = nstime_update_impl;
+
+bool
+nstime_init_update(nstime_t *time) {
+	nstime_init_zero(time);
+	return nstime_update(time);
+}
diff --git a/src/prof.c b/src/prof.c
index 4d3a800..d1d46e2 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -166,8 +166,8 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 
 	/* Get the current time and set this in the extent_t. We'll read this
 	 * when free() is called. */
-	nstime_t t = NSTIME_ZERO_INITIALIZER;
-	nstime_update(&t);
+	nstime_t t;
+	nstime_init_update(&t);
 	prof_alloc_time_set(tsd, ptr, &t);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
diff --git a/src/prof_log.c b/src/prof_log.c
index b587934..2904f0c 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -38,7 +38,7 @@ static char log_filename[
     1];
 
 /* Timestamp for most recent call to log_start(). */
-static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
+static nstime_t log_start_timestamp;
 
 /* Increment these when adding to the log_bt and log_thr linked lists. */
 static size_t log_bt_index = 0;
@@ -231,8 +231,8 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	}
 
 	nstime_t alloc_time = prof_info->alloc_time;
-	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
-	nstime_update(&free_time);
+	nstime_t free_time;
+	nstime_init_update(&free_time);
 
 	size_t sz = sizeof(prof_alloc_node_t);
 	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
@@ -556,9 +556,9 @@ static void
 prof_log_emit_metadata(emitter_t *emitter) {
 	emitter_json_object_kv_begin(emitter, "info");
 
-	nstime_t now = NSTIME_ZERO_INITIALIZER;
+	nstime_t now;
 
-	nstime_update(&now);
+	nstime_init_update(&now);
 	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
 	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
 
@@ -702,6 +702,8 @@ bool prof_log_init(tsd_t *tsd) {
 		return true;
 	}
 
+	nstime_init_zero(&log_start_timestamp);
+
 	log_tables_initialized = true;
 	return false;
 }
diff --git a/test/src/timer.c b/test/src/timer.c
index c451c63..6e8b8ed 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -2,8 +2,7 @@
 
 void
 timer_start(timedelta_t *timer) {
-	nstime_init(&timer->t0, 0);
-	nstime_update(&timer->t0);
+	nstime_init_update(&timer->t0);
 }
 
 void
diff --git a/test/unit/background_thread.c b/test/unit/background_thread.c
index f7bd37c..f597285 100644
--- a/test/unit/background_thread.c
+++ b/test/unit/background_thread.c
@@ -83,9 +83,8 @@ TEST_BEGIN(test_background_thread_running) {
 	assert_b_eq(info->state, background_thread_started,
 	    "Background_thread did not start.\n");
 
-	nstime_t start, now;
-	nstime_init(&start, 0);
-	nstime_update(&start);
+	nstime_t start;
+	nstime_init_update(&start);
 
 	bool ran = false;
 	while (true) {
@@ -98,8 +97,8 @@ TEST_BEGIN(test_background_thread_running) {
 			break;
 		}
 
-		nstime_init(&now, 0);
-		nstime_update(&now);
+		nstime_t now;
+		nstime_init_update(&now);
 		nstime_subtract(&now, &start);
 		assert_u64_lt(nstime_sec(&now), 1000,
 		    "Background threads did not run for 1000 seconds.");
diff --git a/test/unit/decay.c b/test/unit/decay.c
index cf3c079..59936db 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -384,8 +384,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 #define NINTERVALS 101
 	nstime_t time, update_interval, decay_ms, deadline;
 
-	nstime_init(&time, 0);
-	nstime_update(&time);
+	nstime_init_update(&time);
 
 	nstime_init2(&decay_ms, dt, 0);
 	nstime_copy(&deadline, &time);
@@ -456,8 +455,7 @@ TEST_BEGIN(test_decay_ticker) {
 	}
 
 	nupdates_mock = 0;
-	nstime_init(&time_mock, 0);
-	nstime_update(&time_mock);
+	nstime_init_update(&time_mock);
 	monotonic_mock = true;
 
 	nstime_monotonic_orig = nstime_monotonic;
@@ -507,8 +505,7 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 	npurge0 = get_arena_npurge(0);
 
 	nupdates_mock = 0;
-	nstime_init(&time_mock, 0);
-	nstime_update(&time_mock);
+	nstime_init_update(&time_mock);
 	monotonic_mock = false;
 
 	nstime_monotonic_orig = nstime_monotonic;
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index f313780..5a736bb 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -25,7 +25,7 @@ TEST_BEGIN(test_nstime_copy) {
 	nstime_t nsta, nstb;
 
 	nstime_init2(&nsta, 42, 43);
-	nstime_init(&nstb, 0);
+	nstime_init_zero(&nstb);
 	nstime_copy(&nstb, &nsta);
 	assert_u64_eq(nstime_sec(&nstb), 42, "sec incorrectly copied");
 	assert_u64_eq(nstime_nsec(&nstb), 43, "nsec incorrectly copied");
@@ -108,7 +108,7 @@ TEST_BEGIN(test_nstime_subtract) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
 	nstime_subtract(&nsta, &nstb);
-	nstime_init(&nstb, 0);
+	nstime_init_zero(&nstb);
 	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect subtraction result");
 
@@ -126,7 +126,7 @@ TEST_BEGIN(test_nstime_isubtract) {
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_isubtract(&nsta, 42*BILLION + 43);
-	nstime_init(&nstb, 0);
+	nstime_init_zero(&nstb);
 	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect subtraction result");
 
@@ -209,9 +209,7 @@ TEST_END
 TEST_BEGIN(test_nstime_update) {
 	nstime_t nst;
 
-	nstime_init(&nst, 0);
-
-	assert_false(nstime_update(&nst), "Basic time update failed.");
+	assert_false(nstime_init_update(&nst), "Basic time update failed.");
 
 	/* Only Rip Van Winkle sleeps this long. */
 	{
-- 
cgit v0.12


From 4afd709d1f3ae7a727f144a96d8b834157d31e17 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 13 Dec 2019 16:48:03 -0800
Subject: Restructure setters for profiling info

Explicitly define three setters:

- `prof_tctx_reset()`: set `prof_tctx` to `1U`, if we don't know in
advance whether the allocation is large or not;
- `prof_tctx_reset_sampled()`: set `prof_tctx` to `1U`, if we already
know in advance that the allocation is large;
- `prof_info_set()`: set a real `prof_tctx`, and also set other
profiling info e.g. the allocation time.

Code structure wise, the prof level is kept as a thin wrapper, the
large level only provides low level setter APIs, and the arena level
carries out the main logic.
---
 include/jemalloc/internal/arena_inlines_b.h | 15 +++++++--------
 include/jemalloc/internal/large_externs.h   |  3 +--
 include/jemalloc/internal/prof_inlines_b.h  | 29 ++++++++++++++---------------
 src/large.c                                 |  9 ++++++---
 src/prof.c                                  |  8 +-------
 5 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index fbb8fa1..23b3455 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -68,8 +68,7 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_set(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
-    prof_tctx_t *tctx) {
+arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -77,17 +76,17 @@ arena_prof_tctx_set(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 	if (alloc_ctx == NULL) {
 		extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
 		if (unlikely(!extent_slab_get(extent))) {
-			large_prof_tctx_set(extent, tctx);
+			large_prof_tctx_reset(extent);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
-			large_prof_tctx_set(iealloc(tsd_tsdn(tsd), ptr), tctx);
+			large_prof_tctx_reset(iealloc(tsd_tsdn(tsd), ptr));
 		}
 	}
 }
 
-static inline void
-arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
+JEMALLOC_ALWAYS_INLINE void
+arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -98,13 +97,13 @@ arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t *t) {
+arena_prof_info_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
 	assert(!extent_slab_get(extent));
-	large_prof_alloc_time_set(extent, t);
+	large_prof_info_set(extent, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index a0f48b8..2299920 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -23,8 +23,7 @@ void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
 void large_dalloc(tsdn_t *tsdn, extent_t *extent);
 size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
 void large_prof_info_get(const extent_t *extent, prof_info_t *prof_info);
-void large_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
 void large_prof_tctx_reset(extent_t *extent);
-void large_prof_alloc_time_set(extent_t *extent, nstime_t *time);
+void large_prof_info_set(extent_t *extent, prof_tctx_t *tctx);
 
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 657e116..193ede7 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -50,28 +50,28 @@ prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
-    prof_tctx_t *tctx) {
+prof_tctx_reset(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(tsd, ptr, alloc_ctx, tctx);
+	arena_prof_tctx_reset(tsd, ptr, alloc_ctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
+prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(tsd, ptr, tctx);
+	arena_prof_tctx_reset_sampled(tsd, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t *t) {
+prof_info_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
+	assert((uintptr_t)tctx > (uintptr_t)1U);
 
-	arena_prof_alloc_time_set(tsd, ptr, t);
+	arena_prof_info_set(tsd, ptr, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -125,8 +125,7 @@ prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
 		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	} else {
-		prof_tctx_set(tsd, ptr, alloc_ctx,
-		    (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_reset(tsd, ptr, alloc_ctx);
 	}
 }
 
@@ -161,15 +160,15 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	if (unlikely(sampled)) {
 		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	} else if (moved) {
-		prof_tctx_set(tsd, ptr, NULL, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_reset(tsd, ptr, NULL);
 	} else if (unlikely(old_sampled)) {
 		/*
-		 * prof_tctx_set() would work for the !moved case as well, but
-		 * prof_tctx_reset() is slightly cheaper, and the proper thing
-		 * to do here in the presence of explicit knowledge re: moved
-		 * state.
+		 * prof_tctx_reset() would work for the !moved case as well,
+		 * but prof_tctx_reset_sampled() is slightly cheaper, and the
+		 * proper thing to do here in the presence of explicit
+		 * knowledge re: moved state.
 		 */
-		prof_tctx_reset(tsd, ptr, tctx);
+		prof_tctx_reset_sampled(tsd, ptr);
 	} else {
 		prof_info_t prof_info;
 		prof_info_get(tsd, ptr, NULL, &prof_info);
diff --git a/src/large.c b/src/large.c
index f10b0d1..1a1e82b 100644
--- a/src/large.c
+++ b/src/large.c
@@ -372,7 +372,7 @@ large_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
 	extent_prof_info_get(extent, prof_info);
 }
 
-void
+static void
 large_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
 	extent_prof_tctx_set(extent, tctx);
 }
@@ -383,6 +383,9 @@ large_prof_tctx_reset(extent_t *extent) {
 }
 
 void
-large_prof_alloc_time_set(extent_t *extent, nstime_t *t) {
-	extent_prof_alloc_time_set(extent, t);
+large_prof_info_set(extent_t *extent, prof_tctx_t *tctx) {
+	large_prof_tctx_set(extent, tctx);
+	nstime_t t;
+	nstime_init_update(&t);
+	extent_prof_alloc_time_set(extent, &t);
 }
diff --git a/src/prof.c b/src/prof.c
index d1d46e2..3a72e9c 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -162,13 +162,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
-	prof_tctx_set(tsd, ptr, NULL, tctx);
-
-	/* Get the current time and set this in the extent_t. We'll read this
-	 * when free() is called. */
-	nstime_t t;
-	nstime_init_update(&t);
-	prof_alloc_time_set(tsd, ptr, &t);
+	prof_info_set(tsd, ptr, tctx);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	tctx->cnts.curobjs++;
-- 
cgit v0.12


From d5031ea82441301693a30cad50e0d32d45997bc3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 17 Dec 2019 11:57:08 -0800
Subject: Allow dallocx and sdallocx after tsd destruction.

After a thread turns into purgatory / reincarnated state, still allow dallocx
and sdallocx to function normally.
---
 include/jemalloc/internal/tsd.h |  6 ++++++
 src/jemalloc.c                  | 10 ++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 961fc1f..b7ce7ca 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -440,4 +440,10 @@ tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
 	return tsd_rtree_ctx(tsdn_tsd(tsdn));
 }
 
+static inline bool
+tsd_state_nocleanup(tsd_t *tsd) {
+	return tsd_state_get(tsd) == tsd_state_reincarnated ||
+	    tsd_state_get(tsd) == tsd_state_minimal_initialized;
+}
+
 #endif /* JEMALLOC_INTERNAL_TSD_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index af72d41..4fc1a5e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3428,14 +3428,15 @@ je_dallocx(void *ptr, int flags) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	tsd_t *tsd = tsd_fetch();
+	tsd_t *tsd = tsd_fetch_min();
 	bool fast = tsd_fast(tsd);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	tcache_t *tcache;
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		/* Not allowed to be reentrant and specify a custom tcache. */
-		assert(tsd_reentrancy_level_get(tsd) == 0);
+		assert(tsd_reentrancy_level_get(tsd) == 0 ||
+		    tsd_state_nocleanup(tsd));
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
 		} else {
@@ -3487,7 +3488,7 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	tsd_t *tsd = tsd_fetch();
+	tsd_t *tsd = tsd_fetch_min();
 	bool fast = tsd_fast(tsd);
 	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3496,7 +3497,8 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 	tcache_t *tcache;
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		/* Not allowed to be reentrant and specify a custom tcache. */
-		assert(tsd_reentrancy_level_get(tsd) == 0);
+		assert(tsd_reentrancy_level_get(tsd) == 0 ||
+		    tsd_state_nocleanup(tsd));
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
 		} else {
-- 
cgit v0.12


From 9226e1f0d8ad691ef140bc0bf9340efadb96e5fe Mon Sep 17 00:00:00 2001
From: Wenbo Zhang <ethercflow@gmail.com>
Date: Sun, 15 Dec 2019 07:26:45 -0500
Subject: fix opt.thp:never still use THP with base_new

---
 src/base.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/base.c b/src/base.c
index f3c6166..9a55ed2 100644
--- a/src/base.c
+++ b/src/base.c
@@ -39,6 +39,9 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 	size_t alignment = HUGEPAGE;
 	if (extent_hooks == &extent_hooks_default) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
+		if (have_madvise_huge && addr) {
+			pages_set_thp_state(addr, size);
+		}
 	} else {
 		/* No arena context as we are creating new arenas. */
 		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
-- 
cgit v0.12


From 4278f846038b2299938be8479c8ccd3617eed217 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 18 Nov 2019 12:59:34 -0800
Subject: Move extent hook getters/setters to arena.c

This is where they're logically scoped; they access arena data.
---
 include/jemalloc/internal/arena_externs.h   |  3 +++
 include/jemalloc/internal/arena_inlines_b.h |  2 +-
 include/jemalloc/internal/extent_externs.h  |  4 ----
 src/arena.c                                 | 26 ++++++++++++++++++++++++--
 src/ctl.c                                   |  7 ++++---
 src/extent.c                                | 22 +---------------------
 src/large.c                                 |  4 ++--
 7 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 5178e23..93a6302 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -72,6 +72,9 @@ void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
 dss_prec_t arena_dss_prec_get(arena_t *arena);
+extent_hooks_t *arena_get_extent_hooks(arena_t *arena);
+extent_hooks_t *arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
+    extent_hooks_t *extent_hooks);
 bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
 ssize_t arena_dirty_decay_ms_default_get(void);
 bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 23b3455..8f2d396 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -16,7 +16,7 @@ arena_get_from_extent(extent_t *extent) {
 
 JEMALLOC_ALWAYS_INLINE bool
 arena_has_default_hooks(arena_t *arena) {
-	return (extent_hooks_get(arena) == &extent_hooks_default);
+	return (arena_get_extent_hooks(arena) == &extent_hooks_default);
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 6963b47..edf3c65 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -15,10 +15,6 @@ extern mutex_pool_t extent_mutex_pool;
 extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
 void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 
-extent_hooks_t *extent_hooks_get(arena_t *arena);
-extent_hooks_t *extent_hooks_set(tsd_t *tsd, arena_t *arena,
-    extent_hooks_t *extent_hooks);
-
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
diff --git a/src/arena.c b/src/arena.c
index 05c4021..043f806 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -923,7 +923,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	decay->purging = true;
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 
 	extent_list_t decay_extents;
 	extent_list_init(&decay_extents);
@@ -1159,7 +1159,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 * destroyed, or provide custom extent hooks that track retained
 	 * dss-based extents for later reuse.
 	 */
-	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 	extent_t *extent;
 	while ((extent = extents_evict(tsdn, arena, &extent_hooks,
 	    &arena->eset_retained, 0)) != NULL) {
@@ -1846,6 +1846,28 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 	return ret;
 }
 
+extent_hooks_t *
+arena_get_extent_hooks(arena_t *arena) {
+	return base_extent_hooks_get(arena->base);
+}
+
+extent_hooks_t *
+arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
+    extent_hooks_t *extent_hooks) {
+	background_thread_info_t *info;
+	if (have_background_thread) {
+		info = arena_background_thread_info_get(arena);
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	}
+	extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks);
+	if (have_background_thread) {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+	}
+
+	return ret;
+}
+
+
 dss_prec_t
 arena_dss_prec_get(arena_t *arena) {
 	return (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_ACQUIRE);
diff --git a/src/ctl.c b/src/ctl.c
index 24f530f..c2f1270 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2396,11 +2396,12 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				extent_hooks_t *new_extent_hooks
 				    JEMALLOC_CC_SILENCE_INIT(NULL);
 				WRITE(new_extent_hooks, extent_hooks_t *);
-				old_extent_hooks = extent_hooks_set(tsd, arena,
-				    new_extent_hooks);
+				old_extent_hooks = arena_set_extent_hooks(tsd,
+				    arena, new_extent_hooks);
 				READ(old_extent_hooks, extent_hooks_t *);
 			} else {
-				old_extent_hooks = extent_hooks_get(arena);
+				old_extent_hooks = arena_get_extent_hooks(
+				    arena);
 				READ(old_extent_hooks, extent_hooks_t *);
 			}
 		}
diff --git a/src/extent.c b/src/extent.c
index d9eff76..60830a6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -221,31 +221,11 @@ extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
 }
 
-extent_hooks_t *
-extent_hooks_get(arena_t *arena) {
-	return base_extent_hooks_get(arena->base);
-}
-
-extent_hooks_t *
-extent_hooks_set(tsd_t *tsd, arena_t *arena, extent_hooks_t *extent_hooks) {
-	background_thread_info_t *info;
-	if (have_background_thread) {
-		info = arena_background_thread_info_get(arena);
-		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-	}
-	extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks);
-	if (have_background_thread) {
-		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
-	}
-
-	return ret;
-}
-
 static void
 extent_hooks_assure_initialized(arena_t *arena,
     extent_hooks_t **r_extent_hooks) {
 	if (*r_extent_hooks == EXTENT_HOOKS_INITIALIZER) {
-		*r_extent_hooks = extent_hooks_get(arena);
+		*r_extent_hooks = arena_get_extent_hooks(arena);
 	}
 }
 
diff --git a/src/large.c b/src/large.c
index 1a1e82b..fb216ed 100644
--- a/src/large.c
+++ b/src/large.c
@@ -93,7 +93,7 @@ static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 	arena_t *arena = arena_get_from_extent(extent);
 	size_t oldusize = extent_usize_get(extent);
-	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 	size_t diff = extent_size_get(extent) - (usize + sz_large_pad);
 
 	assert(oldusize > usize);
@@ -129,7 +129,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
     bool zero) {
 	arena_t *arena = arena_get_from_extent(extent);
 	size_t oldusize = extent_usize_get(extent);
-	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 	size_t trailsize = usize - oldusize;
 
 	if (extent_hooks->merge == NULL) {
-- 
cgit v0.12


From 9f6eb09585239c10bde86d68ed48f6fe113ef8f7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 18 Nov 2019 14:03:22 -0800
Subject: Extents: Eagerly initialize extent hooks.

When deferred initialization was added, initializing required copying
sizeof(extent_hooks_t) bytes after a pointer chase. Today, it's just a single
pointer loaded from the base_t. In subsequent diffs, we'll get rid of even that.
---
 include/jemalloc/internal/arena_externs.h   |   2 +-
 include/jemalloc/internal/arena_inlines_b.h |   5 +-
 include/jemalloc/internal/extent_externs.h  |  24 +--
 include/jemalloc/internal/extent_types.h    |   2 -
 src/arena.c                                 |  48 ++---
 src/extent.c                                | 271 +++++++++++++---------------
 src/extent_dss.c                            |   4 +-
 src/large.c                                 |  18 +-
 8 files changed, 173 insertions(+), 201 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 93a6302..c13d828 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -28,7 +28,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     arena_stats_extents_t *estats);
 void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent);
+    extent_hooks_t *extent_hooks, extent_t *extent);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
 #endif
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 8f2d396..9ccfaa9 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -134,11 +134,10 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
 
 /* Purge a single extent to retained / unmapped directly. */
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_extent(tsdn_t *tsdn,arena_t *arena, extent_hooks_t **r_extent_hooks,
+arena_decay_extent(tsdn_t *tsdn,arena_t *arena, extent_hooks_t *extent_hooks,
     extent_t *extent) {
 	size_t extent_size = extent_size_get(extent);
-	extent_dalloc_wrapper(tsdn, arena,
-	    r_extent_hooks, extent);
+	extent_dalloc_wrapper(tsdn, arena, extent_hooks, extent);
 	if (config_stats) {
 		/* Update stats accordingly. */
 		arena_stats_lock(tsdn, &arena->stats);
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index edf3c65..218ca94 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -19,38 +19,38 @@ ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
 extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, eset_t *eset, void *new_addr,
+    extent_hooks_t *extent_hooks, eset_t *eset, void *new_addr,
     size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit);
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, eset_t *eset, extent_t *extent);
+    extent_hooks_t *extent_hooks, eset_t *eset, extent_t *extent);
 extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, eset_t *eset, size_t npages_min);
+    extent_hooks_t *extent_hooks, eset_t *eset, size_t npages_min);
 extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit);
 void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent);
+    extent_hooks_t *extent_hooks, extent_t *extent);
 void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent);
+    extent_hooks_t *extent_hooks, extent_t *extent);
 bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length);
 bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length);
 extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
     szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b);
+    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b);
 
 bool extent_boot(void);
 
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index 02d7b2c..25b360e 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -4,8 +4,6 @@
 typedef struct extent_util_stats_s extent_util_stats_t;
 typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
 
-#define EXTENT_HOOKS_INITIALIZER	NULL
-
 /*
  * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
  * is the max ratio between the size of the active extent and the new extent.
diff --git a/src/arena.c b/src/arena.c
index 043f806..a272438 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -253,11 +253,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 void
 arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent) {
+    extent_hooks_t *extent_hooks, extent_t *extent) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extents_dalloc(tsdn, arena, r_extent_hooks, &arena->eset_dirty,
+	extents_dalloc(tsdn, arena, extent_hooks, &arena->eset_dirty,
 	    extent);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
@@ -426,7 +426,7 @@ arena_may_have_muzzy(arena_t *arena) {
 extent_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
-	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -434,17 +434,17 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	bool commit = true;
-	extent_t *extent = extents_alloc(tsdn, arena, &extent_hooks,
+	extent_t *extent = extents_alloc(tsdn, arena, extent_hooks,
 	    &arena->eset_dirty, NULL, usize, sz_large_pad, alignment, false,
 	    szind, zero, &commit);
 	if (extent == NULL && arena_may_have_muzzy(arena)) {
-		extent = extents_alloc(tsdn, arena, &extent_hooks,
+		extent = extents_alloc(tsdn, arena, extent_hooks,
 		    &arena->eset_muzzy, NULL, usize, sz_large_pad, alignment,
 		    false, szind, zero, &commit);
 	}
 	size_t size = usize + sz_large_pad;
 	if (extent == NULL) {
-		extent = extent_alloc_wrapper(tsdn, arena, &extent_hooks, NULL,
+		extent = extent_alloc_wrapper(tsdn, arena, extent_hooks, NULL,
 		    usize, sz_large_pad, alignment, false, szind, zero,
 		    &commit);
 		if (config_stats) {
@@ -819,7 +819,7 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, eset_t *eset, size_t npages_limit,
+    extent_hooks_t *extent_hooks, eset_t *eset, size_t npages_limit,
 	size_t npages_decay_max, extent_list_t *decay_extents) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -828,7 +828,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	size_t nstashed = 0;
 	extent_t *extent;
 	while (nstashed < npages_decay_max &&
-	    (extent = extents_evict(tsdn, arena, r_extent_hooks, eset,
+	    (extent = extents_evict(tsdn, arena, extent_hooks, eset,
 	    npages_limit)) != NULL) {
 		extent_list_append(decay_extents, extent);
 		nstashed += extent_size_get(extent) >> LG_PAGE;
@@ -838,7 +838,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, arena_decay_t *decay, eset_t *eset,
+    extent_hooks_t *extent_hooks, arena_decay_t *decay, eset_t *eset,
     bool all, extent_list_t *decay_extents, bool is_background_thread) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
@@ -864,9 +864,9 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 		case extent_state_dirty:
 			if (!all && muzzy_decay_ms != 0 &&
 			    !extent_purge_lazy_wrapper(tsdn, arena,
-			    r_extent_hooks, extent, 0,
+			    extent_hooks, extent, 0,
 			    extent_size_get(extent))) {
-				extents_dalloc(tsdn, arena, r_extent_hooks,
+				extents_dalloc(tsdn, arena, extent_hooks,
 				    &arena->eset_muzzy, extent);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
@@ -874,7 +874,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 			}
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, arena, r_extent_hooks,
+			extent_dalloc_wrapper(tsdn, arena, extent_hooks,
 			    extent);
 			if (config_stats) {
 				nunmapped += npages;
@@ -928,11 +928,11 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	extent_list_t decay_extents;
 	extent_list_init(&decay_extents);
 
-	size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, eset,
+	size_t npurge = arena_stash_decayed(tsdn, arena, extent_hooks, eset,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = arena_decay_stashed(tsdn, arena,
-		    &extent_hooks, decay, eset, all, &decay_extents,
+		    extent_hooks, decay, eset, all, &decay_extents,
 		    is_background_thread);
 		assert(npurged == npurge);
 	}
@@ -1006,8 +1006,8 @@ static void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
 	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
 
-	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
-	arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, slab);
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	arena_extents_dirty_dalloc(tsdn, arena, extent_hooks, slab);
 }
 
 static void
@@ -1161,9 +1161,9 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 */
 	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 	extent_t *extent;
-	while ((extent = extents_evict(tsdn, arena, &extent_hooks,
+	while ((extent = extents_evict(tsdn, arena, extent_hooks,
 	    &arena->eset_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, arena, &extent_hooks, extent);
+		extent_destroy_wrapper(tsdn, arena, extent_hooks, extent);
 	}
 }
 
@@ -1205,7 +1205,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 
 static extent_t *
 arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, const bin_info_t *bin_info,
+    extent_hooks_t *extent_hooks, const bin_info_t *bin_info,
     szind_t szind) {
 	extent_t *slab;
 	bool zero, commit;
@@ -1215,7 +1215,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 
 	zero = false;
 	commit = true;
-	slab = extent_alloc_wrapper(tsdn, arena, r_extent_hooks, NULL,
+	slab = extent_alloc_wrapper(tsdn, arena, extent_hooks, NULL,
 	    bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
 
 	if (config_stats && slab != NULL) {
@@ -1232,20 +1232,20 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
 	bool commit = true;
-	extent_t *slab = extents_alloc(tsdn, arena, &extent_hooks,
+	extent_t *slab = extents_alloc(tsdn, arena, extent_hooks,
 	    &arena->eset_dirty, NULL, bin_info->slab_size, 0, PAGE, true,
 	    binind, &zero, &commit);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = extents_alloc(tsdn, arena, &extent_hooks,
+		slab = extents_alloc(tsdn, arena, extent_hooks,
 		    &arena->eset_muzzy, NULL, bin_info->slab_size, 0, PAGE,
 		    true, binind, &zero, &commit);
 	}
 	if (slab == NULL) {
-		slab = arena_slab_alloc_hard(tsdn, arena, &extent_hooks,
+		slab = arena_slab_alloc_hard(tsdn, arena, extent_hooks,
 		    bin_info, szind);
 		if (slab == NULL) {
 			return NULL;
diff --git a/src/extent.c b/src/extent.c
index 60830a6..d21a1e8 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -29,7 +29,7 @@ static void extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
 static bool extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained);
 static bool extent_decommit_default(extent_hooks_t *extent_hooks,
     void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
@@ -38,27 +38,27 @@ static bool extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
 static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained);
 #ifdef PAGES_CAN_PURGE_FORCED
 static bool extent_purge_forced_default(extent_hooks_t *extent_hooks,
     void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained);
 static bool extent_split_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t size_a, size_t size_b, bool committed,
     unsigned arena_ind);
 static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
     szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
 static bool extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
     size_t size_a, void *addr_b, size_t size_b, bool committed,
     unsigned arena_ind);
 static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
+    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b,
     bool growing_retained);
 
 const extent_hooks_t	extent_hooks_default = {
@@ -98,14 +98,14 @@ static atomic_zu_t highpages;
 
 static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
 static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, eset_t *eset, void *new_addr,
+    extent_hooks_t *extent_hooks, eset_t *eset, void *new_addr,
     size_t usize, size_t pad, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit, bool growing_retained);
 static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, eset_t *eset, extent_t *extent,
+    extent_hooks_t *extent_hooks, eset_t *eset, extent_t *extent,
     bool growing_retained);
 
 /******************************************************************************/
@@ -221,23 +221,15 @@ extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
 }
 
-static void
-extent_hooks_assure_initialized(arena_t *arena,
-    extent_hooks_t **r_extent_hooks) {
-	if (*r_extent_hooks == EXTENT_HOOKS_INITIALIZER) {
-		*r_extent_hooks = arena_get_extent_hooks(arena);
-	}
-}
-
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent) {
 	extent_state_set(extent, extent_state_active);
 	bool coalesced;
-	extent = extent_try_coalesce(tsdn, arena, r_extent_hooks, rtree_ctx,
+	extent = extent_try_coalesce(tsdn, arena, extent_hooks, rtree_ctx,
 	    eset, extent, &coalesced, false);
 	extent_state_set(extent, eset_state_get(eset));
 
@@ -249,7 +241,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
 }
 
 extent_t *
-extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	assert(size + pad != 0);
@@ -257,14 +249,14 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks, eset,
+	extent_t *extent = extent_recycle(tsdn, arena, extent_hooks, eset,
 	    new_addr, size, pad, alignment, slab, szind, zero, commit, false);
 	assert(extent == NULL || extent_dumpable_get(extent));
 	return extent;
 }
 
 void
-extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
@@ -275,11 +267,11 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	extent_addr_set(extent, extent_base_get(extent));
 	extent_zeroed_set(extent, false);
 
-	extent_record(tsdn, arena, r_extent_hooks, eset, extent, false);
+	extent_record(tsdn, arena, extent_hooks, eset, extent, false);
 }
 
 extent_t *
-extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -309,7 +301,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, arena, r_extent_hooks,
+		if (extent_try_delayed_coalesce(tsdn, arena, extent_hooks,
 		    rtree_ctx, eset, extent)) {
 			break;
 		}
@@ -347,7 +339,7 @@ label_return:
  * indicates OOM), e.g. when trying to split an existing extent.
  */
 static void
-extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, extent_t *extent, bool growing_retained) {
 	size_t sz = extent_size_get(extent);
 	if (config_stats) {
@@ -358,9 +350,9 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks
 	 * that this is only a virtual memory leak.
 	 */
 	if (eset_state_get(eset) == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, arena, r_extent_hooks,
+		if (extent_purge_lazy_impl(tsdn, arena, extent_hooks,
 		    extent, 0, sz, growing_retained)) {
-			extent_purge_forced_impl(tsdn, arena, r_extent_hooks,
+			extent_purge_forced_impl(tsdn, arena, extent_hooks,
 			    extent, 0, extent_size_get(extent),
 			    growing_retained);
 		}
@@ -590,7 +582,7 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
  */
 static extent_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -615,7 +607,6 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 
 	size_t esize = size + pad;
 	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	extent_t *extent;
 	if (new_addr != NULL) {
 		extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr,
@@ -678,7 +669,7 @@ typedef enum {
 
 static extent_split_interior_result_t
 extent_split_interior(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx,
     /* The result of splitting, in case of success. */
     extent_t **extent, extent_t **lead, extent_t **trail,
     /* The mess to clean up, in case of error. */
@@ -702,7 +693,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *extent;
-		*extent = extent_split_impl(tsdn, arena, r_extent_hooks,
+		*extent = extent_split_impl(tsdn, arena, extent_hooks,
 		    *lead, leadsize, SC_NSIZES, false, esize + trailsize, szind,
 		    slab, growing_retained);
 		if (*extent == NULL) {
@@ -714,7 +705,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, arena, r_extent_hooks, *extent,
+		*trail = extent_split_impl(tsdn, arena, extent_hooks, *extent,
 		    esize, szind, slab, trailsize, SC_NSIZES, false,
 		    growing_retained);
 		if (*trail == NULL) {
@@ -755,7 +746,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
  */
 static extent_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, extent_t *extent, bool growing_retained) {
 	extent_t *lead;
@@ -764,7 +755,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 	extent_t *to_salvage;
 
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail,
+	    tsdn, arena, extent_hooks, rtree_ctx, &extent, &lead, &trail,
 	    &to_leak, &to_salvage, new_addr, size, pad, alignment, slab, szind,
 	    growing_retained);
 
@@ -799,7 +790,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 		if (to_leak != NULL) {
 			void *leak = extent_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, r_extent_hooks, eset,
+			extents_abandon_vm(tsdn, arena, extent_hooks, eset,
 			    to_leak, growing_retained);
 			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
 			    false) == NULL);
@@ -825,7 +816,7 @@ extent_need_manual_zero(arena_t *arena) {
  * in the given eset_t.
  */
 static extent_t *
-extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
     bool growing_retained) {
@@ -838,14 +829,14 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks,
+	extent_t *extent = extent_recycle_extract(tsdn, arena, extent_hooks,
 	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
 	    growing_retained);
 	if (extent == NULL) {
 		return NULL;
 	}
 
-	extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx,
+	extent = extent_recycle_split(tsdn, arena, extent_hooks, rtree_ctx,
 	    eset, new_addr, size, pad, alignment, slab, szind, extent,
 	    growing_retained);
 	if (extent == NULL) {
@@ -853,9 +844,9 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	}
 
 	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, r_extent_hooks, extent,
+		if (extent_commit_impl(tsdn, arena, extent_hooks, extent,
 		    0, extent_size_get(extent), growing_retained)) {
-			extent_record(tsdn, arena, r_extent_hooks, eset,
+			extent_record(tsdn, arena, extent_hooks, eset,
 			    extent, growing_retained);
 			return NULL;
 		}
@@ -995,7 +986,7 @@ extent_hook_post_reentrancy(tsdn_t *tsdn) {
  */
 static extent_t *
 extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, size_t size, size_t pad, size_t alignment,
+    extent_hooks_t *extent_hooks, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit) {
 	malloc_mutex_assert_owner(tsdn, &arena->extent_grow_mtx);
 	assert(pad == 0 || !slab);
@@ -1031,12 +1022,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	bool committed = false;
 
 	void *ptr;
-	if (*r_extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &extent_hooks_default) {
 		ptr = extent_alloc_default_impl(tsdn, arena, NULL,
 		    alloc_size, PAGE, &zeroed, &committed);
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		ptr = (*r_extent_hooks)->alloc(*r_extent_hooks, NULL,
+		ptr = extent_hooks->alloc(extent_hooks, NULL,
 		    alloc_size, PAGE, &zeroed, &committed,
 		    arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
@@ -1070,17 +1061,17 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	extent_t *to_leak;
 	extent_t *to_salvage;
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail,
+	    tsdn, arena, extent_hooks, rtree_ctx, &extent, &lead, &trail,
 	    &to_leak, &to_salvage, NULL, size, pad, alignment, slab, szind,
 	    true);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_record(tsdn, arena, r_extent_hooks,
+			extent_record(tsdn, arena, extent_hooks,
 			    &arena->eset_retained, lead, true);
 		}
 		if (trail != NULL) {
-			extent_record(tsdn, arena, r_extent_hooks,
+			extent_record(tsdn, arena, extent_hooks,
 			    &arena->eset_retained, trail, true);
 		}
 	} else {
@@ -1093,21 +1084,21 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 			if (config_prof) {
 				extent_gdump_add(tsdn, to_salvage);
 			}
-			extent_record(tsdn, arena, r_extent_hooks,
+			extent_record(tsdn, arena, extent_hooks,
 			    &arena->eset_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, r_extent_hooks,
+			extents_abandon_vm(tsdn, arena, extent_hooks,
 			    &arena->eset_retained, to_leak, true);
 		}
 		goto label_err;
 	}
 
 	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, r_extent_hooks, extent, 0,
+		if (extent_commit_impl(tsdn, arena, extent_hooks, extent, 0,
 		    extent_size_get(extent), true)) {
-			extent_record(tsdn, arena, r_extent_hooks,
+			extent_record(tsdn, arena, extent_hooks,
 			    &arena->eset_retained, extent, true);
 			goto label_err;
 		}
@@ -1161,14 +1152,14 @@ label_err:
 
 static extent_t *
 extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
 	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
 
-	extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks,
+	extent_t *extent = extent_recycle(tsdn, arena, extent_hooks,
 	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
 	    szind, zero, commit, true);
 	if (extent != NULL) {
@@ -1177,7 +1168,7 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 			extent_gdump_add(tsdn, extent);
 		}
 	} else if (opt_retain && new_addr == NULL) {
-		extent = extent_grow_retained(tsdn, arena, r_extent_hooks, size,
+		extent = extent_grow_retained(tsdn, arena, extent_hooks, size,
 		    pad, alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
@@ -1190,7 +1181,7 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 
 static extent_t *
 extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	size_t esize = size + pad;
 	extent_t *extent = extent_alloc(tsdn, arena);
@@ -1199,13 +1190,13 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	}
 	void *addr;
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	if (*r_extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize,
 		    palignment, zero, commit);
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr,
+		addr = extent_hooks->alloc(extent_hooks, new_addr,
 		    esize, palignment, zero, commit, arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
@@ -1229,14 +1220,12 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 
 extent_t *
 extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-
-	extent_t *extent = extent_alloc_retained(tsdn, arena, r_extent_hooks,
+	extent_t *extent = extent_alloc_retained(tsdn, arena, extent_hooks,
 	    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	if (extent == NULL) {
 		if (opt_retain && new_addr != NULL) {
@@ -1248,7 +1237,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 			 */
 			return NULL;
 		}
-		extent = extent_alloc_wrapper_hard(tsdn, arena, r_extent_hooks,
+		extent = extent_alloc_wrapper_hard(tsdn, arena, extent_hooks,
 		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}
 
@@ -1277,7 +1266,7 @@ extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, extent_t *inner, extent_t *outer, bool forward,
     bool growing_retained) {
 	assert(extent_can_coalesce(arena, eset, inner, outer));
@@ -1285,7 +1274,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	extent_activate_locked(tsdn, arena, eset, outer);
 
 	malloc_mutex_unlock(tsdn, &eset->mtx);
-	bool err = extent_merge_impl(tsdn, arena, r_extent_hooks,
+	bool err = extent_merge_impl(tsdn, arena, extent_hooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &eset->mtx);
 
@@ -1298,7 +1287,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 static extent_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained,
     bool inactive_only) {
 	/*
@@ -1329,7 +1318,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    r_extent_hooks, eset, extent, next, true,
+			    extent_hooks, eset, extent, next, true,
 			    growing_retained)) {
 				if (eset->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -1349,7 +1338,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    r_extent_hooks, eset, extent, prev, false,
+			    extent_hooks, eset, extent, prev, false,
 			    growing_retained)) {
 				extent = prev;
 				if (eset->delay_coalesce) {
@@ -1370,17 +1359,17 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 
 static extent_t *
 extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, r_extent_hooks, rtree_ctx,
+	return extent_try_coalesce_impl(tsdn, arena, extent_hooks, rtree_ctx,
 	    eset, extent, coalesced, growing_retained, false);
 }
 
 static extent_t *
 extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, r_extent_hooks, rtree_ctx,
+	return extent_try_coalesce_impl(tsdn, arena, extent_hooks, rtree_ctx,
 	    eset, extent, coalesced, growing_retained, true);
 }
 
@@ -1389,7 +1378,7 @@ extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena,
  * given eset_t (coalesces, deregisters slab interiors, the heap operations).
  */
 static void
-extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, extent_t *extent, bool growing_retained) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -1399,7 +1388,6 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	    !extent_zeroed_get(extent));
 
 	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
 	extent_szind_set(extent, SC_NSIZES);
 	if (extent_slab_get(extent)) {
@@ -1411,7 +1399,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	    (uintptr_t)extent_base_get(extent), true) == extent);
 
 	if (!eset->delay_coalesce) {
-		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
+		extent = extent_try_coalesce(tsdn, arena, extent_hooks,
 		    rtree_ctx, eset, extent, NULL, growing_retained);
 	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
 		assert(eset == &arena->eset_dirty);
@@ -1420,13 +1408,13 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		do {
 			assert(extent_state_get(extent) == extent_state_active);
 			extent = extent_try_coalesce_large(tsdn, arena,
-			    r_extent_hooks, rtree_ctx, eset, extent,
+			    extent_hooks, rtree_ctx, eset, extent,
 			    &coalesced, growing_retained);
 		} while (coalesced);
 		if (extent_size_get(extent) >= oversize_threshold) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &eset->mtx);
-			arena_decay_extent(tsdn, arena, r_extent_hooks, extent);
+			arena_decay_extent(tsdn, arena, extent_hooks, extent);
 			return;
 		}
 	}
@@ -1437,7 +1425,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 void
 extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -1446,7 +1434,7 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 		extent_dalloc(tsdn, arena, extent);
 		return;
 	}
-	extent_dalloc_wrapper(tsdn, arena, &extent_hooks, extent);
+	extent_dalloc_wrapper(tsdn, arena, extent_hooks, extent);
 }
 
 static bool
@@ -1471,7 +1459,7 @@ extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static bool
 extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent) {
+    extent_hooks_t *extent_hooks, extent_t *extent) {
 	bool err;
 
 	assert(extent_base_get(extent) != NULL);
@@ -1481,16 +1469,15 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 
 	extent_addr_set(extent, extent_base_get(extent));
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	/* Try to deallocate. */
-	if (*r_extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		err = extent_dalloc_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		err = ((*r_extent_hooks)->dalloc == NULL ||
-		    (*r_extent_hooks)->dalloc(*r_extent_hooks,
+		err = (extent_hooks->dalloc == NULL ||
+		    extent_hooks->dalloc(extent_hooks,
 		    extent_base_get(extent), extent_size_get(extent),
 		    extent_committed_get(extent), arena_ind_get(arena)));
 		extent_hook_post_reentrancy(tsdn);
@@ -1505,50 +1492,50 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent) {
+    extent_hooks_t *extent_hooks, extent_t *extent) {
 	assert(extent_dumpable_get(extent));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	/* Avoid calling the default extent_dalloc unless have to. */
-	if (*r_extent_hooks != &extent_hooks_default || extent_may_dalloc()) {
+	if (extent_hooks != &extent_hooks_default || extent_may_dalloc()) {
 		/*
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.
 		 */
 		extent_deregister(tsdn, extent);
-		if (!extent_dalloc_wrapper_try(tsdn, arena, r_extent_hooks,
+		if (!extent_dalloc_wrapper_try(tsdn, arena, extent_hooks,
 		    extent)) {
 			return;
 		}
 		extent_reregister(tsdn, extent);
 	}
 
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
 	/* Try to decommit; purge if that fails. */
 	bool zeroed;
 	if (!extent_committed_get(extent)) {
 		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, arena, r_extent_hooks, extent,
+	} else if (!extent_decommit_wrapper(tsdn, arena, extent_hooks, extent,
 	    0, extent_size_get(extent))) {
 		zeroed = true;
-	} else if ((*r_extent_hooks)->purge_forced != NULL &&
-	    !(*r_extent_hooks)->purge_forced(*r_extent_hooks,
+	} else if (extent_hooks->purge_forced != NULL &&
+	    !extent_hooks->purge_forced(extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), 0,
 	    extent_size_get(extent), arena_ind_get(arena))) {
 		zeroed = true;
 	} else if (extent_state_get(extent) == extent_state_muzzy ||
-	    ((*r_extent_hooks)->purge_lazy != NULL &&
-	    !(*r_extent_hooks)->purge_lazy(*r_extent_hooks,
+	    (extent_hooks->purge_lazy != NULL &&
+	    !extent_hooks->purge_lazy(extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), 0,
 	    extent_size_get(extent), arena_ind_get(arena)))) {
 		zeroed = false;
 	} else {
 		zeroed = false;
 	}
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_zeroed_set(extent, zeroed);
@@ -1557,7 +1544,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 		extent_gdump_sub(tsdn, extent);
 	}
 
-	extent_record(tsdn, arena, r_extent_hooks, &arena->eset_retained,
+	extent_record(tsdn, arena, extent_hooks, &arena->eset_retained,
 	    extent, false);
 }
 
@@ -1576,7 +1563,7 @@ extent_destroy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 void
 extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent) {
+    extent_hooks_t *extent_hooks, extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1587,15 +1574,14 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	extent_addr_set(extent, extent_base_get(extent));
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	/* Try to destroy; silently fail otherwise. */
-	if (*r_extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		extent_destroy_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
-	} else if ((*r_extent_hooks)->destroy != NULL) {
+	} else if (extent_hooks->destroy != NULL) {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		(*r_extent_hooks)->destroy(*r_extent_hooks,
+		extent_hooks->destroy(extent_hooks,
 		    extent_base_get(extent), extent_size_get(extent),
 		    extent_committed_get(extent), arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
@@ -1613,19 +1599,18 @@ extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static bool
 extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = ((*r_extent_hooks)->commit == NULL ||
-	    (*r_extent_hooks)->commit(*r_extent_hooks, extent_base_get(extent),
+	bool err = (extent_hooks->commit == NULL ||
+	    extent_hooks->commit(extent_hooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena)));
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_committed_set(extent, extent_committed_get(extent) || !err);
@@ -1634,9 +1619,9 @@ extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
 
 bool
 extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
-	return extent_commit_impl(tsdn, arena, r_extent_hooks, extent, offset,
+	return extent_commit_impl(tsdn, arena, extent_hooks, extent, offset,
 	    length, false);
 }
 
@@ -1649,21 +1634,19 @@ extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 bool
 extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = ((*r_extent_hooks)->decommit == NULL ||
-	    (*r_extent_hooks)->decommit(*r_extent_hooks,
+	bool err = (extent_hooks->decommit == NULL ||
+	    extent_hooks->decommit(extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena_ind_get(arena)));
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_committed_set(extent, extent_committed_get(extent) && err);
@@ -1686,23 +1669,21 @@ extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static bool
 extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-
-	if ((*r_extent_hooks)->purge_lazy == NULL) {
+	if (extent_hooks->purge_lazy == NULL) {
 		return true;
 	}
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = (*r_extent_hooks)->purge_lazy(*r_extent_hooks,
+	bool err = extent_hooks->purge_lazy(extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena_ind_get(arena));
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 
@@ -1711,9 +1692,9 @@ extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
 
 bool
 extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
-	return extent_purge_lazy_impl(tsdn, arena, r_extent_hooks, extent,
+	return extent_purge_lazy_impl(tsdn, arena, extent_hooks, extent,
 	    offset, length, false);
 }
 
@@ -1733,23 +1714,21 @@ extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
 
 static bool
 extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-
-	if ((*r_extent_hooks)->purge_forced == NULL) {
+	if (extent_hooks->purge_forced == NULL) {
 		return true;
 	}
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = (*r_extent_hooks)->purge_forced(*r_extent_hooks,
+	bool err = extent_hooks->purge_forced(extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena_ind_get(arena));
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	return err;
@@ -1757,9 +1736,9 @@ extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
 
 bool
 extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
-	return extent_purge_forced_impl(tsdn, arena, r_extent_hooks, extent,
+	return extent_purge_forced_impl(tsdn, arena, extent_hooks, extent,
 	    offset, length, false);
 }
 
@@ -1787,16 +1766,14 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
  */
 static extent_t *
 extent_split_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
     szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained) {
 	assert(extent_size_get(extent) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-
-	if ((*r_extent_hooks)->split == NULL) {
+	if (extent_hooks->split == NULL) {
 		return NULL;
 	}
 
@@ -1838,13 +1815,13 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 
 	extent_lock2(tsdn, extent, trail);
 
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = (*r_extent_hooks)->split(*r_extent_hooks, extent_base_get(extent),
+	bool err = extent_hooks->split(extent_hooks, extent_base_get(extent),
 	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
 	    arena_ind_get(arena));
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	if (err) {
@@ -1872,9 +1849,9 @@ label_error_a:
 
 extent_t *
 extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
     szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, arena, r_extent_hooks, extent, size_a,
+	return extent_split_impl(tsdn, arena, extent_hooks, extent, size_a,
 	    szind_a, slab_a, size_b, szind_b, slab_b, false);
 }
 
@@ -1938,26 +1915,24 @@ extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 
 static bool
 extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
+    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(extent_base_get(a) < extent_base_get(b));
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-
-	if ((*r_extent_hooks)->merge == NULL || extent_head_no_merge(a, b)) {
+	if (extent_hooks->merge == NULL || extent_head_no_merge(a, b)) {
 		return true;
 	}
 
 	bool err;
-	if (*r_extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		err = extent_merge_default_impl(extent_base_get(a),
 		    extent_base_get(b));
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		err = (*r_extent_hooks)->merge(*r_extent_hooks,
+		err = extent_hooks->merge(extent_hooks,
 		    extent_base_get(a), extent_size_get(a), extent_base_get(b),
 		    extent_size_get(b), extent_committed_get(a),
 		    arena_ind_get(arena));
@@ -2017,8 +1992,8 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
 
 bool
 extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b) {
-	return extent_merge_impl(tsdn, arena, r_extent_hooks, a, b, false);
+    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b) {
+	return extent_merge_impl(tsdn, arena, extent_hooks, a, b, false);
 }
 
 bool
diff --git a/src/extent_dss.c b/src/extent_dss.c
index eb07480..dd80a19 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -195,7 +195,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				}
 				if (*zero && *commit) {
 					extent_hooks_t *extent_hooks =
-					    EXTENT_HOOKS_INITIALIZER;
+					    arena_get_extent_hooks(arena);
 					extent_t extent;
 
 					extent_init(&extent,
@@ -204,7 +204,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    extent_state_active, false, true,
 					    true, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
-					    arena, &extent_hooks, &extent, 0,
+					    arena, extent_hooks, &extent, 0,
 					    size)) {
 						memset(ret, 0, size);
 					}
diff --git a/src/large.c b/src/large.c
index fb216ed..6de1c57 100644
--- a/src/large.c
+++ b/src/large.c
@@ -105,7 +105,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 	/* Split excess pages. */
 	if (diff != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    &extent_hooks, extent, usize + sz_large_pad,
+		    extent_hooks, extent, usize + sz_large_pad,
 		    sz_size2index(usize), false, diff, SC_NSIZES, false);
 		if (trail == NULL) {
 			return true;
@@ -116,7 +116,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 			    extent_size_get(trail));
 		}
 
-		arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, trail);
+		arena_extents_dirty_dalloc(tsdn, arena, extent_hooks, trail);
 	}
 
 	arena_extent_ralloc_large_shrink(tsdn, arena, extent, oldusize);
@@ -149,17 +149,17 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	bool commit = true;
 	extent_t *trail;
 	bool new_mapping;
-	if ((trail = extents_alloc(tsdn, arena, &extent_hooks,
+	if ((trail = extents_alloc(tsdn, arena, extent_hooks,
 	    &arena->eset_dirty, extent_past_get(extent), trailsize, 0,
 	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL
-	    || (trail = extents_alloc(tsdn, arena, &extent_hooks,
+	    || (trail = extents_alloc(tsdn, arena, extent_hooks,
 	    &arena->eset_muzzy, extent_past_get(extent), trailsize, 0,
 	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
-		if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks,
+		if ((trail = extent_alloc_wrapper(tsdn, arena, extent_hooks,
 		    extent_past_get(extent), trailsize, 0, CACHELINE, false,
 		    SC_NSIZES, &is_zeroed_trail, &commit)) == NULL) {
 			return true;
@@ -169,8 +169,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		}
 	}
 
-	if (extent_merge_wrapper(tsdn, arena, &extent_hooks, extent, trail)) {
-		extent_dalloc_wrapper(tsdn, arena, &extent_hooks, trail);
+	if (extent_merge_wrapper(tsdn, arena, extent_hooks, extent, trail)) {
+		extent_dalloc_wrapper(tsdn, arena, extent_hooks, trail);
 		return true;
 	}
 	rtree_ctx_t rtree_ctx_fallback;
@@ -339,8 +339,8 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 static void
 large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
-	arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, extent);
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	arena_extents_dirty_dalloc(tsdn, arena, extent_hooks, extent);
 }
 
 void
-- 
cgit v0.12


From 837119a9489992e1c4326015ae21e16c246ed094 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 18 Nov 2019 14:43:48 -0800
Subject: base_structs.h: Remove some mid-line tabs.

---
 include/jemalloc/internal/base_structs.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index cc0f9a5..a3429d6 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -9,52 +9,52 @@
 /* Embedded at the beginning of every block of base-managed virtual memory. */
 struct base_block_s {
 	/* Total size of block's virtual memory mapping. */
-	size_t		size;
+	size_t size;
 
 	/* Next block in list of base's blocks. */
-	base_block_t	*next;
+	base_block_t *next;
 
 	/* Tracks unused trailing space. */
-	extent_t	extent;
+	extent_t extent;
 };
 
 struct base_s {
 	/* Associated arena's index within the arenas array. */
-	unsigned	ind;
+	unsigned ind;
 
 	/*
 	 * User-configurable extent hook functions.  Points to an
 	 * extent_hooks_t.
 	 */
-	atomic_p_t	extent_hooks;
+	atomic_p_t extent_hooks;
 
 	/* Protects base_alloc() and base_stats_get() operations. */
-	malloc_mutex_t	mtx;
+	malloc_mutex_t mtx;
 
 	/* Using THP when true (metadata_thp auto mode). */
-	bool		auto_thp_switched;
+	bool auto_thp_switched;
 	/*
 	 * Most recent size class in the series of increasingly large base
 	 * extents.  Logarithmic spacing between subsequent allocations ensures
 	 * that the total number of distinct mappings remains small.
 	 */
-	pszind_t	pind_last;
+	pszind_t pind_last;
 
 	/* Serial number generation state. */
-	size_t		extent_sn_next;
+	size_t extent_sn_next;
 
 	/* Chain of all blocks associated with base. */
-	base_block_t	*blocks;
+	base_block_t *blocks;
 
 	/* Heap of extents that track unused trailing space within blocks. */
-	extent_heap_t	avail[SC_NSIZES];
+	extent_heap_t avail[SC_NSIZES];
 
 	/* Stats, only maintained if config_stats. */
-	size_t		allocated;
-	size_t		resident;
-	size_t		mapped;
+	size_t allocated;
+	size_t resident;
+	size_t mapped;
 	/* Number of THP regions touched. */
-	size_t		n_thp;
+	size_t n_thp;
 };
 
 #endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */
-- 
cgit v0.12


From ba8b9ecbcbda3b975711e4bced4647afaa50c71e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Dec 2019 10:44:09 -0800
Subject: Add ehooks module

---
 Makefile.in                                    | 1 +
 include/jemalloc/internal/ehooks.h             | 4 ++++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj | 1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj | 1 +
 src/ehooks.c                                   | 3 +++
 5 files changed, 10 insertions(+)
 create mode 100644 include/jemalloc/internal/ehooks.h
 create mode 100644 src/ehooks.c

diff --git a/Makefile.in b/Makefile.in
index 0bbf106..a735e0e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -104,6 +104,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
+	$(srcroot)src/ehooks.c \
 	$(srcroot)src/eset.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
new file mode 100644
index 0000000..695859d
--- /dev/null
+++ b/include/jemalloc/internal/ehooks.h
@@ -0,0 +1,4 @@
+#ifndef JEMALLOC_INTERNAL_EHOOKS_H
+#define JEMALLOC_INTERNAL_EHOOKS_H
+
+#endif /* JEMALLOC_INTERNAL_EHOOKS_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 5838e93..e680312 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -44,6 +44,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index b9d4f68..ce51930 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -44,6 +44,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
diff --git a/src/ehooks.c b/src/ehooks.c
new file mode 100644
index 0000000..454cb47
--- /dev/null
+++ b/src/ehooks.c
@@ -0,0 +1,3 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
-- 
cgit v0.12


From ae0d8e8591f749ee8fbe1d732984a63f900aaea3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Dec 2019 14:19:22 -0800
Subject: Move extent ehook calls into ehooks

---
 include/jemalloc/internal/arena_externs.h   |   4 +-
 include/jemalloc/internal/arena_inlines_b.h |   9 +-
 include/jemalloc/internal/base_externs.h    |   2 +-
 include/jemalloc/internal/base_structs.h    |   6 +-
 include/jemalloc/internal/ehooks.h          | 147 +++++++++
 include/jemalloc/internal/extent_externs.h  |  61 ++--
 src/arena.c                                 |  93 +++---
 src/base.c                                  |  69 ++--
 src/ctl.c                                   |   5 +-
 src/ehooks.c                                |   5 +
 src/extent.c                                | 471 +++++++++++++---------------
 src/extent_dss.c                            |   7 +-
 src/large.c                                 |  36 +--
 13 files changed, 511 insertions(+), 404 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index c13d828..b6b33ce 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -28,7 +28,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     arena_stats_extents_t *estats);
 void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent);
+    ehooks_t *ehooks, extent_t *extent);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
 #endif
@@ -72,7 +72,7 @@ void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
 dss_prec_t arena_dss_prec_get(arena_t *arena);
-extent_hooks_t *arena_get_extent_hooks(arena_t *arena);
+ehooks_t *arena_get_ehooks(arena_t *arena);
 extent_hooks_t *arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
     extent_hooks_t *extent_hooks);
 bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 9ccfaa9..16da67e 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -14,11 +14,6 @@ arena_get_from_extent(extent_t *extent) {
 	    ATOMIC_RELAXED);
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-arena_has_default_hooks(arena_t *arena) {
-	return (arena_get_extent_hooks(arena) == &extent_hooks_default);
-}
-
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 	if (arena != NULL) {
@@ -134,10 +129,10 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
 
 /* Purge a single extent to retained / unmapped directly. */
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_extent(tsdn_t *tsdn,arena_t *arena, extent_hooks_t *extent_hooks,
+arena_decay_extent(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
     extent_t *extent) {
 	size_t extent_size = extent_size_get(extent);
-	extent_dalloc_wrapper(tsdn, arena, extent_hooks, extent);
+	extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
 	if (config_stats) {
 		/* Update stats accordingly. */
 		arena_stats_lock(tsdn, &arena->stats);
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index 7b705c9..35734c3 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -7,7 +7,7 @@ extern const char *metadata_thp_mode_names[];
 base_t *b0get(void);
 base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 void base_delete(tsdn_t *tsdn, base_t *base);
-extent_hooks_t *base_extent_hooks_get(base_t *base);
+ehooks_t *base_ehooks_get(base_t *base);
 extent_hooks_t *base_extent_hooks_set(base_t *base,
     extent_hooks_t *extent_hooks);
 void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index a3429d6..68e7896 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H
 #define JEMALLOC_INTERNAL_BASE_STRUCTS_H
 
+#include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
@@ -23,10 +24,9 @@ struct base_s {
 	unsigned ind;
 
 	/*
-	 * User-configurable extent hook functions.  Points to an
-	 * extent_hooks_t.
+	 * User-configurable extent hook functions.
 	 */
-	atomic_p_t extent_hooks;
+	ehooks_t ehooks;
 
 	/* Protects base_alloc() and base_stats_get() operations. */
 	malloc_mutex_t mtx;
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 695859d..c79ea24 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -1,4 +1,151 @@
 #ifndef JEMALLOC_INTERNAL_EHOOKS_H
 #define JEMALLOC_INTERNAL_EHOOKS_H
 
+#include "jemalloc/internal/atomic.h"
+
+extern const extent_hooks_t extent_hooks_default;
+
+typedef struct ehooks_s ehooks_t;
+struct ehooks_s {
+	/* Logically an extent_hooks_t *. */
+	atomic_p_t ptr;
+};
+
+void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks);
+
+static inline void
+ehooks_set_extent_hooks_ptr(ehooks_t *ehooks, extent_hooks_t *extent_hooks) {
+	atomic_store_p(&ehooks->ptr, extent_hooks, ATOMIC_RELEASE);
+}
+
+static inline extent_hooks_t *
+ehooks_get_extent_hooks_ptr(ehooks_t *ehooks) {
+	return (extent_hooks_t *)atomic_load_p(&ehooks->ptr, ATOMIC_ACQUIRE);
+}
+
+static inline bool
+ehooks_are_default(ehooks_t *ehooks) {
+	return ehooks_get_extent_hooks_ptr(ehooks) == &extent_hooks_default;
+}
+
+static inline bool
+ehooks_destroy_is_noop(ehooks_t *ehooks) {
+	return ehooks_get_extent_hooks_ptr(ehooks)->destroy == NULL;
+}
+
+static inline bool
+ehooks_purge_lazy_will_fail(ehooks_t *ehooks) {
+	return ehooks_get_extent_hooks_ptr(ehooks)->purge_lazy == NULL;
+}
+
+static inline bool
+ehooks_purge_forced_will_fail(ehooks_t *ehooks) {
+	return ehooks_get_extent_hooks_ptr(ehooks)->purge_forced == NULL;
+}
+
+static inline bool
+ehooks_split_will_fail(ehooks_t *ehooks) {
+	return ehooks_get_extent_hooks_ptr(ehooks)->split == NULL;
+}
+
+static inline bool
+ehooks_merge_will_fail(ehooks_t *ehooks) {
+	return ehooks_get_extent_hooks_ptr(ehooks)->merge == NULL;
+}
+
+static inline void *
+ehooks_alloc(ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	return extent_hooks->alloc(extent_hooks, new_addr, size, alignment,
+	    zero, commit, arena_ind);
+}
+
+static inline bool
+ehooks_dalloc(ehooks_t *ehooks, void *addr, size_t size, bool committed,
+    unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->dalloc == NULL) {
+		return true;
+	}
+	return extent_hooks->dalloc(extent_hooks, addr, size, committed,
+	    arena_ind);
+}
+
+static inline void
+ehooks_destroy(ehooks_t *ehooks, void *addr, size_t size, bool committed,
+    unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->destroy == NULL) {
+		return;
+	}
+	extent_hooks->destroy(extent_hooks, addr, size, committed, arena_ind);
+}
+
+static inline bool
+ehooks_commit(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
+    size_t length, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->commit == NULL) {
+		return true;
+	}
+	return extent_hooks->commit(extent_hooks, addr, size, offset, length,
+	    arena_ind);
+}
+
+static inline bool
+ehooks_decommit(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
+    size_t length, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->decommit == NULL) {
+		return true;
+	}
+	return extent_hooks->decommit(extent_hooks, addr, size, offset, length,
+	    arena_ind);
+}
+
+static inline bool
+ehooks_purge_lazy(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
+    size_t length, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->purge_lazy == NULL) {
+		return true;
+	}
+	return extent_hooks->purge_lazy(extent_hooks, addr, size, offset,
+	    length, arena_ind);
+}
+
+static inline bool
+ehooks_purge_forced(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
+    size_t length, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->purge_forced == NULL) {
+		return true;
+	}
+	return extent_hooks->purge_forced(extent_hooks, addr, size, offset,
+	    length, arena_ind);
+}
+
+static inline bool
+ehooks_split(ehooks_t *ehooks, void *addr, size_t size, size_t size_a,
+    size_t size_b, bool committed, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->split == NULL) {
+		return true;
+	}
+	return extent_hooks->split(extent_hooks, addr, size, size_a, size_b,
+	    committed, arena_ind);
+}
+
+static inline bool
+ehooks_merge(ehooks_t *ehooks, void *addr_a, size_t size_a, void *addr_b,
+    size_t size_b, bool committed, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->merge == NULL) {
+		return true;
+	}
+	return extent_hooks->merge(extent_hooks, addr_a, size_a, addr_b, size_b,
+	    committed, arena_ind);
+}
+
 #endif /* JEMALLOC_INTERNAL_EHOOKS_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 218ca94..26828ba 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
 
+#include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_pool.h"
 #include "jemalloc/internal/ph.h"
@@ -9,7 +10,6 @@
 extern size_t opt_lg_extent_max_active_fit;
 
 extern rtree_t extents_rtree;
-extern const extent_hooks_t extent_hooks_default;
 extern mutex_pool_t extent_mutex_pool;
 
 extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
@@ -18,39 +18,34 @@ void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
-extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, eset_t *eset, void *new_addr,
-    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit);
-void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, eset_t *eset, extent_t *extent);
-extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, eset_t *eset, size_t npages_min);
-extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit);
+extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit);
+void extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, extent_t *extent);
+extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, size_t npages_min);
+extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit);
 void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
-void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent);
-void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent);
-bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
-bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b);
+void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent);
+void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent);
+bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length);
+bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length);
+bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length);
+bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length);
+extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    size_t size_b, szind_t szind_b, bool slab_b);
+bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *a, extent_t *b);
 
 bool extent_boot(void);
 
diff --git a/src/arena.c b/src/arena.c
index a272438..214a97c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/div.h"
+#include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
@@ -252,13 +253,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 }
 
 void
-arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent) {
+arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extents_dalloc(tsdn, arena, extent_hooks, &arena->eset_dirty,
-	    extent);
+	extents_dalloc(tsdn, arena, ehooks, &arena->eset_dirty, extent);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -426,7 +426,7 @@ arena_may_have_muzzy(arena_t *arena) {
 extent_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -434,19 +434,18 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	bool commit = true;
-	extent_t *extent = extents_alloc(tsdn, arena, extent_hooks,
+	extent_t *extent = extents_alloc(tsdn, arena, ehooks,
 	    &arena->eset_dirty, NULL, usize, sz_large_pad, alignment, false,
 	    szind, zero, &commit);
 	if (extent == NULL && arena_may_have_muzzy(arena)) {
-		extent = extents_alloc(tsdn, arena, extent_hooks,
-		    &arena->eset_muzzy, NULL, usize, sz_large_pad, alignment,
-		    false, szind, zero, &commit);
+		extent = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+		    NULL, usize, sz_large_pad, alignment, false, szind, zero,
+		    &commit);
 	}
 	size_t size = usize + sz_large_pad;
 	if (extent == NULL) {
-		extent = extent_alloc_wrapper(tsdn, arena, extent_hooks, NULL,
-		    usize, sz_large_pad, alignment, false, szind, zero,
-		    &commit);
+		extent = extent_alloc_wrapper(tsdn, arena, ehooks, NULL, usize,
+		    sz_large_pad, alignment, false, szind, zero, &commit);
 		if (config_stats) {
 			/*
 			 * extent may be NULL on OOM, but in that case
@@ -819,8 +818,8 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, eset_t *eset, size_t npages_limit,
-	size_t npages_decay_max, extent_list_t *decay_extents) {
+    ehooks_t *ehooks, eset_t *eset, size_t npages_limit,
+    size_t npages_decay_max, extent_list_t *decay_extents) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -828,8 +827,8 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	size_t nstashed = 0;
 	extent_t *extent;
 	while (nstashed < npages_decay_max &&
-	    (extent = extents_evict(tsdn, arena, extent_hooks, eset,
-	    npages_limit)) != NULL) {
+	    (extent = extents_evict(tsdn, arena, ehooks, eset, npages_limit))
+	    != NULL) {
 		extent_list_append(decay_extents, extent);
 		nstashed += extent_size_get(extent) >> LG_PAGE;
 	}
@@ -837,9 +836,9 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 }
 
 static size_t
-arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, arena_decay_t *decay, eset_t *eset,
-    bool all, extent_list_t *decay_extents, bool is_background_thread) {
+arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    arena_decay_t *decay, eset_t *eset, bool all, extent_list_t *decay_extents,
+    bool is_background_thread) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
 
@@ -864,9 +863,9 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 		case extent_state_dirty:
 			if (!all && muzzy_decay_ms != 0 &&
 			    !extent_purge_lazy_wrapper(tsdn, arena,
-			    extent_hooks, extent, 0,
+			    ehooks, extent, 0,
 			    extent_size_get(extent))) {
-				extents_dalloc(tsdn, arena, extent_hooks,
+				extents_dalloc(tsdn, arena, ehooks,
 				    &arena->eset_muzzy, extent);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
@@ -874,8 +873,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 			}
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, arena, extent_hooks,
-			    extent);
+			extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
 			if (config_stats) {
 				nunmapped += npages;
 			}
@@ -923,17 +921,16 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	decay->purging = true;
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 
 	extent_list_t decay_extents;
 	extent_list_init(&decay_extents);
 
-	size_t npurge = arena_stash_decayed(tsdn, arena, extent_hooks, eset,
+	size_t npurge = arena_stash_decayed(tsdn, arena, ehooks, eset,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
-		size_t npurged = arena_decay_stashed(tsdn, arena,
-		    extent_hooks, decay, eset, all, &decay_extents,
-		    is_background_thread);
+		size_t npurged = arena_decay_stashed(tsdn, arena, ehooks, decay,
+		    eset, all, &decay_extents, is_background_thread);
 		assert(npurged == npurge);
 	}
 
@@ -1006,8 +1003,8 @@ static void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
 	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
 
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
-	arena_extents_dirty_dalloc(tsdn, arena, extent_hooks, slab);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
+	arena_extents_dirty_dalloc(tsdn, arena, ehooks, slab);
 }
 
 static void
@@ -1159,11 +1156,11 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 * destroyed, or provide custom extent hooks that track retained
 	 * dss-based extents for later reuse.
 	 */
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 	extent_t *extent;
-	while ((extent = extents_evict(tsdn, arena, extent_hooks,
+	while ((extent = extents_evict(tsdn, arena, ehooks,
 	    &arena->eset_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, arena, extent_hooks, extent);
+		extent_destroy_wrapper(tsdn, arena, ehooks, extent);
 	}
 }
 
@@ -1204,9 +1201,8 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 }
 
 static extent_t *
-arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, const bin_info_t *bin_info,
-    szind_t szind) {
+arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    const bin_info_t *bin_info, szind_t szind) {
 	extent_t *slab;
 	bool zero, commit;
 
@@ -1215,7 +1211,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 
 	zero = false;
 	commit = true;
-	slab = extent_alloc_wrapper(tsdn, arena, extent_hooks, NULL,
+	slab = extent_alloc_wrapper(tsdn, arena, ehooks, NULL,
 	    bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
 
 	if (config_stats && slab != NULL) {
@@ -1232,21 +1228,20 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
 	bool commit = true;
-	extent_t *slab = extents_alloc(tsdn, arena, extent_hooks,
-	    &arena->eset_dirty, NULL, bin_info->slab_size, 0, PAGE, true,
-	    binind, &zero, &commit);
+	extent_t *slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
+	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = extents_alloc(tsdn, arena, extent_hooks,
-		    &arena->eset_muzzy, NULL, bin_info->slab_size, 0, PAGE,
-		    true, binind, &zero, &commit);
+		slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+		    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero,
+		    &commit);
 	}
 	if (slab == NULL) {
-		slab = arena_slab_alloc_hard(tsdn, arena, extent_hooks,
-		    bin_info, szind);
+		slab = arena_slab_alloc_hard(tsdn, arena, ehooks, bin_info,
+		    szind);
 		if (slab == NULL) {
 			return NULL;
 		}
@@ -1846,9 +1841,9 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 	return ret;
 }
 
-extent_hooks_t *
-arena_get_extent_hooks(arena_t *arena) {
-	return base_extent_hooks_get(arena->base);
+ehooks_t *
+arena_get_ehooks(arena_t *arena) {
+	return base_ehooks_get(arena->base);
 }
 
 extent_hooks_t *
diff --git a/src/base.c b/src/base.c
index 9a55ed2..92dfca8 100644
--- a/src/base.c
+++ b/src/base.c
@@ -29,7 +29,7 @@ metadata_thp_madvise(void) {
 }
 
 static void *
-base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
+base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 	void *addr;
 	bool zero = true;
 	bool commit = true;
@@ -37,7 +37,7 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 	/* Use huge page sizes and alignment regardless of opt_metadata_thp. */
 	assert(size == HUGEPAGE_CEILING(size));
 	size_t alignment = HUGEPAGE;
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
 		if (have_madvise_huge && addr) {
 			pages_set_thp_state(addr, size);
@@ -46,8 +46,8 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 		/* No arena context as we are creating new arenas. */
 		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
 		pre_reentrancy(tsd, NULL);
-		addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment,
-		    &zero, &commit, ind);
+		addr = ehooks_alloc(ehooks, NULL, size, alignment, &zero,
+		    &commit, ind);
 		post_reentrancy(tsd);
 	}
 
@@ -55,7 +55,7 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 }
 
 static void
-base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
+base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
     size_t size) {
 	/*
 	 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
@@ -67,7 +67,7 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 	 * may in fact want the end state of all associated virtual memory to be
 	 * in some consistent-but-allocated state.
 	 */
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		if (!extent_dalloc_mmap(addr, size)) {
 			goto label_done;
 		}
@@ -85,24 +85,16 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 	} else {
 		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
 		pre_reentrancy(tsd, NULL);
-		if (extent_hooks->dalloc != NULL &&
-		    !extent_hooks->dalloc(extent_hooks, addr, size, true,
-		    ind)) {
+		if (!ehooks_dalloc(ehooks, addr, size, true, ind)) {
 			goto label_post_reentrancy;
 		}
-		if (extent_hooks->decommit != NULL &&
-		    !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
-		    ind)) {
+		if (!ehooks_decommit(ehooks, addr, size, 0, size, ind)) {
 			goto label_post_reentrancy;
 		}
-		if (extent_hooks->purge_forced != NULL &&
-		    !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
-		    size, ind)) {
+		if (!ehooks_purge_forced(ehooks, addr, size, 0, size, ind)) {
 			goto label_post_reentrancy;
 		}
-		if (extent_hooks->purge_lazy != NULL &&
-		    !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
-		    ind)) {
+		if (!ehooks_purge_lazy(ehooks, addr, size, 0, size, ind)) {
 			goto label_post_reentrancy;
 		}
 		/* Nothing worked.  That's the application's problem. */
@@ -248,8 +240,8 @@ base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size,
  * On success a pointer to the initialized base_block_t header is returned.
  */
 static base_block_t *
-base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
-    unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size,
+base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
+    pszind_t *pind_last, size_t *extent_sn_next, size_t size,
     size_t alignment) {
 	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
 	size_t usize = ALIGNMENT_CEILING(size, alignment);
@@ -270,7 +262,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
 	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
 	size_t block_size = (min_block_size > next_block_size) ? min_block_size
 	    : next_block_size;
-	base_block_t *block = (base_block_t *)base_map(tsdn, extent_hooks, ind,
+	base_block_t *block = (base_block_t *)base_map(tsdn, ehooks, ind,
 	    block_size);
 	if (block == NULL) {
 		return NULL;
@@ -311,13 +303,13 @@ static extent_t *
 base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	malloc_mutex_assert_owner(tsdn, &base->mtx);
 
-	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
+	ehooks_t *ehooks = base_ehooks_get(base);
 	/*
 	 * Drop mutex during base_block_alloc(), because an extent hook will be
 	 * called.
 	 */
 	malloc_mutex_unlock(tsdn, &base->mtx);
-	base_block_t *block = base_block_alloc(tsdn, base, extent_hooks,
+	base_block_t *block = base_block_alloc(tsdn, base, ehooks,
 	    base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
 	    alignment);
 	malloc_mutex_lock(tsdn, &base->mtx);
@@ -353,7 +345,16 @@ base_t *
 base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	pszind_t pind_last = 0;
 	size_t extent_sn_next = 0;
-	base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind,
+
+	/*
+	 * The base will contain the ehooks eventually, but it itself is
+	 * allocated using them.  So we use some stack ehooks to bootstrap its
+	 * memory, and then initialize the ehooks within the base_t.
+	 */
+	ehooks_t fake_ehooks;
+	ehooks_init(&fake_ehooks, extent_hooks);
+
+	base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
 	if (block == NULL) {
 		return NULL;
@@ -365,10 +366,10 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->extent,
 	    &gap_size, base_size, base_alignment);
 	base->ind = ind;
-	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELAXED);
+	ehooks_init(&base->ehooks, extent_hooks);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
 	    malloc_mutex_rank_exclusive)) {
-		base_unmap(tsdn, extent_hooks, ind, block, block->size);
+		base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
 		return NULL;
 	}
 	base->pind_last = pind_last;
@@ -397,26 +398,26 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 void
 base_delete(tsdn_t *tsdn, base_t *base) {
-	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
+	ehooks_t *ehooks = base_ehooks_get(base);
 	base_block_t *next = base->blocks;
 	do {
 		base_block_t *block = next;
 		next = block->next;
-		base_unmap(tsdn, extent_hooks, base_ind_get(base), block,
+		base_unmap(tsdn, ehooks, base_ind_get(base), block,
 		    block->size);
 	} while (next != NULL);
 }
 
-extent_hooks_t *
-base_extent_hooks_get(base_t *base) {
-	return (extent_hooks_t *)atomic_load_p(&base->extent_hooks,
-	    ATOMIC_ACQUIRE);
+ehooks_t *
+base_ehooks_get(base_t *base) {
+	return &base->ehooks;
 }
 
 extent_hooks_t *
 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
-	extent_hooks_t *old_extent_hooks = base_extent_hooks_get(base);
-	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELEASE);
+	extent_hooks_t *old_extent_hooks =
+	    ehooks_get_extent_hooks_ptr(&base->ehooks);
+	ehooks_init(&base->ehooks, extent_hooks);
 	return old_extent_hooks;
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index c2f1270..9b88f40 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2400,8 +2400,9 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				    arena, new_extent_hooks);
 				READ(old_extent_hooks, extent_hooks_t *);
 			} else {
-				old_extent_hooks = arena_get_extent_hooks(
-				    arena);
+				old_extent_hooks =
+				    ehooks_get_extent_hooks_ptr(
+					arena_get_ehooks(arena));
 				READ(old_extent_hooks, extent_hooks_t *);
 			}
 		}
diff --git a/src/ehooks.c b/src/ehooks.c
index 454cb47..0f59f33 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -1,3 +1,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/ehooks.h"
+
+void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks) {
+	ehooks_set_extent_hooks_ptr(ehooks, extent_hooks);
+}
diff --git a/src/extent.c b/src/extent.c
index d21a1e8..23194e1 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -28,40 +28,38 @@ static void extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
 static bool extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length, bool growing_retained);
-static bool extent_decommit_default(extent_hooks_t *extent_hooks,
-    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained);
+static bool extent_decommit_default(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
 #ifdef PAGES_CAN_PURGE_LAZY
 static bool extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
 static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length, bool growing_retained);
+    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
+    bool growing_retained);
 #ifdef PAGES_CAN_PURGE_FORCED
 static bool extent_purge_forced_default(extent_hooks_t *extent_hooks,
     void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length, bool growing_retained);
+    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
+    bool growing_retained);
 static bool extent_split_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t size_a, size_t size_b, bool committed,
     unsigned arena_ind);
 static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+    ehooks_t *ehooks, extent_t *extent, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
 static bool extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
     size_t size_a, void *addr_b, size_t size_b, bool committed,
     unsigned arena_ind);
-static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b,
-    bool growing_retained);
+static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *a, extent_t *b, bool growing_retained);
 
-const extent_hooks_t	extent_hooks_default = {
+const extent_hooks_t extent_hooks_default = {
 	extent_alloc_default,
 	extent_dalloc_default,
 	extent_destroy_default,
@@ -97,16 +95,14 @@ static atomic_zu_t highpages;
  */
 
 static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
-static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, eset_t *eset, void *new_addr,
-    size_t usize, size_t pad, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit, bool growing_retained);
+static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, void *new_addr, size_t usize, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
 static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    extent_t *extent, bool *coalesced, bool growing_retained);
-static void extent_record(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, eset_t *eset, extent_t *extent,
-    bool growing_retained);
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent,
+    bool *coalesced, bool growing_retained);
+static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, extent_t *extent, bool growing_retained);
 
 /******************************************************************************/
 
@@ -224,13 +220,12 @@ extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
 static bool
-extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    extent_t *extent) {
+extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent) {
 	extent_state_set(extent, extent_state_active);
 	bool coalesced;
-	extent = extent_try_coalesce(tsdn, arena, extent_hooks, rtree_ctx,
-	    eset, extent, &coalesced, false);
+	extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, eset,
+	    extent, &coalesced, false);
 	extent_state_set(extent, eset_state_get(eset));
 
 	if (!coalesced) {
@@ -241,23 +236,23 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
 }
 
 extent_t *
-extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
 	assert(size + pad != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_t *extent = extent_recycle(tsdn, arena, extent_hooks, eset,
-	    new_addr, size, pad, alignment, slab, szind, zero, commit, false);
+	extent_t *extent = extent_recycle(tsdn, arena, ehooks, eset, new_addr,
+	    size, pad, alignment, slab, szind, zero, commit, false);
 	assert(extent == NULL || extent_dumpable_get(extent));
 	return extent;
 }
 
 void
-extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, extent_t *extent) {
+extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
 	assert(extent_dumpable_get(extent));
@@ -267,12 +262,12 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	extent_addr_set(extent, extent_base_get(extent));
 	extent_zeroed_set(extent, false);
 
-	extent_record(tsdn, arena, extent_hooks, eset, extent, false);
+	extent_record(tsdn, arena, ehooks, eset, extent, false);
 }
 
 extent_t *
-extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, size_t npages_min) {
+extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -301,8 +296,8 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, arena, extent_hooks,
-		    rtree_ctx, eset, extent)) {
+		if (extent_try_delayed_coalesce(tsdn, arena, ehooks, rtree_ctx,
+		    eset, extent)) {
 			break;
 		}
 		/*
@@ -339,8 +334,8 @@ label_return:
  * indicates OOM), e.g. when trying to split an existing extent.
  */
 static void
-extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, extent_t *extent, bool growing_retained) {
+extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *extent, bool growing_retained) {
 	size_t sz = extent_size_get(extent);
 	if (config_stats) {
 		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
@@ -350,11 +345,10 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	 * that this is only a virtual memory leak.
 	 */
 	if (eset_state_get(eset) == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, arena, extent_hooks,
-		    extent, 0, sz, growing_retained)) {
-			extent_purge_forced_impl(tsdn, arena, extent_hooks,
-			    extent, 0, extent_size_get(extent),
-			    growing_retained);
+		if (extent_purge_lazy_impl(tsdn, arena, ehooks, extent, 0, sz,
+		    growing_retained)) {
+			extent_purge_forced_impl(tsdn, arena, ehooks, extent, 0,
+			    extent_size_get(extent), growing_retained);
 		}
 	}
 	extent_dalloc(tsdn, arena, extent);
@@ -581,10 +575,9 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
  * given allocation request.
  */
 static extent_t *
-extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    bool growing_retained) {
+extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
+    size_t pad, size_t alignment, bool slab, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(alignment > 0);
@@ -668,8 +661,8 @@ typedef enum {
 } extent_split_interior_result_t;
 
 static extent_split_interior_result_t
-extent_split_interior(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx,
+extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx,
     /* The result of splitting, in case of success. */
     extent_t **extent, extent_t **lead, extent_t **trail,
     /* The mess to clean up, in case of error. */
@@ -693,9 +686,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *extent;
-		*extent = extent_split_impl(tsdn, arena, extent_hooks,
-		    *lead, leadsize, SC_NSIZES, false, esize + trailsize, szind,
-		    slab, growing_retained);
+		*extent = extent_split_impl(tsdn, arena, ehooks, *lead,
+		    leadsize, SC_NSIZES, false, esize + trailsize, szind, slab,
+		    growing_retained);
 		if (*extent == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -705,9 +698,8 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, arena, extent_hooks, *extent,
-		    esize, szind, slab, trailsize, SC_NSIZES, false,
-		    growing_retained);
+		*trail = extent_split_impl(tsdn, arena, ehooks, *extent, esize,
+		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *extent;
 			*to_salvage = *lead;
@@ -745,18 +737,18 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
  * and put back into eset.
  */
 static extent_t *
-extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, extent_t *extent, bool growing_retained) {
+extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
+    size_t pad, size_t alignment, bool slab, szind_t szind, extent_t *extent,
+    bool growing_retained) {
 	extent_t *lead;
 	extent_t *trail;
 	extent_t *to_leak;
 	extent_t *to_salvage;
 
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, extent_hooks, rtree_ctx, &extent, &lead, &trail,
-	    &to_leak, &to_salvage, new_addr, size, pad, alignment, slab, szind,
+	    tsdn, arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
+	    &to_salvage, new_addr, size, pad, alignment, slab, szind,
 	    growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
@@ -790,8 +782,8 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 		if (to_leak != NULL) {
 			void *leak = extent_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, extent_hooks, eset,
-			    to_leak, growing_retained);
+			extents_abandon_vm(tsdn, arena, ehooks, eset, to_leak,
+			    growing_retained);
 			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
 			    false) == NULL);
 		}
@@ -807,7 +799,7 @@ extent_need_manual_zero(arena_t *arena) {
 	 * default extent hooks installed (in which case the purge semantics may
 	 * change); or 2) transparent huge pages enabled.
 	 */
-	return (!arena_has_default_hooks(arena) ||
+	return (!ehooks_are_default(arena_get_ehooks(arena)) ||
 		(opt_thp == thp_mode_always));
 }
 
@@ -816,10 +808,9 @@ extent_need_manual_zero(arena_t *arena) {
  * in the given eset_t.
  */
 static extent_t *
-extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
-    bool growing_retained) {
+extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(new_addr == NULL || !slab);
@@ -829,25 +820,25 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *extent = extent_recycle_extract(tsdn, arena, extent_hooks,
+	extent_t *extent = extent_recycle_extract(tsdn, arena, ehooks,
 	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
 	    growing_retained);
 	if (extent == NULL) {
 		return NULL;
 	}
 
-	extent = extent_recycle_split(tsdn, arena, extent_hooks, rtree_ctx,
-	    eset, new_addr, size, pad, alignment, slab, szind, extent,
+	extent = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, eset,
+	    new_addr, size, pad, alignment, slab, szind, extent,
 	    growing_retained);
 	if (extent == NULL) {
 		return NULL;
 	}
 
 	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, extent_hooks, extent,
-		    0, extent_size_get(extent), growing_retained)) {
-			extent_record(tsdn, arena, extent_hooks, eset,
-			    extent, growing_retained);
+		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
+		    extent_size_get(extent), growing_retained)) {
+			extent_record(tsdn, arena, ehooks, eset, extent,
+			    growing_retained);
 			return NULL;
 		}
 		if (!extent_need_manual_zero(arena)) {
@@ -985,9 +976,9 @@ extent_hook_post_reentrancy(tsdn_t *tsdn) {
  * virtual memory ranges retained by each arena.
  */
 static extent_t *
-extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit) {
+extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit) {
 	malloc_mutex_assert_owner(tsdn, &arena->extent_grow_mtx);
 	assert(pad == 0 || !slab);
 	assert(!*zero || !slab);
@@ -1022,14 +1013,13 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	bool committed = false;
 
 	void *ptr;
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		ptr = extent_alloc_default_impl(tsdn, arena, NULL,
 		    alloc_size, PAGE, &zeroed, &committed);
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		ptr = extent_hooks->alloc(extent_hooks, NULL,
-		    alloc_size, PAGE, &zeroed, &committed,
-		    arena_ind_get(arena));
+		ptr = ehooks_alloc(ehooks, NULL, alloc_size, PAGE, &zeroed,
+		    &committed, arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
 
@@ -1060,18 +1050,17 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	extent_t *trail;
 	extent_t *to_leak;
 	extent_t *to_salvage;
-	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, extent_hooks, rtree_ctx, &extent, &lead, &trail,
-	    &to_leak, &to_salvage, NULL, size, pad, alignment, slab, szind,
-	    true);
+	extent_split_interior_result_t result = extent_split_interior(tsdn,
+	    arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
+	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_record(tsdn, arena, extent_hooks,
+			extent_record(tsdn, arena, ehooks,
 			    &arena->eset_retained, lead, true);
 		}
 		if (trail != NULL) {
-			extent_record(tsdn, arena, extent_hooks,
+			extent_record(tsdn, arena, ehooks,
 			    &arena->eset_retained, trail, true);
 		}
 	} else {
@@ -1084,21 +1073,21 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 			if (config_prof) {
 				extent_gdump_add(tsdn, to_salvage);
 			}
-			extent_record(tsdn, arena, extent_hooks,
+			extent_record(tsdn, arena, ehooks,
 			    &arena->eset_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, extent_hooks,
+			extents_abandon_vm(tsdn, arena, ehooks,
 			    &arena->eset_retained, to_leak, true);
 		}
 		goto label_err;
 	}
 
 	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, extent_hooks, extent, 0,
+		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
 		    extent_size_get(extent), true)) {
-			extent_record(tsdn, arena, extent_hooks,
+			extent_record(tsdn, arena, ehooks,
 			    &arena->eset_retained, extent, true);
 			goto label_err;
 		}
@@ -1151,15 +1140,15 @@ label_err:
 }
 
 static extent_t *
-extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
 	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
 
-	extent_t *extent = extent_recycle(tsdn, arena, extent_hooks,
+	extent_t *extent = extent_recycle(tsdn, arena, ehooks,
 	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
 	    szind, zero, commit, true);
 	if (extent != NULL) {
@@ -1168,8 +1157,8 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 			extent_gdump_add(tsdn, extent);
 		}
 	} else if (opt_retain && new_addr == NULL) {
-		extent = extent_grow_retained(tsdn, arena, extent_hooks, size,
-		    pad, alignment, slab, szind, zero, commit);
+		extent = extent_grow_retained(tsdn, arena, ehooks, size, pad,
+		    alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
 		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
@@ -1180,9 +1169,9 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 }
 
 static extent_t *
-extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
 	size_t esize = size + pad;
 	extent_t *extent = extent_alloc(tsdn, arena);
 	if (extent == NULL) {
@@ -1190,14 +1179,14 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	}
 	void *addr;
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		/* Call directly to propagate tsdn. */
 		addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize,
 		    palignment, zero, commit);
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		addr = extent_hooks->alloc(extent_hooks, new_addr,
-		    esize, palignment, zero, commit, arena_ind_get(arena));
+		addr = ehooks_alloc(ehooks, new_addr, esize, palignment, zero,
+		    commit, arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
 	if (addr == NULL) {
@@ -1219,14 +1208,14 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 }
 
 extent_t *
-extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_t *extent = extent_alloc_retained(tsdn, arena, extent_hooks,
-	    new_addr, size, pad, alignment, slab, szind, zero, commit);
+	extent_t *extent = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
+	    size, pad, alignment, slab, szind, zero, commit);
 	if (extent == NULL) {
 		if (opt_retain && new_addr != NULL) {
 			/*
@@ -1237,7 +1226,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 			 */
 			return NULL;
 		}
-		extent = extent_alloc_wrapper_hard(tsdn, arena, extent_hooks,
+		extent = extent_alloc_wrapper_hard(tsdn, arena, ehooks,
 		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}
 
@@ -1266,15 +1255,14 @@ extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, extent_t *inner, extent_t *outer, bool forward,
-    bool growing_retained) {
+extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *inner, extent_t *outer, bool forward, bool growing_retained) {
 	assert(extent_can_coalesce(arena, eset, inner, outer));
 
 	extent_activate_locked(tsdn, arena, eset, outer);
 
 	malloc_mutex_unlock(tsdn, &eset->mtx);
-	bool err = extent_merge_impl(tsdn, arena, extent_hooks,
+	bool err = extent_merge_impl(tsdn, arena, ehooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &eset->mtx);
 
@@ -1286,10 +1274,9 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 }
 
 static extent_t *
-extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    extent_t *extent, bool *coalesced, bool growing_retained,
-    bool inactive_only) {
+extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    bool growing_retained, bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -1318,7 +1305,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    extent_hooks, eset, extent, next, true,
+			    ehooks, eset, extent, next, true,
 			    growing_retained)) {
 				if (eset->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -1338,7 +1325,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    extent_hooks, eset, extent, prev, false,
+			    ehooks, eset, extent, prev, false,
 			    growing_retained)) {
 				extent = prev;
 				if (eset->delay_coalesce) {
@@ -1358,19 +1345,19 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 }
 
 static extent_t *
-extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    extent_t *extent, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, extent_hooks, rtree_ctx,
-	    eset, extent, coalesced, growing_retained, false);
+extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
+	    extent, coalesced, growing_retained, false);
 }
 
 static extent_t *
-extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    extent_t *extent, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, extent_hooks, rtree_ctx,
-	    eset, extent, coalesced, growing_retained, true);
+extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
+	    extent, coalesced, growing_retained, true);
 }
 
 /*
@@ -1378,8 +1365,8 @@ extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena,
  * given eset_t (coalesces, deregisters slab interiors, the heap operations).
  */
 static void
-extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, extent_t *extent, bool growing_retained) {
+extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *extent, bool growing_retained) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -1399,22 +1386,22 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	    (uintptr_t)extent_base_get(extent), true) == extent);
 
 	if (!eset->delay_coalesce) {
-		extent = extent_try_coalesce(tsdn, arena, extent_hooks,
-		    rtree_ctx, eset, extent, NULL, growing_retained);
+		extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
+		    eset, extent, NULL, growing_retained);
 	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
 		assert(eset == &arena->eset_dirty);
 		/* Always coalesce large eset eagerly. */
 		bool coalesced;
 		do {
 			assert(extent_state_get(extent) == extent_state_active);
-			extent = extent_try_coalesce_large(tsdn, arena,
-			    extent_hooks, rtree_ctx, eset, extent,
-			    &coalesced, growing_retained);
+			extent = extent_try_coalesce_large(tsdn, arena, ehooks,
+			    rtree_ctx, eset, extent, &coalesced,
+			    growing_retained);
 		} while (coalesced);
 		if (extent_size_get(extent) >= oversize_threshold) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &eset->mtx);
-			arena_decay_extent(tsdn, arena, extent_hooks, extent);
+			arena_decay_extent(tsdn, arena, ehooks, extent);
 			return;
 		}
 	}
@@ -1425,7 +1412,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 
 void
 extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -1434,7 +1421,7 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 		extent_dalloc(tsdn, arena, extent);
 		return;
 	}
-	extent_dalloc_wrapper(tsdn, arena, extent_hooks, extent);
+	extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
 }
 
 static bool
@@ -1458,8 +1445,8 @@ extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 
 static bool
-extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent) {
+extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
 	bool err;
 
 	assert(extent_base_get(extent) != NULL);
@@ -1470,16 +1457,15 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 	extent_addr_set(extent, extent_base_get(extent));
 
 	/* Try to deallocate. */
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		/* Call directly to propagate tsdn. */
 		err = extent_dalloc_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		err = (extent_hooks->dalloc == NULL ||
-		    extent_hooks->dalloc(extent_hooks,
-		    extent_base_get(extent), extent_size_get(extent),
-		    extent_committed_get(extent), arena_ind_get(arena)));
+		err = ehooks_dalloc(ehooks, extent_base_get(extent),
+		    extent_size_get(extent), extent_committed_get(extent),
+		    arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
 
@@ -1491,51 +1477,48 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 }
 
 void
-extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent) {
+extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
 	assert(extent_dumpable_get(extent));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	/* Avoid calling the default extent_dalloc unless have to. */
-	if (extent_hooks != &extent_hooks_default || extent_may_dalloc()) {
+	if (!ehooks_are_default(ehooks) || extent_may_dalloc()) {
 		/*
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.
 		 */
 		extent_deregister(tsdn, extent);
-		if (!extent_dalloc_wrapper_try(tsdn, arena, extent_hooks,
-		    extent)) {
+		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, extent)) {
 			return;
 		}
 		extent_reregister(tsdn, extent);
 	}
 
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
 	/* Try to decommit; purge if that fails. */
 	bool zeroed;
 	if (!extent_committed_get(extent)) {
 		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, arena, extent_hooks, extent,
-	    0, extent_size_get(extent))) {
+	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, extent, 0,
+	    extent_size_get(extent))) {
 		zeroed = true;
-	} else if (extent_hooks->purge_forced != NULL &&
-	    !extent_hooks->purge_forced(extent_hooks,
-	    extent_base_get(extent), extent_size_get(extent), 0,
-	    extent_size_get(extent), arena_ind_get(arena))) {
+	} else if (!ehooks_purge_forced(ehooks, extent_base_get(extent),
+	    extent_size_get(extent), 0, extent_size_get(extent),
+	    arena_ind_get(arena))) {
 		zeroed = true;
 	} else if (extent_state_get(extent) == extent_state_muzzy ||
-	    (extent_hooks->purge_lazy != NULL &&
-	    !extent_hooks->purge_lazy(extent_hooks,
-	    extent_base_get(extent), extent_size_get(extent), 0,
-	    extent_size_get(extent), arena_ind_get(arena)))) {
+	    !ehooks_purge_lazy(ehooks, extent_base_get(extent),
+	    extent_size_get(extent), 0, extent_size_get(extent),
+	    arena_ind_get(arena))) {
 		zeroed = false;
 	} else {
 		zeroed = false;
 	}
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_zeroed_set(extent, zeroed);
@@ -1544,8 +1527,8 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 		extent_gdump_sub(tsdn, extent);
 	}
 
-	extent_record(tsdn, arena, extent_hooks, &arena->eset_retained,
-	    extent, false);
+	extent_record(tsdn, arena, ehooks, &arena->eset_retained, extent,
+	    false);
 }
 
 static void
@@ -1562,8 +1545,8 @@ extent_destroy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 
 void
-extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent) {
+extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1575,15 +1558,15 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_addr_set(extent, extent_base_get(extent));
 
 	/* Try to destroy; silently fail otherwise. */
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		/* Call directly to propagate tsdn. */
 		extent_destroy_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
-	} else if (extent_hooks->destroy != NULL) {
+	} else if (!ehooks_destroy_is_noop(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		extent_hooks->destroy(extent_hooks,
-		    extent_base_get(extent), extent_size_get(extent),
-		    extent_committed_get(extent), arena_ind_get(arena));
+		ehooks_destroy(ehooks, extent_base_get(extent),
+		    extent_size_get(extent), extent_committed_get(extent),
+		    arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
 
@@ -1598,19 +1581,17 @@ extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 
 static bool
-extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length, bool growing_retained) {
+extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = (extent_hooks->commit == NULL ||
-	    extent_hooks->commit(extent_hooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena)));
-	if (extent_hooks != &extent_hooks_default) {
+	bool err = ehooks_commit(ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_committed_set(extent, extent_committed_get(extent) || !err);
@@ -1618,11 +1599,11 @@ extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
 }
 
 bool
-extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset,
     size_t length) {
-	return extent_commit_impl(tsdn, arena, extent_hooks, extent, offset,
-	    length, false);
+	return extent_commit_impl(tsdn, arena, ehooks, extent, offset, length,
+	    false);
 }
 
 static bool
@@ -1633,20 +1614,17 @@ extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 
 bool
-extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length) {
+extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = (extent_hooks->decommit == NULL ||
-	    extent_hooks->decommit(extent_hooks,
-	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena_ind_get(arena)));
-	if (extent_hooks != &extent_hooks_default) {
+	bool err = ehooks_decommit(ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_committed_set(extent, extent_committed_get(extent) && err);
@@ -1668,22 +1646,20 @@ extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 #endif
 
 static bool
-extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length, bool growing_retained) {
+extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	if (extent_hooks->purge_lazy == NULL) {
+	if (ehooks_purge_lazy_will_fail(ehooks)) {
 		return true;
 	}
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = extent_hooks->purge_lazy(extent_hooks,
-	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena_ind_get(arena));
-	if (extent_hooks != &extent_hooks_default) {
+	bool err = ehooks_purge_lazy(ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 
@@ -1691,11 +1667,10 @@ extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
 }
 
 bool
-extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length) {
-	return extent_purge_lazy_impl(tsdn, arena, extent_hooks, extent,
-	    offset, length, false);
+extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length) {
+	return extent_purge_lazy_impl(tsdn, arena, ehooks, extent, offset,
+	    length, false);
 }
 
 #ifdef PAGES_CAN_PURGE_FORCED
@@ -1713,32 +1688,29 @@ extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
 #endif
 
 static bool
-extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length, bool growing_retained) {
+extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	if (extent_hooks->purge_forced == NULL) {
+	if (ehooks_purge_forced_will_fail(ehooks)) {
 		return true;
 	}
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = extent_hooks->purge_forced(extent_hooks,
-	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena_ind_get(arena));
-	if (extent_hooks != &extent_hooks_default) {
+	bool err = ehooks_purge_forced(ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	return err;
 }
 
 bool
-extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length) {
-	return extent_purge_forced_impl(tsdn, arena, extent_hooks, extent,
+extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length) {
+	return extent_purge_forced_impl(tsdn, arena, ehooks, extent,
 	    offset, length, false);
 }
 
@@ -1765,15 +1737,14 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
  * and returns the trail (except in case of error).
  */
 static extent_t *
-extent_split_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
-    bool growing_retained) {
+extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
 	assert(extent_size_get(extent) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	if (extent_hooks->split == NULL) {
+	if (ehooks_split_will_fail(ehooks)) {
 		return NULL;
 	}
 
@@ -1815,13 +1786,13 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 
 	extent_lock2(tsdn, extent, trail);
 
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = extent_hooks->split(extent_hooks, extent_base_get(extent),
+	bool err = ehooks_split(ehooks, extent_base_get(extent),
 	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
 	    arena_ind_get(arena));
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	if (err) {
@@ -1848,11 +1819,11 @@ label_error_a:
 }
 
 extent_t *
-extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, arena, extent_hooks, extent, size_a,
-	    szind_a, slab_a, size_b, szind_b, slab_b, false);
+extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    size_t size_b, szind_t szind_b, bool slab_b) {
+	return extent_split_impl(tsdn, arena, ehooks, extent, size_a, szind_a,
+	    slab_a, size_b, szind_b, slab_b, false);
 }
 
 static bool
@@ -1914,28 +1885,26 @@ extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 }
 
 static bool
-extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b,
-    bool growing_retained) {
+extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
+    extent_t *b, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(extent_base_get(a) < extent_base_get(b));
 
-	if (extent_hooks->merge == NULL || extent_head_no_merge(a, b)) {
+	if (ehooks_merge_will_fail(ehooks) || extent_head_no_merge(a, b)) {
 		return true;
 	}
 
 	bool err;
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		/* Call directly to propagate tsdn. */
 		err = extent_merge_default_impl(extent_base_get(a),
 		    extent_base_get(b));
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		err = extent_hooks->merge(extent_hooks,
-		    extent_base_get(a), extent_size_get(a), extent_base_get(b),
-		    extent_size_get(b), extent_committed_get(a),
-		    arena_ind_get(arena));
+		err = ehooks_merge(ehooks, extent_base_get(a),
+		    extent_size_get(a), extent_base_get(b), extent_size_get(b),
+		    extent_committed_get(a), arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
 
@@ -1991,9 +1960,9 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
 }
 
 bool
-extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b) {
-	return extent_merge_impl(tsdn, arena, extent_hooks, a, b, false);
+extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *a, extent_t *b) {
+	return extent_merge_impl(tsdn, arena, ehooks, a, b, false);
 }
 
 bool
diff --git a/src/extent_dss.c b/src/extent_dss.c
index dd80a19..59e7e7d 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -194,9 +194,9 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					*commit = pages_decommit(ret, size);
 				}
 				if (*zero && *commit) {
-					extent_hooks_t *extent_hooks =
-					    arena_get_extent_hooks(arena);
 					extent_t extent;
+					ehooks_t *ehooks = arena_get_ehooks(
+					    arena);
 
 					extent_init(&extent,
 					    arena_ind_get(arena), ret, size,
@@ -204,8 +204,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    extent_state_active, false, true,
 					    true, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
-					    arena, extent_hooks, &extent, 0,
-					    size)) {
+					    arena, ehooks, &extent, 0, size)) {
 						memset(ret, 0, size);
 					}
 				}
diff --git a/src/large.c b/src/large.c
index 6de1c57..6fd21be 100644
--- a/src/large.c
+++ b/src/large.c
@@ -93,20 +93,20 @@ static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 	arena_t *arena = arena_get_from_extent(extent);
 	size_t oldusize = extent_usize_get(extent);
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 	size_t diff = extent_size_get(extent) - (usize + sz_large_pad);
 
 	assert(oldusize > usize);
 
-	if (extent_hooks->split == NULL) {
+	if (ehooks_split_will_fail(ehooks)) {
 		return true;
 	}
 
 	/* Split excess pages. */
 	if (diff != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    extent_hooks, extent, usize + sz_large_pad,
-		    sz_size2index(usize), false, diff, SC_NSIZES, false);
+		    ehooks, extent, usize + sz_large_pad, sz_size2index(usize),
+		    false, diff, SC_NSIZES, false);
 		if (trail == NULL) {
 			return true;
 		}
@@ -116,7 +116,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 			    extent_size_get(trail));
 		}
 
-		arena_extents_dirty_dalloc(tsdn, arena, extent_hooks, trail);
+		arena_extents_dirty_dalloc(tsdn, arena, ehooks, trail);
 	}
 
 	arena_extent_ralloc_large_shrink(tsdn, arena, extent, oldusize);
@@ -129,10 +129,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
     bool zero) {
 	arena_t *arena = arena_get_from_extent(extent);
 	size_t oldusize = extent_usize_get(extent);
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 	size_t trailsize = usize - oldusize;
 
-	if (extent_hooks->merge == NULL) {
+	if (ehooks_merge_will_fail(ehooks)) {
 		return true;
 	}
 
@@ -149,17 +149,17 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	bool commit = true;
 	extent_t *trail;
 	bool new_mapping;
-	if ((trail = extents_alloc(tsdn, arena, extent_hooks,
-	    &arena->eset_dirty, extent_past_get(extent), trailsize, 0,
-	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL
-	    || (trail = extents_alloc(tsdn, arena, extent_hooks,
-	    &arena->eset_muzzy, extent_past_get(extent), trailsize, 0,
-	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL) {
+	if ((trail = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
+	    extent_past_get(extent), trailsize, 0, CACHELINE, false, SC_NSIZES,
+	    &is_zeroed_trail, &commit)) != NULL
+	    || (trail = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+	    extent_past_get(extent), trailsize, 0, CACHELINE, false, SC_NSIZES,
+	    &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
-		if ((trail = extent_alloc_wrapper(tsdn, arena, extent_hooks,
+		if ((trail = extent_alloc_wrapper(tsdn, arena, ehooks,
 		    extent_past_get(extent), trailsize, 0, CACHELINE, false,
 		    SC_NSIZES, &is_zeroed_trail, &commit)) == NULL) {
 			return true;
@@ -169,8 +169,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		}
 	}
 
-	if (extent_merge_wrapper(tsdn, arena, extent_hooks, extent, trail)) {
-		extent_dalloc_wrapper(tsdn, arena, extent_hooks, trail);
+	if (extent_merge_wrapper(tsdn, arena, ehooks, extent, trail)) {
+		extent_dalloc_wrapper(tsdn, arena, ehooks, trail);
 		return true;
 	}
 	rtree_ctx_t rtree_ctx_fallback;
@@ -339,8 +339,8 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 static void
 large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
-	arena_extents_dirty_dalloc(tsdn, arena, extent_hooks, extent);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
+	arena_extents_dirty_dalloc(tsdn, arena, ehooks, extent);
 }
 
 void
-- 
cgit v0.12


From 703fbc0ff584e00899b5b30aa927c55ecc89dabf Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Dec 2019 16:45:40 -0800
Subject: Introduce unsafe reentrancy guards.

We have to work to circumvent the safety checks in pre_reentrancy when going
down extent hook pathways.  Instead, let's explicitly have checked and unchecked
guards.
---
 .../internal/jemalloc_internal_inlines_a.h         | 16 ++-----------
 include/jemalloc/internal/tsd.h                    | 26 ++++++++++++++++++++++
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index fedbd86..98a6478 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -151,24 +151,12 @@ static inline void
 pre_reentrancy(tsd_t *tsd, arena_t *arena) {
 	/* arena is the current context.  Reentry from a0 is not allowed. */
 	assert(arena != arena_get(tsd_tsdn(tsd), 0, false));
-
-	bool fast = tsd_fast(tsd);
-	assert(tsd_reentrancy_level_get(tsd) < INT8_MAX);
-	++*tsd_reentrancy_levelp_get(tsd);
-	if (fast) {
-		/* Prepare slow path for reentrancy. */
-		tsd_slow_update(tsd);
-		assert(tsd_state_get(tsd) == tsd_state_nominal_slow);
-	}
+	tsd_pre_reentrancy_raw(tsd);
 }
 
 static inline void
 post_reentrancy(tsd_t *tsd) {
-	int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsd);
-	assert(*reentrancy_level > 0);
-	if (--*reentrancy_level == 0) {
-		tsd_slow_update(tsd);
-	}
+	tsd_post_reentrancy_raw(tsd);
 }
 
 #endif /* JEMALLOC_INTERNAL_INLINES_A_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index b7ce7ca..3465a2d 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -446,4 +446,30 @@ tsd_state_nocleanup(tsd_t *tsd) {
 	    tsd_state_get(tsd) == tsd_state_minimal_initialized;
 }
 
+/*
+ * These "raw" tsd reentrancy functions don't have any debug checking to make
+ * sure that we're not touching arena 0.  Better is to call pre_reentrancy and
+ * post_reentrancy if this is possible.
+ */
+static inline void
+tsd_pre_reentrancy_raw(tsd_t *tsd) {
+	bool fast = tsd_fast(tsd);
+	assert(tsd_reentrancy_level_get(tsd) < INT8_MAX);
+	++*tsd_reentrancy_levelp_get(tsd);
+	if (fast) {
+		/* Prepare slow path for reentrancy. */
+		tsd_slow_update(tsd);
+		assert(tsd_state_get(tsd) == tsd_state_nominal_slow);
+	}
+}
+
+static inline void
+tsd_post_reentrancy_raw(tsd_t *tsd) {
+	int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsd);
+	assert(*reentrancy_level > 0);
+	if (--*reentrancy_level == 0) {
+		tsd_slow_update(tsd);
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_TSD_H */
-- 
cgit v0.12


From dc8b4e6e13fd2a0497f3ab5c0ba9edb92a64f470 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Dec 2019 16:42:44 -0800
Subject: Extent -> Ehooks: Move alloc hook.

---
 include/jemalloc/internal/ehooks.h | 33 +++++++++++--
 src/base.c                         |  6 +--
 src/ehooks.c                       | 69 +++++++++++++++++++++++++++
 src/extent.c                       | 96 ++------------------------------------
 4 files changed, 105 insertions(+), 99 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index c79ea24..37087ca 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -11,6 +11,26 @@ struct ehooks_s {
 	atomic_p_t ptr;
 };
 
+/* NOT PUBLIC. */
+void *ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
+void *ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit,
+    unsigned arena_ind);
+
+static inline void
+ehooks_pre_reentrancy(tsdn_t *tsdn) {
+	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+	tsd_pre_reentrancy_raw(tsd);
+}
+
+static inline void
+ehooks_post_reentrancy(tsdn_t *tsdn) {
+	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+	tsd_post_reentrancy_raw(tsd);
+}
+
+/* PUBLIC. */
 void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks);
 
 static inline void
@@ -54,11 +74,18 @@ ehooks_merge_will_fail(ehooks_t *ehooks) {
 }
 
 static inline void *
-ehooks_alloc(ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit, unsigned arena_ind) {
+ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	return extent_hooks->alloc(extent_hooks, new_addr, size, alignment,
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_alloc_impl(tsdn, new_addr, size,
+		    alignment, zero, commit, arena_ind);
+	}
+	ehooks_pre_reentrancy(tsdn);
+	void *ret = extent_hooks->alloc(extent_hooks, new_addr, size, alignment,
 	    zero, commit, arena_ind);
+	ehooks_post_reentrancy(tsdn);
+	return ret;
 }
 
 static inline bool
diff --git a/src/base.c b/src/base.c
index 92dfca8..4f47438 100644
--- a/src/base.c
+++ b/src/base.c
@@ -43,12 +43,8 @@ base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 			pages_set_thp_state(addr, size);
 		}
 	} else {
-		/* No arena context as we are creating new arenas. */
-		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
-		pre_reentrancy(tsd, NULL);
-		addr = ehooks_alloc(ehooks, NULL, size, alignment, &zero,
+		addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero,
 		    &commit, ind);
-		post_reentrancy(tsd);
 	}
 
 	return addr;
diff --git a/src/ehooks.c b/src/ehooks.c
index 0f59f33..ba62b8d 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -2,7 +2,76 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/extent_mmap.h"
 
 void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks) {
 	ehooks_set_extent_hooks_ptr(ehooks, extent_hooks);
 }
+
+/*
+ * If the caller specifies (!*zero), it is still possible to receive zeroed
+ * memory, in which case *zero is toggled to true.  arena_extent_alloc() takes
+ * advantage of this to avoid demanding zeroed extents, but taking advantage of
+ * them if they are returned.
+ */
+static void *
+extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec) {
+	void *ret;
+
+	assert(size != 0);
+	assert(alignment != 0);
+
+	/* "primary" dss. */
+	if (have_dss && dss_prec == dss_prec_primary && (ret =
+	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL) {
+		return ret;
+	}
+	/* mmap. */
+	if ((ret = extent_alloc_mmap(new_addr, size, alignment, zero, commit))
+	    != NULL) {
+		return ret;
+	}
+	/* "secondary" dss. */
+	if (have_dss && dss_prec == dss_prec_secondary && (ret =
+	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL) {
+		return ret;
+	}
+
+	/* All strategies for allocation failed. */
+	return NULL;
+}
+
+void *
+ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
+	arena_t *arena = arena_get(tsdn, arena_ind, false);
+	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
+	    commit, (dss_prec_t)atomic_load_u(&arena->dss_prec,
+	    ATOMIC_RELAXED));
+	if (have_madvise_huge && ret) {
+		pages_set_thp_state(ret, size);
+	}
+	return ret;
+}
+
+void *
+ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
+	tsdn_t *tsdn;
+	arena_t *arena;
+
+	tsdn = tsdn_fetch();
+	arena = arena_get(tsdn, arena_ind, false);
+	/*
+	 * The arena we're allocating on behalf of must have been initialized
+	 * already.
+	 */
+	assert(arena != NULL);
+
+	return ehooks_default_alloc_impl(tsdn, new_addr, size,
+	    ALIGNMENT_CEILING(alignment, PAGE), zero, commit,
+	    arena_ind_get(arena));
+}
diff --git a/src/extent.c b/src/extent.c
index 23194e1..96547a5 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,9 +19,6 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static void *extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit,
-    unsigned arena_ind);
 static bool extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
 static void extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
@@ -60,7 +57,7 @@ static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *a, extent_t *b, bool growing_retained);
 
 const extent_hooks_t extent_hooks_default = {
-	extent_alloc_default,
+	ehooks_default_alloc,
 	extent_dalloc_default,
 	extent_destroy_default,
 	extent_commit_default,
@@ -881,72 +878,6 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	return extent;
 }
 
-/*
- * If the caller specifies (!*zero), it is still possible to receive zeroed
- * memory, in which case *zero is toggled to true.  arena_extent_alloc() takes
- * advantage of this to avoid demanding zeroed extents, but taking advantage of
- * them if they are returned.
- */
-static void *
-extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec) {
-	void *ret;
-
-	assert(size != 0);
-	assert(alignment != 0);
-
-	/* "primary" dss. */
-	if (have_dss && dss_prec == dss_prec_primary && (ret =
-	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL) {
-		return ret;
-	}
-	/* mmap. */
-	if ((ret = extent_alloc_mmap(new_addr, size, alignment, zero, commit))
-	    != NULL) {
-		return ret;
-	}
-	/* "secondary" dss. */
-	if (have_dss && dss_prec == dss_prec_secondary && (ret =
-	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL) {
-		return ret;
-	}
-
-	/* All strategies for allocation failed. */
-	return NULL;
-}
-
-static void *
-extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit) {
-	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
-	    commit, (dss_prec_t)atomic_load_u(&arena->dss_prec,
-	    ATOMIC_RELAXED));
-	if (have_madvise_huge && ret) {
-		pages_set_thp_state(ret, size);
-	}
-	return ret;
-}
-
-static void *
-extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
-	tsdn_t *tsdn;
-	arena_t *arena;
-
-	tsdn = tsdn_fetch();
-	arena = arena_get(tsdn, arena_ind, false);
-	/*
-	 * The arena we're allocating on behalf of must have been initialized
-	 * already.
-	 */
-	assert(arena != NULL);
-
-	return extent_alloc_default_impl(tsdn, arena, new_addr, size,
-	    ALIGNMENT_CEILING(alignment, PAGE), zero, commit);
-}
-
 static void
 extent_hook_pre_reentrancy(tsdn_t *tsdn, arena_t *arena) {
 	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
@@ -1012,16 +943,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	bool zeroed = false;
 	bool committed = false;
 
-	void *ptr;
-	if (ehooks_are_default(ehooks)) {
-		ptr = extent_alloc_default_impl(tsdn, arena, NULL,
-		    alloc_size, PAGE, &zeroed, &committed);
-	} else {
-		extent_hook_pre_reentrancy(tsdn, arena);
-		ptr = ehooks_alloc(ehooks, NULL, alloc_size, PAGE, &zeroed,
-		    &committed, arena_ind_get(arena));
-		extent_hook_post_reentrancy(tsdn);
-	}
+	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
+	    &committed, arena_ind_get(arena));
 
 	extent_init(extent, arena_ind_get(arena), ptr, alloc_size, false,
 	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
@@ -1177,18 +1100,9 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (extent == NULL) {
 		return NULL;
 	}
-	void *addr;
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	if (ehooks_are_default(ehooks)) {
-		/* Call directly to propagate tsdn. */
-		addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize,
-		    palignment, zero, commit);
-	} else {
-		extent_hook_pre_reentrancy(tsdn, arena);
-		addr = ehooks_alloc(ehooks, new_addr, esize, palignment, zero,
-		    commit, arena_ind_get(arena));
-		extent_hook_post_reentrancy(tsdn);
-	}
+	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
+	    zero, commit, arena_ind_get(arena));
 	if (addr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
 		return NULL;
-- 
cgit v0.12


From bac8e2e5a65a361dec4598419dd10d2b119e8d24 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 10:47:28 -0800
Subject: Extent -> Ehooks: Move dalloc hook.

---
 include/jemalloc/internal/ehooks.h | 20 +++++++++++++++-----
 src/base.c                         |  2 +-
 src/ehooks.c                       | 14 ++++++++++++++
 src/extent.c                       | 32 ++++----------------------------
 4 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 37087ca..dc03021 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -18,6 +18,10 @@ void *ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit,
     unsigned arena_ind);
 
+bool ehooks_default_dalloc_impl(void *addr, size_t size);
+bool ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, bool committed, unsigned arena_ind);
+
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
 	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
@@ -89,14 +93,20 @@ ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
 }
 
 static inline bool
-ehooks_dalloc(ehooks_t *ehooks, void *addr, size_t size, bool committed,
-    unsigned arena_ind) {
+ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks->dalloc == NULL) {
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_dalloc_impl(addr, size);
+	} else if (extent_hooks->dalloc == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->dalloc(extent_hooks, addr, size,
+		    committed, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->dalloc(extent_hooks, addr, size, committed,
-	    arena_ind);
 }
 
 static inline void
diff --git a/src/base.c b/src/base.c
index 4f47438..52699c5 100644
--- a/src/base.c
+++ b/src/base.c
@@ -81,7 +81,7 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 	} else {
 		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
 		pre_reentrancy(tsd, NULL);
-		if (!ehooks_dalloc(ehooks, addr, size, true, ind)) {
+		if (!ehooks_dalloc(tsdn, ehooks, addr, size, true, ind)) {
 			goto label_post_reentrancy;
 		}
 		if (!ehooks_decommit(ehooks, addr, size, 0, size, ind)) {
diff --git a/src/ehooks.c b/src/ehooks.c
index ba62b8d..9a266ef 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -75,3 +75,17 @@ ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	    ALIGNMENT_CEILING(alignment, PAGE), zero, commit,
 	    arena_ind_get(arena));
 }
+
+bool
+ehooks_default_dalloc_impl(void *addr, size_t size) {
+	if (!have_dss || !extent_in_dss(addr)) {
+		return extent_dalloc_mmap(addr, size);
+	}
+	return true;
+}
+
+bool
+ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind) {
+	return ehooks_default_dalloc_impl(addr, size);
+}
diff --git a/src/extent.c b/src/extent.c
index 96547a5..676d7ac 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,8 +19,6 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static bool extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
 static void extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
 static bool extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
@@ -58,7 +56,7 @@ static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 const extent_hooks_t extent_hooks_default = {
 	ehooks_default_alloc,
-	extent_dalloc_default,
+	ehooks_default_dalloc,
 	extent_destroy_default,
 	extent_commit_default,
 	extent_decommit_default
@@ -1345,20 +1343,6 @@ extent_may_dalloc(void) {
 }
 
 static bool
-extent_dalloc_default_impl(void *addr, size_t size) {
-	if (!have_dss || !extent_in_dss(addr)) {
-		return extent_dalloc_mmap(addr, size);
-	}
-	return true;
-}
-
-static bool
-extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    bool committed, unsigned arena_ind) {
-	return extent_dalloc_default_impl(addr, size);
-}
-
-static bool
 extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent) {
 	bool err;
@@ -1371,17 +1355,9 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	extent_addr_set(extent, extent_base_get(extent));
 
 	/* Try to deallocate. */
-	if (ehooks_are_default(ehooks)) {
-		/* Call directly to propagate tsdn. */
-		err = extent_dalloc_default_impl(extent_base_get(extent),
-		    extent_size_get(extent));
-	} else {
-		extent_hook_pre_reentrancy(tsdn, arena);
-		err = ehooks_dalloc(ehooks, extent_base_get(extent),
-		    extent_size_get(extent), extent_committed_get(extent),
-		    arena_ind_get(arena));
-		extent_hook_post_reentrancy(tsdn);
-	}
+	err = ehooks_dalloc(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), extent_committed_get(extent),
+	    arena_ind_get(arena));
 
 	if (!err) {
 		extent_dalloc(tsdn, arena, extent);
-- 
cgit v0.12


From 5459ec9daeea3144e71abb3b0eb9417a56e7ae95 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 12:05:18 -0800
Subject: Extent -> Ehooks: Move destroy hook.

---
 include/jemalloc/internal/ehooks.h | 18 +++++++++++++-----
 src/ehooks.c                       | 14 ++++++++++++++
 src/extent.c                       | 31 ++++---------------------------
 3 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index dc03021..07094d9 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -17,10 +17,12 @@ void *ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
 void *ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit,
     unsigned arena_ind);
-
 bool ehooks_default_dalloc_impl(void *addr, size_t size);
 bool ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
+void ehooks_default_destroy_impl(void *addr, size_t size);
+void ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, bool committed, unsigned arena_ind);
 
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
@@ -110,13 +112,19 @@ ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline void
-ehooks_destroy(ehooks_t *ehooks, void *addr, size_t size, bool committed,
-    unsigned arena_ind) {
+ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks->destroy == NULL) {
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_destroy_impl(addr, size);
+	} else if (extent_hooks->destroy == NULL) {
 		return;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		extent_hooks->destroy(extent_hooks, addr, size, committed,
+		    arena_ind);
+		ehooks_post_reentrancy(tsdn);
 	}
-	extent_hooks->destroy(extent_hooks, addr, size, committed, arena_ind);
 }
 
 static inline bool
diff --git a/src/ehooks.c b/src/ehooks.c
index 9a266ef..ad6fd24 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -89,3 +89,17 @@ ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	return ehooks_default_dalloc_impl(addr, size);
 }
+
+void
+ehooks_default_destroy_impl(void *addr, size_t size) {
+	if (!have_dss || !extent_in_dss(addr)) {
+		pages_unmap(addr, size);
+	}
+}
+
+void
+ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind) {
+	ehooks_default_destroy_impl(addr, size);
+}
+
diff --git a/src/extent.c b/src/extent.c
index 676d7ac..271fe4a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,8 +19,6 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static void extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
 static bool extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
@@ -57,7 +55,7 @@ static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 const extent_hooks_t extent_hooks_default = {
 	ehooks_default_alloc,
 	ehooks_default_dalloc,
-	extent_destroy_default,
+	ehooks_default_destroy,
 	extent_commit_default,
 	extent_decommit_default
 #ifdef PAGES_CAN_PURGE_LAZY
@@ -1421,19 +1419,6 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    false);
 }
 
-static void
-extent_destroy_default_impl(void *addr, size_t size) {
-	if (!have_dss || !extent_in_dss(addr)) {
-		pages_unmap(addr, size);
-	}
-}
-
-static void
-extent_destroy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    bool committed, unsigned arena_ind) {
-	extent_destroy_default_impl(addr, size);
-}
-
 void
 extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent) {
@@ -1448,17 +1433,9 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	extent_addr_set(extent, extent_base_get(extent));
 
 	/* Try to destroy; silently fail otherwise. */
-	if (ehooks_are_default(ehooks)) {
-		/* Call directly to propagate tsdn. */
-		extent_destroy_default_impl(extent_base_get(extent),
-		    extent_size_get(extent));
-	} else if (!ehooks_destroy_is_noop(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-		ehooks_destroy(ehooks, extent_base_get(extent),
-		    extent_size_get(extent), extent_committed_get(extent),
-		    arena_ind_get(arena));
-		extent_hook_post_reentrancy(tsdn);
-	}
+	ehooks_destroy(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), extent_committed_get(extent),
+	    arena_ind_get(arena));
 
 	extent_dalloc(tsdn, arena, extent);
 }
-- 
cgit v0.12


From d78fe241acb79ab4b0b7cb5b48d07be8582fc60a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 12:25:00 -0800
Subject: Extent -> Ehooks: Move commit and decommit hooks.

---
 include/jemalloc/internal/ehooks.h | 38 ++++++++++++++++++++++++++----------
 src/base.c                         |  2 +-
 src/ehooks.c                       | 23 ++++++++++++++++++++++
 src/extent.c                       | 40 ++++----------------------------------
 4 files changed, 56 insertions(+), 47 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 07094d9..e9bdca3 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -23,6 +23,12 @@ bool ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr,
 void ehooks_default_destroy_impl(void *addr, size_t size);
 void ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
+bool ehooks_default_commit_impl(void *addr, size_t offset, size_t length);
+bool ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind);
+bool ehooks_default_decommit_impl(void *addr, size_t offset, size_t length);
+bool ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind);
 
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
@@ -128,25 +134,37 @@ ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline bool
-ehooks_commit(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind) {
+ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks->commit == NULL) {
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_commit_impl(addr, offset, length);
+	} else if (extent_hooks->commit == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->commit(extent_hooks, addr, size,
+		    offset, length, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->commit(extent_hooks, addr, size, offset, length,
-	    arena_ind);
 }
 
 static inline bool
-ehooks_decommit(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind) {
+ehooks_decommit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks->decommit == NULL) {
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_decommit_impl(addr, offset, length);
+	} else if (extent_hooks->decommit == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->decommit(extent_hooks, addr, size,
+		    offset, length, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->decommit(extent_hooks, addr, size, offset, length,
-	    arena_ind);
 }
 
 static inline bool
diff --git a/src/base.c b/src/base.c
index 52699c5..6b88b23 100644
--- a/src/base.c
+++ b/src/base.c
@@ -84,7 +84,7 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 		if (!ehooks_dalloc(tsdn, ehooks, addr, size, true, ind)) {
 			goto label_post_reentrancy;
 		}
-		if (!ehooks_decommit(ehooks, addr, size, 0, size, ind)) {
+		if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size, ind)) {
 			goto label_post_reentrancy;
 		}
 		if (!ehooks_purge_forced(ehooks, addr, size, 0, size, ind)) {
diff --git a/src/ehooks.c b/src/ehooks.c
index ad6fd24..cb02377 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -103,3 +103,26 @@ ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	ehooks_default_destroy_impl(addr, size);
 }
 
+bool
+ehooks_default_commit_impl(void *addr, size_t offset, size_t length) {
+	return pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
+	    length);
+}
+
+bool
+ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
+	return ehooks_default_commit_impl(addr, offset, length);
+}
+
+bool
+ehooks_default_decommit_impl(void *addr, size_t offset, size_t length) {
+	return pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
+	    length);
+}
+
+bool
+ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
+	return ehooks_default_decommit_impl(addr, offset, length);
+}
diff --git a/src/extent.c b/src/extent.c
index 271fe4a..3eb4961 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,12 +19,8 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static bool extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
 static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent, size_t offset, size_t length, bool growing_retained);
-static bool extent_decommit_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
 #ifdef PAGES_CAN_PURGE_LAZY
 static bool extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
@@ -56,8 +52,8 @@ const extent_hooks_t extent_hooks_default = {
 	ehooks_default_alloc,
 	ehooks_default_dalloc,
 	ehooks_default_destroy,
-	extent_commit_default,
-	extent_decommit_default
+	ehooks_default_commit,
+	ehooks_default_decommit
 #ifdef PAGES_CAN_PURGE_LAZY
 	,
 	extent_purge_lazy_default
@@ -1441,26 +1437,12 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static bool
-extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
-	return pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length);
-}
-
-static bool
 extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-	}
-	bool err = ehooks_commit(ehooks, extent_base_get(extent),
+	bool err = ehooks_commit(tsdn, ehooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_post_reentrancy(tsdn);
-	}
 	extent_committed_set(extent, extent_committed_get(extent) || !err);
 	return err;
 }
@@ -1473,27 +1455,13 @@ extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    false);
 }
 
-static bool
-extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
-	return pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length);
-}
-
 bool
 extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent, size_t offset, size_t length) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
-
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-	}
-	bool err = ehooks_decommit(ehooks, extent_base_get(extent),
+	bool err = ehooks_decommit(tsdn, ehooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_post_reentrancy(tsdn);
-	}
 	extent_committed_set(extent, extent_committed_get(extent) && err);
 	return err;
 }
-- 
cgit v0.12


From f83fdf5336b6705bac027cb3f70b6ca4485cb0c1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 12:26:45 -0800
Subject: Extent: Clean up a comma

---
 src/extent.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 3eb4961..cb01064 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -53,22 +53,17 @@ const extent_hooks_t extent_hooks_default = {
 	ehooks_default_dalloc,
 	ehooks_default_destroy,
 	ehooks_default_commit,
-	ehooks_default_decommit
+	ehooks_default_decommit,
 #ifdef PAGES_CAN_PURGE_LAZY
-	,
-	extent_purge_lazy_default
+	extent_purge_lazy_default,
 #else
-	,
-	NULL
+	NULL,
 #endif
 #ifdef PAGES_CAN_PURGE_FORCED
-	,
-	extent_purge_forced_default
+	extent_purge_forced_default,
 #else
-	,
-	NULL
+	NULL,
 #endif
-	,
 	extent_split_default,
 	extent_merge_default
 };
-- 
cgit v0.12


From 368baa42ef76f1dd44950b5929dc5697c0ac7add Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 12:59:46 -0800
Subject: Extent -> Ehooks: Move purge_lazy hook.

---
 include/jemalloc/internal/ehooks.h | 22 ++++++++++++++++++----
 src/base.c                         |  3 ++-
 src/ehooks.c                       | 18 ++++++++++++++++++
 src/extent.c                       | 35 +++--------------------------------
 4 files changed, 41 insertions(+), 37 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index e9bdca3..c234ccd 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -29,6 +29,11 @@ bool ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size
 bool ehooks_default_decommit_impl(void *addr, size_t offset, size_t length);
 bool ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind);
+#ifdef PAGES_CAN_PURGE_LAZY
+bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
+bool ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind);
+#endif
 
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
@@ -168,14 +173,23 @@ ehooks_decommit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline bool
-ehooks_purge_lazy(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind) {
+ehooks_purge_lazy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+#ifdef PAGES_CAN_PURGE_LAZY
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_purge_lazy_impl(addr, offset, length);
+	}
+#endif
 	if (extent_hooks->purge_lazy == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->purge_lazy(extent_hooks, addr, size,
+		    offset, length, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->purge_lazy(extent_hooks, addr, size, offset,
-	    length, arena_ind);
 }
 
 static inline bool
diff --git a/src/base.c b/src/base.c
index 6b88b23..48a8c6a 100644
--- a/src/base.c
+++ b/src/base.c
@@ -90,7 +90,8 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 		if (!ehooks_purge_forced(ehooks, addr, size, 0, size, ind)) {
 			goto label_post_reentrancy;
 		}
-		if (!ehooks_purge_lazy(ehooks, addr, size, 0, size, ind)) {
+		if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size,
+		    ind)) {
 			goto label_post_reentrancy;
 		}
 		/* Nothing worked.  That's the application's problem. */
diff --git a/src/ehooks.c b/src/ehooks.c
index cb02377..ae0e980 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -126,3 +126,21 @@ ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	return ehooks_default_decommit_impl(addr, offset, length);
 }
+
+#ifdef PAGES_CAN_PURGE_LAZY
+bool
+ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length) {
+	return pages_purge_lazy((void *)((uintptr_t)addr + (uintptr_t)offset),
+	    length);
+}
+
+bool
+ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
+	assert(addr != NULL);
+	assert((offset & PAGE_MASK) == 0);
+	assert(length != 0);
+	assert((length & PAGE_MASK) == 0);
+	return ehooks_default_purge_lazy_impl(addr, offset, length);
+}
+#endif
diff --git a/src/extent.c b/src/extent.c
index cb01064..f3fbe95 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -21,10 +21,6 @@ size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
 static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent, size_t offset, size_t length, bool growing_retained);
-#ifdef PAGES_CAN_PURGE_LAZY
-static bool extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-#endif
 static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
     bool growing_retained);
@@ -55,7 +51,7 @@ const extent_hooks_t extent_hooks_default = {
 	ehooks_default_commit,
 	ehooks_default_decommit,
 #ifdef PAGES_CAN_PURGE_LAZY
-	extent_purge_lazy_default,
+	ehooks_default_purge_lazy,
 #else
 	NULL,
 #endif
@@ -1390,7 +1386,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    arena_ind_get(arena))) {
 		zeroed = true;
 	} else if (extent_state_get(extent) == extent_state_muzzy ||
-	    !ehooks_purge_lazy(ehooks, extent_base_get(extent),
+	    !ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
 	    extent_size_get(extent), 0, extent_size_get(extent),
 	    arena_ind_get(arena))) {
 		zeroed = false;
@@ -1461,38 +1457,13 @@ extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	return err;
 }
 
-#ifdef PAGES_CAN_PURGE_LAZY
-static bool
-extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
-	assert(addr != NULL);
-	assert((offset & PAGE_MASK) == 0);
-	assert(length != 0);
-	assert((length & PAGE_MASK) == 0);
-
-	return pages_purge_lazy((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length);
-}
-#endif
-
 static bool
 extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-
-	if (ehooks_purge_lazy_will_fail(ehooks)) {
-		return true;
-	}
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-	}
-	bool err = ehooks_purge_lazy(ehooks, extent_base_get(extent),
+	bool err = ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_post_reentrancy(tsdn);
-	}
-
 	return err;
 }
 
-- 
cgit v0.12


From a5b42a1a10048d9562d59e494c9e2cf3ab6943ba Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 13:11:54 -0800
Subject: Extent -> Ehooks: Move purge_forced hook.

---
 include/jemalloc/internal/ehooks.h | 22 ++++++++++++++++++----
 src/base.c                         |  3 ++-
 src/ehooks.c                       | 18 ++++++++++++++++++
 src/extent.c                       | 34 +++-------------------------------
 4 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index c234ccd..ae5ef66 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -34,6 +34,11 @@ bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
 bool ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind);
 #endif
+#ifdef PAGES_CAN_PURGE_FORCED
+bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
+bool ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
+#endif
 
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
@@ -193,14 +198,23 @@ ehooks_purge_lazy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline bool
-ehooks_purge_forced(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind) {
+ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+#ifdef PAGES_CAN_PURGE_FORCED
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_purge_forced_impl(addr, offset, length);
+	}
+#endif
 	if (extent_hooks->purge_forced == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->purge_forced(extent_hooks, addr, size,
+		    offset, length, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->purge_forced(extent_hooks, addr, size, offset,
-	    length, arena_ind);
 }
 
 static inline bool
diff --git a/src/base.c b/src/base.c
index 48a8c6a..92d9bc1 100644
--- a/src/base.c
+++ b/src/base.c
@@ -87,7 +87,8 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 		if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size, ind)) {
 			goto label_post_reentrancy;
 		}
-		if (!ehooks_purge_forced(ehooks, addr, size, 0, size, ind)) {
+		if (!ehooks_purge_forced(tsdn, ehooks, addr, size, 0, size,
+		    ind)) {
 			goto label_post_reentrancy;
 		}
 		if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size,
diff --git a/src/ehooks.c b/src/ehooks.c
index ae0e980..67ca238 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -144,3 +144,21 @@ ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	return ehooks_default_purge_lazy_impl(addr, offset, length);
 }
 #endif
+
+#ifdef PAGES_CAN_PURGE_FORCED
+bool
+ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length) {
+	return pages_purge_forced((void *)((uintptr_t)addr +
+	    (uintptr_t)offset), length);
+}
+
+bool
+ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind) {
+	assert(addr != NULL);
+	assert((offset & PAGE_MASK) == 0);
+	assert(length != 0);
+	assert((length & PAGE_MASK) == 0);
+	return ehooks_default_purge_forced_impl(addr, offset, length);
+}
+#endif
diff --git a/src/extent.c b/src/extent.c
index f3fbe95..f4f3797 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -24,10 +24,6 @@ static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
     bool growing_retained);
-#ifdef PAGES_CAN_PURGE_FORCED
-static bool extent_purge_forced_default(extent_hooks_t *extent_hooks,
-    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
-#endif
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
     bool growing_retained);
@@ -56,7 +52,7 @@ const extent_hooks_t extent_hooks_default = {
 	NULL,
 #endif
 #ifdef PAGES_CAN_PURGE_FORCED
-	extent_purge_forced_default,
+	ehooks_default_purge_forced,
 #else
 	NULL,
 #endif
@@ -1381,7 +1377,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, extent, 0,
 	    extent_size_get(extent))) {
 		zeroed = true;
-	} else if (!ehooks_purge_forced(ehooks, extent_base_get(extent),
+	} else if (!ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
 	    extent_size_get(extent), 0, extent_size_get(extent),
 	    arena_ind_get(arena))) {
 		zeroed = true;
@@ -1474,37 +1470,13 @@ extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    length, false);
 }
 
-#ifdef PAGES_CAN_PURGE_FORCED
-static bool
-extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind) {
-	assert(addr != NULL);
-	assert((offset & PAGE_MASK) == 0);
-	assert(length != 0);
-	assert((length & PAGE_MASK) == 0);
-
-	return pages_purge_forced((void *)((uintptr_t)addr +
-	    (uintptr_t)offset), length);
-}
-#endif
-
 static bool
 extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-
-	if (ehooks_purge_forced_will_fail(ehooks)) {
-		return true;
-	}
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-	}
-	bool err = ehooks_purge_forced(ehooks, extent_base_get(extent),
+	bool err = ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_post_reentrancy(tsdn);
-	}
 	return err;
 }
 
-- 
cgit v0.12


From 1fff4d2ee3f5ab9d288a2b56544c1c8c4d8736da Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 13:48:18 -0800
Subject: Extent -> Ehooks: Move split hook.

---
 include/jemalloc/internal/ehooks.h | 19 ++++++++++++++-----
 src/ehooks.c                       | 20 ++++++++++++++++++++
 src/extent.c                       | 29 +++--------------------------
 3 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index ae5ef66..e84222f 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -39,6 +39,9 @@ bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
 bool ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
+bool ehooks_default_split_impl();
+bool ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t size_a, size_t size_b, bool committed, unsigned arena_ind);
 
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
@@ -218,14 +221,20 @@ ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline bool
-ehooks_split(ehooks_t *ehooks, void *addr, size_t size, size_t size_a,
-    size_t size_b, bool committed, unsigned arena_ind) {
+ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks->split == NULL) {
+	if (ehooks_are_default(ehooks)) {
+		return ehooks_default_split_impl();
+	} else if (extent_hooks->split == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->split(extent_hooks, addr, size, size_a,
+		    size_b, committed, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->split(extent_hooks, addr, size, size_a, size_b,
-	    committed, arena_ind);
 }
 
 static inline bool
diff --git a/src/ehooks.c b/src/ehooks.c
index 67ca238..8bd9550 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -162,3 +162,23 @@ ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
 	return ehooks_default_purge_forced_impl(addr, offset, length);
 }
 #endif
+
+bool
+ehooks_default_split_impl() {
+	if (!maps_coalesce) {
+		/*
+		 * Without retain, only whole regions can be purged (required by
+		 * MEM_RELEASE on Windows) -- therefore disallow splitting.  See
+		 * comments in extent_head_no_merge().
+		 */
+		return !opt_retain;
+	}
+
+	return false;
+}
+
+bool
+ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
+	return ehooks_default_split_impl();
+}
diff --git a/src/extent.c b/src/extent.c
index f4f3797..521c0b9 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -27,9 +27,6 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
     bool growing_retained);
-static bool extent_split_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t size_a, size_t size_b, bool committed,
-    unsigned arena_ind);
 static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, extent_t *extent, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
@@ -56,7 +53,7 @@ const extent_hooks_t extent_hooks_default = {
 #else
 	NULL,
 #endif
-	extent_split_default,
+	ehooks_default_split,
 	extent_merge_default
 };
 
@@ -1487,21 +1484,6 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    offset, length, false);
 }
 
-static bool
-extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
-	if (!maps_coalesce) {
-		/*
-		 * Without retain, only whole regions can be purged (required by
-		 * MEM_RELEASE on Windows) -- therefore disallow splitting.  See
-		 * comments in extent_head_no_merge().
-		 */
-		return !opt_retain;
-	}
-
-	return false;
-}
-
 /*
  * Accepts the extent to split, and the characteristics of each side of the
  * split.  The 'a' parameters go with the 'lead' of the resulting pair of
@@ -1559,15 +1541,10 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	extent_lock2(tsdn, extent, trail);
 
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-	}
-	bool err = ehooks_split(ehooks, extent_base_get(extent),
+	bool err = ehooks_split(tsdn, ehooks, extent_base_get(extent),
 	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
 	    arena_ind_get(arena));
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_post_reentrancy(tsdn);
-	}
+
 	if (err) {
 		goto label_error_c;
 	}
-- 
cgit v0.12


From 2fe5108263d013b07572f5aa597ba6ace86ed342 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 14:03:38 -0800
Subject: Extent -> Ehooks: Move merge hook.

---
 include/jemalloc/internal/ehooks.h         | 19 ++++++++----
 include/jemalloc/internal/extent_externs.h |  1 +
 src/ehooks.c                               | 26 ++++++++++++++++
 src/extent.c                               | 48 ++++--------------------------
 4 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index e84222f..48d13fc 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -42,6 +42,9 @@ bool ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
 bool ehooks_default_split_impl();
 bool ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind);
+bool ehooks_default_merge_impl(void *addr_a, void *addr_b);
+bool ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind);
 
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
@@ -238,14 +241,20 @@ ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline bool
-ehooks_merge(ehooks_t *ehooks, void *addr_a, size_t size_a, void *addr_b,
-    size_t size_b, bool committed, unsigned arena_ind) {
+ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks->merge == NULL) {
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_merge_impl(addr_a, addr_b);
+	} else if (extent_hooks->merge == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->merge(extent_hooks, addr_a, size_a,
+		    addr_b, size_b, committed, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->merge(extent_hooks, addr_a, size_a, addr_b, size_b,
-	    committed, arena_ind);
 }
 
 #endif /* JEMALLOC_INTERNAL_EHOOKS_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 26828ba..4e3803c 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -46,6 +46,7 @@ extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     size_t size_b, szind_t szind_b, bool slab_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *a, extent_t *b);
+bool extent_head_no_merge(extent_t *a, extent_t *b);
 
 bool extent_boot(void);
 
diff --git a/src/ehooks.c b/src/ehooks.c
index 8bd9550..bb32854 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -182,3 +182,29 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
 	return ehooks_default_split_impl();
 }
+
+bool
+ehooks_default_merge_impl(void *addr_a, void *addr_b) {
+	if (!maps_coalesce && !opt_retain) {
+		return true;
+	}
+	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
+		return true;
+	}
+
+	return false;
+}
+
+bool
+ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
+	if (!maps_coalesce) {
+		tsdn_t *tsdn = tsdn_fetch();
+		extent_t *a = iealloc(tsdn, addr_a);
+		extent_t *b = iealloc(tsdn, addr_b);
+		if (extent_head_no_merge(a, b)) {
+			return true;
+		}
+	}
+	return ehooks_default_merge_impl(addr_a, addr_b);
+}
diff --git a/src/extent.c b/src/extent.c
index 521c0b9..3e78e96 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -31,9 +31,6 @@ static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, extent_t *extent, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
-static bool extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
-    size_t size_a, void *addr_b, size_t size_b, bool committed,
-    unsigned arena_ind);
 static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *a, extent_t *b, bool growing_retained);
 
@@ -54,7 +51,7 @@ const extent_hooks_t extent_hooks_default = {
 	NULL,
 #endif
 	ehooks_default_split,
-	extent_merge_default
+	ehooks_default_merge
 };
 
 /* Used exclusively for gdump triggering. */
@@ -1576,23 +1573,11 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    slab_a, size_b, szind_b, slab_b, false);
 }
 
-static bool
-extent_merge_default_impl(void *addr_a, void *addr_b) {
-	if (!maps_coalesce && !opt_retain) {
-		return true;
-	}
-	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
-		return true;
-	}
-
-	return false;
-}
-
 /*
  * Returns true if the given extents can't be merged because of their head bit
  * settings.  Assumes the second extent has the higher address.
  */
-static bool
+bool
 extent_head_no_merge(extent_t *a, extent_t *b) {
 	assert(extent_base_get(a) < extent_base_get(b));
 	/*
@@ -1621,20 +1606,6 @@ extent_head_no_merge(extent_t *a, extent_t *b) {
 }
 
 static bool
-extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
-    void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
-	if (!maps_coalesce) {
-		tsdn_t *tsdn = tsdn_fetch();
-		extent_t *a = iealloc(tsdn, addr_a);
-		extent_t *b = iealloc(tsdn, addr_b);
-		if (extent_head_no_merge(a, b)) {
-			return true;
-		}
-	}
-	return extent_merge_default_impl(addr_a, addr_b);
-}
-
-static bool
 extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
     extent_t *b, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1645,18 +1616,9 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
 		return true;
 	}
 
-	bool err;
-	if (ehooks_are_default(ehooks)) {
-		/* Call directly to propagate tsdn. */
-		err = extent_merge_default_impl(extent_base_get(a),
-		    extent_base_get(b));
-	} else {
-		extent_hook_pre_reentrancy(tsdn, arena);
-		err = ehooks_merge(ehooks, extent_base_get(a),
-		    extent_size_get(a), extent_base_get(b), extent_size_get(b),
-		    extent_committed_get(a), arena_ind_get(arena));
-		extent_hook_post_reentrancy(tsdn);
-	}
+	bool err = ehooks_merge(tsdn, ehooks, extent_base_get(a),
+	    extent_size_get(a), extent_base_get(b), extent_size_get(b),
+	    extent_committed_get(a), arena_ind_get(arena));
 
 	if (err) {
 		return true;
-- 
cgit v0.12


From c8dae890c88162748c22acbc7885c9ebf8012e10 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 14:23:40 -0800
Subject: Extent -> Ehooks: Move over default hooks.

---
 include/jemalloc/internal/ehooks.h                 | 23 ++++++++++++----------
 .../internal/jemalloc_internal_inlines_a.h         |  2 +-
 src/base.c                                         |  2 +-
 src/ctl.c                                          |  4 ++--
 src/ehooks.c                                       | 20 +++++++++++++++++++
 src/extent.c                                       | 20 -------------------
 src/jemalloc.c                                     |  7 ++++---
 test/unit/base.c                                   |  3 ++-
 8 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 48d13fc..fbb3713 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/atomic.h"
 
-extern const extent_hooks_t extent_hooks_default;
+extern const extent_hooks_t ehooks_default_extent_hooks;
 
 typedef struct ehooks_s ehooks_t;
 struct ehooks_s {
@@ -11,6 +11,8 @@ struct ehooks_s {
 	atomic_p_t ptr;
 };
 
+extern const extent_hooks_t ehooks_default_extent_hooks;
+
 /* NOT PUBLIC. */
 void *ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
@@ -73,7 +75,8 @@ ehooks_get_extent_hooks_ptr(ehooks_t *ehooks) {
 
 static inline bool
 ehooks_are_default(ehooks_t *ehooks) {
-	return ehooks_get_extent_hooks_ptr(ehooks) == &extent_hooks_default;
+	return ehooks_get_extent_hooks_ptr(ehooks) ==
+	    &ehooks_default_extent_hooks;
 }
 
 static inline bool
@@ -105,7 +108,7 @@ static inline void *
 ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_alloc_impl(tsdn, new_addr, size,
 		    alignment, zero, commit, arena_ind);
 	}
@@ -120,7 +123,7 @@ static inline bool
 ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_dalloc_impl(addr, size);
 	} else if (extent_hooks->dalloc == NULL) {
 		return true;
@@ -137,7 +140,7 @@ static inline void
 ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_destroy_impl(addr, size);
 	} else if (extent_hooks->destroy == NULL) {
 		return;
@@ -153,7 +156,7 @@ static inline bool
 ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_commit_impl(addr, offset, length);
 	} else if (extent_hooks->commit == NULL) {
 		return true;
@@ -170,7 +173,7 @@ static inline bool
 ehooks_decommit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_decommit_impl(addr, offset, length);
 	} else if (extent_hooks->decommit == NULL) {
 		return true;
@@ -188,7 +191,7 @@ ehooks_purge_lazy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 #ifdef PAGES_CAN_PURGE_LAZY
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_purge_lazy_impl(addr, offset, length);
 	}
 #endif
@@ -208,7 +211,7 @@ ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 #ifdef PAGES_CAN_PURGE_FORCED
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_purge_forced_impl(addr, offset, length);
 	}
 #endif
@@ -244,7 +247,7 @@ static inline bool
 ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_merge_impl(addr_a, addr_b);
 	} else if (extent_hooks->merge == NULL) {
 		return true;
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 98a6478..f079e85 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -91,7 +91,7 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	if (unlikely(ret == NULL)) {
 		if (init_if_missing) {
 			ret = arena_init(tsdn, ind,
-			    (extent_hooks_t *)&extent_hooks_default);
+			    (extent_hooks_t *)&ehooks_default_extent_hooks);
 		}
 	}
 	return ret;
diff --git a/src/base.c b/src/base.c
index 92d9bc1..a1b45d0 100644
--- a/src/base.c
+++ b/src/base.c
@@ -511,6 +511,6 @@ base_postfork_child(tsdn_t *tsdn, base_t *base) {
 
 bool
 base_boot(tsdn_t *tsdn) {
-	b0 = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
+	b0 = base_new(tsdn, 0, (extent_hooks_t *)&ehooks_default_extent_hooks);
 	return (b0 == NULL);
 }
diff --git a/src/ctl.c b/src/ctl.c
index 9b88f40..a9982ca 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2377,7 +2377,7 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				goto label_return;
 			}
 			old_extent_hooks =
-			    (extent_hooks_t *)&extent_hooks_default;
+			    (extent_hooks_t *)&ehooks_default_extent_hooks;
 			READ(old_extent_hooks, extent_hooks_t *);
 			if (newp != NULL) {
 				/* Initialize a new arena as a side effect. */
@@ -2581,7 +2581,7 @@ arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 
-	extent_hooks = (extent_hooks_t *)&extent_hooks_default;
+	extent_hooks = (extent_hooks_t *)&ehooks_default_extent_hooks;
 	WRITE(extent_hooks, extent_hooks_t *);
 	if ((arena_ind = ctl_arena_init(tsd, extent_hooks)) == UINT_MAX) {
 		ret = EAGAIN;
diff --git a/src/ehooks.c b/src/ehooks.c
index bb32854..728783e 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -208,3 +208,23 @@ ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 	}
 	return ehooks_default_merge_impl(addr_a, addr_b);
 }
+
+const extent_hooks_t ehooks_default_extent_hooks = {
+	ehooks_default_alloc,
+	ehooks_default_dalloc,
+	ehooks_default_destroy,
+	ehooks_default_commit,
+	ehooks_default_decommit,
+#ifdef PAGES_CAN_PURGE_LAZY
+	ehooks_default_purge_lazy,
+#else
+	NULL,
+#endif
+#ifdef PAGES_CAN_PURGE_FORCED
+	ehooks_default_purge_forced,
+#else
+	NULL,
+#endif
+	ehooks_default_split,
+	ehooks_default_merge
+};
diff --git a/src/extent.c b/src/extent.c
index 3e78e96..e7e4712 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -34,26 +34,6 @@ static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *a, extent_t *b, bool growing_retained);
 
-const extent_hooks_t extent_hooks_default = {
-	ehooks_default_alloc,
-	ehooks_default_dalloc,
-	ehooks_default_destroy,
-	ehooks_default_commit,
-	ehooks_default_decommit,
-#ifdef PAGES_CAN_PURGE_LAZY
-	ehooks_default_purge_lazy,
-#else
-	NULL,
-#endif
-#ifdef PAGES_CAN_PURGE_FORCED
-	ehooks_default_purge_forced,
-#else
-	NULL,
-#endif
-	ehooks_default_split,
-	ehooks_default_merge
-};
-
 /* Used exclusively for gdump triggering. */
 static atomic_zu_t curpages;
 static atomic_zu_t highpages;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4fc1a5e..825a8ed 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -589,7 +589,8 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 				choose[j] = first_null;
 				arena = arena_init_locked(tsd_tsdn(tsd),
 				    choose[j],
-				    (extent_hooks_t *)&extent_hooks_default);
+				    (extent_hooks_t *)
+				    &ehooks_default_extent_hooks);
 				if (arena == NULL) {
 					malloc_mutex_unlock(tsd_tsdn(tsd),
 					    &arenas_lock);
@@ -1589,8 +1590,8 @@ malloc_init_hard_a0_locked() {
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(TSDN_NULL, 0, (extent_hooks_t *)&extent_hooks_default)
-	    == NULL) {
+	if (arena_init(TSDN_NULL, 0,
+	    (extent_hooks_t *)&ehooks_default_extent_hooks) == NULL) {
 		return true;
 	}
 	a0 = arena_get(TSDN_NULL, 0, false);
diff --git a/test/unit/base.c b/test/unit/base.c
index 6b792cf..7ced15f 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -31,7 +31,8 @@ TEST_BEGIN(test_base_hooks_default) {
 	size_t allocated0, allocated1, resident, mapped, n_thp;
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	base = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
+	base = base_new(tsdn, 0,
+	    (extent_hooks_t *)&ehooks_default_extent_hooks);
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
-- 
cgit v0.12


From 39fdc690a0d3a49c1e36d79f625350426480b18f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 17:03:31 -0800
Subject: Ehooks comments and cleanup.

---
 include/jemalloc/internal/ehooks.h | 73 +++++++++++++++++++-------------------
 src/ehooks.c                       | 18 +++++-----
 2 files changed, 46 insertions(+), 45 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index fbb3713..97c3f44 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -1,6 +1,13 @@
 #ifndef JEMALLOC_INTERNAL_EHOOKS_H
 #define JEMALLOC_INTERNAL_EHOOKS_H
 
+/*
+ * This module is the internal interface to the extent hooks (both
+ * user-specified and external).  Eventually, this will give us the flexibility
+ * to use multiple different versions of user-visible extent-hook APIs under a
+ * single user interface.
+ */
+
 #include "jemalloc/internal/atomic.h"
 
 extern const extent_hooks_t ehooks_default_extent_hooks;
@@ -13,41 +20,45 @@ struct ehooks_s {
 
 extern const extent_hooks_t ehooks_default_extent_hooks;
 
-/* NOT PUBLIC. */
+/*
+ * These are not really part of the public API.  Each hook has a fast-path for
+ * the default-hooks case that can avoid various small inefficiencies:
+ *   - Forgetting tsd and then calling tsd_get within the hook.
+ *   - Getting more state than necessary out of the extent_t.
+ *   - Doing arena_ind -> arena -> arena_ind lookups.
+ * By making the calls to these functions visible to the compiler, it can move
+ * those extra bits of computation down below the fast-paths where they get ignored.
+ */
 void *ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
-void *ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit,
-    unsigned arena_ind);
 bool ehooks_default_dalloc_impl(void *addr, size_t size);
-bool ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
 void ehooks_default_destroy_impl(void *addr, size_t size);
-void ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
 bool ehooks_default_commit_impl(void *addr, size_t offset, size_t length);
-bool ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind);
 bool ehooks_default_decommit_impl(void *addr, size_t offset, size_t length);
-bool ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind);
 #ifdef PAGES_CAN_PURGE_LAZY
 bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
-bool ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind);
 #endif
 #ifdef PAGES_CAN_PURGE_FORCED
 bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
-bool ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
 bool ehooks_default_split_impl();
-bool ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t size_a, size_t size_b, bool committed, unsigned arena_ind);
 bool ehooks_default_merge_impl(void *addr_a, void *addr_b);
-bool ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
-    void *addr_b, size_t size_b, bool committed, unsigned arena_ind);
 
+/*
+ * We don't officially support reentrancy from wtihin the extent hooks.  But
+ * various people who sit within throwing distance of the jemalloc team want
+ * that functionality in certain limited cases.  The default reentrancy guards
+ * assert that we're not reentrant from a0 (since it's the bootstrap arena,
+ * where reentrant allocations would be redirected), which we would incorrectly
+ * trigger in cases where a0 has extent hooks (those hooks themselves can't be
+ * reentrant, then, but there are reasonable uses for such functionality, like
+ * putting internal metadata on hugepages).  Therefore, we use the raw
+ * reentrancy guards.
+ *
+ * Eventually, we need to think more carefully about whether and where we
+ * support allocating from within extent hooks (and what that means for things
+ * like profiling, stats collection, etc.), and document what the guarantee is.
+ */
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
 	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
@@ -60,7 +71,7 @@ ehooks_post_reentrancy(tsdn_t *tsdn) {
 	tsd_post_reentrancy_raw(tsd);
 }
 
-/* PUBLIC. */
+/* Beginning of the public API. */
 void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks);
 
 static inline void
@@ -79,21 +90,11 @@ ehooks_are_default(ehooks_t *ehooks) {
 	    &ehooks_default_extent_hooks;
 }
 
-static inline bool
-ehooks_destroy_is_noop(ehooks_t *ehooks) {
-	return ehooks_get_extent_hooks_ptr(ehooks)->destroy == NULL;
-}
-
-static inline bool
-ehooks_purge_lazy_will_fail(ehooks_t *ehooks) {
-	return ehooks_get_extent_hooks_ptr(ehooks)->purge_lazy == NULL;
-}
-
-static inline bool
-ehooks_purge_forced_will_fail(ehooks_t *ehooks) {
-	return ehooks_get_extent_hooks_ptr(ehooks)->purge_forced == NULL;
-}
-
+/*
+ * In some cases, a caller needs to allocate resources before attempting to call
+ * a hook.  If that hook is doomed to fail, this is wasteful.  We therefore
+ * include some checks for such cases.
+ */
 static inline bool
 ehooks_split_will_fail(ehooks_t *ehooks) {
 	return ehooks_get_extent_hooks_ptr(ehooks)->split == NULL;
diff --git a/src/ehooks.c b/src/ehooks.c
index 728783e..d7d1613 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -57,7 +57,7 @@ ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
 	return ret;
 }
 
-void *
+static void *
 ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	tsdn_t *tsdn;
@@ -84,7 +84,7 @@ ehooks_default_dalloc_impl(void *addr, size_t size) {
 	return true;
 }
 
-bool
+static bool
 ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	return ehooks_default_dalloc_impl(addr, size);
@@ -97,7 +97,7 @@ ehooks_default_destroy_impl(void *addr, size_t size) {
 	}
 }
 
-void
+static void
 ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	ehooks_default_destroy_impl(addr, size);
@@ -109,7 +109,7 @@ ehooks_default_commit_impl(void *addr, size_t offset, size_t length) {
 	    length);
 }
 
-bool
+static bool
 ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	return ehooks_default_commit_impl(addr, offset, length);
@@ -121,7 +121,7 @@ ehooks_default_decommit_impl(void *addr, size_t offset, size_t length) {
 	    length);
 }
 
-bool
+static bool
 ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	return ehooks_default_decommit_impl(addr, offset, length);
@@ -134,7 +134,7 @@ ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length) {
 	    length);
 }
 
-bool
+static bool
 ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	assert(addr != NULL);
@@ -152,7 +152,7 @@ ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length) {
 	    (uintptr_t)offset), length);
 }
 
-bool
+static bool
 ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind) {
 	assert(addr != NULL);
@@ -177,7 +177,7 @@ ehooks_default_split_impl() {
 	return false;
 }
 
-bool
+static bool
 ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
 	return ehooks_default_split_impl();
@@ -195,7 +195,7 @@ ehooks_default_merge_impl(void *addr_a, void *addr_b) {
 	return false;
 }
 
-bool
+static bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	if (!maps_coalesce) {
-- 
cgit v0.12


From e08c581cf1ae5fe8a6735f7b92b7780527125287 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 17:43:53 -0800
Subject: Extent: Get rid of extent-specific pre/post reentrancy calls.

These are taken care of by the ehook module; the extra increments and
decrements are safe but unnecessary.
---
 src/extent.c | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index e7e4712..ea7b8f2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -831,29 +831,6 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	return extent;
 }
 
-static void
-extent_hook_pre_reentrancy(tsdn_t *tsdn, arena_t *arena) {
-	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
-	if (arena == arena_get(tsd_tsdn(tsd), 0, false)) {
-		/*
-		 * The only legitimate case of customized extent hooks for a0 is
-		 * hooks with no allocation activities.  One such example is to
-		 * place metadata on pre-allocated resources such as huge pages.
-		 * In that case, rely on reentrancy_level checks to catch
-		 * infinite recursions.
-		 */
-		pre_reentrancy(tsd, NULL);
-	} else {
-		pre_reentrancy(tsd, arena);
-	}
-}
-
-static void
-extent_hook_post_reentrancy(tsdn_t *tsdn) {
-	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
-	post_reentrancy(tsd);
-}
-
 /*
  * If virtual memory is retained, create increasingly larger extents from which
  * to split requested extents in order to limit the total number of disjoint
@@ -1341,9 +1318,6 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		extent_reregister(tsdn, extent);
 	}
 
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-	}
 	/* Try to decommit; purge if that fails. */
 	bool zeroed;
 	if (!extent_committed_get(extent)) {
@@ -1363,9 +1337,6 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	} else {
 		zeroed = false;
 	}
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_post_reentrancy(tsdn);
-	}
 	extent_zeroed_set(extent, zeroed);
 
 	if (config_prof) {
-- 
cgit v0.12


From 92a511d385d1a256a42c6bf8cfc3dd9adb1f5217 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 18:31:47 -0800
Subject: Make extent module hermetic.

In the form of extent2.h.  The naming leaves something to be desired, but I'll
leave that for a later diff.
---
 Makefile.in                                        |    1 +
 include/jemalloc/internal/bin.h                    |    2 -
 include/jemalloc/internal/extent.h                 |    3 +
 include/jemalloc/internal/extent2.h                |   92 ++
 include/jemalloc/internal/extent_externs.h         |   59 -
 include/jemalloc/internal/extent_inlines.h         |   38 -
 include/jemalloc/internal/extent_structs.h         |   33 -
 include/jemalloc/internal/extent_types.h           |   13 -
 .../jemalloc/internal/jemalloc_internal_includes.h |    3 -
 .../internal/jemalloc_internal_inlines_b.h         |    1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |    1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |    1 +
 src/extent.c                                       | 1718 +------------------
 src/extent2.c                                      | 1738 ++++++++++++++++++++
 14 files changed, 1838 insertions(+), 1865 deletions(-)
 create mode 100644 include/jemalloc/internal/extent2.h
 delete mode 100644 include/jemalloc/internal/extent_externs.h
 delete mode 100644 include/jemalloc/internal/extent_inlines.h
 delete mode 100644 include/jemalloc/internal/extent_structs.h
 delete mode 100644 include/jemalloc/internal/extent_types.h
 create mode 100644 src/extent2.c

diff --git a/Makefile.in b/Makefile.in
index a735e0e..29977bc 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -107,6 +107,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ehooks.c \
 	$(srcroot)src/eset.c \
 	$(srcroot)src/extent.c \
+	$(srcroot)src/extent2.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/hash.c \
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 0d6aff8..92e8122 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -4,8 +4,6 @@
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/extent_types.h"
-#include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
 
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index fa7d126..2fd6e90 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -620,4 +620,7 @@ extent_esnead_comp(const extent_t *a, const extent_t *b) {
 	return ret;
 }
 
+ph_proto(, extent_avail_, extent_tree_t, extent_t)
+ph_proto(, extent_heap_, extent_heap_t, extent_t)
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_H */
diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
new file mode 100644
index 0000000..22035bb
--- /dev/null
+++ b/include/jemalloc/internal/extent2.h
@@ -0,0 +1,92 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT2_H
+#define JEMALLOC_INTERNAL_EXTENT2_H
+
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/eset.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/rtree.h"
+
+/*
+ * This module contains the page-level allocator.  It chooses the addresses that
+ * allocations requested by other modules will inhabit, and updates the global
+ * metadata to reflect allocation/deallocation/purging decisions.
+ *
+ * The naming ("extent2" for the module, and "extent_" or "extents_" for most of
+ * the functions) is historical.  Eventually, the naming should be updated to
+ * reflect the functionality.  Similarly, the utilization stats live here for no
+ * particular reason.  This will also be changed, but much more immediately.
+ */
+
+/*
+ * The following two structs are for experimental purposes. See
+ * experimental_utilization_query_ctl and
+ * experimental_utilization_batch_query_ctl in src/ctl.c.
+ */
+typedef struct extent_util_stats_s extent_util_stats_t;
+struct extent_util_stats_s {
+	size_t nfree;
+	size_t nregs;
+	size_t size;
+};
+
+typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
+struct extent_util_stats_verbose_s {
+	void *slabcur_addr;
+	size_t nfree;
+	size_t nregs;
+	size_t size;
+	size_t bin_nfree;
+	size_t bin_nregs;
+};
+
+/*
+ * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
+ * is the max ratio between the size of the active extent and the new extent.
+ */
+#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
+extern size_t opt_lg_extent_max_active_fit;
+
+extern rtree_t extents_rtree;
+
+extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
+void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+
+extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit);
+void extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, extent_t *extent);
+extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, size_t npages_min);
+extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit);
+void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent);
+void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent);
+bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length);
+bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length);
+bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length);
+bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length);
+extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    size_t size_b, szind_t szind_b, bool slab_b);
+bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *a, extent_t *b);
+bool extent_head_no_merge(extent_t *a, extent_t *b);
+
+bool extent_boot(void);
+
+void extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size);
+void extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size,
+    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
+
+#endif /* JEMALLOC_INTERNAL_EXTENT2_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
deleted file mode 100644
index 4e3803c..0000000
--- a/include/jemalloc/internal/extent_externs.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
-#define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
-
-#include "jemalloc/internal/ehooks.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool.h"
-#include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/rtree.h"
-
-extern size_t opt_lg_extent_max_active_fit;
-
-extern rtree_t extents_rtree;
-extern mutex_pool_t extent_mutex_pool;
-
-extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
-void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
-
-ph_proto(, extent_avail_, extent_tree_t, extent_t)
-ph_proto(, extent_heap_, extent_heap_t, extent_t)
-
-extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit);
-void extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, extent_t *extent);
-extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, size_t npages_min);
-extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit);
-void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
-void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent);
-void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent);
-bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length);
-bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length);
-bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length);
-bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length);
-extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
-    size_t size_b, szind_t szind_b, bool slab_b);
-bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *a, extent_t *b);
-bool extent_head_no_merge(extent_t *a, extent_t *b);
-
-bool extent_boot(void);
-
-void extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size);
-void extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size,
-    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
deleted file mode 100644
index 2647df8..0000000
--- a/include/jemalloc/internal/extent_inlines.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H
-#define JEMALLOC_INTERNAL_EXTENT_INLINES_H
-
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool.h"
-#include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/prng.h"
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/sc.h"
-#include "jemalloc/internal/sz.h"
-
-static inline void
-extent_lock(tsdn_t *tsdn, extent_t *extent) {
-	assert(extent != NULL);
-	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
-}
-
-static inline void
-extent_unlock(tsdn_t *tsdn, extent_t *extent) {
-	assert(extent != NULL);
-	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
-}
-
-static inline void
-extent_lock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
-	assert(extent1 != NULL && extent2 != NULL);
-	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
-	    (uintptr_t)extent2);
-}
-
-static inline void
-extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
-	assert(extent1 != NULL && extent2 != NULL);
-	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
-	    (uintptr_t)extent2);
-}
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_INLINES_H */
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
deleted file mode 100644
index 4e6e085..0000000
--- a/include/jemalloc/internal/extent_structs.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
-#define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
-
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/sc.h"
-#include "jemalloc/internal/slab_data.h"
-
-/*
- * The following two structs are for experimental purposes. See
- * experimental_utilization_query_ctl and
- * experimental_utilization_batch_query_ctl in src/ctl.c.
- */
-
-struct extent_util_stats_s {
-	size_t nfree;
-	size_t nregs;
-	size_t size;
-};
-
-struct extent_util_stats_verbose_s {
-	void *slabcur_addr;
-	size_t nfree;
-	size_t nregs;
-	size_t size;
-	size_t bin_nfree;
-	size_t bin_nregs;
-};
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
deleted file mode 100644
index 25b360e..0000000
--- a/include/jemalloc/internal/extent_types.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_TYPES_H
-#define JEMALLOC_INTERNAL_EXTENT_TYPES_H
-
-typedef struct extent_util_stats_s extent_util_stats_t;
-typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
-
-/*
- * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
- * is the max ratio between the size of the active extent and the new extent.
- */
-#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 6755b43..75a94d3 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/extent_types.h"
 #include "jemalloc/internal/base_types.h"
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/tcache_types.h"
@@ -61,7 +60,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/extent_externs.h"
 #include "jemalloc/internal/base_externs.h"
 #include "jemalloc/internal/arena_externs.h"
 #include "jemalloc/internal/large_externs.h"
@@ -81,7 +79,6 @@
  */
 #include "jemalloc/internal/prof_inlines_a.h"
 #include "jemalloc/internal/arena_inlines_a.h"
-#include "jemalloc/internal/extent_inlines.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_b.h"
 #include "jemalloc/internal/tcache_inlines.h"
 #include "jemalloc/internal/arena_inlines_b.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index f0b73d0..d4cb04c 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_B_H
 #define JEMALLOC_INTERNAL_INLINES_B_H
 
+#include "jemalloc/internal/extent2.h"
 #include "jemalloc/internal/rtree.h"
 
 /* Choose an arena based on a per-thread value. */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index e680312..4118b91 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -47,6 +47,7 @@
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\extent2.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index ce51930..ed3b524 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -47,6 +47,7 @@
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\extent2.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
diff --git a/src/extent.c b/src/extent.c
index ea7b8f2..1a5a1fa 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1,1722 +1,6 @@
-#define JEMALLOC_EXTENT_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-#include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/extent_dss.h"
-#include "jemalloc/internal/extent_mmap.h"
-#include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool.h"
-
-/******************************************************************************/
-/* Data. */
-
-rtree_t		extents_rtree;
-/* Keyed by the address of the extent_t being protected. */
-mutex_pool_t	extent_mutex_pool;
-
-size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
-
-static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained);
-static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
-    bool growing_retained);
-static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
-    bool growing_retained);
-static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
-    bool growing_retained);
-static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *a, extent_t *b, bool growing_retained);
-
-/* Used exclusively for gdump triggering. */
-static atomic_zu_t curpages;
-static atomic_zu_t highpages;
-
-/******************************************************************************/
-/*
- * Function prototypes for static functions that are referenced prior to
- * definition.
- */
-
-static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
-static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, void *new_addr, size_t usize, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
-static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent,
-    bool *coalesced, bool growing_retained);
-static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, extent_t *extent, bool growing_retained);
-
-/******************************************************************************/
-
-#define ATTR_NONE /* does nothing */
-
-ph_gen(ATTR_NONE, extent_avail_, extent_tree_t, extent_t, ph_link,
+ph_gen(, extent_avail_, extent_tree_t, extent_t, ph_link,
     extent_esnead_comp)
-
-#undef ATTR_NONE
-
-typedef enum {
-	lock_result_success,
-	lock_result_failure,
-	lock_result_no_extent
-} lock_result_t;
-
-static lock_result_t
-extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
-    extent_t **result, bool inactive_only) {
-	extent_t *extent1 = rtree_leaf_elm_extent_read(tsdn, &extents_rtree,
-	    elm, true);
-
-	/* Slab implies active extents and should be skipped. */
-	if (extent1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
-	    &extents_rtree, elm, true))) {
-		return lock_result_no_extent;
-	}
-
-	/*
-	 * It's possible that the extent changed out from under us, and with it
-	 * the leaf->extent mapping.  We have to recheck while holding the lock.
-	 */
-	extent_lock(tsdn, extent1);
-	extent_t *extent2 = rtree_leaf_elm_extent_read(tsdn,
-	    &extents_rtree, elm, true);
-
-	if (extent1 == extent2) {
-		*result = extent1;
-		return lock_result_success;
-	} else {
-		extent_unlock(tsdn, extent1);
-		return lock_result_failure;
-	}
-}
-
-/*
- * Returns a pool-locked extent_t * if there's one associated with the given
- * address, and NULL otherwise.
- */
-static extent_t *
-extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
-    bool inactive_only) {
-	extent_t *ret = NULL;
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree,
-	    rtree_ctx, (uintptr_t)addr, false, false);
-	if (elm == NULL) {
-		return NULL;
-	}
-	lock_result_t lock_result;
-	do {
-		lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret,
-		    inactive_only);
-	} while (lock_result == lock_result_failure);
-	return ret;
-}
-
-static void
-extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    size_t alignment) {
-	assert(extent_base_get(extent) == extent_addr_get(extent));
-
-	if (alignment < PAGE) {
-		unsigned lg_range = LG_PAGE -
-		    lg_floor(CACHELINE_CEILING(alignment));
-		size_t r;
-		if (!tsdn_null(tsdn)) {
-			tsd_t *tsd = tsdn_tsd(tsdn);
-			r = (size_t)prng_lg_range_u64(
-			    tsd_prng_statep_get(tsd), lg_range);
-		} else {
-			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
-			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
-		}
-		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
-		    lg_range);
-		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
-		    random_offset);
-		assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
-		    extent->e_addr);
-	}
-}
-
-extent_t *
-extent_alloc(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
-	extent_t *extent = extent_avail_first(&arena->extent_avail);
-	if (extent == NULL) {
-		malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
-		return base_alloc_extent(tsdn, arena->base);
-	}
-	extent_avail_remove(&arena->extent_avail, extent);
-	atomic_fetch_sub_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
-	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
-	return extent;
-}
-
-void
-extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
-	extent_avail_insert(&arena->extent_avail, extent);
-	atomic_fetch_add_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
-	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
-}
-
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
-
-static bool
-extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent) {
-	extent_state_set(extent, extent_state_active);
-	bool coalesced;
-	extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, eset,
-	    extent, &coalesced, false);
-	extent_state_set(extent, eset_state_get(eset));
-
-	if (!coalesced) {
-		return true;
-	}
-	eset_insert_locked(tsdn, eset, extent);
-	return false;
-}
-
-extent_t *
-extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
-	assert(size + pad != 0);
-	assert(alignment != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	extent_t *extent = extent_recycle(tsdn, arena, ehooks, eset, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit, false);
-	assert(extent == NULL || extent_dumpable_get(extent));
-	return extent;
-}
-
-void
-extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *extent) {
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
-	assert(extent_dumpable_get(extent));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	extent_addr_set(extent, extent_base_get(extent));
-	extent_zeroed_set(extent, false);
-
-	extent_record(tsdn, arena, ehooks, eset, extent, false);
-}
-
-extent_t *
-extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    size_t npages_min) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	malloc_mutex_lock(tsdn, &eset->mtx);
-
-	/*
-	 * Get the LRU coalesced extent, if any.  If coalescing was delayed,
-	 * the loop will iterate until the LRU extent is fully coalesced.
-	 */
-	extent_t *extent;
-	while (true) {
-		/* Get the LRU extent, if any. */
-		extent = extent_list_first(&eset->lru);
-		if (extent == NULL) {
-			goto label_return;
-		}
-		/* Check the eviction limit. */
-		size_t extents_npages = atomic_load_zu(&eset->npages,
-		    ATOMIC_RELAXED);
-		if (extents_npages <= npages_min) {
-			extent = NULL;
-			goto label_return;
-		}
-		eset_remove_locked(tsdn, eset, extent);
-		if (!eset->delay_coalesce) {
-			break;
-		}
-		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    eset, extent)) {
-			break;
-		}
-		/*
-		 * The LRU extent was just coalesced and the result placed in
-		 * the LRU at its neighbor's position.  Start over.
-		 */
-	}
-
-	/*
-	 * Either mark the extent active or deregister it to protect against
-	 * concurrent operations.
-	 */
-	switch (eset_state_get(eset)) {
-	case extent_state_active:
-		not_reached();
-	case extent_state_dirty:
-	case extent_state_muzzy:
-		extent_state_set(extent, extent_state_active);
-		break;
-	case extent_state_retained:
-		extent_deregister(tsdn, extent);
-		break;
-	default:
-		not_reached();
-	}
-
-label_return:
-	malloc_mutex_unlock(tsdn, &eset->mtx);
-	return extent;
-}
-
-/*
- * This can only happen when we fail to allocate a new extent struct (which
- * indicates OOM), e.g. when trying to split an existing extent.
- */
-static void
-extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *extent, bool growing_retained) {
-	size_t sz = extent_size_get(extent);
-	if (config_stats) {
-		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
-	}
-	/*
-	 * Leak extent after making sure its pages have already been purged, so
-	 * that this is only a virtual memory leak.
-	 */
-	if (eset_state_get(eset) == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, arena, ehooks, extent, 0, sz,
-		    growing_retained)) {
-			extent_purge_forced_impl(tsdn, arena, ehooks, extent, 0,
-			    extent_size_get(extent), growing_retained);
-		}
-	}
-	extent_dalloc(tsdn, arena, extent);
-}
-
-static void
-extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    extent_t *extent) {
-	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
-	assert(extent_state_get(extent) == extent_state_active);
-
-	extent_state_set(extent, eset_state_get(eset));
-	eset_insert_locked(tsdn, eset, extent);
-}
-
-static void
-extent_deactivate(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    extent_t *extent) {
-	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_deactivate_locked(tsdn, arena, eset, extent);
-	malloc_mutex_unlock(tsdn, &eset->mtx);
-}
-
-static void
-extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    extent_t *extent) {
-	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
-	assert(extent_state_get(extent) == eset_state_get(eset));
-
-	eset_remove_locked(tsdn, eset, extent);
-	extent_state_set(extent, extent_state_active);
-}
-
-static bool
-extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    const extent_t *extent, bool dependent, bool init_missing,
-    rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
-	*r_elm_a = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
-	if (!dependent && *r_elm_a == NULL) {
-		return true;
-	}
-	assert(*r_elm_a != NULL);
-
-	*r_elm_b = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_last_get(extent), dependent, init_missing);
-	if (!dependent && *r_elm_b == NULL) {
-		return true;
-	}
-	assert(*r_elm_b != NULL);
-
-	return false;
-}
-
-static void
-extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
-    rtree_leaf_elm_t *elm_b, extent_t *extent, szind_t szind, bool slab) {
-	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, extent, szind, slab);
-	if (elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, extent, szind,
-		    slab);
-	}
-}
-
-static void
-extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, extent_t *extent,
-    szind_t szind) {
-	assert(extent_slab_get(extent));
-
-	/* Register interior. */
-	for (size_t i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
-		rtree_write(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
-		    LG_PAGE), extent, szind, true);
-	}
-}
-
-static void
-extent_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
-	cassert(config_prof);
-	/* prof_gdump() requirement. */
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	if (opt_prof && extent_state_get(extent) == extent_state_active) {
-		size_t nadd = extent_size_get(extent) >> LG_PAGE;
-		size_t cur = atomic_fetch_add_zu(&curpages, nadd,
-		    ATOMIC_RELAXED) + nadd;
-		size_t high = atomic_load_zu(&highpages, ATOMIC_RELAXED);
-		while (cur > high && !atomic_compare_exchange_weak_zu(
-		    &highpages, &high, cur, ATOMIC_RELAXED, ATOMIC_RELAXED)) {
-			/*
-			 * Don't refresh cur, because it may have decreased
-			 * since this thread lost the highpages update race.
-			 * Note that high is updated in case of CAS failure.
-			 */
-		}
-		if (cur > high && prof_gdump_get_unlocked()) {
-			prof_gdump(tsdn);
-		}
-	}
-}
-
-static void
-extent_gdump_sub(tsdn_t *tsdn, const extent_t *extent) {
-	cassert(config_prof);
-
-	if (opt_prof && extent_state_get(extent) == extent_state_active) {
-		size_t nsub = extent_size_get(extent) >> LG_PAGE;
-		assert(atomic_load_zu(&curpages, ATOMIC_RELAXED) >= nsub);
-		atomic_fetch_sub_zu(&curpages, nsub, ATOMIC_RELAXED);
-	}
-}
-
-static bool
-extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *elm_a, *elm_b;
-
-	/*
-	 * We need to hold the lock to protect against a concurrent coalesce
-	 * operation that sees us in a partial state.
-	 */
-	extent_lock(tsdn, extent);
-
-	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, false, true,
-	    &elm_a, &elm_b)) {
-		extent_unlock(tsdn, extent);
-		return true;
-	}
-
-	szind_t szind = extent_szind_get_maybe_invalid(extent);
-	bool slab = extent_slab_get(extent);
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent, szind, slab);
-	if (slab) {
-		extent_interior_register(tsdn, rtree_ctx, extent, szind);
-	}
-
-	extent_unlock(tsdn, extent);
-
-	if (config_prof && gdump_add) {
-		extent_gdump_add(tsdn, extent);
-	}
-
-	return false;
-}
-
-static bool
-extent_register(tsdn_t *tsdn, extent_t *extent) {
-	return extent_register_impl(tsdn, extent, true);
-}
-
-static bool
-extent_register_no_gdump_add(tsdn_t *tsdn, extent_t *extent) {
-	return extent_register_impl(tsdn, extent, false);
-}
-
-static void
-extent_reregister(tsdn_t *tsdn, extent_t *extent) {
-	bool err = extent_register(tsdn, extent);
-	assert(!err);
-}
-
-/*
- * Removes all pointers to the given extent from the global rtree indices for
- * its interior.  This is relevant for slab extents, for which we need to do
- * metadata lookups at places other than the head of the extent.  We deregister
- * on the interior, then, when an extent moves from being an active slab to an
- * inactive state.
- */
-static void
-extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    extent_t *extent) {
-	size_t i;
-
-	assert(extent_slab_get(extent));
-
-	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
-		rtree_clear(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
-		    LG_PAGE));
-	}
-}
-
-/*
- * Removes all pointers to the given extent from the global rtree.
- */
-static void
-extent_deregister_impl(tsdn_t *tsdn, extent_t *extent, bool gdump) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *elm_a, *elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, true, false,
-	    &elm_a, &elm_b);
-
-	extent_lock(tsdn, extent);
-
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false);
-	if (extent_slab_get(extent)) {
-		extent_interior_deregister(tsdn, rtree_ctx, extent);
-		extent_slab_set(extent, false);
-	}
-
-	extent_unlock(tsdn, extent);
-
-	if (config_prof && gdump) {
-		extent_gdump_sub(tsdn, extent);
-	}
-}
-
-static void
-extent_deregister(tsdn_t *tsdn, extent_t *extent) {
-	extent_deregister_impl(tsdn, extent, true);
-}
-
-static void
-extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
-	extent_deregister_impl(tsdn, extent, false);
-}
-
-/*
- * Tries to find and remove an extent from eset that can be used for the
- * given allocation request.
- */
-static extent_t *
-extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
-    size_t pad, size_t alignment, bool slab, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(alignment > 0);
-	if (config_debug && new_addr != NULL) {
-		/*
-		 * Non-NULL new_addr has two use cases:
-		 *
-		 *   1) Recycle a known-extant extent, e.g. during purging.
-		 *   2) Perform in-place expanding reallocation.
-		 *
-		 * Regardless of use case, new_addr must either refer to a
-		 * non-existing extent, or to the base of an extant extent,
-		 * since only active slabs support interior lookups (which of
-		 * course cannot be recycled).
-		 */
-		assert(PAGE_ADDR2BASE(new_addr) == new_addr);
-		assert(pad == 0);
-		assert(alignment <= PAGE);
-	}
-
-	size_t esize = size + pad;
-	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_t *extent;
-	if (new_addr != NULL) {
-		extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr,
-		    false);
-		if (extent != NULL) {
-			/*
-			 * We might null-out extent to report an error, but we
-			 * still need to unlock the associated mutex after.
-			 */
-			extent_t *unlock_extent = extent;
-			assert(extent_base_get(extent) == new_addr);
-			if (extent_arena_ind_get(extent)
-			    != arena_ind_get(arena) ||
-			    extent_size_get(extent) < esize ||
-			    extent_state_get(extent) !=
-			    eset_state_get(eset)) {
-				extent = NULL;
-			}
-			extent_unlock(tsdn, unlock_extent);
-		}
-	} else {
-		extent = eset_fit_locked(tsdn, eset, esize, alignment);
-	}
-	if (extent == NULL) {
-		malloc_mutex_unlock(tsdn, &eset->mtx);
-		return NULL;
-	}
-
-	extent_activate_locked(tsdn, arena, eset, extent);
-	malloc_mutex_unlock(tsdn, &eset->mtx);
-
-	return extent;
-}
-
-/*
- * Given an allocation request and an extent guaranteed to be able to satisfy
- * it, this splits off lead and trail extents, leaving extent pointing to an
- * extent satisfying the allocation.
- * This function doesn't put lead or trail into any eset_t; it's the caller's
- * job to ensure that they can be reused.
- */
-typedef enum {
-	/*
-	 * Split successfully.  lead, extent, and trail, are modified to extents
-	 * describing the ranges before, in, and after the given allocation.
-	 */
-	extent_split_interior_ok,
-	/*
-	 * The extent can't satisfy the given allocation request.  None of the
-	 * input extent_t *s are touched.
-	 */
-	extent_split_interior_cant_alloc,
-	/*
-	 * In a potentially invalid state.  Must leak (if *to_leak is non-NULL),
-	 * and salvage what's still salvageable (if *to_salvage is non-NULL).
-	 * None of lead, extent, or trail are valid.
-	 */
-	extent_split_interior_error
-} extent_split_interior_result_t;
-
-static extent_split_interior_result_t
-extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx,
-    /* The result of splitting, in case of success. */
-    extent_t **extent, extent_t **lead, extent_t **trail,
-    /* The mess to clean up, in case of error. */
-    extent_t **to_leak, extent_t **to_salvage,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool growing_retained) {
-	size_t esize = size + pad;
-	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(*extent),
-	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(*extent);
-	assert(new_addr == NULL || leadsize == 0);
-	if (extent_size_get(*extent) < leadsize + esize) {
-		return extent_split_interior_cant_alloc;
-	}
-	size_t trailsize = extent_size_get(*extent) - leadsize - esize;
-
-	*lead = NULL;
-	*trail = NULL;
-	*to_leak = NULL;
-	*to_salvage = NULL;
-
-	/* Split the lead. */
-	if (leadsize != 0) {
-		*lead = *extent;
-		*extent = extent_split_impl(tsdn, arena, ehooks, *lead,
-		    leadsize, SC_NSIZES, false, esize + trailsize, szind, slab,
-		    growing_retained);
-		if (*extent == NULL) {
-			*to_leak = *lead;
-			*lead = NULL;
-			return extent_split_interior_error;
-		}
-	}
-
-	/* Split the trail. */
-	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, arena, ehooks, *extent, esize,
-		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
-		if (*trail == NULL) {
-			*to_leak = *extent;
-			*to_salvage = *lead;
-			*lead = NULL;
-			*extent = NULL;
-			return extent_split_interior_error;
-		}
-	}
-
-	if (leadsize == 0 && trailsize == 0) {
-		/*
-		 * Splitting causes szind to be set as a side effect, but no
-		 * splitting occurred.
-		 */
-		extent_szind_set(*extent, szind);
-		if (szind != SC_NSIZES) {
-			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
-			    (uintptr_t)extent_addr_get(*extent), szind, slab);
-			if (slab && extent_size_get(*extent) > PAGE) {
-				rtree_szind_slab_update(tsdn, &extents_rtree,
-				    rtree_ctx,
-				    (uintptr_t)extent_past_get(*extent) -
-				    (uintptr_t)PAGE, szind, slab);
-			}
-		}
-	}
-
-	return extent_split_interior_ok;
-}
-
-/*
- * This fulfills the indicated allocation request out of the given extent (which
- * the caller should have ensured was big enough).  If there's any unused space
- * before or after the resulting allocation, that space is given its own extent
- * and put back into eset.
- */
-static extent_t *
-extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
-    size_t pad, size_t alignment, bool slab, szind_t szind, extent_t *extent,
-    bool growing_retained) {
-	extent_t *lead;
-	extent_t *trail;
-	extent_t *to_leak;
-	extent_t *to_salvage;
-
-	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
-	    &to_salvage, new_addr, size, pad, alignment, slab, szind,
-	    growing_retained);
-
-	if (!maps_coalesce && result != extent_split_interior_ok
-	    && !opt_retain) {
-		/*
-		 * Split isn't supported (implies Windows w/o retain).  Avoid
-		 * leaking the eset.
-		 */
-		assert(to_leak != NULL && lead == NULL && trail == NULL);
-		extent_deactivate(tsdn, arena, eset, to_leak);
-		return NULL;
-	}
-
-	if (result == extent_split_interior_ok) {
-		if (lead != NULL) {
-			extent_deactivate(tsdn, arena, eset, lead);
-		}
-		if (trail != NULL) {
-			extent_deactivate(tsdn, arena, eset, trail);
-		}
-		return extent;
-	} else {
-		/*
-		 * We should have picked an extent that was large enough to
-		 * fulfill our allocation request.
-		 */
-		assert(result == extent_split_interior_error);
-		if (to_salvage != NULL) {
-			extent_deregister(tsdn, to_salvage);
-		}
-		if (to_leak != NULL) {
-			void *leak = extent_base_get(to_leak);
-			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, ehooks, eset, to_leak,
-			    growing_retained);
-			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
-			    false) == NULL);
-		}
-		return NULL;
-	}
-	unreachable();
-}
-
-static bool
-extent_need_manual_zero(arena_t *arena) {
-	/*
-	 * Need to manually zero the extent on repopulating if either; 1) non
-	 * default extent hooks installed (in which case the purge semantics may
-	 * change); or 2) transparent huge pages enabled.
-	 */
-	return (!ehooks_are_default(arena_get_ehooks(arena)) ||
-		(opt_thp == thp_mode_always));
-}
-
-/*
- * Tries to satisfy the given allocation request by reusing one of the extents
- * in the given eset_t.
- */
-static extent_t *
-extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(new_addr == NULL || !slab);
-	assert(pad == 0 || !slab);
-	assert(!*zero || !slab);
-
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	extent_t *extent = extent_recycle_extract(tsdn, arena, ehooks,
-	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
-	    growing_retained);
-	if (extent == NULL) {
-		return NULL;
-	}
-
-	extent = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, eset,
-	    new_addr, size, pad, alignment, slab, szind, extent,
-	    growing_retained);
-	if (extent == NULL) {
-		return NULL;
-	}
-
-	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
-		    extent_size_get(extent), growing_retained)) {
-			extent_record(tsdn, arena, ehooks, eset, extent,
-			    growing_retained);
-			return NULL;
-		}
-		if (!extent_need_manual_zero(arena)) {
-			extent_zeroed_set(extent, true);
-		}
-	}
-
-	if (extent_committed_get(extent)) {
-		*commit = true;
-	}
-	if (extent_zeroed_get(extent)) {
-		*zero = true;
-	}
-
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, extent, alignment);
-	}
-	assert(extent_state_get(extent) == extent_state_active);
-	if (slab) {
-		extent_slab_set(extent, slab);
-		extent_interior_register(tsdn, rtree_ctx, extent, szind);
-	}
-
-	if (*zero) {
-		void *addr = extent_base_get(extent);
-		if (!extent_zeroed_get(extent)) {
-			size_t size = extent_size_get(extent);
-			if (extent_need_manual_zero(arena) ||
-			    pages_purge_forced(addr, size)) {
-				memset(addr, 0, size);
-			}
-		} else if (config_debug) {
-			size_t *p = (size_t *)(uintptr_t)addr;
-			/* Check the first page only. */
-			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
-				assert(p[i] == 0);
-			}
-		}
-	}
-	return extent;
-}
-
-/*
- * If virtual memory is retained, create increasingly larger extents from which
- * to split requested extents in order to limit the total number of disjoint
- * virtual memory ranges retained by each arena.
- */
-static extent_t *
-extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &arena->extent_grow_mtx);
-	assert(pad == 0 || !slab);
-	assert(!*zero || !slab);
-
-	size_t esize = size + pad;
-	size_t alloc_size_min = esize + PAGE_CEILING(alignment) - PAGE;
-	/* Beware size_t wrap-around. */
-	if (alloc_size_min < esize) {
-		goto label_err;
-	}
-	/*
-	 * Find the next extent size in the series that would be large enough to
-	 * satisfy this request.
-	 */
-	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
-	while (alloc_size < alloc_size_min) {
-		egn_skip++;
-		if (arena->extent_grow_next + egn_skip >=
-		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
-			/* Outside legal range. */
-			goto label_err;
-		}
-		alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
-	}
-
-	extent_t *extent = extent_alloc(tsdn, arena);
-	if (extent == NULL) {
-		goto label_err;
-	}
-	bool zeroed = false;
-	bool committed = false;
-
-	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
-	    &committed, arena_ind_get(arena));
-
-	extent_init(extent, arena_ind_get(arena), ptr, alloc_size, false,
-	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
-	    committed, true, EXTENT_IS_HEAD);
-	if (ptr == NULL) {
-		extent_dalloc(tsdn, arena, extent);
-		goto label_err;
-	}
-
-	if (extent_register_no_gdump_add(tsdn, extent)) {
-		extent_dalloc(tsdn, arena, extent);
-		goto label_err;
-	}
-
-	if (extent_zeroed_get(extent) && extent_committed_get(extent)) {
-		*zero = true;
-	}
-	if (extent_committed_get(extent)) {
-		*commit = true;
-	}
-
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	extent_t *lead;
-	extent_t *trail;
-	extent_t *to_leak;
-	extent_t *to_salvage;
-	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
-	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
-
-	if (result == extent_split_interior_ok) {
-		if (lead != NULL) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, lead, true);
-		}
-		if (trail != NULL) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, trail, true);
-		}
-	} else {
-		/*
-		 * We should have allocated a sufficiently large extent; the
-		 * cant_alloc case should not occur.
-		 */
-		assert(result == extent_split_interior_error);
-		if (to_salvage != NULL) {
-			if (config_prof) {
-				extent_gdump_add(tsdn, to_salvage);
-			}
-			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, to_salvage, true);
-		}
-		if (to_leak != NULL) {
-			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, ehooks,
-			    &arena->eset_retained, to_leak, true);
-		}
-		goto label_err;
-	}
-
-	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
-		    extent_size_get(extent), true)) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, extent, true);
-			goto label_err;
-		}
-		if (!extent_need_manual_zero(arena)) {
-			extent_zeroed_set(extent, true);
-		}
-	}
-
-	/*
-	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
-	 * range.
-	 */
-	if (arena->extent_grow_next + egn_skip + 1 <=
-	    arena->retain_grow_limit) {
-		arena->extent_grow_next += egn_skip + 1;
-	} else {
-		arena->extent_grow_next = arena->retain_grow_limit;
-	}
-	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
-
-	if (config_prof) {
-		/* Adjust gdump stats now that extent is final size. */
-		extent_gdump_add(tsdn, extent);
-	}
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, extent, alignment);
-	}
-	if (slab) {
-		rtree_ctx_t rtree_ctx_fallback;
-		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
-		    &rtree_ctx_fallback);
-
-		extent_slab_set(extent, true);
-		extent_interior_register(tsdn, rtree_ctx, extent, szind);
-	}
-	if (*zero && !extent_zeroed_get(extent)) {
-		void *addr = extent_base_get(extent);
-		size_t size = extent_size_get(extent);
-		if (extent_need_manual_zero(arena) ||
-		    pages_purge_forced(addr, size)) {
-			memset(addr, 0, size);
-		}
-	}
-
-	return extent;
-label_err:
-	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
-	return NULL;
-}
-
-static extent_t *
-extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
-	assert(size != 0);
-	assert(alignment != 0);
-
-	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
-
-	extent_t *extent = extent_recycle(tsdn, arena, ehooks,
-	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
-	    szind, zero, commit, true);
-	if (extent != NULL) {
-		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
-		if (config_prof) {
-			extent_gdump_add(tsdn, extent);
-		}
-	} else if (opt_retain && new_addr == NULL) {
-		extent = extent_grow_retained(tsdn, arena, ehooks, size, pad,
-		    alignment, slab, szind, zero, commit);
-		/* extent_grow_retained() always releases extent_grow_mtx. */
-	} else {
-		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
-	}
-	malloc_mutex_assert_not_owner(tsdn, &arena->extent_grow_mtx);
-
-	return extent;
-}
-
-static extent_t *
-extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
-	size_t esize = size + pad;
-	extent_t *extent = extent_alloc(tsdn, arena);
-	if (extent == NULL) {
-		return NULL;
-	}
-	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
-	    zero, commit, arena_ind_get(arena));
-	if (addr == NULL) {
-		extent_dalloc(tsdn, arena, extent);
-		return NULL;
-	}
-	extent_init(extent, arena_ind_get(arena), addr, esize, slab, szind,
-	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
-	    true, EXTENT_NOT_HEAD);
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, extent, alignment);
-	}
-	if (extent_register(tsdn, extent)) {
-		extent_dalloc(tsdn, arena, extent);
-		return NULL;
-	}
-
-	return extent;
-}
-
-extent_t *
-extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	extent_t *extent = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit);
-	if (extent == NULL) {
-		if (opt_retain && new_addr != NULL) {
-			/*
-			 * When retain is enabled and new_addr is set, we do not
-			 * attempt extent_alloc_wrapper_hard which does mmap
-			 * that is very unlikely to succeed (unless it happens
-			 * to be at the end).
-			 */
-			return NULL;
-		}
-		extent = extent_alloc_wrapper_hard(tsdn, arena, ehooks,
-		    new_addr, size, pad, alignment, slab, szind, zero, commit);
-	}
-
-	assert(extent == NULL || extent_dumpable_get(extent));
-	return extent;
-}
-
-static bool
-extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
-    const extent_t *outer) {
-	assert(extent_arena_ind_get(inner) == arena_ind_get(arena));
-	if (extent_arena_ind_get(outer) != arena_ind_get(arena)) {
-		return false;
-	}
-
-	assert(extent_state_get(inner) == extent_state_active);
-	if (extent_state_get(outer) != eset->state) {
-		return false;
-	}
-
-	if (extent_committed_get(inner) != extent_committed_get(outer)) {
-		return false;
-	}
-
-	return true;
-}
-
-static bool
-extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *inner, extent_t *outer, bool forward, bool growing_retained) {
-	assert(extent_can_coalesce(arena, eset, inner, outer));
-
-	extent_activate_locked(tsdn, arena, eset, outer);
-
-	malloc_mutex_unlock(tsdn, &eset->mtx);
-	bool err = extent_merge_impl(tsdn, arena, ehooks,
-	    forward ? inner : outer, forward ? outer : inner, growing_retained);
-	malloc_mutex_lock(tsdn, &eset->mtx);
-
-	if (err) {
-		extent_deactivate_locked(tsdn, arena, eset, outer);
-	}
-
-	return err;
-}
-
-static extent_t *
-extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
-    bool growing_retained, bool inactive_only) {
-	/*
-	 * We avoid checking / locking inactive neighbors for large size
-	 * classes, since they are eagerly coalesced on deallocation which can
-	 * cause lock contention.
-	 */
-	/*
-	 * Continue attempting to coalesce until failure, to protect against
-	 * races with other threads that are thwarted by this one.
-	 */
-	bool again;
-	do {
-		again = false;
-
-		/* Try to coalesce forward. */
-		extent_t *next = extent_lock_from_addr(tsdn, rtree_ctx,
-		    extent_past_get(extent), inactive_only);
-		if (next != NULL) {
-			/*
-			 * eset->mtx only protects against races for
-			 * like-state eset, so call extent_can_coalesce()
-			 * before releasing next's pool lock.
-			 */
-			bool can_coalesce = extent_can_coalesce(arena, eset,
-			    extent, next);
-
-			extent_unlock(tsdn, next);
-
-			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, eset, extent, next, true,
-			    growing_retained)) {
-				if (eset->delay_coalesce) {
-					/* Do minimal coalescing. */
-					*coalesced = true;
-					return extent;
-				}
-				again = true;
-			}
-		}
-
-		/* Try to coalesce backward. */
-		extent_t *prev = extent_lock_from_addr(tsdn, rtree_ctx,
-		    extent_before_get(extent), inactive_only);
-		if (prev != NULL) {
-			bool can_coalesce = extent_can_coalesce(arena, eset,
-			    extent, prev);
-			extent_unlock(tsdn, prev);
-
-			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, eset, extent, prev, false,
-			    growing_retained)) {
-				extent = prev;
-				if (eset->delay_coalesce) {
-					/* Do minimal coalescing. */
-					*coalesced = true;
-					return extent;
-				}
-				again = true;
-			}
-		}
-	} while (again);
-
-	if (eset->delay_coalesce) {
-		*coalesced = false;
-	}
-	return extent;
-}
-
-static extent_t *
-extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
-    bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
-	    extent, coalesced, growing_retained, false);
-}
-
-static extent_t *
-extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
-    bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
-	    extent, coalesced, growing_retained, true);
-}
-
-/*
- * Does the metadata management portions of putting an unused extent into the
- * given eset_t (coalesces, deregisters slab interiors, the heap operations).
- */
-static void
-extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *extent, bool growing_retained) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	assert((eset_state_get(eset) != extent_state_dirty &&
-	    eset_state_get(eset) != extent_state_muzzy) ||
-	    !extent_zeroed_get(extent));
-
-	malloc_mutex_lock(tsdn, &eset->mtx);
-
-	extent_szind_set(extent, SC_NSIZES);
-	if (extent_slab_get(extent)) {
-		extent_interior_deregister(tsdn, rtree_ctx, extent);
-		extent_slab_set(extent, false);
-	}
-
-	assert(rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_base_get(extent), true) == extent);
-
-	if (!eset->delay_coalesce) {
-		extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    eset, extent, NULL, growing_retained);
-	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
-		assert(eset == &arena->eset_dirty);
-		/* Always coalesce large eset eagerly. */
-		bool coalesced;
-		do {
-			assert(extent_state_get(extent) == extent_state_active);
-			extent = extent_try_coalesce_large(tsdn, arena, ehooks,
-			    rtree_ctx, eset, extent, &coalesced,
-			    growing_retained);
-		} while (coalesced);
-		if (extent_size_get(extent) >= oversize_threshold) {
-			/* Shortcut to purge the oversize extent eagerly. */
-			malloc_mutex_unlock(tsdn, &eset->mtx);
-			arena_decay_extent(tsdn, arena, ehooks, extent);
-			return;
-		}
-	}
-	extent_deactivate_locked(tsdn, arena, eset, extent);
-
-	malloc_mutex_unlock(tsdn, &eset->mtx);
-}
-
-void
-extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	if (extent_register(tsdn, extent)) {
-		extent_dalloc(tsdn, arena, extent);
-		return;
-	}
-	extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
-}
-
-static bool
-extent_may_dalloc(void) {
-	/* With retain enabled, the default dalloc always fails. */
-	return !opt_retain;
-}
-
-static bool
-extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
-	bool err;
-
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	extent_addr_set(extent, extent_base_get(extent));
-
-	/* Try to deallocate. */
-	err = ehooks_dalloc(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), extent_committed_get(extent),
-	    arena_ind_get(arena));
-
-	if (!err) {
-		extent_dalloc(tsdn, arena, extent);
-	}
-
-	return err;
-}
-
-void
-extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
-	assert(extent_dumpable_get(extent));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	/* Avoid calling the default extent_dalloc unless have to. */
-	if (!ehooks_are_default(ehooks) || extent_may_dalloc()) {
-		/*
-		 * Deregister first to avoid a race with other allocating
-		 * threads, and reregister if deallocation fails.
-		 */
-		extent_deregister(tsdn, extent);
-		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, extent)) {
-			return;
-		}
-		extent_reregister(tsdn, extent);
-	}
-
-	/* Try to decommit; purge if that fails. */
-	bool zeroed;
-	if (!extent_committed_get(extent)) {
-		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, extent, 0,
-	    extent_size_get(extent))) {
-		zeroed = true;
-	} else if (!ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), 0, extent_size_get(extent),
-	    arena_ind_get(arena))) {
-		zeroed = true;
-	} else if (extent_state_get(extent) == extent_state_muzzy ||
-	    !ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), 0, extent_size_get(extent),
-	    arena_ind_get(arena))) {
-		zeroed = false;
-	} else {
-		zeroed = false;
-	}
-	extent_zeroed_set(extent, zeroed);
-
-	if (config_prof) {
-		extent_gdump_sub(tsdn, extent);
-	}
-
-	extent_record(tsdn, arena, ehooks, &arena->eset_retained, extent,
-	    false);
-}
-
-void
-extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	/* Deregister first to avoid a race with other allocating threads. */
-	extent_deregister(tsdn, extent);
-
-	extent_addr_set(extent, extent_base_get(extent));
-
-	/* Try to destroy; silently fail otherwise. */
-	ehooks_destroy(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), extent_committed_get(extent),
-	    arena_ind_get(arena));
-
-	extent_dalloc(tsdn, arena, extent);
-}
-
-static bool
-extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_commit(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	extent_committed_set(extent, extent_committed_get(extent) || !err);
-	return err;
-}
-
-bool
-extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset,
-    size_t length) {
-	return extent_commit_impl(tsdn, arena, ehooks, extent, offset, length,
-	    false);
-}
-
-bool
-extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-	bool err = ehooks_decommit(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	extent_committed_set(extent, extent_committed_get(extent) && err);
-	return err;
-}
-
-static bool
-extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	return err;
-}
-
-bool
-extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length) {
-	return extent_purge_lazy_impl(tsdn, arena, ehooks, extent, offset,
-	    length, false);
-}
-
-static bool
-extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	return err;
-}
-
-bool
-extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length) {
-	return extent_purge_forced_impl(tsdn, arena, ehooks, extent,
-	    offset, length, false);
-}
-
-/*
- * Accepts the extent to split, and the characteristics of each side of the
- * split.  The 'a' parameters go with the 'lead' of the resulting pair of
- * extents (the lower addressed portion of the split), and the 'b' parameters go
- * with the trail (the higher addressed portion).  This makes 'extent' the lead,
- * and returns the trail (except in case of error).
- */
-static extent_t *
-extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
-    size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
-	assert(extent_size_get(extent) == size_a + size_b);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-
-	if (ehooks_split_will_fail(ehooks)) {
-		return NULL;
-	}
-
-	extent_t *trail = extent_alloc(tsdn, arena);
-	if (trail == NULL) {
-		goto label_error_a;
-	}
-
-	extent_init(trail, arena_ind_get(arena),
-	    (void *)((uintptr_t)extent_base_get(extent) + size_a), size_b,
-	    slab_b, szind_b, extent_sn_get(extent), extent_state_get(extent),
-	    extent_zeroed_get(extent), extent_committed_get(extent),
-	    extent_dumpable_get(extent), EXTENT_NOT_HEAD);
-
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *lead_elm_a, *lead_elm_b;
-	{
-		extent_t lead;
-
-		extent_init(&lead, arena_ind_get(arena),
-		    extent_addr_get(extent), size_a,
-		    slab_a, szind_a, extent_sn_get(extent),
-		    extent_state_get(extent), extent_zeroed_get(extent),
-		    extent_committed_get(extent), extent_dumpable_get(extent),
-		    EXTENT_NOT_HEAD);
-
-		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
-		    true, &lead_elm_a, &lead_elm_b);
-	}
-	rtree_leaf_elm_t *trail_elm_a, *trail_elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, trail, false, true,
-	    &trail_elm_a, &trail_elm_b);
-
-	if (lead_elm_a == NULL || lead_elm_b == NULL || trail_elm_a == NULL
-	    || trail_elm_b == NULL) {
-		goto label_error_b;
-	}
-
-	extent_lock2(tsdn, extent, trail);
-
-	bool err = ehooks_split(tsdn, ehooks, extent_base_get(extent),
-	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
-	    arena_ind_get(arena));
-
-	if (err) {
-		goto label_error_c;
-	}
-
-	extent_size_set(extent, size_a);
-	extent_szind_set(extent, szind_a);
-
-	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent,
-	    szind_a, slab_a);
-	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
-	    szind_b, slab_b);
-
-	extent_unlock2(tsdn, extent, trail);
-
-	return trail;
-label_error_c:
-	extent_unlock2(tsdn, extent, trail);
-label_error_b:
-	extent_dalloc(tsdn, arena, trail);
-label_error_a:
-	return NULL;
-}
-
-extent_t *
-extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
-    size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, arena, ehooks, extent, size_a, szind_a,
-	    slab_a, size_b, szind_b, slab_b, false);
-}
-
-/*
- * Returns true if the given extents can't be merged because of their head bit
- * settings.  Assumes the second extent has the higher address.
- */
-bool
-extent_head_no_merge(extent_t *a, extent_t *b) {
-	assert(extent_base_get(a) < extent_base_get(b));
-	/*
-	 * When coalesce is not always allowed (Windows), only merge extents
-	 * from the same VirtualAlloc region under opt.retain (in which case
-	 * MEM_DECOMMIT is utilized for purging).
-	 */
-	if (maps_coalesce) {
-		return false;
-	}
-	if (!opt_retain) {
-		return true;
-	}
-	/* If b is a head extent, disallow the cross-region merge. */
-	if (extent_is_head_get(b)) {
-		/*
-		 * Additionally, sn should not overflow with retain; sanity
-		 * check that different regions have unique sn.
-		 */
-		assert(extent_sn_comp(a, b) != 0);
-		return true;
-	}
-	assert(extent_sn_comp(a, b) == 0);
-
-	return false;
-}
-
-static bool
-extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
-    extent_t *b, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(extent_base_get(a) < extent_base_get(b));
-
-	if (ehooks_merge_will_fail(ehooks) || extent_head_no_merge(a, b)) {
-		return true;
-	}
-
-	bool err = ehooks_merge(tsdn, ehooks, extent_base_get(a),
-	    extent_size_get(a), extent_base_get(b), extent_size_get(b),
-	    extent_committed_get(a), arena_ind_get(arena));
-
-	if (err) {
-		return true;
-	}
-
-	/*
-	 * The rtree writes must happen while all the relevant elements are
-	 * owned, so the following code uses decomposed helper functions rather
-	 * than extent_{,de}register() to do things in the right order.
-	 */
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, a, true, false, &a_elm_a,
-	    &a_elm_b);
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, b, true, false, &b_elm_a,
-	    &b_elm_b);
-
-	extent_lock2(tsdn, a, b);
-
-	if (a_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL,
-		    SC_NSIZES, false);
-	}
-	if (b_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, NULL,
-		    SC_NSIZES, false);
-	} else {
-		b_elm_b = b_elm_a;
-	}
-
-	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
-	extent_szind_set(a, SC_NSIZES);
-	extent_sn_set(a, (extent_sn_get(a) < extent_sn_get(b)) ?
-	    extent_sn_get(a) : extent_sn_get(b));
-	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
-
-	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES,
-	    false);
-
-	extent_unlock2(tsdn, a, b);
-
-	/*
-	 * If we got here, we merged the extents; so they must be from the same
-	 * arena (i.e. this one).
-	 */
-	assert(extent_arena_ind_get(b) == arena_ind_get(arena));
-	extent_dalloc(tsdn, arena, b);
-
-	return false;
-}
-
-bool
-extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *a, extent_t *b) {
-	return extent_merge_impl(tsdn, arena, ehooks, a, b, false);
-}
-
-bool
-extent_boot(void) {
-	if (rtree_new(&extents_rtree, true)) {
-		return true;
-	}
-
-	if (mutex_pool_init(&extent_mutex_pool, "extent_mutex_pool",
-	    WITNESS_RANK_EXTENT_POOL)) {
-		return true;
-	}
-
-	if (have_dss) {
-		extent_dss_boot();
-	}
-
-	return false;
-}
-
-void
-extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size) {
-	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
-
-	const extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(extent == NULL)) {
-		*nfree = *nregs = *size = 0;
-		return;
-	}
-
-	*size = extent_size_get(extent);
-	if (!extent_slab_get(extent)) {
-		*nfree = 0;
-		*nregs = 1;
-	} else {
-		*nfree = extent_nfree_get(extent);
-		*nregs = bin_infos[extent_szind_get(extent)].nregs;
-		assert(*nfree <= *nregs);
-		assert(*nfree * extent_usize_get(extent) <= *size);
-	}
-}
-
-void
-extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size,
-    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr) {
-	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
-	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
-
-	const extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(extent == NULL)) {
-		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
-		*slabcur_addr = NULL;
-		return;
-	}
-
-	*size = extent_size_get(extent);
-	if (!extent_slab_get(extent)) {
-		*nfree = *bin_nfree = *bin_nregs = 0;
-		*nregs = 1;
-		*slabcur_addr = NULL;
-		return;
-	}
-
-	*nfree = extent_nfree_get(extent);
-	const szind_t szind = extent_szind_get(extent);
-	*nregs = bin_infos[szind].nregs;
-	assert(*nfree <= *nregs);
-	assert(*nfree * extent_usize_get(extent) <= *size);
-
-	const arena_t *arena = (arena_t *)atomic_load_p(
-	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
-	assert(arena != NULL);
-	const unsigned binshard = extent_binshard_get(extent);
-	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
-
-	malloc_mutex_lock(tsdn, &bin->lock);
-	if (config_stats) {
-		*bin_nregs = *nregs * bin->stats.curslabs;
-		assert(*bin_nregs >= bin->stats.curregs);
-		*bin_nfree = *bin_nregs - bin->stats.curregs;
-	} else {
-		*bin_nfree = *bin_nregs = 0;
-	}
-	extent_t *slab;
-	if (bin->slabcur != NULL) {
-		slab = bin->slabcur;
-	} else {
-		slab = extent_heap_first(&bin->slabs_nonfull);
-	}
-	*slabcur_addr = slab != NULL ? extent_addr_get(slab) : NULL;
-	malloc_mutex_unlock(tsdn, &bin->lock);
-}
diff --git a/src/extent2.c b/src/extent2.c
new file mode 100644
index 0000000..4865beb
--- /dev/null
+++ b/src/extent2.c
@@ -0,0 +1,1738 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_pool.h"
+
+/******************************************************************************/
+/* Data. */
+
+rtree_t		extents_rtree;
+/* Keyed by the address of the extent_t being protected. */
+mutex_pool_t	extent_mutex_pool;
+
+size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
+
+static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained);
+static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
+    bool growing_retained);
+static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
+    bool growing_retained);
+static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, extent_t *extent, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+    bool growing_retained);
+static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *a, extent_t *b, bool growing_retained);
+
+/* Used exclusively for gdump triggering. */
+static atomic_zu_t curpages;
+static atomic_zu_t highpages;
+
+/******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
+
+static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
+static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, void *new_addr, size_t usize, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
+static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent,
+    bool *coalesced, bool growing_retained);
+static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, extent_t *extent, bool growing_retained);
+
+/******************************************************************************/
+
+typedef enum {
+	lock_result_success,
+	lock_result_failure,
+	lock_result_no_extent
+} lock_result_t;
+
+static inline void
+extent_lock(tsdn_t *tsdn, extent_t *extent) {
+	assert(extent != NULL);
+	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+}
+
+static inline void
+extent_unlock(tsdn_t *tsdn, extent_t *extent) {
+	assert(extent != NULL);
+	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+}
+
+static inline void
+extent_lock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
+	assert(extent1 != NULL && extent2 != NULL);
+	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
+	    (uintptr_t)extent2);
+}
+
+static inline void
+extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
+	assert(extent1 != NULL && extent2 != NULL);
+	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
+	    (uintptr_t)extent2);
+}
+
+static lock_result_t
+extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
+    extent_t **result, bool inactive_only) {
+	extent_t *extent1 = rtree_leaf_elm_extent_read(tsdn, &extents_rtree,
+	    elm, true);
+
+	/* Slab implies active extents and should be skipped. */
+	if (extent1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
+	    &extents_rtree, elm, true))) {
+		return lock_result_no_extent;
+	}
+
+	/*
+	 * It's possible that the extent changed out from under us, and with it
+	 * the leaf->extent mapping.  We have to recheck while holding the lock.
+	 */
+	extent_lock(tsdn, extent1);
+	extent_t *extent2 = rtree_leaf_elm_extent_read(tsdn,
+	    &extents_rtree, elm, true);
+
+	if (extent1 == extent2) {
+		*result = extent1;
+		return lock_result_success;
+	} else {
+		extent_unlock(tsdn, extent1);
+		return lock_result_failure;
+	}
+}
+
+/*
+ * Returns a pool-locked extent_t * if there's one associated with the given
+ * address, and NULL otherwise.
+ */
+static extent_t *
+extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
+    bool inactive_only) {
+	extent_t *ret = NULL;
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree,
+	    rtree_ctx, (uintptr_t)addr, false, false);
+	if (elm == NULL) {
+		return NULL;
+	}
+	lock_result_t lock_result;
+	do {
+		lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret,
+		    inactive_only);
+	} while (lock_result == lock_result_failure);
+	return ret;
+}
+
+static void
+extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    size_t alignment) {
+	assert(extent_base_get(extent) == extent_addr_get(extent));
+
+	if (alignment < PAGE) {
+		unsigned lg_range = LG_PAGE -
+		    lg_floor(CACHELINE_CEILING(alignment));
+		size_t r;
+		if (!tsdn_null(tsdn)) {
+			tsd_t *tsd = tsdn_tsd(tsdn);
+			r = (size_t)prng_lg_range_u64(
+			    tsd_prng_statep_get(tsd), lg_range);
+		} else {
+			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
+			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
+		}
+		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
+		    lg_range);
+		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
+		    random_offset);
+		assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
+		    extent->e_addr);
+	}
+}
+
+extent_t *
+extent_alloc(tsdn_t *tsdn, arena_t *arena) {
+	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
+	extent_t *extent = extent_avail_first(&arena->extent_avail);
+	if (extent == NULL) {
+		malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
+		return base_alloc_extent(tsdn, arena->base);
+	}
+	extent_avail_remove(&arena->extent_avail, extent);
+	atomic_fetch_sub_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
+	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
+	return extent;
+}
+
+void
+extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
+	extent_avail_insert(&arena->extent_avail, extent);
+	atomic_fetch_add_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
+	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
+}
+
+static bool
+extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent) {
+	extent_state_set(extent, extent_state_active);
+	bool coalesced;
+	extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, eset,
+	    extent, &coalesced, false);
+	extent_state_set(extent, eset_state_get(eset));
+
+	if (!coalesced) {
+		return true;
+	}
+	eset_insert_locked(tsdn, eset, extent);
+	return false;
+}
+
+extent_t *
+extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
+	assert(size + pad != 0);
+	assert(alignment != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	extent_t *extent = extent_recycle(tsdn, arena, ehooks, eset, new_addr,
+	    size, pad, alignment, slab, szind, zero, commit, false);
+	assert(extent == NULL || extent_dumpable_get(extent));
+	return extent;
+}
+
+void
+extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *extent) {
+	assert(extent_base_get(extent) != NULL);
+	assert(extent_size_get(extent) != 0);
+	assert(extent_dumpable_get(extent));
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	extent_addr_set(extent, extent_base_get(extent));
+	extent_zeroed_set(extent, false);
+
+	extent_record(tsdn, arena, ehooks, eset, extent, false);
+}
+
+extent_t *
+extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    size_t npages_min) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	malloc_mutex_lock(tsdn, &eset->mtx);
+
+	/*
+	 * Get the LRU coalesced extent, if any.  If coalescing was delayed,
+	 * the loop will iterate until the LRU extent is fully coalesced.
+	 */
+	extent_t *extent;
+	while (true) {
+		/* Get the LRU extent, if any. */
+		extent = extent_list_first(&eset->lru);
+		if (extent == NULL) {
+			goto label_return;
+		}
+		/* Check the eviction limit. */
+		size_t extents_npages = atomic_load_zu(&eset->npages,
+		    ATOMIC_RELAXED);
+		if (extents_npages <= npages_min) {
+			extent = NULL;
+			goto label_return;
+		}
+		eset_remove_locked(tsdn, eset, extent);
+		if (!eset->delay_coalesce) {
+			break;
+		}
+		/* Try to coalesce. */
+		if (extent_try_delayed_coalesce(tsdn, arena, ehooks, rtree_ctx,
+		    eset, extent)) {
+			break;
+		}
+		/*
+		 * The LRU extent was just coalesced and the result placed in
+		 * the LRU at its neighbor's position.  Start over.
+		 */
+	}
+
+	/*
+	 * Either mark the extent active or deregister it to protect against
+	 * concurrent operations.
+	 */
+	switch (eset_state_get(eset)) {
+	case extent_state_active:
+		not_reached();
+	case extent_state_dirty:
+	case extent_state_muzzy:
+		extent_state_set(extent, extent_state_active);
+		break;
+	case extent_state_retained:
+		extent_deregister(tsdn, extent);
+		break;
+	default:
+		not_reached();
+	}
+
+label_return:
+	malloc_mutex_unlock(tsdn, &eset->mtx);
+	return extent;
+}
+
+/*
+ * This can only happen when we fail to allocate a new extent struct (which
+ * indicates OOM), e.g. when trying to split an existing extent.
+ */
+static void
+extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *extent, bool growing_retained) {
+	size_t sz = extent_size_get(extent);
+	if (config_stats) {
+		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
+	}
+	/*
+	 * Leak extent after making sure its pages have already been purged, so
+	 * that this is only a virtual memory leak.
+	 */
+	if (eset_state_get(eset) == extent_state_dirty) {
+		if (extent_purge_lazy_impl(tsdn, arena, ehooks, extent, 0, sz,
+		    growing_retained)) {
+			extent_purge_forced_impl(tsdn, arena, ehooks, extent, 0,
+			    extent_size_get(extent), growing_retained);
+		}
+	}
+	extent_dalloc(tsdn, arena, extent);
+}
+
+static void
+extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
+    extent_t *extent) {
+	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
+	assert(extent_state_get(extent) == extent_state_active);
+
+	extent_state_set(extent, eset_state_get(eset));
+	eset_insert_locked(tsdn, eset, extent);
+}
+
+static void
+extent_deactivate(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
+    extent_t *extent) {
+	malloc_mutex_lock(tsdn, &eset->mtx);
+	extent_deactivate_locked(tsdn, arena, eset, extent);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
+}
+
+static void
+extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
+    extent_t *extent) {
+	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
+	assert(extent_state_get(extent) == eset_state_get(eset));
+
+	eset_remove_locked(tsdn, eset, extent);
+	extent_state_set(extent, extent_state_active);
+}
+
+static bool
+extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
+    const extent_t *extent, bool dependent, bool init_missing,
+    rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
+	*r_elm_a = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
+	if (!dependent && *r_elm_a == NULL) {
+		return true;
+	}
+	assert(*r_elm_a != NULL);
+
+	*r_elm_b = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)extent_last_get(extent), dependent, init_missing);
+	if (!dependent && *r_elm_b == NULL) {
+		return true;
+	}
+	assert(*r_elm_b != NULL);
+
+	return false;
+}
+
+static void
+extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
+    rtree_leaf_elm_t *elm_b, extent_t *extent, szind_t szind, bool slab) {
+	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, extent, szind, slab);
+	if (elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, extent, szind,
+		    slab);
+	}
+}
+
+static void
+extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, extent_t *extent,
+    szind_t szind) {
+	assert(extent_slab_get(extent));
+
+	/* Register interior. */
+	for (size_t i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+		rtree_write(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
+		    LG_PAGE), extent, szind, true);
+	}
+}
+
+static void
+extent_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
+	cassert(config_prof);
+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	if (opt_prof && extent_state_get(extent) == extent_state_active) {
+		size_t nadd = extent_size_get(extent) >> LG_PAGE;
+		size_t cur = atomic_fetch_add_zu(&curpages, nadd,
+		    ATOMIC_RELAXED) + nadd;
+		size_t high = atomic_load_zu(&highpages, ATOMIC_RELAXED);
+		while (cur > high && !atomic_compare_exchange_weak_zu(
+		    &highpages, &high, cur, ATOMIC_RELAXED, ATOMIC_RELAXED)) {
+			/*
+			 * Don't refresh cur, because it may have decreased
+			 * since this thread lost the highpages update race.
+			 * Note that high is updated in case of CAS failure.
+			 */
+		}
+		if (cur > high && prof_gdump_get_unlocked()) {
+			prof_gdump(tsdn);
+		}
+	}
+}
+
+static void
+extent_gdump_sub(tsdn_t *tsdn, const extent_t *extent) {
+	cassert(config_prof);
+
+	if (opt_prof && extent_state_get(extent) == extent_state_active) {
+		size_t nsub = extent_size_get(extent) >> LG_PAGE;
+		assert(atomic_load_zu(&curpages, ATOMIC_RELAXED) >= nsub);
+		atomic_fetch_sub_zu(&curpages, nsub, ATOMIC_RELAXED);
+	}
+}
+
+static bool
+extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_leaf_elm_t *elm_a, *elm_b;
+
+	/*
+	 * We need to hold the lock to protect against a concurrent coalesce
+	 * operation that sees us in a partial state.
+	 */
+	extent_lock(tsdn, extent);
+
+	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, false, true,
+	    &elm_a, &elm_b)) {
+		extent_unlock(tsdn, extent);
+		return true;
+	}
+
+	szind_t szind = extent_szind_get_maybe_invalid(extent);
+	bool slab = extent_slab_get(extent);
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent, szind, slab);
+	if (slab) {
+		extent_interior_register(tsdn, rtree_ctx, extent, szind);
+	}
+
+	extent_unlock(tsdn, extent);
+
+	if (config_prof && gdump_add) {
+		extent_gdump_add(tsdn, extent);
+	}
+
+	return false;
+}
+
+static bool
+extent_register(tsdn_t *tsdn, extent_t *extent) {
+	return extent_register_impl(tsdn, extent, true);
+}
+
+static bool
+extent_register_no_gdump_add(tsdn_t *tsdn, extent_t *extent) {
+	return extent_register_impl(tsdn, extent, false);
+}
+
+static void
+extent_reregister(tsdn_t *tsdn, extent_t *extent) {
+	bool err = extent_register(tsdn, extent);
+	assert(!err);
+}
+
+/*
+ * Removes all pointers to the given extent from the global rtree indices for
+ * its interior.  This is relevant for slab extents, for which we need to do
+ * metadata lookups at places other than the head of the extent.  We deregister
+ * on the interior, then, when an extent moves from being an active slab to an
+ * inactive state.
+ */
+static void
+extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
+    extent_t *extent) {
+	size_t i;
+
+	assert(extent_slab_get(extent));
+
+	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+		rtree_clear(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
+		    LG_PAGE));
+	}
+}
+
+/*
+ * Removes all pointers to the given extent from the global rtree.
+ */
+static void
+extent_deregister_impl(tsdn_t *tsdn, extent_t *extent, bool gdump) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_leaf_elm_t *elm_a, *elm_b;
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, true, false,
+	    &elm_a, &elm_b);
+
+	extent_lock(tsdn, extent);
+
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false);
+	if (extent_slab_get(extent)) {
+		extent_interior_deregister(tsdn, rtree_ctx, extent);
+		extent_slab_set(extent, false);
+	}
+
+	extent_unlock(tsdn, extent);
+
+	if (config_prof && gdump) {
+		extent_gdump_sub(tsdn, extent);
+	}
+}
+
+static void
+extent_deregister(tsdn_t *tsdn, extent_t *extent) {
+	extent_deregister_impl(tsdn, extent, true);
+}
+
+static void
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
+	extent_deregister_impl(tsdn, extent, false);
+}
+
+/*
+ * Tries to find and remove an extent from eset that can be used for the
+ * given allocation request.
+ */
+static extent_t *
+extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
+    size_t pad, size_t alignment, bool slab, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	assert(alignment > 0);
+	if (config_debug && new_addr != NULL) {
+		/*
+		 * Non-NULL new_addr has two use cases:
+		 *
+		 *   1) Recycle a known-extant extent, e.g. during purging.
+		 *   2) Perform in-place expanding reallocation.
+		 *
+		 * Regardless of use case, new_addr must either refer to a
+		 * non-existing extent, or to the base of an extant extent,
+		 * since only active slabs support interior lookups (which of
+		 * course cannot be recycled).
+		 */
+		assert(PAGE_ADDR2BASE(new_addr) == new_addr);
+		assert(pad == 0);
+		assert(alignment <= PAGE);
+	}
+
+	size_t esize = size + pad;
+	malloc_mutex_lock(tsdn, &eset->mtx);
+	extent_t *extent;
+	if (new_addr != NULL) {
+		extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr,
+		    false);
+		if (extent != NULL) {
+			/*
+			 * We might null-out extent to report an error, but we
+			 * still need to unlock the associated mutex after.
+			 */
+			extent_t *unlock_extent = extent;
+			assert(extent_base_get(extent) == new_addr);
+			if (extent_arena_ind_get(extent)
+			    != arena_ind_get(arena) ||
+			    extent_size_get(extent) < esize ||
+			    extent_state_get(extent) !=
+			    eset_state_get(eset)) {
+				extent = NULL;
+			}
+			extent_unlock(tsdn, unlock_extent);
+		}
+	} else {
+		extent = eset_fit_locked(tsdn, eset, esize, alignment);
+	}
+	if (extent == NULL) {
+		malloc_mutex_unlock(tsdn, &eset->mtx);
+		return NULL;
+	}
+
+	extent_activate_locked(tsdn, arena, eset, extent);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
+
+	return extent;
+}
+
+/*
+ * Given an allocation request and an extent guaranteed to be able to satisfy
+ * it, this splits off lead and trail extents, leaving extent pointing to an
+ * extent satisfying the allocation.
+ * This function doesn't put lead or trail into any eset_t; it's the caller's
+ * job to ensure that they can be reused.
+ */
+typedef enum {
+	/*
+	 * Split successfully.  lead, extent, and trail, are modified to extents
+	 * describing the ranges before, in, and after the given allocation.
+	 */
+	extent_split_interior_ok,
+	/*
+	 * The extent can't satisfy the given allocation request.  None of the
+	 * input extent_t *s are touched.
+	 */
+	extent_split_interior_cant_alloc,
+	/*
+	 * In a potentially invalid state.  Must leak (if *to_leak is non-NULL),
+	 * and salvage what's still salvageable (if *to_salvage is non-NULL).
+	 * None of lead, extent, or trail are valid.
+	 */
+	extent_split_interior_error
+} extent_split_interior_result_t;
+
+static extent_split_interior_result_t
+extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx,
+    /* The result of splitting, in case of success. */
+    extent_t **extent, extent_t **lead, extent_t **trail,
+    /* The mess to clean up, in case of error. */
+    extent_t **to_leak, extent_t **to_salvage,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool growing_retained) {
+	size_t esize = size + pad;
+	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(*extent),
+	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(*extent);
+	assert(new_addr == NULL || leadsize == 0);
+	if (extent_size_get(*extent) < leadsize + esize) {
+		return extent_split_interior_cant_alloc;
+	}
+	size_t trailsize = extent_size_get(*extent) - leadsize - esize;
+
+	*lead = NULL;
+	*trail = NULL;
+	*to_leak = NULL;
+	*to_salvage = NULL;
+
+	/* Split the lead. */
+	if (leadsize != 0) {
+		*lead = *extent;
+		*extent = extent_split_impl(tsdn, arena, ehooks, *lead,
+		    leadsize, SC_NSIZES, false, esize + trailsize, szind, slab,
+		    growing_retained);
+		if (*extent == NULL) {
+			*to_leak = *lead;
+			*lead = NULL;
+			return extent_split_interior_error;
+		}
+	}
+
+	/* Split the trail. */
+	if (trailsize != 0) {
+		*trail = extent_split_impl(tsdn, arena, ehooks, *extent, esize,
+		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
+		if (*trail == NULL) {
+			*to_leak = *extent;
+			*to_salvage = *lead;
+			*lead = NULL;
+			*extent = NULL;
+			return extent_split_interior_error;
+		}
+	}
+
+	if (leadsize == 0 && trailsize == 0) {
+		/*
+		 * Splitting causes szind to be set as a side effect, but no
+		 * splitting occurred.
+		 */
+		extent_szind_set(*extent, szind);
+		if (szind != SC_NSIZES) {
+			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
+			    (uintptr_t)extent_addr_get(*extent), szind, slab);
+			if (slab && extent_size_get(*extent) > PAGE) {
+				rtree_szind_slab_update(tsdn, &extents_rtree,
+				    rtree_ctx,
+				    (uintptr_t)extent_past_get(*extent) -
+				    (uintptr_t)PAGE, szind, slab);
+			}
+		}
+	}
+
+	return extent_split_interior_ok;
+}
+
+/*
+ * This fulfills the indicated allocation request out of the given extent (which
+ * the caller should have ensured was big enough).  If there's any unused space
+ * before or after the resulting allocation, that space is given its own extent
+ * and put back into eset.
+ */
+static extent_t *
+extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
+    size_t pad, size_t alignment, bool slab, szind_t szind, extent_t *extent,
+    bool growing_retained) {
+	extent_t *lead;
+	extent_t *trail;
+	extent_t *to_leak;
+	extent_t *to_salvage;
+
+	extent_split_interior_result_t result = extent_split_interior(
+	    tsdn, arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
+	    &to_salvage, new_addr, size, pad, alignment, slab, szind,
+	    growing_retained);
+
+	if (!maps_coalesce && result != extent_split_interior_ok
+	    && !opt_retain) {
+		/*
+		 * Split isn't supported (implies Windows w/o retain).  Avoid
+		 * leaking the eset.
+		 */
+		assert(to_leak != NULL && lead == NULL && trail == NULL);
+		extent_deactivate(tsdn, arena, eset, to_leak);
+		return NULL;
+	}
+
+	if (result == extent_split_interior_ok) {
+		if (lead != NULL) {
+			extent_deactivate(tsdn, arena, eset, lead);
+		}
+		if (trail != NULL) {
+			extent_deactivate(tsdn, arena, eset, trail);
+		}
+		return extent;
+	} else {
+		/*
+		 * We should have picked an extent that was large enough to
+		 * fulfill our allocation request.
+		 */
+		assert(result == extent_split_interior_error);
+		if (to_salvage != NULL) {
+			extent_deregister(tsdn, to_salvage);
+		}
+		if (to_leak != NULL) {
+			void *leak = extent_base_get(to_leak);
+			extent_deregister_no_gdump_sub(tsdn, to_leak);
+			extents_abandon_vm(tsdn, arena, ehooks, eset, to_leak,
+			    growing_retained);
+			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
+			    false) == NULL);
+		}
+		return NULL;
+	}
+	unreachable();
+}
+
+static bool
+extent_need_manual_zero(arena_t *arena) {
+	/*
+	 * Need to manually zero the extent on repopulating if either; 1) non
+	 * default extent hooks installed (in which case the purge semantics may
+	 * change); or 2) transparent huge pages enabled.
+	 */
+	return (!ehooks_are_default(arena_get_ehooks(arena)) ||
+		(opt_thp == thp_mode_always));
+}
+
+/*
+ * Tries to satisfy the given allocation request by reusing one of the extents
+ * in the given eset_t.
+ */
+static extent_t *
+extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	assert(new_addr == NULL || !slab);
+	assert(pad == 0 || !slab);
+	assert(!*zero || !slab);
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	extent_t *extent = extent_recycle_extract(tsdn, arena, ehooks,
+	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
+	    growing_retained);
+	if (extent == NULL) {
+		return NULL;
+	}
+
+	extent = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, eset,
+	    new_addr, size, pad, alignment, slab, szind, extent,
+	    growing_retained);
+	if (extent == NULL) {
+		return NULL;
+	}
+
+	if (*commit && !extent_committed_get(extent)) {
+		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
+		    extent_size_get(extent), growing_retained)) {
+			extent_record(tsdn, arena, ehooks, eset, extent,
+			    growing_retained);
+			return NULL;
+		}
+		if (!extent_need_manual_zero(arena)) {
+			extent_zeroed_set(extent, true);
+		}
+	}
+
+	if (extent_committed_get(extent)) {
+		*commit = true;
+	}
+	if (extent_zeroed_get(extent)) {
+		*zero = true;
+	}
+
+	if (pad != 0) {
+		extent_addr_randomize(tsdn, arena, extent, alignment);
+	}
+	assert(extent_state_get(extent) == extent_state_active);
+	if (slab) {
+		extent_slab_set(extent, slab);
+		extent_interior_register(tsdn, rtree_ctx, extent, szind);
+	}
+
+	if (*zero) {
+		void *addr = extent_base_get(extent);
+		if (!extent_zeroed_get(extent)) {
+			size_t size = extent_size_get(extent);
+			if (extent_need_manual_zero(arena) ||
+			    pages_purge_forced(addr, size)) {
+				memset(addr, 0, size);
+			}
+		} else if (config_debug) {
+			size_t *p = (size_t *)(uintptr_t)addr;
+			/* Check the first page only. */
+			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
+				assert(p[i] == 0);
+			}
+		}
+	}
+	return extent;
+}
+
+/*
+ * If virtual memory is retained, create increasingly larger extents from which
+ * to split requested extents in order to limit the total number of disjoint
+ * virtual memory ranges retained by each arena.
+ */
+static extent_t *
+extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit) {
+	malloc_mutex_assert_owner(tsdn, &arena->extent_grow_mtx);
+	assert(pad == 0 || !slab);
+	assert(!*zero || !slab);
+
+	size_t esize = size + pad;
+	size_t alloc_size_min = esize + PAGE_CEILING(alignment) - PAGE;
+	/* Beware size_t wrap-around. */
+	if (alloc_size_min < esize) {
+		goto label_err;
+	}
+	/*
+	 * Find the next extent size in the series that would be large enough to
+	 * satisfy this request.
+	 */
+	pszind_t egn_skip = 0;
+	size_t alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
+	while (alloc_size < alloc_size_min) {
+		egn_skip++;
+		if (arena->extent_grow_next + egn_skip >=
+		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
+			/* Outside legal range. */
+			goto label_err;
+		}
+		alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
+	}
+
+	extent_t *extent = extent_alloc(tsdn, arena);
+	if (extent == NULL) {
+		goto label_err;
+	}
+	bool zeroed = false;
+	bool committed = false;
+
+	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
+	    &committed, arena_ind_get(arena));
+
+	extent_init(extent, arena_ind_get(arena), ptr, alloc_size, false,
+	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
+	    committed, true, EXTENT_IS_HEAD);
+	if (ptr == NULL) {
+		extent_dalloc(tsdn, arena, extent);
+		goto label_err;
+	}
+
+	if (extent_register_no_gdump_add(tsdn, extent)) {
+		extent_dalloc(tsdn, arena, extent);
+		goto label_err;
+	}
+
+	if (extent_zeroed_get(extent) && extent_committed_get(extent)) {
+		*zero = true;
+	}
+	if (extent_committed_get(extent)) {
+		*commit = true;
+	}
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	extent_t *lead;
+	extent_t *trail;
+	extent_t *to_leak;
+	extent_t *to_salvage;
+	extent_split_interior_result_t result = extent_split_interior(tsdn,
+	    arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
+	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
+
+	if (result == extent_split_interior_ok) {
+		if (lead != NULL) {
+			extent_record(tsdn, arena, ehooks,
+			    &arena->eset_retained, lead, true);
+		}
+		if (trail != NULL) {
+			extent_record(tsdn, arena, ehooks,
+			    &arena->eset_retained, trail, true);
+		}
+	} else {
+		/*
+		 * We should have allocated a sufficiently large extent; the
+		 * cant_alloc case should not occur.
+		 */
+		assert(result == extent_split_interior_error);
+		if (to_salvage != NULL) {
+			if (config_prof) {
+				extent_gdump_add(tsdn, to_salvage);
+			}
+			extent_record(tsdn, arena, ehooks,
+			    &arena->eset_retained, to_salvage, true);
+		}
+		if (to_leak != NULL) {
+			extent_deregister_no_gdump_sub(tsdn, to_leak);
+			extents_abandon_vm(tsdn, arena, ehooks,
+			    &arena->eset_retained, to_leak, true);
+		}
+		goto label_err;
+	}
+
+	if (*commit && !extent_committed_get(extent)) {
+		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
+		    extent_size_get(extent), true)) {
+			extent_record(tsdn, arena, ehooks,
+			    &arena->eset_retained, extent, true);
+			goto label_err;
+		}
+		if (!extent_need_manual_zero(arena)) {
+			extent_zeroed_set(extent, true);
+		}
+	}
+
+	/*
+	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
+	 * range.
+	 */
+	if (arena->extent_grow_next + egn_skip + 1 <=
+	    arena->retain_grow_limit) {
+		arena->extent_grow_next += egn_skip + 1;
+	} else {
+		arena->extent_grow_next = arena->retain_grow_limit;
+	}
+	/* All opportunities for failure are past. */
+	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+
+	if (config_prof) {
+		/* Adjust gdump stats now that extent is final size. */
+		extent_gdump_add(tsdn, extent);
+	}
+	if (pad != 0) {
+		extent_addr_randomize(tsdn, arena, extent, alignment);
+	}
+	if (slab) {
+		rtree_ctx_t rtree_ctx_fallback;
+		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
+		    &rtree_ctx_fallback);
+
+		extent_slab_set(extent, true);
+		extent_interior_register(tsdn, rtree_ctx, extent, szind);
+	}
+	if (*zero && !extent_zeroed_get(extent)) {
+		void *addr = extent_base_get(extent);
+		size_t size = extent_size_get(extent);
+		if (extent_need_manual_zero(arena) ||
+		    pages_purge_forced(addr, size)) {
+			memset(addr, 0, size);
+		}
+	}
+
+	return extent;
+label_err:
+	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+	return NULL;
+}
+
+static extent_t *
+extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
+	assert(size != 0);
+	assert(alignment != 0);
+
+	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
+
+	extent_t *extent = extent_recycle(tsdn, arena, ehooks,
+	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
+	    szind, zero, commit, true);
+	if (extent != NULL) {
+		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+		if (config_prof) {
+			extent_gdump_add(tsdn, extent);
+		}
+	} else if (opt_retain && new_addr == NULL) {
+		extent = extent_grow_retained(tsdn, arena, ehooks, size, pad,
+		    alignment, slab, szind, zero, commit);
+		/* extent_grow_retained() always releases extent_grow_mtx. */
+	} else {
+		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+	}
+	malloc_mutex_assert_not_owner(tsdn, &arena->extent_grow_mtx);
+
+	return extent;
+}
+
+static extent_t *
+extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
+	size_t esize = size + pad;
+	extent_t *extent = extent_alloc(tsdn, arena);
+	if (extent == NULL) {
+		return NULL;
+	}
+	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
+	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
+	    zero, commit, arena_ind_get(arena));
+	if (addr == NULL) {
+		extent_dalloc(tsdn, arena, extent);
+		return NULL;
+	}
+	extent_init(extent, arena_ind_get(arena), addr, esize, slab, szind,
+	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
+	    true, EXTENT_NOT_HEAD);
+	if (pad != 0) {
+		extent_addr_randomize(tsdn, arena, extent, alignment);
+	}
+	if (extent_register(tsdn, extent)) {
+		extent_dalloc(tsdn, arena, extent);
+		return NULL;
+	}
+
+	return extent;
+}
+
+extent_t *
+extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	extent_t *extent = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
+	    size, pad, alignment, slab, szind, zero, commit);
+	if (extent == NULL) {
+		if (opt_retain && new_addr != NULL) {
+			/*
+			 * When retain is enabled and new_addr is set, we do not
+			 * attempt extent_alloc_wrapper_hard which does mmap
+			 * that is very unlikely to succeed (unless it happens
+			 * to be at the end).
+			 */
+			return NULL;
+		}
+		extent = extent_alloc_wrapper_hard(tsdn, arena, ehooks,
+		    new_addr, size, pad, alignment, slab, szind, zero, commit);
+	}
+
+	assert(extent == NULL || extent_dumpable_get(extent));
+	return extent;
+}
+
+static bool
+extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
+    const extent_t *outer) {
+	assert(extent_arena_ind_get(inner) == arena_ind_get(arena));
+	if (extent_arena_ind_get(outer) != arena_ind_get(arena)) {
+		return false;
+	}
+
+	assert(extent_state_get(inner) == extent_state_active);
+	if (extent_state_get(outer) != eset->state) {
+		return false;
+	}
+
+	if (extent_committed_get(inner) != extent_committed_get(outer)) {
+		return false;
+	}
+
+	return true;
+}
+
+static bool
+extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *inner, extent_t *outer, bool forward, bool growing_retained) {
+	assert(extent_can_coalesce(arena, eset, inner, outer));
+
+	extent_activate_locked(tsdn, arena, eset, outer);
+
+	malloc_mutex_unlock(tsdn, &eset->mtx);
+	bool err = extent_merge_impl(tsdn, arena, ehooks,
+	    forward ? inner : outer, forward ? outer : inner, growing_retained);
+	malloc_mutex_lock(tsdn, &eset->mtx);
+
+	if (err) {
+		extent_deactivate_locked(tsdn, arena, eset, outer);
+	}
+
+	return err;
+}
+
+static extent_t *
+extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    bool growing_retained, bool inactive_only) {
+	/*
+	 * We avoid checking / locking inactive neighbors for large size
+	 * classes, since they are eagerly coalesced on deallocation which can
+	 * cause lock contention.
+	 */
+	/*
+	 * Continue attempting to coalesce until failure, to protect against
+	 * races with other threads that are thwarted by this one.
+	 */
+	bool again;
+	do {
+		again = false;
+
+		/* Try to coalesce forward. */
+		extent_t *next = extent_lock_from_addr(tsdn, rtree_ctx,
+		    extent_past_get(extent), inactive_only);
+		if (next != NULL) {
+			/*
+			 * eset->mtx only protects against races for
+			 * like-state eset, so call extent_can_coalesce()
+			 * before releasing next's pool lock.
+			 */
+			bool can_coalesce = extent_can_coalesce(arena, eset,
+			    extent, next);
+
+			extent_unlock(tsdn, next);
+
+			if (can_coalesce && !extent_coalesce(tsdn, arena,
+			    ehooks, eset, extent, next, true,
+			    growing_retained)) {
+				if (eset->delay_coalesce) {
+					/* Do minimal coalescing. */
+					*coalesced = true;
+					return extent;
+				}
+				again = true;
+			}
+		}
+
+		/* Try to coalesce backward. */
+		extent_t *prev = extent_lock_from_addr(tsdn, rtree_ctx,
+		    extent_before_get(extent), inactive_only);
+		if (prev != NULL) {
+			bool can_coalesce = extent_can_coalesce(arena, eset,
+			    extent, prev);
+			extent_unlock(tsdn, prev);
+
+			if (can_coalesce && !extent_coalesce(tsdn, arena,
+			    ehooks, eset, extent, prev, false,
+			    growing_retained)) {
+				extent = prev;
+				if (eset->delay_coalesce) {
+					/* Do minimal coalescing. */
+					*coalesced = true;
+					return extent;
+				}
+				again = true;
+			}
+		}
+	} while (again);
+
+	if (eset->delay_coalesce) {
+		*coalesced = false;
+	}
+	return extent;
+}
+
+static extent_t *
+extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
+	    extent, coalesced, growing_retained, false);
+}
+
+static extent_t *
+extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
+	    extent, coalesced, growing_retained, true);
+}
+
+/*
+ * Does the metadata management portions of putting an unused extent into the
+ * given eset_t (coalesces, deregisters slab interiors, the heap operations).
+ */
+static void
+extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *extent, bool growing_retained) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	assert((eset_state_get(eset) != extent_state_dirty &&
+	    eset_state_get(eset) != extent_state_muzzy) ||
+	    !extent_zeroed_get(extent));
+
+	malloc_mutex_lock(tsdn, &eset->mtx);
+
+	extent_szind_set(extent, SC_NSIZES);
+	if (extent_slab_get(extent)) {
+		extent_interior_deregister(tsdn, rtree_ctx, extent);
+		extent_slab_set(extent, false);
+	}
+
+	assert(rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)extent_base_get(extent), true) == extent);
+
+	if (!eset->delay_coalesce) {
+		extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
+		    eset, extent, NULL, growing_retained);
+	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
+		assert(eset == &arena->eset_dirty);
+		/* Always coalesce large eset eagerly. */
+		bool coalesced;
+		do {
+			assert(extent_state_get(extent) == extent_state_active);
+			extent = extent_try_coalesce_large(tsdn, arena, ehooks,
+			    rtree_ctx, eset, extent, &coalesced,
+			    growing_retained);
+		} while (coalesced);
+		if (extent_size_get(extent) >= oversize_threshold) {
+			/* Shortcut to purge the oversize extent eagerly. */
+			malloc_mutex_unlock(tsdn, &eset->mtx);
+			arena_decay_extent(tsdn, arena, ehooks, extent);
+			return;
+		}
+	}
+	extent_deactivate_locked(tsdn, arena, eset, extent);
+
+	malloc_mutex_unlock(tsdn, &eset->mtx);
+}
+
+void
+extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+	ehooks_t *ehooks = arena_get_ehooks(arena);
+
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	if (extent_register(tsdn, extent)) {
+		extent_dalloc(tsdn, arena, extent);
+		return;
+	}
+	extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
+}
+
+static bool
+extent_may_dalloc(void) {
+	/* With retain enabled, the default dalloc always fails. */
+	return !opt_retain;
+}
+
+static bool
+extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
+	bool err;
+
+	assert(extent_base_get(extent) != NULL);
+	assert(extent_size_get(extent) != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	extent_addr_set(extent, extent_base_get(extent));
+
+	/* Try to deallocate. */
+	err = ehooks_dalloc(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), extent_committed_get(extent),
+	    arena_ind_get(arena));
+
+	if (!err) {
+		extent_dalloc(tsdn, arena, extent);
+	}
+
+	return err;
+}
+
+void
+extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
+	assert(extent_dumpable_get(extent));
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	/* Avoid calling the default extent_dalloc unless have to. */
+	if (!ehooks_are_default(ehooks) || extent_may_dalloc()) {
+		/*
+		 * Deregister first to avoid a race with other allocating
+		 * threads, and reregister if deallocation fails.
+		 */
+		extent_deregister(tsdn, extent);
+		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, extent)) {
+			return;
+		}
+		extent_reregister(tsdn, extent);
+	}
+
+	/* Try to decommit; purge if that fails. */
+	bool zeroed;
+	if (!extent_committed_get(extent)) {
+		zeroed = true;
+	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, extent, 0,
+	    extent_size_get(extent))) {
+		zeroed = true;
+	} else if (!ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), 0, extent_size_get(extent),
+	    arena_ind_get(arena))) {
+		zeroed = true;
+	} else if (extent_state_get(extent) == extent_state_muzzy ||
+	    !ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), 0, extent_size_get(extent),
+	    arena_ind_get(arena))) {
+		zeroed = false;
+	} else {
+		zeroed = false;
+	}
+	extent_zeroed_set(extent, zeroed);
+
+	if (config_prof) {
+		extent_gdump_sub(tsdn, extent);
+	}
+
+	extent_record(tsdn, arena, ehooks, &arena->eset_retained, extent,
+	    false);
+}
+
+void
+extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
+	assert(extent_base_get(extent) != NULL);
+	assert(extent_size_get(extent) != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	/* Deregister first to avoid a race with other allocating threads. */
+	extent_deregister(tsdn, extent);
+
+	extent_addr_set(extent, extent_base_get(extent));
+
+	/* Try to destroy; silently fail otherwise. */
+	ehooks_destroy(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), extent_committed_get(extent),
+	    arena_ind_get(arena));
+
+	extent_dalloc(tsdn, arena, extent);
+}
+
+static bool
+extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	bool err = ehooks_commit(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	extent_committed_set(extent, extent_committed_get(extent) || !err);
+	return err;
+}
+
+bool
+extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset,
+    size_t length) {
+	return extent_commit_impl(tsdn, arena, ehooks, extent, offset, length,
+	    false);
+}
+
+bool
+extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+	bool err = ehooks_decommit(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	extent_committed_set(extent, extent_committed_get(extent) && err);
+	return err;
+}
+
+static bool
+extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	bool err = ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	return err;
+}
+
+bool
+extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length) {
+	return extent_purge_lazy_impl(tsdn, arena, ehooks, extent, offset,
+	    length, false);
+}
+
+static bool
+extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	bool err = ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	return err;
+}
+
+bool
+extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length) {
+	return extent_purge_forced_impl(tsdn, arena, ehooks, extent,
+	    offset, length, false);
+}
+
+/*
+ * Accepts the extent to split, and the characteristics of each side of the
+ * split.  The 'a' parameters go with the 'lead' of the resulting pair of
+ * extents (the lower addressed portion of the split), and the 'b' parameters go
+ * with the trail (the higher addressed portion).  This makes 'extent' the lead,
+ * and returns the trail (except in case of error).
+ */
+static extent_t *
+extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
+	assert(extent_size_get(extent) == size_a + size_b);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+	if (ehooks_split_will_fail(ehooks)) {
+		return NULL;
+	}
+
+	extent_t *trail = extent_alloc(tsdn, arena);
+	if (trail == NULL) {
+		goto label_error_a;
+	}
+
+	extent_init(trail, arena_ind_get(arena),
+	    (void *)((uintptr_t)extent_base_get(extent) + size_a), size_b,
+	    slab_b, szind_b, extent_sn_get(extent), extent_state_get(extent),
+	    extent_zeroed_get(extent), extent_committed_get(extent),
+	    extent_dumpable_get(extent), EXTENT_NOT_HEAD);
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_leaf_elm_t *lead_elm_a, *lead_elm_b;
+	{
+		extent_t lead;
+
+		extent_init(&lead, arena_ind_get(arena),
+		    extent_addr_get(extent), size_a,
+		    slab_a, szind_a, extent_sn_get(extent),
+		    extent_state_get(extent), extent_zeroed_get(extent),
+		    extent_committed_get(extent), extent_dumpable_get(extent),
+		    EXTENT_NOT_HEAD);
+
+		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
+		    true, &lead_elm_a, &lead_elm_b);
+	}
+	rtree_leaf_elm_t *trail_elm_a, *trail_elm_b;
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, trail, false, true,
+	    &trail_elm_a, &trail_elm_b);
+
+	if (lead_elm_a == NULL || lead_elm_b == NULL || trail_elm_a == NULL
+	    || trail_elm_b == NULL) {
+		goto label_error_b;
+	}
+
+	extent_lock2(tsdn, extent, trail);
+
+	bool err = ehooks_split(tsdn, ehooks, extent_base_get(extent),
+	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
+	    arena_ind_get(arena));
+
+	if (err) {
+		goto label_error_c;
+	}
+
+	extent_size_set(extent, size_a);
+	extent_szind_set(extent, szind_a);
+
+	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent,
+	    szind_a, slab_a);
+	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
+	    szind_b, slab_b);
+
+	extent_unlock2(tsdn, extent, trail);
+
+	return trail;
+label_error_c:
+	extent_unlock2(tsdn, extent, trail);
+label_error_b:
+	extent_dalloc(tsdn, arena, trail);
+label_error_a:
+	return NULL;
+}
+
+extent_t *
+extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    size_t size_b, szind_t szind_b, bool slab_b) {
+	return extent_split_impl(tsdn, arena, ehooks, extent, size_a, szind_a,
+	    slab_a, size_b, szind_b, slab_b, false);
+}
+
+/*
+ * Returns true if the given extents can't be merged because of their head bit
+ * settings.  Assumes the second extent has the higher address.
+ */
+bool
+extent_head_no_merge(extent_t *a, extent_t *b) {
+	assert(extent_base_get(a) < extent_base_get(b));
+	/*
+	 * When coalesce is not always allowed (Windows), only merge extents
+	 * from the same VirtualAlloc region under opt.retain (in which case
+	 * MEM_DECOMMIT is utilized for purging).
+	 */
+	if (maps_coalesce) {
+		return false;
+	}
+	if (!opt_retain) {
+		return true;
+	}
+	/* If b is a head extent, disallow the cross-region merge. */
+	if (extent_is_head_get(b)) {
+		/*
+		 * Additionally, sn should not overflow with retain; sanity
+		 * check that different regions have unique sn.
+		 */
+		assert(extent_sn_comp(a, b) != 0);
+		return true;
+	}
+	assert(extent_sn_comp(a, b) == 0);
+
+	return false;
+}
+
+static bool
+extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
+    extent_t *b, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	assert(extent_base_get(a) < extent_base_get(b));
+
+	if (ehooks_merge_will_fail(ehooks) || extent_head_no_merge(a, b)) {
+		return true;
+	}
+
+	bool err = ehooks_merge(tsdn, ehooks, extent_base_get(a),
+	    extent_size_get(a), extent_base_get(b), extent_size_get(b),
+	    extent_committed_get(a), arena_ind_get(arena));
+
+	if (err) {
+		return true;
+	}
+
+	/*
+	 * The rtree writes must happen while all the relevant elements are
+	 * owned, so the following code uses decomposed helper functions rather
+	 * than extent_{,de}register() to do things in the right order.
+	 */
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_leaf_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, a, true, false, &a_elm_a,
+	    &a_elm_b);
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, b, true, false, &b_elm_a,
+	    &b_elm_b);
+
+	extent_lock2(tsdn, a, b);
+
+	if (a_elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL,
+		    SC_NSIZES, false);
+	}
+	if (b_elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, NULL,
+		    SC_NSIZES, false);
+	} else {
+		b_elm_b = b_elm_a;
+	}
+
+	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
+	extent_szind_set(a, SC_NSIZES);
+	extent_sn_set(a, (extent_sn_get(a) < extent_sn_get(b)) ?
+	    extent_sn_get(a) : extent_sn_get(b));
+	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
+
+	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES,
+	    false);
+
+	extent_unlock2(tsdn, a, b);
+
+	/*
+	 * If we got here, we merged the extents; so they must be from the same
+	 * arena (i.e. this one).
+	 */
+	assert(extent_arena_ind_get(b) == arena_ind_get(arena));
+	extent_dalloc(tsdn, arena, b);
+
+	return false;
+}
+
+bool
+extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *a, extent_t *b) {
+	return extent_merge_impl(tsdn, arena, ehooks, a, b, false);
+}
+
+bool
+extent_boot(void) {
+	if (rtree_new(&extents_rtree, true)) {
+		return true;
+	}
+
+	if (mutex_pool_init(&extent_mutex_pool, "extent_mutex_pool",
+	    WITNESS_RANK_EXTENT_POOL)) {
+		return true;
+	}
+
+	if (have_dss) {
+		extent_dss_boot();
+	}
+
+	return false;
+}
+
+void
+extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size) {
+	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
+
+	const extent_t *extent = iealloc(tsdn, ptr);
+	if (unlikely(extent == NULL)) {
+		*nfree = *nregs = *size = 0;
+		return;
+	}
+
+	*size = extent_size_get(extent);
+	if (!extent_slab_get(extent)) {
+		*nfree = 0;
+		*nregs = 1;
+	} else {
+		*nfree = extent_nfree_get(extent);
+		*nregs = bin_infos[extent_szind_get(extent)].nregs;
+		assert(*nfree <= *nregs);
+		assert(*nfree * extent_usize_get(extent) <= *size);
+	}
+}
+
+void
+extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size,
+    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr) {
+	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
+	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
+
+	const extent_t *extent = iealloc(tsdn, ptr);
+	if (unlikely(extent == NULL)) {
+		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
+		*slabcur_addr = NULL;
+		return;
+	}
+
+	*size = extent_size_get(extent);
+	if (!extent_slab_get(extent)) {
+		*nfree = *bin_nfree = *bin_nregs = 0;
+		*nregs = 1;
+		*slabcur_addr = NULL;
+		return;
+	}
+
+	*nfree = extent_nfree_get(extent);
+	const szind_t szind = extent_szind_get(extent);
+	*nregs = bin_infos[szind].nregs;
+	assert(*nfree <= *nregs);
+	assert(*nfree * extent_usize_get(extent) <= *size);
+
+	const arena_t *arena = (arena_t *)atomic_load_p(
+	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
+	assert(arena != NULL);
+	const unsigned binshard = extent_binshard_get(extent);
+	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
+
+	malloc_mutex_lock(tsdn, &bin->lock);
+	if (config_stats) {
+		*bin_nregs = *nregs * bin->stats.curslabs;
+		assert(*bin_nregs >= bin->stats.curregs);
+		*bin_nfree = *bin_nregs - bin->stats.curregs;
+	} else {
+		*bin_nfree = *bin_nregs = 0;
+	}
+	extent_t *slab;
+	if (bin->slabcur != NULL) {
+		slab = bin->slabcur;
+	} else {
+		slab = extent_heap_first(&bin->slabs_nonfull);
+	}
+	*slabcur_addr = slab != NULL ? extent_addr_get(slab) : NULL;
+	malloc_mutex_unlock(tsdn, &bin->lock);
+}
-- 
cgit v0.12


From 403f2d1664acfae920e8e6ce51e2695d826a0628 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Dec 2019 09:44:59 -0800
Subject: Extents: Split out introspection functionality.

This isn't really part of the core extent allocation facilities.  Especially as
this module grows, having it in its own place may come in handy.
---
 Makefile.in                                    |   3 +-
 include/jemalloc/internal/extent2.h            |  28 ---
 include/jemalloc/internal/inspect.h            |  40 ++++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 src/ctl.c                                      |  20 +-
 src/extent2.c                                  |  75 -------
 src/inspect.c                                  |  77 +++++++
 test/unit/extent_util.c                        | 267 -------------------------
 test/unit/inspect.c                            | 267 +++++++++++++++++++++++++
 10 files changed, 399 insertions(+), 380 deletions(-)
 create mode 100644 include/jemalloc/internal/inspect.h
 create mode 100644 src/inspect.c
 delete mode 100644 test/unit/extent_util.c
 create mode 100644 test/unit/inspect.c

diff --git a/Makefile.in b/Makefile.in
index 29977bc..cab4e1f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -112,6 +112,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/hash.c \
 	$(srcroot)src/hook.c \
+	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/log.c \
 	$(srcroot)src/malloc_io.c \
@@ -189,11 +190,11 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/emitter.c \
 	$(srcroot)test/unit/extent_quantize.c \
-	$(srcroot)test/unit/extent_util.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/huge.c \
+	$(srcroot)test/unit/inspect.c \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index 22035bb..7a18a61 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -18,28 +18,6 @@
  */
 
 /*
- * The following two structs are for experimental purposes. See
- * experimental_utilization_query_ctl and
- * experimental_utilization_batch_query_ctl in src/ctl.c.
- */
-typedef struct extent_util_stats_s extent_util_stats_t;
-struct extent_util_stats_s {
-	size_t nfree;
-	size_t nregs;
-	size_t size;
-};
-
-typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
-struct extent_util_stats_verbose_s {
-	void *slabcur_addr;
-	size_t nfree;
-	size_t nregs;
-	size_t size;
-	size_t bin_nfree;
-	size_t bin_nregs;
-};
-
-/*
  * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
  * is the max ratio between the size of the active extent and the new extent.
  */
@@ -83,10 +61,4 @@ bool extent_head_no_merge(extent_t *a, extent_t *b);
 
 bool extent_boot(void);
 
-void extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size);
-void extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size,
-    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
-
 #endif /* JEMALLOC_INTERNAL_EXTENT2_H */
diff --git a/include/jemalloc/internal/inspect.h b/include/jemalloc/internal/inspect.h
new file mode 100644
index 0000000..65fef51
--- /dev/null
+++ b/include/jemalloc/internal/inspect.h
@@ -0,0 +1,40 @@
+#ifndef JEMALLOC_INTERNAL_INSPECT_H
+#define JEMALLOC_INTERNAL_INSPECT_H
+
+/*
+ * This module contains the heap introspection capabilities.  For now they are
+ * exposed purely through mallctl APIs in the experimental namespace, but this
+ * may change over time.
+ */
+
+/*
+ * The following two structs are for experimental purposes. See
+ * experimental_utilization_query_ctl and
+ * experimental_utilization_batch_query_ctl in src/ctl.c.
+ */
+typedef struct inspect_extent_util_stats_s inspect_extent_util_stats_t;
+struct inspect_extent_util_stats_s {
+	size_t nfree;
+	size_t nregs;
+	size_t size;
+};
+
+typedef struct inspect_extent_util_stats_verbose_s
+    inspect_extent_util_stats_verbose_t;
+
+struct inspect_extent_util_stats_verbose_s {
+	void *slabcur_addr;
+	size_t nfree;
+	size_t nregs;
+	size_t size;
+	size_t bin_nfree;
+	size_t bin_nregs;
+};
+
+void inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size);
+void inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size,
+    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
+
+#endif /* JEMALLOC_INTERNAL_INSPECT_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 4118b91..f5069d3 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -52,6 +52,7 @@
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
     <ClCompile Include="..\..\..\..\src\log.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index ed3b524..19e72d4 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -52,6 +52,7 @@
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
     <ClCompile Include="..\..\..\..\src\log.c" />
diff --git a/src/ctl.c b/src/ctl.c
index a9982ca..4aa4af8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/inspect.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/sc.h"
@@ -3258,11 +3259,11 @@ experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
-	assert(sizeof(extent_util_stats_verbose_t)
+	assert(sizeof(inspect_extent_util_stats_verbose_t)
 	    == sizeof(void *) + sizeof(size_t) * 5);
 
 	if (oldp == NULL || oldlenp == NULL
-	    || *oldlenp != sizeof(extent_util_stats_verbose_t)
+	    || *oldlenp != sizeof(inspect_extent_util_stats_verbose_t)
 	    || newp == NULL) {
 		ret = EINVAL;
 		goto label_return;
@@ -3270,9 +3271,9 @@ experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib,
 
 	void *ptr = NULL;
 	WRITE(ptr, void *);
-	extent_util_stats_verbose_t *util_stats
-	    = (extent_util_stats_verbose_t *)oldp;
-	extent_util_stats_verbose_get(tsd_tsdn(tsd), ptr,
+	inspect_extent_util_stats_verbose_t *util_stats
+	    = (inspect_extent_util_stats_verbose_t *)oldp;
+	inspect_extent_util_stats_verbose_get(tsd_tsdn(tsd), ptr,
 	    &util_stats->nfree, &util_stats->nregs, &util_stats->size,
 	    &util_stats->bin_nfree, &util_stats->bin_nregs,
 	    &util_stats->slabcur_addr);
@@ -3383,21 +3384,22 @@ experimental_utilization_batch_query_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
-	assert(sizeof(extent_util_stats_t) == sizeof(size_t) * 3);
+	assert(sizeof(inspect_extent_util_stats_t) == sizeof(size_t) * 3);
 
 	const size_t len = newlen / sizeof(const void *);
 	if (oldp == NULL || oldlenp == NULL || newp == NULL || newlen == 0
 	    || newlen != len * sizeof(const void *)
-	    || *oldlenp != len * sizeof(extent_util_stats_t)) {
+	    || *oldlenp != len * sizeof(inspect_extent_util_stats_t)) {
 		ret = EINVAL;
 		goto label_return;
 	}
 
 	void **ptrs = (void **)newp;
-	extent_util_stats_t *util_stats = (extent_util_stats_t *)oldp;
+	inspect_extent_util_stats_t *util_stats =
+	    (inspect_extent_util_stats_t *)oldp;
 	size_t i;
 	for (i = 0; i < len; ++i) {
-		extent_util_stats_get(tsd_tsdn(tsd), ptrs[i],
+		inspect_extent_util_stats_get(tsd_tsdn(tsd), ptrs[i],
 		    &util_stats[i].nfree, &util_stats[i].nregs,
 		    &util_stats[i].size);
 	}
diff --git a/src/extent2.c b/src/extent2.c
index 4865beb..0b09716 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -1661,78 +1661,3 @@ extent_boot(void) {
 
 	return false;
 }
-
-void
-extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size) {
-	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
-
-	const extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(extent == NULL)) {
-		*nfree = *nregs = *size = 0;
-		return;
-	}
-
-	*size = extent_size_get(extent);
-	if (!extent_slab_get(extent)) {
-		*nfree = 0;
-		*nregs = 1;
-	} else {
-		*nfree = extent_nfree_get(extent);
-		*nregs = bin_infos[extent_szind_get(extent)].nregs;
-		assert(*nfree <= *nregs);
-		assert(*nfree * extent_usize_get(extent) <= *size);
-	}
-}
-
-void
-extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size,
-    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr) {
-	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
-	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
-
-	const extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(extent == NULL)) {
-		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
-		*slabcur_addr = NULL;
-		return;
-	}
-
-	*size = extent_size_get(extent);
-	if (!extent_slab_get(extent)) {
-		*nfree = *bin_nfree = *bin_nregs = 0;
-		*nregs = 1;
-		*slabcur_addr = NULL;
-		return;
-	}
-
-	*nfree = extent_nfree_get(extent);
-	const szind_t szind = extent_szind_get(extent);
-	*nregs = bin_infos[szind].nregs;
-	assert(*nfree <= *nregs);
-	assert(*nfree * extent_usize_get(extent) <= *size);
-
-	const arena_t *arena = (arena_t *)atomic_load_p(
-	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
-	assert(arena != NULL);
-	const unsigned binshard = extent_binshard_get(extent);
-	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
-
-	malloc_mutex_lock(tsdn, &bin->lock);
-	if (config_stats) {
-		*bin_nregs = *nregs * bin->stats.curslabs;
-		assert(*bin_nregs >= bin->stats.curregs);
-		*bin_nfree = *bin_nregs - bin->stats.curregs;
-	} else {
-		*bin_nfree = *bin_nregs = 0;
-	}
-	extent_t *slab;
-	if (bin->slabcur != NULL) {
-		slab = bin->slabcur;
-	} else {
-		slab = extent_heap_first(&bin->slabs_nonfull);
-	}
-	*slabcur_addr = slab != NULL ? extent_addr_get(slab) : NULL;
-	malloc_mutex_unlock(tsdn, &bin->lock);
-}
diff --git a/src/inspect.c b/src/inspect.c
new file mode 100644
index 0000000..435016e
--- /dev/null
+++ b/src/inspect.c
@@ -0,0 +1,77 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+void
+inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
+    size_t *nregs, size_t *size) {
+	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
+
+	const extent_t *extent = iealloc(tsdn, ptr);
+	if (unlikely(extent == NULL)) {
+		*nfree = *nregs = *size = 0;
+		return;
+	}
+
+	*size = extent_size_get(extent);
+	if (!extent_slab_get(extent)) {
+		*nfree = 0;
+		*nregs = 1;
+	} else {
+		*nfree = extent_nfree_get(extent);
+		*nregs = bin_infos[extent_szind_get(extent)].nregs;
+		assert(*nfree <= *nregs);
+		assert(*nfree * extent_usize_get(extent) <= *size);
+	}
+}
+
+void
+inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size, size_t *bin_nfree,
+    size_t *bin_nregs, void **slabcur_addr) {
+	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
+	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
+
+	const extent_t *extent = iealloc(tsdn, ptr);
+	if (unlikely(extent == NULL)) {
+		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
+		*slabcur_addr = NULL;
+		return;
+	}
+
+	*size = extent_size_get(extent);
+	if (!extent_slab_get(extent)) {
+		*nfree = *bin_nfree = *bin_nregs = 0;
+		*nregs = 1;
+		*slabcur_addr = NULL;
+		return;
+	}
+
+	*nfree = extent_nfree_get(extent);
+	const szind_t szind = extent_szind_get(extent);
+	*nregs = bin_infos[szind].nregs;
+	assert(*nfree <= *nregs);
+	assert(*nfree * extent_usize_get(extent) <= *size);
+
+	const arena_t *arena = (arena_t *)atomic_load_p(
+	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
+	assert(arena != NULL);
+	const unsigned binshard = extent_binshard_get(extent);
+	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
+
+	malloc_mutex_lock(tsdn, &bin->lock);
+	if (config_stats) {
+		*bin_nregs = *nregs * bin->stats.curslabs;
+		assert(*bin_nregs >= bin->stats.curregs);
+		*bin_nfree = *bin_nregs - bin->stats.curregs;
+	} else {
+		*bin_nfree = *bin_nregs = 0;
+	}
+	extent_t *slab;
+	if (bin->slabcur != NULL) {
+		slab = bin->slabcur;
+	} else {
+		slab = extent_heap_first(&bin->slabs_nonfull);
+	}
+	*slabcur_addr = slab != NULL ? extent_addr_get(slab) : NULL;
+	malloc_mutex_unlock(tsdn, &bin->lock);
+}
diff --git a/test/unit/extent_util.c b/test/unit/extent_util.c
deleted file mode 100644
index 4de0b04..0000000
--- a/test/unit/extent_util.c
+++ /dev/null
@@ -1,267 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do {		\
-	assert_d_eq(mallctl("experimental.utilization." node,		\
-	    a, b, c, d), EINVAL, "Should fail when " why_inval);	\
-	assert_zu_eq(out_sz, out_sz_ref,				\
-	    "Output size touched when given invalid arguments");	\
-	assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
-	    "Output content touched when given invalid arguments");	\
-} while (0)
-
-#define TEST_UTIL_QUERY_EINVAL(a, b, c, d, why_inval)			\
-	TEST_UTIL_EINVAL("query", a, b, c, d, why_inval)
-#define TEST_UTIL_BATCH_EINVAL(a, b, c, d, why_inval)			\
-	TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval)
-
-#define TEST_UTIL_VALID(node) do {					\
-        assert_d_eq(mallctl("experimental.utilization." node,		\
-	    out, &out_sz, in, in_sz), 0,				\
-	    "Should return 0 on correct arguments");			\
-        assert_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
-	assert_d_ne(memcmp(out, out_ref, out_sz_ref), 0,		\
-	    "Output content should be changed");			\
-} while (0)
-
-#define TEST_UTIL_BATCH_VALID TEST_UTIL_VALID("batch_query")
-
-#define TEST_MAX_SIZE (1 << 20)
-
-TEST_BEGIN(test_query) {
-	size_t sz;
-	/*
-	 * Select some sizes that can span both small and large sizes, and are
-	 * numerically unrelated to any size boundaries.
-	 */
-	for (sz = 7; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
-	    sz += (sz <= SC_SMALL_MAXCLASS ? 1009 : 99989)) {
-		void *p = mallocx(sz, 0);
-		void **in = &p;
-		size_t in_sz = sizeof(const void *);
-		size_t out_sz = sizeof(void *) + sizeof(size_t) * 5;
-		void *out = mallocx(out_sz, 0);
-		void *out_ref = mallocx(out_sz, 0);
-		size_t out_sz_ref = out_sz;
-
-		assert_ptr_not_null(p,
-		    "test pointer allocation failed");
-		assert_ptr_not_null(out,
-		    "test output allocation failed");
-		assert_ptr_not_null(out_ref,
-		    "test reference output allocation failed");
-
-#define SLABCUR_READ(out) (*(void **)out)
-#define COUNTS(out) ((size_t *)((void **)out + 1))
-#define NFREE_READ(out) COUNTS(out)[0]
-#define NREGS_READ(out) COUNTS(out)[1]
-#define SIZE_READ(out) COUNTS(out)[2]
-#define BIN_NFREE_READ(out) COUNTS(out)[3]
-#define BIN_NREGS_READ(out) COUNTS(out)[4]
-
-		SLABCUR_READ(out) = NULL;
-		NFREE_READ(out) = NREGS_READ(out) = SIZE_READ(out) = -1;
-		BIN_NFREE_READ(out) = BIN_NREGS_READ(out) = -1;
-		memcpy(out_ref, out, out_sz);
-
-		/* Test invalid argument(s) errors */
-		TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz,
-		    "old is NULL");
-		TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz,
-		    "oldlenp is NULL");
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, NULL, in_sz,
-		    "newp is NULL");
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0,
-		    "newlen is zero");
-		in_sz -= 1;
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
-		    "invalid newlen");
-		in_sz += 1;
-		out_sz_ref = out_sz -= 2 * sizeof(size_t);
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
-		    "invalid *oldlenp");
-		out_sz_ref = out_sz += 2 * sizeof(size_t);
-
-		/* Examine output for valid call */
-		TEST_UTIL_VALID("query");
-		assert_zu_le(sz, SIZE_READ(out),
-		    "Extent size should be at least allocation size");
-		assert_zu_eq(SIZE_READ(out) & (PAGE - 1), 0,
-		    "Extent size should be a multiple of page size");
-		if (sz <= SC_SMALL_MAXCLASS) {
-			assert_zu_le(NFREE_READ(out), NREGS_READ(out),
-			    "Extent free count exceeded region count");
-			assert_zu_le(NREGS_READ(out), SIZE_READ(out),
-			    "Extent region count exceeded size");
-			assert_zu_ne(NREGS_READ(out), 0,
-			    "Extent region count must be positive");
-			assert_true(NFREE_READ(out) == 0 || (SLABCUR_READ(out)
-			    != NULL && SLABCUR_READ(out) <= p),
-			    "Allocation should follow first fit principle");
-			if (config_stats) {
-				assert_zu_le(BIN_NFREE_READ(out),
-				    BIN_NREGS_READ(out),
-				    "Bin free count exceeded region count");
-				assert_zu_ne(BIN_NREGS_READ(out), 0,
-				    "Bin region count must be positive");
-				assert_zu_le(NFREE_READ(out),
-				    BIN_NFREE_READ(out),
-				    "Extent free count exceeded bin free count");
-				assert_zu_le(NREGS_READ(out),
-				    BIN_NREGS_READ(out),
-				    "Extent region count exceeded "
-				    "bin region count");
-				assert_zu_eq(BIN_NREGS_READ(out)
-				    % NREGS_READ(out), 0,
-				    "Bin region count isn't a multiple of "
-				    "extent region count");
-				assert_zu_le(
-				    BIN_NFREE_READ(out) - NFREE_READ(out),
-				    BIN_NREGS_READ(out) - NREGS_READ(out),
-				    "Free count in other extents in the bin "
-				    "exceeded region count in other extents "
-				    "in the bin");
-				assert_zu_le(NREGS_READ(out) - NFREE_READ(out),
-				    BIN_NREGS_READ(out) - BIN_NFREE_READ(out),
-				    "Extent utilized count exceeded "
-				    "bin utilized count");
-			}
-		} else {
-			assert_zu_eq(NFREE_READ(out), 0,
-			    "Extent free count should be zero");
-			assert_zu_eq(NREGS_READ(out), 1,
-			    "Extent region count should be one");
-			assert_ptr_null(SLABCUR_READ(out),
-			    "Current slab must be null for large size classes");
-			if (config_stats) {
-				assert_zu_eq(BIN_NFREE_READ(out), 0,
-				    "Bin free count must be zero for "
-				    "large sizes");
-				assert_zu_eq(BIN_NREGS_READ(out), 0,
-				    "Bin region count must be zero for "
-				    "large sizes");
-			}
-		}
-
-#undef BIN_NREGS_READ
-#undef BIN_NFREE_READ
-#undef SIZE_READ
-#undef NREGS_READ
-#undef NFREE_READ
-#undef COUNTS
-#undef SLABCUR_READ
-
-		free(out_ref);
-		free(out);
-		free(p);
-	}
-}
-TEST_END
-
-TEST_BEGIN(test_batch) {
-	size_t sz;
-	/*
-	 * Select some sizes that can span both small and large sizes, and are
-	 * numerically unrelated to any size boundaries.
-	 */
-	for (sz = 17; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
-	    sz += (sz <= SC_SMALL_MAXCLASS ? 1019 : 99991)) {
-		void *p = mallocx(sz, 0);
-		void *q = mallocx(sz, 0);
-		void *in[] = {p, q};
-		size_t in_sz = sizeof(const void *) * 2;
-		size_t out[] = {-1, -1, -1, -1, -1, -1};
-		size_t out_sz = sizeof(size_t) * 6;
-		size_t out_ref[] = {-1, -1, -1, -1, -1, -1};
-		size_t out_sz_ref = out_sz;
-
-		assert_ptr_not_null(p, "test pointer allocation failed");
-		assert_ptr_not_null(q, "test pointer allocation failed");
-
-		/* Test invalid argument(s) errors */
-		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz,
-		    "old is NULL");
-		TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz,
-		    "oldlenp is NULL");
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, NULL, in_sz,
-		    "newp is NULL");
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0,
-		    "newlen is zero");
-		in_sz -= 1;
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
-		    "newlen is not an exact multiple");
-		in_sz += 1;
-		out_sz_ref = out_sz -= 2 * sizeof(size_t);
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
-		    "*oldlenp is not an exact multiple");
-		out_sz_ref = out_sz += 2 * sizeof(size_t);
-		in_sz -= sizeof(const void *);
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
-		    "*oldlenp and newlen do not match");
-		in_sz += sizeof(const void *);
-
-	/* Examine output for valid calls */
-#define TEST_EQUAL_REF(i, message) \
-	assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
-
-#define NFREE_READ(out, i) out[(i) * 3]
-#define NREGS_READ(out, i) out[(i) * 3 + 1]
-#define SIZE_READ(out, i) out[(i) * 3 + 2]
-
-		out_sz_ref = out_sz /= 2;
-		in_sz /= 2;
-		TEST_UTIL_BATCH_VALID;
-		assert_zu_le(sz, SIZE_READ(out, 0),
-		    "Extent size should be at least allocation size");
-		assert_zu_eq(SIZE_READ(out, 0) & (PAGE - 1), 0,
-		    "Extent size should be a multiple of page size");
-		if (sz <= SC_SMALL_MAXCLASS) {
-			assert_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
-			    "Extent free count exceeded region count");
-			assert_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
-			    "Extent region count exceeded size");
-			assert_zu_ne(NREGS_READ(out, 0), 0,
-			    "Extent region count must be positive");
-		} else {
-			assert_zu_eq(NFREE_READ(out, 0), 0,
-			    "Extent free count should be zero");
-			assert_zu_eq(NREGS_READ(out, 0), 1,
-			    "Extent region count should be one");
-		}
-		TEST_EQUAL_REF(1,
-		    "Should not overwrite content beyond what's needed");
-		in_sz *= 2;
-		out_sz_ref = out_sz *= 2;
-
-		memcpy(out_ref, out, 3 * sizeof(size_t));
-		TEST_UTIL_BATCH_VALID;
-		TEST_EQUAL_REF(0, "Statistics should be stable across calls");
-		if (sz <= SC_SMALL_MAXCLASS) {
-			assert_zu_le(NFREE_READ(out, 1), NREGS_READ(out, 1),
-			    "Extent free count exceeded region count");
-		} else {
-			assert_zu_eq(NFREE_READ(out, 0), 0,
-			    "Extent free count should be zero");
-		}
-		assert_zu_eq(NREGS_READ(out, 0), NREGS_READ(out, 1),
-		    "Extent region count should be same for same region size");
-		assert_zu_eq(SIZE_READ(out, 0), SIZE_READ(out, 1),
-		    "Extent size should be same for same region size");
-
-#undef SIZE_READ
-#undef NREGS_READ
-#undef NFREE_READ
-
-#undef TEST_EQUAL_REF
-
-		free(q);
-		free(p);
-	}
-}
-TEST_END
-
-int
-main(void) {
-	assert_zu_lt(SC_SMALL_MAXCLASS, TEST_MAX_SIZE,
-	    "Test case cannot cover large classes");
-	return test(test_query, test_batch);
-}
diff --git a/test/unit/inspect.c b/test/unit/inspect.c
new file mode 100644
index 0000000..4de0b04
--- /dev/null
+++ b/test/unit/inspect.c
@@ -0,0 +1,267 @@
+#include "test/jemalloc_test.h"
+
+#define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do {		\
+	assert_d_eq(mallctl("experimental.utilization." node,		\
+	    a, b, c, d), EINVAL, "Should fail when " why_inval);	\
+	assert_zu_eq(out_sz, out_sz_ref,				\
+	    "Output size touched when given invalid arguments");	\
+	assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
+	    "Output content touched when given invalid arguments");	\
+} while (0)
+
+#define TEST_UTIL_QUERY_EINVAL(a, b, c, d, why_inval)			\
+	TEST_UTIL_EINVAL("query", a, b, c, d, why_inval)
+#define TEST_UTIL_BATCH_EINVAL(a, b, c, d, why_inval)			\
+	TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval)
+
+#define TEST_UTIL_VALID(node) do {					\
+        assert_d_eq(mallctl("experimental.utilization." node,		\
+	    out, &out_sz, in, in_sz), 0,				\
+	    "Should return 0 on correct arguments");			\
+        assert_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
+	assert_d_ne(memcmp(out, out_ref, out_sz_ref), 0,		\
+	    "Output content should be changed");			\
+} while (0)
+
+#define TEST_UTIL_BATCH_VALID TEST_UTIL_VALID("batch_query")
+
+#define TEST_MAX_SIZE (1 << 20)
+
+TEST_BEGIN(test_query) {
+	size_t sz;
+	/*
+	 * Select some sizes that can span both small and large sizes, and are
+	 * numerically unrelated to any size boundaries.
+	 */
+	for (sz = 7; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
+	    sz += (sz <= SC_SMALL_MAXCLASS ? 1009 : 99989)) {
+		void *p = mallocx(sz, 0);
+		void **in = &p;
+		size_t in_sz = sizeof(const void *);
+		size_t out_sz = sizeof(void *) + sizeof(size_t) * 5;
+		void *out = mallocx(out_sz, 0);
+		void *out_ref = mallocx(out_sz, 0);
+		size_t out_sz_ref = out_sz;
+
+		assert_ptr_not_null(p,
+		    "test pointer allocation failed");
+		assert_ptr_not_null(out,
+		    "test output allocation failed");
+		assert_ptr_not_null(out_ref,
+		    "test reference output allocation failed");
+
+#define SLABCUR_READ(out) (*(void **)out)
+#define COUNTS(out) ((size_t *)((void **)out + 1))
+#define NFREE_READ(out) COUNTS(out)[0]
+#define NREGS_READ(out) COUNTS(out)[1]
+#define SIZE_READ(out) COUNTS(out)[2]
+#define BIN_NFREE_READ(out) COUNTS(out)[3]
+#define BIN_NREGS_READ(out) COUNTS(out)[4]
+
+		SLABCUR_READ(out) = NULL;
+		NFREE_READ(out) = NREGS_READ(out) = SIZE_READ(out) = -1;
+		BIN_NFREE_READ(out) = BIN_NREGS_READ(out) = -1;
+		memcpy(out_ref, out, out_sz);
+
+		/* Test invalid argument(s) errors */
+		TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz,
+		    "old is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz,
+		    "oldlenp is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, NULL, in_sz,
+		    "newp is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0,
+		    "newlen is zero");
+		in_sz -= 1;
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
+		    "invalid newlen");
+		in_sz += 1;
+		out_sz_ref = out_sz -= 2 * sizeof(size_t);
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
+		    "invalid *oldlenp");
+		out_sz_ref = out_sz += 2 * sizeof(size_t);
+
+		/* Examine output for valid call */
+		TEST_UTIL_VALID("query");
+		assert_zu_le(sz, SIZE_READ(out),
+		    "Extent size should be at least allocation size");
+		assert_zu_eq(SIZE_READ(out) & (PAGE - 1), 0,
+		    "Extent size should be a multiple of page size");
+		if (sz <= SC_SMALL_MAXCLASS) {
+			assert_zu_le(NFREE_READ(out), NREGS_READ(out),
+			    "Extent free count exceeded region count");
+			assert_zu_le(NREGS_READ(out), SIZE_READ(out),
+			    "Extent region count exceeded size");
+			assert_zu_ne(NREGS_READ(out), 0,
+			    "Extent region count must be positive");
+			assert_true(NFREE_READ(out) == 0 || (SLABCUR_READ(out)
+			    != NULL && SLABCUR_READ(out) <= p),
+			    "Allocation should follow first fit principle");
+			if (config_stats) {
+				assert_zu_le(BIN_NFREE_READ(out),
+				    BIN_NREGS_READ(out),
+				    "Bin free count exceeded region count");
+				assert_zu_ne(BIN_NREGS_READ(out), 0,
+				    "Bin region count must be positive");
+				assert_zu_le(NFREE_READ(out),
+				    BIN_NFREE_READ(out),
+				    "Extent free count exceeded bin free count");
+				assert_zu_le(NREGS_READ(out),
+				    BIN_NREGS_READ(out),
+				    "Extent region count exceeded "
+				    "bin region count");
+				assert_zu_eq(BIN_NREGS_READ(out)
+				    % NREGS_READ(out), 0,
+				    "Bin region count isn't a multiple of "
+				    "extent region count");
+				assert_zu_le(
+				    BIN_NFREE_READ(out) - NFREE_READ(out),
+				    BIN_NREGS_READ(out) - NREGS_READ(out),
+				    "Free count in other extents in the bin "
+				    "exceeded region count in other extents "
+				    "in the bin");
+				assert_zu_le(NREGS_READ(out) - NFREE_READ(out),
+				    BIN_NREGS_READ(out) - BIN_NFREE_READ(out),
+				    "Extent utilized count exceeded "
+				    "bin utilized count");
+			}
+		} else {
+			assert_zu_eq(NFREE_READ(out), 0,
+			    "Extent free count should be zero");
+			assert_zu_eq(NREGS_READ(out), 1,
+			    "Extent region count should be one");
+			assert_ptr_null(SLABCUR_READ(out),
+			    "Current slab must be null for large size classes");
+			if (config_stats) {
+				assert_zu_eq(BIN_NFREE_READ(out), 0,
+				    "Bin free count must be zero for "
+				    "large sizes");
+				assert_zu_eq(BIN_NREGS_READ(out), 0,
+				    "Bin region count must be zero for "
+				    "large sizes");
+			}
+		}
+
+#undef BIN_NREGS_READ
+#undef BIN_NFREE_READ
+#undef SIZE_READ
+#undef NREGS_READ
+#undef NFREE_READ
+#undef COUNTS
+#undef SLABCUR_READ
+
+		free(out_ref);
+		free(out);
+		free(p);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_batch) {
+	size_t sz;
+	/*
+	 * Select some sizes that can span both small and large sizes, and are
+	 * numerically unrelated to any size boundaries.
+	 */
+	for (sz = 17; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
+	    sz += (sz <= SC_SMALL_MAXCLASS ? 1019 : 99991)) {
+		void *p = mallocx(sz, 0);
+		void *q = mallocx(sz, 0);
+		void *in[] = {p, q};
+		size_t in_sz = sizeof(const void *) * 2;
+		size_t out[] = {-1, -1, -1, -1, -1, -1};
+		size_t out_sz = sizeof(size_t) * 6;
+		size_t out_ref[] = {-1, -1, -1, -1, -1, -1};
+		size_t out_sz_ref = out_sz;
+
+		assert_ptr_not_null(p, "test pointer allocation failed");
+		assert_ptr_not_null(q, "test pointer allocation failed");
+
+		/* Test invalid argument(s) errors */
+		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz,
+		    "old is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz,
+		    "oldlenp is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, NULL, in_sz,
+		    "newp is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0,
+		    "newlen is zero");
+		in_sz -= 1;
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+		    "newlen is not an exact multiple");
+		in_sz += 1;
+		out_sz_ref = out_sz -= 2 * sizeof(size_t);
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+		    "*oldlenp is not an exact multiple");
+		out_sz_ref = out_sz += 2 * sizeof(size_t);
+		in_sz -= sizeof(const void *);
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+		    "*oldlenp and newlen do not match");
+		in_sz += sizeof(const void *);
+
+	/* Examine output for valid calls */
+#define TEST_EQUAL_REF(i, message) \
+	assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
+
+#define NFREE_READ(out, i) out[(i) * 3]
+#define NREGS_READ(out, i) out[(i) * 3 + 1]
+#define SIZE_READ(out, i) out[(i) * 3 + 2]
+
+		out_sz_ref = out_sz /= 2;
+		in_sz /= 2;
+		TEST_UTIL_BATCH_VALID;
+		assert_zu_le(sz, SIZE_READ(out, 0),
+		    "Extent size should be at least allocation size");
+		assert_zu_eq(SIZE_READ(out, 0) & (PAGE - 1), 0,
+		    "Extent size should be a multiple of page size");
+		if (sz <= SC_SMALL_MAXCLASS) {
+			assert_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
+			    "Extent free count exceeded region count");
+			assert_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
+			    "Extent region count exceeded size");
+			assert_zu_ne(NREGS_READ(out, 0), 0,
+			    "Extent region count must be positive");
+		} else {
+			assert_zu_eq(NFREE_READ(out, 0), 0,
+			    "Extent free count should be zero");
+			assert_zu_eq(NREGS_READ(out, 0), 1,
+			    "Extent region count should be one");
+		}
+		TEST_EQUAL_REF(1,
+		    "Should not overwrite content beyond what's needed");
+		in_sz *= 2;
+		out_sz_ref = out_sz *= 2;
+
+		memcpy(out_ref, out, 3 * sizeof(size_t));
+		TEST_UTIL_BATCH_VALID;
+		TEST_EQUAL_REF(0, "Statistics should be stable across calls");
+		if (sz <= SC_SMALL_MAXCLASS) {
+			assert_zu_le(NFREE_READ(out, 1), NREGS_READ(out, 1),
+			    "Extent free count exceeded region count");
+		} else {
+			assert_zu_eq(NFREE_READ(out, 0), 0,
+			    "Extent free count should be zero");
+		}
+		assert_zu_eq(NREGS_READ(out, 0), NREGS_READ(out, 1),
+		    "Extent region count should be same for same region size");
+		assert_zu_eq(SIZE_READ(out, 0), SIZE_READ(out, 1),
+		    "Extent size should be same for same region size");
+
+#undef SIZE_READ
+#undef NREGS_READ
+#undef NFREE_READ
+
+#undef TEST_EQUAL_REF
+
+		free(q);
+		free(p);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	assert_zu_lt(SC_SMALL_MAXCLASS, TEST_MAX_SIZE,
+	    "Test case cannot cover large classes");
+	return test(test_query, test_batch);
+}
-- 
cgit v0.12


From ebbb973271e26175c832a6ec5dfc515e7473a9af Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Dec 2019 14:05:14 -0800
Subject: Base: Remove some unnecessary reentrancy guards.

The ehooks module will now call these if necessary.
---
 src/base.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/base.c b/src/base.c
index a1b45d0..79736cd 100644
--- a/src/base.c
+++ b/src/base.c
@@ -79,25 +79,21 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 		/* Nothing worked.  This should never happen. */
 		not_reached();
 	} else {
-		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
-		pre_reentrancy(tsd, NULL);
 		if (!ehooks_dalloc(tsdn, ehooks, addr, size, true, ind)) {
-			goto label_post_reentrancy;
+			goto label_done;
 		}
 		if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size, ind)) {
-			goto label_post_reentrancy;
+			goto label_done;
 		}
 		if (!ehooks_purge_forced(tsdn, ehooks, addr, size, 0, size,
 		    ind)) {
-			goto label_post_reentrancy;
+			goto label_done;
 		}
 		if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size,
 		    ind)) {
-			goto label_post_reentrancy;
+			goto label_done;
 		}
 		/* Nothing worked.  That's the application's problem. */
-	label_post_reentrancy:
-		post_reentrancy(tsd);
 	}
 label_done:
 	if (metadata_thp_madvise()) {
-- 
cgit v0.12


From d0f187ad3b2ea2e457a05217da4be23db5d915a5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Dec 2019 14:42:10 -0800
Subject: Arena: Loosen arena_may_have_muzzy restrictions.

If there are custom extent hooks, pages_can_purge_lazy is not necessarily the
right guard.  We could check ehooks_are_default too, but the case where
purge_lazy is unsupported is rare and getting rarer.  Just checking the decay
interval captures most of the benefit.
---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 214a97c..2d46b9e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -420,7 +420,7 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
 
 static bool
 arena_may_have_muzzy(arena_t *arena) {
-	return (pages_can_purge_lazy && (arena_muzzy_decay_ms_get(arena) != 0));
+	return arena_muzzy_decay_ms_get(arena) != 0;
 }
 
 extent_t *
-- 
cgit v0.12


From 4b2e5ee8b9989a84a5c3665bada0973ab351d3d9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Dec 2019 17:55:24 -0800
Subject: Ehooks: Add a "zero" ehook.

This is the first API expansion.  It lets the hooks pick where and how to purge
within themselves.
---
 include/jemalloc/internal/ehooks.h | 18 ++++++++++++++++++
 src/ehooks.c                       | 17 +++++++++++++++++
 src/extent2.c                      | 38 +++++++++++++-------------------------
 3 files changed, 48 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 97c3f44..734cd18 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -43,6 +43,7 @@ bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
 #endif
 bool ehooks_default_split_impl();
 bool ehooks_default_merge_impl(void *addr_a, void *addr_b);
+void ehooks_default_zero_impl(void *addr, size_t size);
 
 /*
  * We don't officially support reentrancy from wtihin the extent hooks.  But
@@ -261,4 +262,21 @@ ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
 	}
 }
 
+static inline void
+ehooks_zero(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks == &ehooks_default_extent_hooks) {
+		ehooks_default_zero_impl(addr, size);
+	} else {
+		/*
+		 * It would be correct to try using the user-provided purge
+		 * hooks (since they are required to have zeroed the extent if
+		 * they indicate success), but we don't necessarily know their
+		 * cost.  We'll be conservative and use memset.
+		 */
+		memset(addr, 0, size);
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_EHOOKS_H */
diff --git a/src/ehooks.c b/src/ehooks.c
index d7d1613..25aef1c 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -209,6 +209,23 @@ ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 	return ehooks_default_merge_impl(addr_a, addr_b);
 }
 
+void
+ehooks_default_zero_impl(void *addr, size_t size) {
+	/*
+	 * By default, we try to zero out memory using OS-provided demand-zeroed
+	 * pages.  If the user has specifically requested hugepages, though, we
+	 * don't want to purge in the middle of a hugepage (which would break it
+	 * up), so we act conservatively and use memset.
+	 */
+	bool needs_memset = true;
+	if (opt_thp != thp_mode_always) {
+		needs_memset = pages_purge_forced(addr, size);
+	}
+	if (needs_memset) {
+		memset(addr, 0, size);
+	}
+}
+
 const extent_hooks_t ehooks_default_extent_hooks = {
 	ehooks_default_alloc,
 	ehooks_default_dalloc,
diff --git a/src/extent2.c b/src/extent2.c
index 0b09716..55f72df 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -758,17 +758,6 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	unreachable();
 }
 
-static bool
-extent_need_manual_zero(arena_t *arena) {
-	/*
-	 * Need to manually zero the extent on repopulating if either; 1) non
-	 * default extent hooks installed (in which case the purge semantics may
-	 * change); or 2) transparent huge pages enabled.
-	 */
-	return (!ehooks_are_default(arena_get_ehooks(arena)) ||
-		(opt_thp == thp_mode_always));
-}
-
 /*
  * Tries to satisfy the given allocation request by reusing one of the extents
  * in the given eset_t.
@@ -807,9 +796,6 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 			    growing_retained);
 			return NULL;
 		}
-		if (!extent_need_manual_zero(arena)) {
-			extent_zeroed_set(extent, true);
-		}
 	}
 
 	if (extent_committed_get(extent)) {
@@ -832,11 +818,10 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 		void *addr = extent_base_get(extent);
 		if (!extent_zeroed_get(extent)) {
 			size_t size = extent_size_get(extent);
-			if (extent_need_manual_zero(arena) ||
-			    pages_purge_forced(addr, size)) {
-				memset(addr, 0, size);
-			}
-		} else if (config_debug) {
+			ehooks_zero(tsdn, ehooks, addr, size,
+			    arena_ind_get(arena));
+		}
+		if (config_debug) {
 			size_t *p = (size_t *)(uintptr_t)addr;
 			/* Check the first page only. */
 			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
@@ -960,8 +945,14 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    &arena->eset_retained, extent, true);
 			goto label_err;
 		}
-		if (!extent_need_manual_zero(arena)) {
-			extent_zeroed_set(extent, true);
+		/* A successful commit should return zeroed memory. */
+		if (config_debug) {
+			void *addr = extent_addr_get(extent);
+			size_t *p = (size_t *)(uintptr_t)addr;
+			/* Check the first page only. */
+			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
+				assert(p[i] == 0);
+			}
 		}
 	}
 
@@ -996,10 +987,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (*zero && !extent_zeroed_get(extent)) {
 		void *addr = extent_base_get(extent);
 		size_t size = extent_size_get(extent);
-		if (extent_need_manual_zero(arena) ||
-		    pages_purge_forced(addr, size)) {
-			memset(addr, 0, size);
-		}
+		ehooks_zero(tsdn, ehooks, addr, size, arena_ind_get(arena));
 	}
 
 	return extent;
-- 
cgit v0.12


From a738a66b5c43849eb90deef11b391641ce382aa0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 5 Dec 2019 17:09:44 -0800
Subject: Ehooks: Add some debug zero and addr checks.

These help make sure that the ehooks return properly zeroed memory when required
to.
---
 include/jemalloc/internal/ehooks.h | 75 +++++++++++++++++++++++++++++++++-----
 src/extent2.c                      |  7 ----
 2 files changed, 66 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 734cd18..c046cd1 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -106,18 +106,63 @@ ehooks_merge_will_fail(ehooks_t *ehooks) {
 	return ehooks_get_extent_hooks_ptr(ehooks)->merge == NULL;
 }
 
+/*
+ * Some hooks are required to return zeroed memory in certain situations.  In
+ * debug mode, we do some heuristic checks that they did what they were supposed
+ * to.
+ *
+ * This isn't really ehooks-specific (i.e. anyone can check for zeroed memory).
+ * But incorrect zero information indicates an ehook bug.
+ */
+static inline void
+ehooks_debug_zero_check(void *addr, size_t size) {
+	assert(((uintptr_t)addr & PAGE_MASK) == 0);
+	assert((size & PAGE_MASK) == 0);
+	assert(size > 0);
+	if (config_debug) {
+		/* Check the whole first page. */
+		size_t *p = (size_t *)addr;
+		for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
+			assert(p[i] == 0);
+		}
+		/*
+		 * And 4 spots within.  There's a tradeoff here; the larger
+		 * this number, the more likely it is that we'll catch a bug
+		 * where ehooks return a sparsely non-zero range.  But
+		 * increasing the number of checks also increases the number of
+		 * page faults in debug mode.  FreeBSD does much of their
+		 * day-to-day development work in debug mode, so we don't want
+		 * even the debug builds to be too slow.
+		 */
+		const size_t nchecks = 4;
+		assert(PAGE >= sizeof(size_t) * nchecks);
+		for (size_t i = 0; i < nchecks; ++i) {
+			assert(p[i * (size / sizeof(size_t) / nchecks)] == 0);
+		}
+	}
+}
+
+
 static inline void *
 ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
+	bool orig_zero = *zero;
+	void *ret;
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
-		return ehooks_default_alloc_impl(tsdn, new_addr, size,
+		ret = ehooks_default_alloc_impl(tsdn, new_addr, size,
 		    alignment, zero, commit, arena_ind);
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		ret = extent_hooks->alloc(extent_hooks, new_addr, size,
+		    alignment, zero, commit, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+	}
+	assert(new_addr == NULL || ret == NULL || new_addr == ret);
+	assert(!orig_zero || *zero);
+	if (*zero && ret != NULL) {
+		ehooks_debug_zero_check(ret, size);
 	}
-	ehooks_pre_reentrancy(tsdn);
-	void *ret = extent_hooks->alloc(extent_hooks, new_addr, size, alignment,
-	    zero, commit, arena_ind);
-	ehooks_post_reentrancy(tsdn);
 	return ret;
 }
 
@@ -158,17 +203,21 @@ static inline bool
 ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	bool err;
 	if (extent_hooks == &ehooks_default_extent_hooks) {
-		return ehooks_default_commit_impl(addr, offset, length);
+		err = ehooks_default_commit_impl(addr, offset, length);
 	} else if (extent_hooks->commit == NULL) {
-		return true;
+		err = true;
 	} else {
 		ehooks_pre_reentrancy(tsdn);
-		bool err = extent_hooks->commit(extent_hooks, addr, size,
+		err = extent_hooks->commit(extent_hooks, addr, size,
 		    offset, length, arena_ind);
 		ehooks_post_reentrancy(tsdn);
-		return err;
 	}
+	if (!err) {
+		ehooks_debug_zero_check(addr, size);
+	}
+	return err;
 }
 
 static inline bool
@@ -212,6 +261,14 @@ static inline bool
 ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	/*
+	 * It would be correct to have a ehooks_debug_zero_check call at the end
+	 * of this function; purge_forced is required to zero.  But checking
+	 * would touch the page in question, which may have performance
+	 * consequences (imagine the hooks are using hugepages, with a global
+	 * zero page off).  Even in debug mode, it's usually a good idea to
+	 * avoid cases that can dramatically increase memory consumption.
+	 */
 #ifdef PAGES_CAN_PURGE_FORCED
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_purge_forced_impl(addr, offset, length);
diff --git a/src/extent2.c b/src/extent2.c
index 55f72df..4001d17 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -821,13 +821,6 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 			ehooks_zero(tsdn, ehooks, addr, size,
 			    arena_ind_get(arena));
 		}
-		if (config_debug) {
-			size_t *p = (size_t *)(uintptr_t)addr;
-			/* Check the first page only. */
-			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
-				assert(p[i] == 0);
-			}
-		}
 	}
 	return extent;
 }
-- 
cgit v0.12


From 865debda2276fee0257c90678bafd1bd2f73df6a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Dec 2019 10:41:25 -0800
Subject: Rename extent.h -> edata.h.

This name is slightly pithier; a full-on rename will come shortly.
---
 Makefile.in                                    |   2 +-
 include/jemalloc/internal/base_structs.h       |   2 +-
 include/jemalloc/internal/bin.h                |   2 +-
 include/jemalloc/internal/edata.h              | 626 +++++++++++++++++++++++++
 include/jemalloc/internal/eset.h               |   2 +-
 include/jemalloc/internal/extent.h             | 626 -------------------------
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj |   2 +-
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj |   2 +-
 src/edata.c                                    |   6 +
 src/extent.c                                   |   6 -
 10 files changed, 638 insertions(+), 638 deletions(-)
 create mode 100644 include/jemalloc/internal/edata.h
 delete mode 100644 include/jemalloc/internal/extent.h
 create mode 100644 src/edata.c
 delete mode 100644 src/extent.c

diff --git a/Makefile.in b/Makefile.in
index cab4e1f..86a51cc 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -104,9 +104,9 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
+	$(srcroot)src/edata.c \
 	$(srcroot)src/ehooks.c \
 	$(srcroot)src/eset.c \
-	$(srcroot)src/extent.c \
 	$(srcroot)src/extent2.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index 68e7896..1097892 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_BASE_STRUCTS_H
 
 #include "jemalloc/internal/ehooks.h"
-#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 92e8122..8cc7fed 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
-#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
 
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
new file mode 100644
index 0000000..2fd6e90
--- /dev/null
+++ b/include/jemalloc/internal/edata.h
@@ -0,0 +1,626 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_H
+#define JEMALLOC_INTERNAL_EXTENT_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bin_info.h"
+#include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/slab_data.h"
+#include "jemalloc/internal/sz.h"
+
+enum extent_state_e {
+	extent_state_active   = 0,
+	extent_state_dirty    = 1,
+	extent_state_muzzy    = 2,
+	extent_state_retained = 3
+};
+typedef enum extent_state_e extent_state_t;
+
+enum extent_head_state_e {
+	EXTENT_NOT_HEAD,
+	EXTENT_IS_HEAD   /* Only relevant for Windows && opt.retain. */
+};
+typedef enum extent_head_state_e extent_head_state_t;
+
+/* Extent (span of pages).  Use accessor functions for e_* fields. */
+typedef struct extent_s extent_t;
+typedef ql_head(extent_t) extent_list_t;
+typedef ph(extent_t) extent_tree_t;
+typedef ph(extent_t) extent_heap_t;
+struct extent_s {
+	/*
+	 * Bitfield containing several fields:
+	 *
+	 * a: arena_ind
+	 * b: slab
+	 * c: committed
+	 * d: dumpable
+	 * z: zeroed
+	 * t: state
+	 * i: szind
+	 * f: nfree
+	 * s: bin_shard
+	 * n: sn
+	 *
+	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
+	 *
+	 * arena_ind: Arena from which this extent came, or all 1 bits if
+	 *            unassociated.
+	 *
+	 * slab: The slab flag indicates whether the extent is used for a slab
+	 *       of small regions.  This helps differentiate small size classes,
+	 *       and it indicates whether interior pointers can be looked up via
+	 *       iealloc().
+	 *
+	 * committed: The committed flag indicates whether physical memory is
+	 *            committed to the extent, whether explicitly or implicitly
+	 *            as on a system that overcommits and satisfies physical
+	 *            memory needs on demand via soft page faults.
+	 *
+	 * dumpable: The dumpable flag indicates whether or not we've set the
+	 *           memory in question to be dumpable.  Note that this
+	 *           interacts somewhat subtly with user-specified extent hooks,
+	 *           since we don't know if *they* are fiddling with
+	 *           dumpability (in which case, we don't want to undo whatever
+	 *           they're doing).  To deal with this scenario, we:
+	 *             - Make dumpable false only for memory allocated with the
+	 *               default hooks.
+	 *             - Only allow memory to go from non-dumpable to dumpable,
+	 *               and only once.
+	 *             - Never make the OS call to allow dumping when the
+	 *               dumpable bit is already set.
+	 *           These three constraints mean that we will never
+	 *           accidentally dump user memory that the user meant to set
+	 *           nondumpable with their extent hooks.
+	 *
+	 *
+	 * zeroed: The zeroed flag is used by extent recycling code to track
+	 *         whether memory is zero-filled.
+	 *
+	 * state: The state flag is an extent_state_t.
+	 *
+	 * szind: The szind flag indicates usable size class index for
+	 *        allocations residing in this extent, regardless of whether the
+	 *        extent is a slab.  Extent size and usable size often differ
+	 *        even for non-slabs, either due to sz_large_pad or promotion of
+	 *        sampled small regions.
+	 *
+	 * nfree: Number of free regions in slab.
+	 *
+	 * bin_shard: the shard of the bin from which this extent came.
+	 *
+	 * sn: Serial number (potentially non-unique).
+	 *
+	 *     Serial numbers may wrap around if !opt_retain, but as long as
+	 *     comparison functions fall back on address comparison for equal
+	 *     serial numbers, stable (if imperfect) ordering is maintained.
+	 *
+	 *     Serial numbers may not be unique even in the absence of
+	 *     wrap-around, e.g. when splitting an extent and assigning the same
+	 *     serial number to both resulting adjacent extents.
+	 */
+	uint64_t		e_bits;
+#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
+
+#define EXTENT_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
+#define EXTENT_BITS_ARENA_SHIFT  0
+#define EXTENT_BITS_ARENA_MASK  MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
+
+#define EXTENT_BITS_SLAB_WIDTH  1
+#define EXTENT_BITS_SLAB_SHIFT  (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
+#define EXTENT_BITS_SLAB_MASK  MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
+
+#define EXTENT_BITS_COMMITTED_WIDTH  1
+#define EXTENT_BITS_COMMITTED_SHIFT  (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
+#define EXTENT_BITS_COMMITTED_MASK  MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
+
+#define EXTENT_BITS_DUMPABLE_WIDTH  1
+#define EXTENT_BITS_DUMPABLE_SHIFT  (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
+#define EXTENT_BITS_DUMPABLE_MASK  MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
+
+#define EXTENT_BITS_ZEROED_WIDTH  1
+#define EXTENT_BITS_ZEROED_SHIFT  (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
+#define EXTENT_BITS_ZEROED_MASK  MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
+
+#define EXTENT_BITS_STATE_WIDTH  2
+#define EXTENT_BITS_STATE_SHIFT  (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
+#define EXTENT_BITS_STATE_MASK  MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
+
+#define EXTENT_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
+#define EXTENT_BITS_SZIND_SHIFT  (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
+#define EXTENT_BITS_SZIND_MASK  MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
+
+#define EXTENT_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
+#define EXTENT_BITS_NFREE_SHIFT  (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
+#define EXTENT_BITS_NFREE_MASK  MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
+
+#define EXTENT_BITS_BINSHARD_WIDTH  6
+#define EXTENT_BITS_BINSHARD_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
+#define EXTENT_BITS_BINSHARD_MASK  MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
+
+#define EXTENT_BITS_IS_HEAD_WIDTH 1
+#define EXTENT_BITS_IS_HEAD_SHIFT  (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
+#define EXTENT_BITS_IS_HEAD_MASK  MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT)
+
+#define EXTENT_BITS_SN_SHIFT   (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT)
+#define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
+
+	/* Pointer to the extent that this structure is responsible for. */
+	void			*e_addr;
+
+	union {
+		/*
+		 * Extent size and serial number associated with the extent
+		 * structure (different than the serial number for the extent at
+		 * e_addr).
+		 *
+		 * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
+		 */
+		size_t			e_size_esn;
+	#define EXTENT_SIZE_MASK	((size_t)~(PAGE-1))
+	#define EXTENT_ESN_MASK		((size_t)PAGE-1)
+		/* Base extent size, which may not be a multiple of PAGE. */
+		size_t			e_bsize;
+	};
+
+	/*
+	 * List linkage, used by a variety of lists:
+	 * - bin_t's slabs_full
+	 * - extents_t's LRU
+	 * - stashed dirty extents
+	 * - arena's large allocations
+	 */
+	ql_elm(extent_t)	ql_link;
+
+	/*
+	 * Linkage for per size class sn/address-ordered heaps, and
+	 * for extent_avail
+	 */
+	phn(extent_t)		ph_link;
+
+	union {
+		/* Small region slab metadata. */
+		slab_data_t	e_slab_data;
+
+		/* Profiling data, used for large objects. */
+		struct {
+			/* Time when this was allocated. */
+			nstime_t		e_alloc_time;
+			/* Points to a prof_tctx_t. */
+			atomic_p_t		e_prof_tctx;
+		};
+	};
+};
+
+static inline unsigned
+extent_arena_ind_get(const extent_t *extent) {
+	unsigned arena_ind = (unsigned)((extent->e_bits &
+	    EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
+	assert(arena_ind < MALLOCX_ARENA_LIMIT);
+
+	return arena_ind;
+}
+
+static inline szind_t
+extent_szind_get_maybe_invalid(const extent_t *extent) {
+	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
+	    EXTENT_BITS_SZIND_SHIFT);
+	assert(szind <= SC_NSIZES);
+	return szind;
+}
+
+static inline szind_t
+extent_szind_get(const extent_t *extent) {
+	szind_t szind = extent_szind_get_maybe_invalid(extent);
+	assert(szind < SC_NSIZES); /* Never call when "invalid". */
+	return szind;
+}
+
+static inline size_t
+extent_usize_get(const extent_t *extent) {
+	return sz_index2size(extent_szind_get(extent));
+}
+
+static inline unsigned
+extent_binshard_get(const extent_t *extent) {
+	unsigned binshard = (unsigned)((extent->e_bits &
+	    EXTENT_BITS_BINSHARD_MASK) >> EXTENT_BITS_BINSHARD_SHIFT);
+	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+	return binshard;
+}
+
+static inline size_t
+extent_sn_get(const extent_t *extent) {
+	return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
+	    EXTENT_BITS_SN_SHIFT);
+}
+
+static inline extent_state_t
+extent_state_get(const extent_t *extent) {
+	return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >>
+	    EXTENT_BITS_STATE_SHIFT);
+}
+
+static inline bool
+extent_zeroed_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >>
+	    EXTENT_BITS_ZEROED_SHIFT);
+}
+
+static inline bool
+extent_committed_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >>
+	    EXTENT_BITS_COMMITTED_SHIFT);
+}
+
+static inline bool
+extent_dumpable_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
+	    EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
+static inline bool
+extent_slab_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
+	    EXTENT_BITS_SLAB_SHIFT);
+}
+
+static inline unsigned
+extent_nfree_get(const extent_t *extent) {
+	assert(extent_slab_get(extent));
+	return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >>
+	    EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void *
+extent_base_get(const extent_t *extent) {
+	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+	    !extent_slab_get(extent));
+	return PAGE_ADDR2BASE(extent->e_addr);
+}
+
+static inline void *
+extent_addr_get(const extent_t *extent) {
+	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+	    !extent_slab_get(extent));
+	return extent->e_addr;
+}
+
+static inline size_t
+extent_size_get(const extent_t *extent) {
+	return (extent->e_size_esn & EXTENT_SIZE_MASK);
+}
+
+static inline size_t
+extent_esn_get(const extent_t *extent) {
+	return (extent->e_size_esn & EXTENT_ESN_MASK);
+}
+
+static inline size_t
+extent_bsize_get(const extent_t *extent) {
+	return extent->e_bsize;
+}
+
+static inline void *
+extent_before_get(const extent_t *extent) {
+	return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
+}
+
+static inline void *
+extent_last_get(const extent_t *extent) {
+	return (void *)((uintptr_t)extent_base_get(extent) +
+	    extent_size_get(extent) - PAGE);
+}
+
+static inline void *
+extent_past_get(const extent_t *extent) {
+	return (void *)((uintptr_t)extent_base_get(extent) +
+	    extent_size_get(extent));
+}
+
+static inline slab_data_t *
+extent_slab_data_get(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	return &extent->e_slab_data;
+}
+
+static inline const slab_data_t *
+extent_slab_data_get_const(const extent_t *extent) {
+	assert(extent_slab_get(extent));
+	return &extent->e_slab_data;
+}
+
+static inline void
+extent_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
+	assert(prof_info != NULL);
+	prof_info->alloc_tctx = (prof_tctx_t *)atomic_load_p(
+	    &extent->e_prof_tctx, ATOMIC_ACQUIRE);
+	prof_info->alloc_time = extent->e_alloc_time;
+}
+
+static inline void
+extent_arena_ind_set(extent_t *extent, unsigned arena_ind) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
+	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
+}
+
+static inline void
+extent_binshard_set(extent_t *extent, unsigned binshard) {
+	/* The assertion assumes szind is set already. */
+	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_BINSHARD_MASK) |
+	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT);
+}
+
+static inline void
+extent_addr_set(extent_t *extent, void *addr) {
+	extent->e_addr = addr;
+}
+
+static inline void
+extent_size_set(extent_t *extent, size_t size) {
+	assert((size & ~EXTENT_SIZE_MASK) == 0);
+	extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
+}
+
+static inline void
+extent_esn_set(extent_t *extent, size_t esn) {
+	extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn &
+	    EXTENT_ESN_MASK);
+}
+
+static inline void
+extent_bsize_set(extent_t *extent, size_t bsize) {
+	extent->e_bsize = bsize;
+}
+
+static inline void
+extent_szind_set(extent_t *extent, szind_t szind) {
+	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
+	    ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
+}
+
+static inline void
+extent_nfree_set(extent_t *extent, unsigned nfree) {
+	assert(extent_slab_get(extent));
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) |
+	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_binshard_set(extent_t *extent, unsigned nfree, unsigned binshard) {
+	/* The assertion assumes szind is set already. */
+	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+	extent->e_bits = (extent->e_bits &
+	    (~EXTENT_BITS_NFREE_MASK & ~EXTENT_BITS_BINSHARD_MASK)) |
+	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT) |
+	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_inc(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_dec(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_sub(extent_t *extent, uint64_t n) {
+	assert(extent_slab_get(extent));
+	extent->e_bits -= (n << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_sn_set(extent_t *extent, size_t sn) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
+	    ((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
+}
+
+static inline void
+extent_state_set(extent_t *extent, extent_state_t state) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) |
+	    ((uint64_t)state << EXTENT_BITS_STATE_SHIFT);
+}
+
+static inline void
+extent_zeroed_set(extent_t *extent, bool zeroed) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) |
+	    ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT);
+}
+
+static inline void
+extent_committed_set(extent_t *extent, bool committed) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) |
+	    ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
+}
+
+static inline void
+extent_dumpable_set(extent_t *extent, bool dumpable) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
+	    ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
+static inline void
+extent_slab_set(extent_t *extent, bool slab) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
+	    ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
+}
+
+static inline void
+extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
+	atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
+}
+
+static inline void
+extent_prof_alloc_time_set(extent_t *extent, nstime_t *t) {
+	nstime_copy(&extent->e_alloc_time, t);
+}
+
+static inline bool
+extent_is_head_get(extent_t *extent) {
+	if (maps_coalesce) {
+		not_reached();
+	}
+
+	return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >>
+	    EXTENT_BITS_IS_HEAD_SHIFT);
+}
+
+static inline void
+extent_is_head_set(extent_t *extent, bool is_head) {
+	if (maps_coalesce) {
+		not_reached();
+	}
+
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) |
+	    ((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT);
+}
+
+static inline void
+extent_init(extent_t *extent, unsigned arena_ind, void *addr, size_t size,
+    bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
+    bool committed, bool dumpable, extent_head_state_t is_head) {
+	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
+
+	extent_arena_ind_set(extent, arena_ind);
+	extent_addr_set(extent, addr);
+	extent_size_set(extent, size);
+	extent_slab_set(extent, slab);
+	extent_szind_set(extent, szind);
+	extent_sn_set(extent, sn);
+	extent_state_set(extent, state);
+	extent_zeroed_set(extent, zeroed);
+	extent_committed_set(extent, committed);
+	extent_dumpable_set(extent, dumpable);
+	ql_elm_new(extent, ql_link);
+	if (!maps_coalesce) {
+		extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true :
+		    false);
+	}
+	if (config_prof) {
+		extent_prof_tctx_set(extent, NULL);
+	}
+}
+
+static inline void
+extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
+	extent_arena_ind_set(extent, (1U << MALLOCX_ARENA_BITS) - 1);
+	extent_addr_set(extent, addr);
+	extent_bsize_set(extent, bsize);
+	extent_slab_set(extent, false);
+	extent_szind_set(extent, SC_NSIZES);
+	extent_sn_set(extent, sn);
+	extent_state_set(extent, extent_state_active);
+	extent_zeroed_set(extent, true);
+	extent_committed_set(extent, true);
+	extent_dumpable_set(extent, true);
+}
+
+static inline void
+extent_list_init(extent_list_t *list) {
+	ql_new(list);
+}
+
+static inline extent_t *
+extent_list_first(const extent_list_t *list) {
+	return ql_first(list);
+}
+
+static inline extent_t *
+extent_list_last(const extent_list_t *list) {
+	return ql_last(list, ql_link);
+}
+
+static inline void
+extent_list_append(extent_list_t *list, extent_t *extent) {
+	ql_tail_insert(list, extent, ql_link);
+}
+
+static inline void
+extent_list_prepend(extent_list_t *list, extent_t *extent) {
+	ql_head_insert(list, extent, ql_link);
+}
+
+static inline void
+extent_list_replace(extent_list_t *list, extent_t *to_remove,
+    extent_t *to_insert) {
+	ql_after_insert(to_remove, to_insert, ql_link);
+	ql_remove(list, to_remove, ql_link);
+}
+
+static inline void
+extent_list_remove(extent_list_t *list, extent_t *extent) {
+	ql_remove(list, extent, ql_link);
+}
+
+static inline int
+extent_sn_comp(const extent_t *a, const extent_t *b) {
+	size_t a_sn = extent_sn_get(a);
+	size_t b_sn = extent_sn_get(b);
+
+	return (a_sn > b_sn) - (a_sn < b_sn);
+}
+
+static inline int
+extent_esn_comp(const extent_t *a, const extent_t *b) {
+	size_t a_esn = extent_esn_get(a);
+	size_t b_esn = extent_esn_get(b);
+
+	return (a_esn > b_esn) - (a_esn < b_esn);
+}
+
+static inline int
+extent_ad_comp(const extent_t *a, const extent_t *b) {
+	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
+	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
+
+	return (a_addr > b_addr) - (a_addr < b_addr);
+}
+
+static inline int
+extent_ead_comp(const extent_t *a, const extent_t *b) {
+	uintptr_t a_eaddr = (uintptr_t)a;
+	uintptr_t b_eaddr = (uintptr_t)b;
+
+	return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
+}
+
+static inline int
+extent_snad_comp(const extent_t *a, const extent_t *b) {
+	int ret;
+
+	ret = extent_sn_comp(a, b);
+	if (ret != 0) {
+		return ret;
+	}
+
+	ret = extent_ad_comp(a, b);
+	return ret;
+}
+
+static inline int
+extent_esnead_comp(const extent_t *a, const extent_t *b) {
+	int ret;
+
+	ret = extent_esn_comp(a, b);
+	if (ret != 0) {
+		return ret;
+	}
+
+	ret = extent_ead_comp(a, b);
+	return ret;
+}
+
+ph_proto(, extent_avail_, extent_tree_t, extent_t)
+ph_proto(, extent_heap_, extent_heap_t, extent_t)
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_H */
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index fae64c8..833f19c 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 
 /*
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
deleted file mode 100644
index 2fd6e90..0000000
--- a/include/jemalloc/internal/extent.h
+++ /dev/null
@@ -1,626 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_H
-#define JEMALLOC_INTERNAL_EXTENT_H
-
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/bin_info.h"
-#include "jemalloc/internal/bit_util.h"
-#include "jemalloc/internal/nstime.h"
-#include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/sc.h"
-#include "jemalloc/internal/slab_data.h"
-#include "jemalloc/internal/sz.h"
-
-enum extent_state_e {
-	extent_state_active   = 0,
-	extent_state_dirty    = 1,
-	extent_state_muzzy    = 2,
-	extent_state_retained = 3
-};
-typedef enum extent_state_e extent_state_t;
-
-enum extent_head_state_e {
-	EXTENT_NOT_HEAD,
-	EXTENT_IS_HEAD   /* Only relevant for Windows && opt.retain. */
-};
-typedef enum extent_head_state_e extent_head_state_t;
-
-/* Extent (span of pages).  Use accessor functions for e_* fields. */
-typedef struct extent_s extent_t;
-typedef ql_head(extent_t) extent_list_t;
-typedef ph(extent_t) extent_tree_t;
-typedef ph(extent_t) extent_heap_t;
-struct extent_s {
-	/*
-	 * Bitfield containing several fields:
-	 *
-	 * a: arena_ind
-	 * b: slab
-	 * c: committed
-	 * d: dumpable
-	 * z: zeroed
-	 * t: state
-	 * i: szind
-	 * f: nfree
-	 * s: bin_shard
-	 * n: sn
-	 *
-	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
-	 *
-	 * arena_ind: Arena from which this extent came, or all 1 bits if
-	 *            unassociated.
-	 *
-	 * slab: The slab flag indicates whether the extent is used for a slab
-	 *       of small regions.  This helps differentiate small size classes,
-	 *       and it indicates whether interior pointers can be looked up via
-	 *       iealloc().
-	 *
-	 * committed: The committed flag indicates whether physical memory is
-	 *            committed to the extent, whether explicitly or implicitly
-	 *            as on a system that overcommits and satisfies physical
-	 *            memory needs on demand via soft page faults.
-	 *
-	 * dumpable: The dumpable flag indicates whether or not we've set the
-	 *           memory in question to be dumpable.  Note that this
-	 *           interacts somewhat subtly with user-specified extent hooks,
-	 *           since we don't know if *they* are fiddling with
-	 *           dumpability (in which case, we don't want to undo whatever
-	 *           they're doing).  To deal with this scenario, we:
-	 *             - Make dumpable false only for memory allocated with the
-	 *               default hooks.
-	 *             - Only allow memory to go from non-dumpable to dumpable,
-	 *               and only once.
-	 *             - Never make the OS call to allow dumping when the
-	 *               dumpable bit is already set.
-	 *           These three constraints mean that we will never
-	 *           accidentally dump user memory that the user meant to set
-	 *           nondumpable with their extent hooks.
-	 *
-	 *
-	 * zeroed: The zeroed flag is used by extent recycling code to track
-	 *         whether memory is zero-filled.
-	 *
-	 * state: The state flag is an extent_state_t.
-	 *
-	 * szind: The szind flag indicates usable size class index for
-	 *        allocations residing in this extent, regardless of whether the
-	 *        extent is a slab.  Extent size and usable size often differ
-	 *        even for non-slabs, either due to sz_large_pad or promotion of
-	 *        sampled small regions.
-	 *
-	 * nfree: Number of free regions in slab.
-	 *
-	 * bin_shard: the shard of the bin from which this extent came.
-	 *
-	 * sn: Serial number (potentially non-unique).
-	 *
-	 *     Serial numbers may wrap around if !opt_retain, but as long as
-	 *     comparison functions fall back on address comparison for equal
-	 *     serial numbers, stable (if imperfect) ordering is maintained.
-	 *
-	 *     Serial numbers may not be unique even in the absence of
-	 *     wrap-around, e.g. when splitting an extent and assigning the same
-	 *     serial number to both resulting adjacent extents.
-	 */
-	uint64_t		e_bits;
-#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
-
-#define EXTENT_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
-#define EXTENT_BITS_ARENA_SHIFT  0
-#define EXTENT_BITS_ARENA_MASK  MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
-
-#define EXTENT_BITS_SLAB_WIDTH  1
-#define EXTENT_BITS_SLAB_SHIFT  (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
-#define EXTENT_BITS_SLAB_MASK  MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
-
-#define EXTENT_BITS_COMMITTED_WIDTH  1
-#define EXTENT_BITS_COMMITTED_SHIFT  (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
-#define EXTENT_BITS_COMMITTED_MASK  MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
-
-#define EXTENT_BITS_DUMPABLE_WIDTH  1
-#define EXTENT_BITS_DUMPABLE_SHIFT  (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
-#define EXTENT_BITS_DUMPABLE_MASK  MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
-
-#define EXTENT_BITS_ZEROED_WIDTH  1
-#define EXTENT_BITS_ZEROED_SHIFT  (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
-#define EXTENT_BITS_ZEROED_MASK  MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
-
-#define EXTENT_BITS_STATE_WIDTH  2
-#define EXTENT_BITS_STATE_SHIFT  (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
-#define EXTENT_BITS_STATE_MASK  MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
-
-#define EXTENT_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
-#define EXTENT_BITS_SZIND_SHIFT  (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
-#define EXTENT_BITS_SZIND_MASK  MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
-
-#define EXTENT_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
-#define EXTENT_BITS_NFREE_SHIFT  (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
-#define EXTENT_BITS_NFREE_MASK  MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
-
-#define EXTENT_BITS_BINSHARD_WIDTH  6
-#define EXTENT_BITS_BINSHARD_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
-#define EXTENT_BITS_BINSHARD_MASK  MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
-
-#define EXTENT_BITS_IS_HEAD_WIDTH 1
-#define EXTENT_BITS_IS_HEAD_SHIFT  (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
-#define EXTENT_BITS_IS_HEAD_MASK  MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT)
-
-#define EXTENT_BITS_SN_SHIFT   (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT)
-#define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
-
-	/* Pointer to the extent that this structure is responsible for. */
-	void			*e_addr;
-
-	union {
-		/*
-		 * Extent size and serial number associated with the extent
-		 * structure (different than the serial number for the extent at
-		 * e_addr).
-		 *
-		 * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
-		 */
-		size_t			e_size_esn;
-	#define EXTENT_SIZE_MASK	((size_t)~(PAGE-1))
-	#define EXTENT_ESN_MASK		((size_t)PAGE-1)
-		/* Base extent size, which may not be a multiple of PAGE. */
-		size_t			e_bsize;
-	};
-
-	/*
-	 * List linkage, used by a variety of lists:
-	 * - bin_t's slabs_full
-	 * - extents_t's LRU
-	 * - stashed dirty extents
-	 * - arena's large allocations
-	 */
-	ql_elm(extent_t)	ql_link;
-
-	/*
-	 * Linkage for per size class sn/address-ordered heaps, and
-	 * for extent_avail
-	 */
-	phn(extent_t)		ph_link;
-
-	union {
-		/* Small region slab metadata. */
-		slab_data_t	e_slab_data;
-
-		/* Profiling data, used for large objects. */
-		struct {
-			/* Time when this was allocated. */
-			nstime_t		e_alloc_time;
-			/* Points to a prof_tctx_t. */
-			atomic_p_t		e_prof_tctx;
-		};
-	};
-};
-
-static inline unsigned
-extent_arena_ind_get(const extent_t *extent) {
-	unsigned arena_ind = (unsigned)((extent->e_bits &
-	    EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
-	assert(arena_ind < MALLOCX_ARENA_LIMIT);
-
-	return arena_ind;
-}
-
-static inline szind_t
-extent_szind_get_maybe_invalid(const extent_t *extent) {
-	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
-	    EXTENT_BITS_SZIND_SHIFT);
-	assert(szind <= SC_NSIZES);
-	return szind;
-}
-
-static inline szind_t
-extent_szind_get(const extent_t *extent) {
-	szind_t szind = extent_szind_get_maybe_invalid(extent);
-	assert(szind < SC_NSIZES); /* Never call when "invalid". */
-	return szind;
-}
-
-static inline size_t
-extent_usize_get(const extent_t *extent) {
-	return sz_index2size(extent_szind_get(extent));
-}
-
-static inline unsigned
-extent_binshard_get(const extent_t *extent) {
-	unsigned binshard = (unsigned)((extent->e_bits &
-	    EXTENT_BITS_BINSHARD_MASK) >> EXTENT_BITS_BINSHARD_SHIFT);
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
-	return binshard;
-}
-
-static inline size_t
-extent_sn_get(const extent_t *extent) {
-	return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
-	    EXTENT_BITS_SN_SHIFT);
-}
-
-static inline extent_state_t
-extent_state_get(const extent_t *extent) {
-	return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >>
-	    EXTENT_BITS_STATE_SHIFT);
-}
-
-static inline bool
-extent_zeroed_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >>
-	    EXTENT_BITS_ZEROED_SHIFT);
-}
-
-static inline bool
-extent_committed_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >>
-	    EXTENT_BITS_COMMITTED_SHIFT);
-}
-
-static inline bool
-extent_dumpable_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
-	    EXTENT_BITS_DUMPABLE_SHIFT);
-}
-
-static inline bool
-extent_slab_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
-	    EXTENT_BITS_SLAB_SHIFT);
-}
-
-static inline unsigned
-extent_nfree_get(const extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >>
-	    EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void *
-extent_base_get(const extent_t *extent) {
-	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
-	    !extent_slab_get(extent));
-	return PAGE_ADDR2BASE(extent->e_addr);
-}
-
-static inline void *
-extent_addr_get(const extent_t *extent) {
-	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
-	    !extent_slab_get(extent));
-	return extent->e_addr;
-}
-
-static inline size_t
-extent_size_get(const extent_t *extent) {
-	return (extent->e_size_esn & EXTENT_SIZE_MASK);
-}
-
-static inline size_t
-extent_esn_get(const extent_t *extent) {
-	return (extent->e_size_esn & EXTENT_ESN_MASK);
-}
-
-static inline size_t
-extent_bsize_get(const extent_t *extent) {
-	return extent->e_bsize;
-}
-
-static inline void *
-extent_before_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
-}
-
-static inline void *
-extent_last_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) +
-	    extent_size_get(extent) - PAGE);
-}
-
-static inline void *
-extent_past_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) +
-	    extent_size_get(extent));
-}
-
-static inline slab_data_t *
-extent_slab_data_get(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return &extent->e_slab_data;
-}
-
-static inline const slab_data_t *
-extent_slab_data_get_const(const extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return &extent->e_slab_data;
-}
-
-static inline void
-extent_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
-	assert(prof_info != NULL);
-	prof_info->alloc_tctx = (prof_tctx_t *)atomic_load_p(
-	    &extent->e_prof_tctx, ATOMIC_ACQUIRE);
-	prof_info->alloc_time = extent->e_alloc_time;
-}
-
-static inline void
-extent_arena_ind_set(extent_t *extent, unsigned arena_ind) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
-	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
-}
-
-static inline void
-extent_binshard_set(extent_t *extent, unsigned binshard) {
-	/* The assertion assumes szind is set already. */
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_BINSHARD_MASK) |
-	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT);
-}
-
-static inline void
-extent_addr_set(extent_t *extent, void *addr) {
-	extent->e_addr = addr;
-}
-
-static inline void
-extent_size_set(extent_t *extent, size_t size) {
-	assert((size & ~EXTENT_SIZE_MASK) == 0);
-	extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
-}
-
-static inline void
-extent_esn_set(extent_t *extent, size_t esn) {
-	extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn &
-	    EXTENT_ESN_MASK);
-}
-
-static inline void
-extent_bsize_set(extent_t *extent, size_t bsize) {
-	extent->e_bsize = bsize;
-}
-
-static inline void
-extent_szind_set(extent_t *extent, szind_t szind) {
-	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
-	    ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
-}
-
-static inline void
-extent_nfree_set(extent_t *extent, unsigned nfree) {
-	assert(extent_slab_get(extent));
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) |
-	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_binshard_set(extent_t *extent, unsigned nfree, unsigned binshard) {
-	/* The assertion assumes szind is set already. */
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
-	extent->e_bits = (extent->e_bits &
-	    (~EXTENT_BITS_NFREE_MASK & ~EXTENT_BITS_BINSHARD_MASK)) |
-	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT) |
-	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_inc(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_dec(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_sub(extent_t *extent, uint64_t n) {
-	assert(extent_slab_get(extent));
-	extent->e_bits -= (n << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_sn_set(extent_t *extent, size_t sn) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
-	    ((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
-}
-
-static inline void
-extent_state_set(extent_t *extent, extent_state_t state) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) |
-	    ((uint64_t)state << EXTENT_BITS_STATE_SHIFT);
-}
-
-static inline void
-extent_zeroed_set(extent_t *extent, bool zeroed) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) |
-	    ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT);
-}
-
-static inline void
-extent_committed_set(extent_t *extent, bool committed) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) |
-	    ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
-}
-
-static inline void
-extent_dumpable_set(extent_t *extent, bool dumpable) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
-	    ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
-}
-
-static inline void
-extent_slab_set(extent_t *extent, bool slab) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
-	    ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
-}
-
-static inline void
-extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
-	atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
-}
-
-static inline void
-extent_prof_alloc_time_set(extent_t *extent, nstime_t *t) {
-	nstime_copy(&extent->e_alloc_time, t);
-}
-
-static inline bool
-extent_is_head_get(extent_t *extent) {
-	if (maps_coalesce) {
-		not_reached();
-	}
-
-	return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >>
-	    EXTENT_BITS_IS_HEAD_SHIFT);
-}
-
-static inline void
-extent_is_head_set(extent_t *extent, bool is_head) {
-	if (maps_coalesce) {
-		not_reached();
-	}
-
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) |
-	    ((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT);
-}
-
-static inline void
-extent_init(extent_t *extent, unsigned arena_ind, void *addr, size_t size,
-    bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
-    bool committed, bool dumpable, extent_head_state_t is_head) {
-	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
-
-	extent_arena_ind_set(extent, arena_ind);
-	extent_addr_set(extent, addr);
-	extent_size_set(extent, size);
-	extent_slab_set(extent, slab);
-	extent_szind_set(extent, szind);
-	extent_sn_set(extent, sn);
-	extent_state_set(extent, state);
-	extent_zeroed_set(extent, zeroed);
-	extent_committed_set(extent, committed);
-	extent_dumpable_set(extent, dumpable);
-	ql_elm_new(extent, ql_link);
-	if (!maps_coalesce) {
-		extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true :
-		    false);
-	}
-	if (config_prof) {
-		extent_prof_tctx_set(extent, NULL);
-	}
-}
-
-static inline void
-extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
-	extent_arena_ind_set(extent, (1U << MALLOCX_ARENA_BITS) - 1);
-	extent_addr_set(extent, addr);
-	extent_bsize_set(extent, bsize);
-	extent_slab_set(extent, false);
-	extent_szind_set(extent, SC_NSIZES);
-	extent_sn_set(extent, sn);
-	extent_state_set(extent, extent_state_active);
-	extent_zeroed_set(extent, true);
-	extent_committed_set(extent, true);
-	extent_dumpable_set(extent, true);
-}
-
-static inline void
-extent_list_init(extent_list_t *list) {
-	ql_new(list);
-}
-
-static inline extent_t *
-extent_list_first(const extent_list_t *list) {
-	return ql_first(list);
-}
-
-static inline extent_t *
-extent_list_last(const extent_list_t *list) {
-	return ql_last(list, ql_link);
-}
-
-static inline void
-extent_list_append(extent_list_t *list, extent_t *extent) {
-	ql_tail_insert(list, extent, ql_link);
-}
-
-static inline void
-extent_list_prepend(extent_list_t *list, extent_t *extent) {
-	ql_head_insert(list, extent, ql_link);
-}
-
-static inline void
-extent_list_replace(extent_list_t *list, extent_t *to_remove,
-    extent_t *to_insert) {
-	ql_after_insert(to_remove, to_insert, ql_link);
-	ql_remove(list, to_remove, ql_link);
-}
-
-static inline void
-extent_list_remove(extent_list_t *list, extent_t *extent) {
-	ql_remove(list, extent, ql_link);
-}
-
-static inline int
-extent_sn_comp(const extent_t *a, const extent_t *b) {
-	size_t a_sn = extent_sn_get(a);
-	size_t b_sn = extent_sn_get(b);
-
-	return (a_sn > b_sn) - (a_sn < b_sn);
-}
-
-static inline int
-extent_esn_comp(const extent_t *a, const extent_t *b) {
-	size_t a_esn = extent_esn_get(a);
-	size_t b_esn = extent_esn_get(b);
-
-	return (a_esn > b_esn) - (a_esn < b_esn);
-}
-
-static inline int
-extent_ad_comp(const extent_t *a, const extent_t *b) {
-	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
-	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
-
-	return (a_addr > b_addr) - (a_addr < b_addr);
-}
-
-static inline int
-extent_ead_comp(const extent_t *a, const extent_t *b) {
-	uintptr_t a_eaddr = (uintptr_t)a;
-	uintptr_t b_eaddr = (uintptr_t)b;
-
-	return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
-}
-
-static inline int
-extent_snad_comp(const extent_t *a, const extent_t *b) {
-	int ret;
-
-	ret = extent_sn_comp(a, b);
-	if (ret != 0) {
-		return ret;
-	}
-
-	ret = extent_ad_comp(a, b);
-	return ret;
-}
-
-static inline int
-extent_esnead_comp(const extent_t *a, const extent_t *b) {
-	int ret;
-
-	ret = extent_esn_comp(a, b);
-	if (ret != 0) {
-		return ret;
-	}
-
-	ret = extent_ead_comp(a, b);
-	return ret;
-}
-
-ph_proto(, extent_avail_, extent_tree_t, extent_t)
-ph_proto(, extent_heap_, extent_heap_t, extent_t)
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index f5069d3..9dfc36d 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -44,9 +44,9 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\edata.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
-    <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent2.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 19e72d4..0ec4d1e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -44,9 +44,9 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\edata.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
-    <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent2.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/src/edata.c b/src/edata.c
new file mode 100644
index 0000000..1a5a1fa
--- /dev/null
+++ b/src/edata.c
@@ -0,0 +1,6 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+ph_gen(, extent_avail_, extent_tree_t, extent_t, ph_link,
+    extent_esnead_comp)
+ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
diff --git a/src/extent.c b/src/extent.c
deleted file mode 100644
index 1a5a1fa..0000000
--- a/src/extent.c
+++ /dev/null
@@ -1,6 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-ph_gen(, extent_avail_, extent_tree_t, extent_t, ph_link,
-    extent_esnead_comp)
-ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
-- 
cgit v0.12


From a7862df6169f27d9f347343ffef2bef3e167317c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Dec 2019 14:36:45 -0800
Subject: Rename extent_t to edata_t.

This frees us up from the unfortunate extent/extent2 naming collision.
---
 include/jemalloc/internal/arena_externs.h          |  14 +-
 include/jemalloc/internal/arena_inlines_b.h        |  86 +--
 include/jemalloc/internal/arena_stats.h            |   4 +-
 include/jemalloc/internal/arena_structs.h          |  14 +-
 include/jemalloc/internal/base_externs.h           |   2 +-
 include/jemalloc/internal/base_structs.h           |   4 +-
 include/jemalloc/internal/bin.h                    |   6 +-
 include/jemalloc/internal/bin_types.h              |   2 +-
 include/jemalloc/internal/edata.h                  | 469 ++++++------
 include/jemalloc/internal/eset.h                   |  10 +-
 include/jemalloc/internal/extent2.h                |  34 +-
 .../internal/jemalloc_internal_inlines_b.h         |   4 +-
 include/jemalloc/internal/large_externs.h          |  16 +-
 include/jemalloc/internal/rtree.h                  |  72 +-
 include/jemalloc/internal/witness.h                |   2 +-
 src/arena.c                                        | 296 ++++----
 src/base.c                                         |  76 +-
 src/bin.c                                          |   4 +-
 src/ctl.c                                          |  16 +-
 src/edata.c                                        |   6 +-
 src/ehooks.c                                       |   4 +-
 src/eset.c                                         |  68 +-
 src/extent2.c                                      | 837 ++++++++++-----------
 src/extent_dss.c                                   |  10 +-
 src/inspect.c                                      |  38 +-
 src/large.c                                        | 144 ++--
 src/tcache.c                                       |  64 +-
 test/unit/arena_reset.c                            |  10 +-
 test/unit/base.c                                   |   6 +-
 test/unit/binshard.c                               |  10 +-
 test/unit/rtree.c                                  |  72 +-
 test/unit/slab.c                                   |  10 +-
 32 files changed, 1201 insertions(+), 1209 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index b6b33ce..608dda7 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -28,18 +28,18 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     arena_stats_extents_t *estats);
 void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent);
+    ehooks_t *ehooks, edata_t *edata);
 #ifdef JEMALLOC_JET
-size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
+size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
 #endif
-extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
+edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool *zero);
 void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent);
+    edata_t *edata);
 void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, size_t oldsize);
+    edata_t *edata, size_t oldsize);
 void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, size_t oldsize);
+    edata_t *edata, size_t oldsize);
 ssize_t arena_dirty_decay_ms_get(arena_t *arena);
 bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
 ssize_t arena_muzzy_decay_ms_get(arena_t *arena);
@@ -64,7 +64,7 @@ void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
 void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
 void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, extent_t *extent, void *ptr);
+    szind_t binind, edata_t *edata, void *ptr);
 void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 16da67e..6dacab3 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -9,8 +9,8 @@
 #include "jemalloc/internal/ticker.h"
 
 static inline arena_t *
-arena_get_from_extent(extent_t *extent) {
-	return (arena_t *)atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+arena_get_from_edata(edata_t *edata) {
+	return (arena_t *)atomic_load_p(&arenas[edata_arena_ind_get(edata)],
 	    ATOMIC_RELAXED);
 }
 
@@ -42,20 +42,20 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 	assert(ptr != NULL);
 	assert(prof_info != NULL);
 
-	const extent_t *extent;
+	const edata_t *edata;
 	bool is_slab;
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		extent = iealloc(tsd_tsdn(tsd), ptr);
-		is_slab = extent_slab_get(extent);
+		edata = iealloc(tsd_tsdn(tsd), ptr);
+		is_slab = edata_slab_get(edata);
 	} else if (!unlikely(is_slab = alloc_ctx->slab)) {
-		extent = iealloc(tsd_tsdn(tsd), ptr);
+		edata = iealloc(tsd_tsdn(tsd), ptr);
 	}
 
 	if (unlikely(!is_slab)) {
-		/* extent must have been initialized at this point. */
-		large_prof_info_get(extent, prof_info);
+		/* edata must have been initialized at this point. */
+		large_prof_info_get(edata, prof_info);
 	} else {
 		memset(prof_info, 0, sizeof(prof_info_t));
 		prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
@@ -69,9 +69,9 @@ arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx) {
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
-		if (unlikely(!extent_slab_get(extent))) {
-			large_prof_tctx_reset(extent);
+		edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+		if (unlikely(!edata_slab_get(edata))) {
+			large_prof_tctx_reset(edata);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
@@ -85,10 +85,10 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
-	assert(!extent_slab_get(extent));
+	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+	assert(!edata_slab_get(edata));
 
-	large_prof_tctx_reset(extent);
+	large_prof_tctx_reset(edata);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -96,9 +96,9 @@ arena_prof_info_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
-	assert(!extent_slab_get(extent));
-	large_prof_info_set(extent, tctx);
+	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+	assert(!edata_slab_get(edata));
+	large_prof_info_set(edata, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -130,9 +130,9 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
 /* Purge a single extent to retained / unmapped directly. */
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_extent(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
-	size_t extent_size = extent_size_get(extent);
-	extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
+    edata_t *edata) {
+	size_t extent_size = edata_size_get(edata);
+	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
 		arena_stats_lock(tsdn, &arena->stats);
@@ -169,7 +169,7 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(tsdn_t *tsdn, const void *ptr) {
-	return (arena_t *)atomic_load_p(&arenas[extent_arena_ind_get(
+	return (arena_t *)atomic_load_p(&arenas[edata_arena_ind_get(
 	    iealloc(tsdn, ptr))], ATOMIC_RELAXED);
 }
 
@@ -201,19 +201,19 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *extent;
+	edata_t *edata;
 	szind_t szind;
-	if (rtree_extent_szind_read(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)ptr, false, &extent, &szind)) {
+	if (rtree_edata_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, false, &edata, &szind)) {
 		return 0;
 	}
 
-	if (extent == NULL) {
+	if (edata == NULL) {
 		return 0;
 	}
-	assert(extent_state_get(extent) == extent_state_active);
+	assert(edata_state_get(edata) == extent_state_active);
 	/* Only slab members should be looked up via interior pointers. */
-	assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
+	assert(edata_addr_get(edata) == ptr || edata_slab_get(edata));
 
 	assert(szind != SC_NSIZES);
 
@@ -225,8 +225,8 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	if (config_prof && unlikely(szind < SC_NBINS)) {
 		arena_dalloc_promoted(tsdn, ptr, NULL, true);
 	} else {
-		extent_t *extent = iealloc(tsdn, ptr);
-		large_dalloc(tsdn, extent);
+		edata_t *edata = iealloc(tsdn, ptr);
+		large_dalloc(tsdn, edata);
 	}
 }
 
@@ -243,11 +243,11 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	    true, &szind, &slab);
 
 	if (config_debug) {
-		extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
+		edata_t *edata = rtree_edata_read(tsdn, &extents_rtree,
 		    rtree_ctx, (uintptr_t)ptr, true);
-		assert(szind == extent_szind_get(extent));
+		assert(szind == edata_szind_get(edata));
 		assert(szind < SC_NSIZES);
-		assert(slab == extent_slab_get(extent));
+		assert(slab == edata_slab_get(edata));
 	}
 
 	if (likely(slab)) {
@@ -269,8 +269,8 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 			    slow_path);
 		}
 	} else {
-		extent_t *extent = iealloc(tsdn, ptr);
-		large_dalloc(tsdn, extent);
+		edata_t *edata = iealloc(tsdn, ptr);
+		large_dalloc(tsdn, edata);
 	}
 }
 
@@ -300,11 +300,11 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 
 	if (config_debug) {
 		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
+		edata_t *edata = rtree_edata_read(tsdn, &extents_rtree,
 		    rtree_ctx, (uintptr_t)ptr, true);
-		assert(szind == extent_szind_get(extent));
+		assert(szind == edata_szind_get(edata));
 		assert(szind < SC_NSIZES);
-		assert(slab == extent_slab_get(extent));
+		assert(slab == edata_slab_get(edata));
 	}
 
 	if (likely(slab)) {
@@ -344,10 +344,10 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		assert((config_prof && opt_prof) || slab == (szind < SC_NBINS));
 
 		if (config_debug) {
-			extent_t *extent = rtree_extent_read(tsdn,
+			edata_t *edata = rtree_edata_read(tsdn,
 			    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
-			assert(szind == extent_szind_get(extent));
-			assert(slab == extent_slab_get(extent));
+			assert(szind == edata_szind_get(edata));
+			assert(slab == edata_slab_get(edata));
 		}
 	}
 
@@ -401,10 +401,10 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
 		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &szind, &slab);
-		extent_t *extent = rtree_extent_read(tsdn,
+		edata_t *edata = rtree_edata_read(tsdn,
 		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
-		assert(szind == extent_szind_get(extent));
-		assert(slab == extent_slab_get(extent));
+		assert(szind == edata_szind_get(edata));
+		assert(slab == edata_slab_get(edata));
 	}
 
 	if (likely(slab)) {
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 23949ed..4166705 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -94,8 +94,8 @@ struct arena_stats_s {
 	 */
 	atomic_zu_t		retained; /* Derived. */
 
-	/* Number of extent_t structs allocated by base, but not being used. */
-	atomic_zu_t		extent_avail;
+	/* Number of edata_t structs allocated by base, but not being used. */
+	atomic_zu_t		edata_avail;
 
 	arena_stats_decay_t	decay_dirty;
 	arena_stats_decay_t	decay_muzzy;
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index bc8c039..aac620b 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -144,7 +144,7 @@ struct arena_s {
 	 *
 	 * Synchronization: large_mtx.
 	 */
-	extent_list_t		large;
+	edata_list_t		large;
 	/* Synchronizes all large allocation/update/deallocation. */
 	malloc_mutex_t		large_mtx;
 
@@ -185,14 +185,14 @@ struct arena_s {
 	malloc_mutex_t		extent_grow_mtx;
 
 	/*
-	 * Available extent structures that were allocated via
-	 * base_alloc_extent().
+	 * Available edata structures that were allocated via
+	 * base_alloc_edata().
 	 *
-	 * Synchronization: extent_avail_mtx.
+	 * Synchronization: edata_avail_mtx.
 	 */
-	extent_tree_t		extent_avail;
-	atomic_zu_t		extent_avail_cnt;
-	malloc_mutex_t		extent_avail_mtx;
+	edata_tree_t		edata_avail;
+	atomic_zu_t		edata_avail_cnt;
+	malloc_mutex_t		edata_avail_mtx;
 
 	/*
 	 * bins is used to store heaps of free regions.
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index 35734c3..2f24131 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -11,7 +11,7 @@ ehooks_t *base_ehooks_get(base_t *base);
 extent_hooks_t *base_extent_hooks_set(base_t *base,
     extent_hooks_t *extent_hooks);
 void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
-extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base);
+edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
 void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
     size_t *resident, size_t *mapped, size_t *n_thp);
 void base_prefork(tsdn_t *tsdn, base_t *base);
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index 1097892..fb7e68a 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -16,7 +16,7 @@ struct base_block_s {
 	base_block_t *next;
 
 	/* Tracks unused trailing space. */
-	extent_t extent;
+	edata_t edata;
 };
 
 struct base_s {
@@ -47,7 +47,7 @@ struct base_s {
 	base_block_t *blocks;
 
 	/* Heap of extents that track unused trailing space within blocks. */
-	extent_heap_t avail[SC_NSIZES];
+	edata_heap_t avail[SC_NSIZES];
 
 	/* Stats, only maintained if config_stats. */
 	size_t allocated;
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 8cc7fed..9a774e9 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -22,17 +22,17 @@ struct bin_s {
 	 * slabcur is reassigned, the previous slab must be deallocated or
 	 * inserted into slabs_{nonfull,full}.
 	 */
-	extent_t		*slabcur;
+	edata_t			*slabcur;
 
 	/*
 	 * Heap of non-full slabs.  This heap is used to assure that new
 	 * allocations come from the non-full slab that is oldest/lowest in
 	 * memory.
 	 */
-	extent_heap_t		slabs_nonfull;
+	edata_heap_t		slabs_nonfull;
 
 	/* List used to track full slabs. */
-	extent_list_t		slabs_full;
+	edata_list_t		slabs_full;
 
 	/* Bin statistics. */
 	bin_stats_t	stats;
diff --git a/include/jemalloc/internal/bin_types.h b/include/jemalloc/internal/bin_types.h
index 3533606..945e832 100644
--- a/include/jemalloc/internal/bin_types.h
+++ b/include/jemalloc/internal/bin_types.h
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/sc.h"
 
-#define BIN_SHARDS_MAX (1 << EXTENT_BITS_BINSHARD_WIDTH)
+#define BIN_SHARDS_MAX (1 << EDATA_BITS_BINSHARD_WIDTH)
 #define N_BIN_SHARDS_DEFAULT 1
 
 /* Used in TSD static initializer only. Real init in arena_bind(). */
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 2fd6e90..990c325 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_H
-#define JEMALLOC_INTERNAL_EXTENT_H
+#ifndef JEMALLOC_INTERNAL_EDATA_H
+#define JEMALLOC_INTERNAL_EDATA_H
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin_info.h"
@@ -26,11 +26,11 @@ enum extent_head_state_e {
 typedef enum extent_head_state_e extent_head_state_t;
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
-typedef struct extent_s extent_t;
-typedef ql_head(extent_t) extent_list_t;
-typedef ph(extent_t) extent_tree_t;
-typedef ph(extent_t) extent_heap_t;
-struct extent_s {
+typedef struct edata_s edata_t;
+typedef ql_head(edata_t) edata_list_t;
+typedef ph(edata_t) edata_tree_t;
+typedef ph(edata_t) edata_heap_t;
+struct edata_s {
 	/*
 	 * Bitfield containing several fields:
 	 *
@@ -105,48 +105,48 @@ struct extent_s {
 	uint64_t		e_bits;
 #define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
 
-#define EXTENT_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
-#define EXTENT_BITS_ARENA_SHIFT  0
-#define EXTENT_BITS_ARENA_MASK  MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
+#define EDATA_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
+#define EDATA_BITS_ARENA_SHIFT  0
+#define EDATA_BITS_ARENA_MASK  MASK(EDATA_BITS_ARENA_WIDTH, EDATA_BITS_ARENA_SHIFT)
 
-#define EXTENT_BITS_SLAB_WIDTH  1
-#define EXTENT_BITS_SLAB_SHIFT  (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
-#define EXTENT_BITS_SLAB_MASK  MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
+#define EDATA_BITS_SLAB_WIDTH  1
+#define EDATA_BITS_SLAB_SHIFT  (EDATA_BITS_ARENA_WIDTH + EDATA_BITS_ARENA_SHIFT)
+#define EDATA_BITS_SLAB_MASK  MASK(EDATA_BITS_SLAB_WIDTH, EDATA_BITS_SLAB_SHIFT)
 
-#define EXTENT_BITS_COMMITTED_WIDTH  1
-#define EXTENT_BITS_COMMITTED_SHIFT  (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
-#define EXTENT_BITS_COMMITTED_MASK  MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_COMMITTED_WIDTH  1
+#define EDATA_BITS_COMMITTED_SHIFT  (EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT)
+#define EDATA_BITS_COMMITTED_MASK  MASK(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
 
-#define EXTENT_BITS_DUMPABLE_WIDTH  1
-#define EXTENT_BITS_DUMPABLE_SHIFT  (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
-#define EXTENT_BITS_DUMPABLE_MASK  MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
+#define EDATA_BITS_DUMPABLE_WIDTH  1
+#define EDATA_BITS_DUMPABLE_SHIFT  (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_DUMPABLE_MASK  MASK(EDATA_BITS_DUMPABLE_WIDTH, EDATA_BITS_DUMPABLE_SHIFT)
 
-#define EXTENT_BITS_ZEROED_WIDTH  1
-#define EXTENT_BITS_ZEROED_SHIFT  (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
-#define EXTENT_BITS_ZEROED_MASK  MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_ZEROED_WIDTH  1
+#define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_DUMPABLE_WIDTH + EDATA_BITS_DUMPABLE_SHIFT)
+#define EDATA_BITS_ZEROED_MASK  MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
 
-#define EXTENT_BITS_STATE_WIDTH  2
-#define EXTENT_BITS_STATE_SHIFT  (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
-#define EXTENT_BITS_STATE_MASK  MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
+#define EDATA_BITS_STATE_WIDTH  2
+#define EDATA_BITS_STATE_SHIFT  (EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_STATE_MASK  MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
 
-#define EXTENT_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
-#define EXTENT_BITS_SZIND_SHIFT  (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
-#define EXTENT_BITS_SZIND_MASK  MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
+#define EDATA_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
+#define EDATA_BITS_SZIND_SHIFT  (EDATA_BITS_STATE_WIDTH + EDATA_BITS_STATE_SHIFT)
+#define EDATA_BITS_SZIND_MASK  MASK(EDATA_BITS_SZIND_WIDTH, EDATA_BITS_SZIND_SHIFT)
 
-#define EXTENT_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
-#define EXTENT_BITS_NFREE_SHIFT  (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
-#define EXTENT_BITS_NFREE_MASK  MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
+#define EDATA_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
+#define EDATA_BITS_NFREE_SHIFT  (EDATA_BITS_SZIND_WIDTH + EDATA_BITS_SZIND_SHIFT)
+#define EDATA_BITS_NFREE_MASK  MASK(EDATA_BITS_NFREE_WIDTH, EDATA_BITS_NFREE_SHIFT)
 
-#define EXTENT_BITS_BINSHARD_WIDTH  6
-#define EXTENT_BITS_BINSHARD_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
-#define EXTENT_BITS_BINSHARD_MASK  MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
+#define EDATA_BITS_BINSHARD_WIDTH  6
+#define EDATA_BITS_BINSHARD_SHIFT  (EDATA_BITS_NFREE_WIDTH + EDATA_BITS_NFREE_SHIFT)
+#define EDATA_BITS_BINSHARD_MASK  MASK(EDATA_BITS_BINSHARD_WIDTH, EDATA_BITS_BINSHARD_SHIFT)
 
-#define EXTENT_BITS_IS_HEAD_WIDTH 1
-#define EXTENT_BITS_IS_HEAD_SHIFT  (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
-#define EXTENT_BITS_IS_HEAD_MASK  MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT)
+#define EDATA_BITS_IS_HEAD_WIDTH 1
+#define EDATA_BITS_IS_HEAD_SHIFT  (EDATA_BITS_BINSHARD_WIDTH + EDATA_BITS_BINSHARD_SHIFT)
+#define EDATA_BITS_IS_HEAD_MASK  MASK(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT)
 
-#define EXTENT_BITS_SN_SHIFT   (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT)
-#define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
+#define EDATA_BITS_SN_SHIFT   (EDATA_BITS_IS_HEAD_WIDTH + EDATA_BITS_IS_HEAD_SHIFT)
+#define EDATA_BITS_SN_MASK  (UINT64_MAX << EDATA_BITS_SN_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
 	void			*e_addr;
@@ -160,8 +160,8 @@ struct extent_s {
 		 * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
 		 */
 		size_t			e_size_esn;
-	#define EXTENT_SIZE_MASK	((size_t)~(PAGE-1))
-	#define EXTENT_ESN_MASK		((size_t)PAGE-1)
+	#define EDATA_SIZE_MASK	((size_t)~(PAGE-1))
+	#define EDATA_ESN_MASK		((size_t)PAGE-1)
 		/* Base extent size, which may not be a multiple of PAGE. */
 		size_t			e_bsize;
 	};
@@ -173,13 +173,13 @@ struct extent_s {
 	 * - stashed dirty extents
 	 * - arena's large allocations
 	 */
-	ql_elm(extent_t)	ql_link;
+	ql_elm(edata_t) ql_link;
 
 	/*
 	 * Linkage for per size class sn/address-ordered heaps, and
 	 * for extent_avail
 	 */
-	phn(extent_t)		ph_link;
+	phn(edata_t)		ph_link;
 
 	union {
 		/* Small region slab metadata. */
@@ -196,398 +196,397 @@ struct extent_s {
 };
 
 static inline unsigned
-extent_arena_ind_get(const extent_t *extent) {
-	unsigned arena_ind = (unsigned)((extent->e_bits &
-	    EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
+edata_arena_ind_get(const edata_t *edata) {
+	unsigned arena_ind = (unsigned)((edata->e_bits &
+	    EDATA_BITS_ARENA_MASK) >> EDATA_BITS_ARENA_SHIFT);
 	assert(arena_ind < MALLOCX_ARENA_LIMIT);
 
 	return arena_ind;
 }
 
 static inline szind_t
-extent_szind_get_maybe_invalid(const extent_t *extent) {
-	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
-	    EXTENT_BITS_SZIND_SHIFT);
+edata_szind_get_maybe_invalid(const edata_t *edata) {
+	szind_t szind = (szind_t)((edata->e_bits & EDATA_BITS_SZIND_MASK) >>
+	    EDATA_BITS_SZIND_SHIFT);
 	assert(szind <= SC_NSIZES);
 	return szind;
 }
 
 static inline szind_t
-extent_szind_get(const extent_t *extent) {
-	szind_t szind = extent_szind_get_maybe_invalid(extent);
+edata_szind_get(const edata_t *edata) {
+	szind_t szind = edata_szind_get_maybe_invalid(edata);
 	assert(szind < SC_NSIZES); /* Never call when "invalid". */
 	return szind;
 }
 
 static inline size_t
-extent_usize_get(const extent_t *extent) {
-	return sz_index2size(extent_szind_get(extent));
+edata_usize_get(const edata_t *edata) {
+	return sz_index2size(edata_szind_get(edata));
 }
 
 static inline unsigned
-extent_binshard_get(const extent_t *extent) {
-	unsigned binshard = (unsigned)((extent->e_bits &
-	    EXTENT_BITS_BINSHARD_MASK) >> EXTENT_BITS_BINSHARD_SHIFT);
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+edata_binshard_get(const edata_t *edata) {
+	unsigned binshard = (unsigned)((edata->e_bits &
+	    EDATA_BITS_BINSHARD_MASK) >> EDATA_BITS_BINSHARD_SHIFT);
+	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
 	return binshard;
 }
 
 static inline size_t
-extent_sn_get(const extent_t *extent) {
-	return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
-	    EXTENT_BITS_SN_SHIFT);
+edata_sn_get(const edata_t *edata) {
+	return (size_t)((edata->e_bits & EDATA_BITS_SN_MASK) >>
+	    EDATA_BITS_SN_SHIFT);
 }
 
 static inline extent_state_t
-extent_state_get(const extent_t *extent) {
-	return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >>
-	    EXTENT_BITS_STATE_SHIFT);
+edata_state_get(const edata_t *edata) {
+	return (extent_state_t)((edata->e_bits & EDATA_BITS_STATE_MASK) >>
+	    EDATA_BITS_STATE_SHIFT);
 }
 
 static inline bool
-extent_zeroed_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >>
-	    EXTENT_BITS_ZEROED_SHIFT);
+edata_zeroed_get(const edata_t *edata) {
+	return (bool)((edata->e_bits & EDATA_BITS_ZEROED_MASK) >>
+	    EDATA_BITS_ZEROED_SHIFT);
 }
 
 static inline bool
-extent_committed_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >>
-	    EXTENT_BITS_COMMITTED_SHIFT);
+edata_committed_get(const edata_t *edata) {
+	return (bool)((edata->e_bits & EDATA_BITS_COMMITTED_MASK) >>
+	    EDATA_BITS_COMMITTED_SHIFT);
 }
 
 static inline bool
-extent_dumpable_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
-	    EXTENT_BITS_DUMPABLE_SHIFT);
+edata_dumpable_get(const edata_t *edata) {
+	return (bool)((edata->e_bits & EDATA_BITS_DUMPABLE_MASK) >>
+	    EDATA_BITS_DUMPABLE_SHIFT);
 }
 
 static inline bool
-extent_slab_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
-	    EXTENT_BITS_SLAB_SHIFT);
+edata_slab_get(const edata_t *edata) {
+	return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >>
+	    EDATA_BITS_SLAB_SHIFT);
 }
 
 static inline unsigned
-extent_nfree_get(const extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >>
-	    EXTENT_BITS_NFREE_SHIFT);
+edata_nfree_get(const edata_t *edata) {
+	assert(edata_slab_get(edata));
+	return (unsigned)((edata->e_bits & EDATA_BITS_NFREE_MASK) >>
+	    EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void *
-extent_base_get(const extent_t *extent) {
-	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
-	    !extent_slab_get(extent));
-	return PAGE_ADDR2BASE(extent->e_addr);
+edata_base_get(const edata_t *edata) {
+	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr) ||
+	    !edata_slab_get(edata));
+	return PAGE_ADDR2BASE(edata->e_addr);
 }
 
 static inline void *
-extent_addr_get(const extent_t *extent) {
-	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
-	    !extent_slab_get(extent));
-	return extent->e_addr;
+edata_addr_get(const edata_t *edata) {
+	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr) ||
+	    !edata_slab_get(edata));
+	return edata->e_addr;
 }
 
 static inline size_t
-extent_size_get(const extent_t *extent) {
-	return (extent->e_size_esn & EXTENT_SIZE_MASK);
+edata_size_get(const edata_t *edata) {
+	return (edata->e_size_esn & EDATA_SIZE_MASK);
 }
 
 static inline size_t
-extent_esn_get(const extent_t *extent) {
-	return (extent->e_size_esn & EXTENT_ESN_MASK);
+edata_esn_get(const edata_t *edata) {
+	return (edata->e_size_esn & EDATA_ESN_MASK);
 }
 
 static inline size_t
-extent_bsize_get(const extent_t *extent) {
-	return extent->e_bsize;
+edata_bsize_get(const edata_t *edata) {
+	return edata->e_bsize;
 }
 
 static inline void *
-extent_before_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
+edata_before_get(const edata_t *edata) {
+	return (void *)((uintptr_t)edata_base_get(edata) - PAGE);
 }
 
 static inline void *
-extent_last_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) +
-	    extent_size_get(extent) - PAGE);
+edata_last_get(const edata_t *edata) {
+	return (void *)((uintptr_t)edata_base_get(edata) +
+	    edata_size_get(edata) - PAGE);
 }
 
 static inline void *
-extent_past_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) +
-	    extent_size_get(extent));
+edata_past_get(const edata_t *edata) {
+	return (void *)((uintptr_t)edata_base_get(edata) +
+	    edata_size_get(edata));
 }
 
 static inline slab_data_t *
-extent_slab_data_get(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return &extent->e_slab_data;
+edata_slab_data_get(edata_t *edata) {
+	assert(edata_slab_get(edata));
+	return &edata->e_slab_data;
 }
 
 static inline const slab_data_t *
-extent_slab_data_get_const(const extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return &extent->e_slab_data;
+edata_slab_data_get_const(const edata_t *edata) {
+	assert(edata_slab_get(edata));
+	return &edata->e_slab_data;
 }
 
 static inline void
-extent_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
+edata_prof_info_get(const edata_t *edata, prof_info_t *prof_info) {
 	assert(prof_info != NULL);
 	prof_info->alloc_tctx = (prof_tctx_t *)atomic_load_p(
-	    &extent->e_prof_tctx, ATOMIC_ACQUIRE);
-	prof_info->alloc_time = extent->e_alloc_time;
+	    &edata->e_prof_tctx, ATOMIC_ACQUIRE);
+	prof_info->alloc_time = edata->e_alloc_time;
 }
 
 static inline void
-extent_arena_ind_set(extent_t *extent, unsigned arena_ind) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
-	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
+edata_arena_ind_set(edata_t *edata, unsigned arena_ind) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ARENA_MASK) |
+	    ((uint64_t)arena_ind << EDATA_BITS_ARENA_SHIFT);
 }
 
 static inline void
-extent_binshard_set(extent_t *extent, unsigned binshard) {
+edata_binshard_set(edata_t *edata, unsigned binshard) {
 	/* The assertion assumes szind is set already. */
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_BINSHARD_MASK) |
-	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT);
+	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_BINSHARD_MASK) |
+	    ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT);
 }
 
 static inline void
-extent_addr_set(extent_t *extent, void *addr) {
-	extent->e_addr = addr;
+edata_addr_set(edata_t *edata, void *addr) {
+	edata->e_addr = addr;
 }
 
 static inline void
-extent_size_set(extent_t *extent, size_t size) {
-	assert((size & ~EXTENT_SIZE_MASK) == 0);
-	extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
+edata_size_set(edata_t *edata, size_t size) {
+	assert((size & ~EDATA_SIZE_MASK) == 0);
+	edata->e_size_esn = size | (edata->e_size_esn & ~EDATA_SIZE_MASK);
 }
 
 static inline void
-extent_esn_set(extent_t *extent, size_t esn) {
-	extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn &
-	    EXTENT_ESN_MASK);
+edata_esn_set(edata_t *edata, size_t esn) {
+	edata->e_size_esn = (edata->e_size_esn & ~EDATA_ESN_MASK) | (esn &
+	    EDATA_ESN_MASK);
 }
 
 static inline void
-extent_bsize_set(extent_t *extent, size_t bsize) {
-	extent->e_bsize = bsize;
+edata_bsize_set(edata_t *edata, size_t bsize) {
+	edata->e_bsize = bsize;
 }
 
 static inline void
-extent_szind_set(extent_t *extent, szind_t szind) {
+edata_szind_set(edata_t *edata, szind_t szind) {
 	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
-	    ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SZIND_MASK) |
+	    ((uint64_t)szind << EDATA_BITS_SZIND_SHIFT);
 }
 
 static inline void
-extent_nfree_set(extent_t *extent, unsigned nfree) {
-	assert(extent_slab_get(extent));
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) |
-	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+edata_nfree_set(edata_t *edata, unsigned nfree) {
+	assert(edata_slab_get(edata));
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_NFREE_MASK) |
+	    ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
-extent_nfree_binshard_set(extent_t *extent, unsigned nfree, unsigned binshard) {
+edata_nfree_binshard_set(edata_t *edata, unsigned nfree, unsigned binshard) {
 	/* The assertion assumes szind is set already. */
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
-	extent->e_bits = (extent->e_bits &
-	    (~EXTENT_BITS_NFREE_MASK & ~EXTENT_BITS_BINSHARD_MASK)) |
-	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT) |
-	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
+	edata->e_bits = (edata->e_bits &
+	    (~EDATA_BITS_NFREE_MASK & ~EDATA_BITS_BINSHARD_MASK)) |
+	    ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT) |
+	    ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
-extent_nfree_inc(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+edata_nfree_inc(edata_t *edata) {
+	assert(edata_slab_get(edata));
+	edata->e_bits += ((uint64_t)1U << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
-extent_nfree_dec(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+edata_nfree_dec(edata_t *edata) {
+	assert(edata_slab_get(edata));
+	edata->e_bits -= ((uint64_t)1U << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
-extent_nfree_sub(extent_t *extent, uint64_t n) {
-	assert(extent_slab_get(extent));
-	extent->e_bits -= (n << EXTENT_BITS_NFREE_SHIFT);
+edata_nfree_sub(edata_t *edata, uint64_t n) {
+	assert(edata_slab_get(edata));
+	edata->e_bits -= (n << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
-extent_sn_set(extent_t *extent, size_t sn) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
-	    ((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
+edata_sn_set(edata_t *edata, size_t sn) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SN_MASK) |
+	    ((uint64_t)sn << EDATA_BITS_SN_SHIFT);
 }
 
 static inline void
-extent_state_set(extent_t *extent, extent_state_t state) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) |
-	    ((uint64_t)state << EXTENT_BITS_STATE_SHIFT);
+edata_state_set(edata_t *edata, extent_state_t state) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_STATE_MASK) |
+	    ((uint64_t)state << EDATA_BITS_STATE_SHIFT);
 }
 
 static inline void
-extent_zeroed_set(extent_t *extent, bool zeroed) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) |
-	    ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT);
+edata_zeroed_set(edata_t *edata, bool zeroed) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ZEROED_MASK) |
+	    ((uint64_t)zeroed << EDATA_BITS_ZEROED_SHIFT);
 }
 
 static inline void
-extent_committed_set(extent_t *extent, bool committed) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) |
-	    ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
+edata_committed_set(edata_t *edata, bool committed) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_COMMITTED_MASK) |
+	    ((uint64_t)committed << EDATA_BITS_COMMITTED_SHIFT);
 }
 
 static inline void
-extent_dumpable_set(extent_t *extent, bool dumpable) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
-	    ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
+edata_dumpable_set(edata_t *edata, bool dumpable) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_DUMPABLE_MASK) |
+	    ((uint64_t)dumpable << EDATA_BITS_DUMPABLE_SHIFT);
 }
 
 static inline void
-extent_slab_set(extent_t *extent, bool slab) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
-	    ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
+edata_slab_set(edata_t *edata, bool slab) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) |
+	    ((uint64_t)slab << EDATA_BITS_SLAB_SHIFT);
 }
 
 static inline void
-extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
-	atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
+edata_prof_tctx_set(edata_t *edata, prof_tctx_t *tctx) {
+	atomic_store_p(&edata->e_prof_tctx, tctx, ATOMIC_RELEASE);
 }
 
 static inline void
-extent_prof_alloc_time_set(extent_t *extent, nstime_t *t) {
-	nstime_copy(&extent->e_alloc_time, t);
+edata_prof_alloc_time_set(edata_t *edata, nstime_t *t) {
+	nstime_copy(&edata->e_alloc_time, t);
 }
 
 static inline bool
-extent_is_head_get(extent_t *extent) {
+edata_is_head_get(edata_t *edata) {
 	if (maps_coalesce) {
 		not_reached();
 	}
 
-	return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >>
-	    EXTENT_BITS_IS_HEAD_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_IS_HEAD_MASK) >>
+	    EDATA_BITS_IS_HEAD_SHIFT);
 }
 
 static inline void
-extent_is_head_set(extent_t *extent, bool is_head) {
+edata_is_head_set(edata_t *edata, bool is_head) {
 	if (maps_coalesce) {
 		not_reached();
 	}
 
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) |
-	    ((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_IS_HEAD_MASK) |
+	    ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
 }
 
 static inline void
-extent_init(extent_t *extent, unsigned arena_ind, void *addr, size_t size,
+edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
     bool committed, bool dumpable, extent_head_state_t is_head) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
-	extent_arena_ind_set(extent, arena_ind);
-	extent_addr_set(extent, addr);
-	extent_size_set(extent, size);
-	extent_slab_set(extent, slab);
-	extent_szind_set(extent, szind);
-	extent_sn_set(extent, sn);
-	extent_state_set(extent, state);
-	extent_zeroed_set(extent, zeroed);
-	extent_committed_set(extent, committed);
-	extent_dumpable_set(extent, dumpable);
-	ql_elm_new(extent, ql_link);
+	edata_arena_ind_set(edata, arena_ind);
+	edata_addr_set(edata, addr);
+	edata_size_set(edata, size);
+	edata_slab_set(edata, slab);
+	edata_szind_set(edata, szind);
+	edata_sn_set(edata, sn);
+	edata_state_set(edata, state);
+	edata_zeroed_set(edata, zeroed);
+	edata_committed_set(edata, committed);
+	edata_dumpable_set(edata, dumpable);
+	ql_elm_new(edata, ql_link);
 	if (!maps_coalesce) {
-		extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true :
-		    false);
+		edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
 	}
 	if (config_prof) {
-		extent_prof_tctx_set(extent, NULL);
+		edata_prof_tctx_set(edata, NULL);
 	}
 }
 
 static inline void
-extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
-	extent_arena_ind_set(extent, (1U << MALLOCX_ARENA_BITS) - 1);
-	extent_addr_set(extent, addr);
-	extent_bsize_set(extent, bsize);
-	extent_slab_set(extent, false);
-	extent_szind_set(extent, SC_NSIZES);
-	extent_sn_set(extent, sn);
-	extent_state_set(extent, extent_state_active);
-	extent_zeroed_set(extent, true);
-	extent_committed_set(extent, true);
-	extent_dumpable_set(extent, true);
+edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
+	edata_arena_ind_set(edata, (1U << MALLOCX_ARENA_BITS) - 1);
+	edata_addr_set(edata, addr);
+	edata_bsize_set(edata, bsize);
+	edata_slab_set(edata, false);
+	edata_szind_set(edata, SC_NSIZES);
+	edata_sn_set(edata, sn);
+	edata_state_set(edata, extent_state_active);
+	edata_zeroed_set(edata, true);
+	edata_committed_set(edata, true);
+	edata_dumpable_set(edata, true);
 }
 
 static inline void
-extent_list_init(extent_list_t *list) {
+edata_list_init(edata_list_t *list) {
 	ql_new(list);
 }
 
-static inline extent_t *
-extent_list_first(const extent_list_t *list) {
+static inline edata_t *
+edata_list_first(const edata_list_t *list) {
 	return ql_first(list);
 }
 
-static inline extent_t *
-extent_list_last(const extent_list_t *list) {
+static inline edata_t *
+edata_list_last(const edata_list_t *list) {
 	return ql_last(list, ql_link);
 }
 
 static inline void
-extent_list_append(extent_list_t *list, extent_t *extent) {
-	ql_tail_insert(list, extent, ql_link);
+edata_list_append(edata_list_t *list, edata_t *edata) {
+	ql_tail_insert(list, edata, ql_link);
 }
 
 static inline void
-extent_list_prepend(extent_list_t *list, extent_t *extent) {
-	ql_head_insert(list, extent, ql_link);
+edata_list_prepend(edata_list_t *list, edata_t *edata) {
+	ql_head_insert(list, edata, ql_link);
 }
 
 static inline void
-extent_list_replace(extent_list_t *list, extent_t *to_remove,
-    extent_t *to_insert) {
+edata_list_replace(edata_list_t *list, edata_t *to_remove,
+    edata_t *to_insert) {
 	ql_after_insert(to_remove, to_insert, ql_link);
 	ql_remove(list, to_remove, ql_link);
 }
 
 static inline void
-extent_list_remove(extent_list_t *list, extent_t *extent) {
-	ql_remove(list, extent, ql_link);
+edata_list_remove(edata_list_t *list, edata_t *edata) {
+	ql_remove(list, edata, ql_link);
 }
 
 static inline int
-extent_sn_comp(const extent_t *a, const extent_t *b) {
-	size_t a_sn = extent_sn_get(a);
-	size_t b_sn = extent_sn_get(b);
+edata_sn_comp(const edata_t *a, const edata_t *b) {
+	size_t a_sn = edata_sn_get(a);
+	size_t b_sn = edata_sn_get(b);
 
 	return (a_sn > b_sn) - (a_sn < b_sn);
 }
 
 static inline int
-extent_esn_comp(const extent_t *a, const extent_t *b) {
-	size_t a_esn = extent_esn_get(a);
-	size_t b_esn = extent_esn_get(b);
+edata_esn_comp(const edata_t *a, const edata_t *b) {
+	size_t a_esn = edata_esn_get(a);
+	size_t b_esn = edata_esn_get(b);
 
 	return (a_esn > b_esn) - (a_esn < b_esn);
 }
 
 static inline int
-extent_ad_comp(const extent_t *a, const extent_t *b) {
-	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
-	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
+edata_ad_comp(const edata_t *a, const edata_t *b) {
+	uintptr_t a_addr = (uintptr_t)edata_addr_get(a);
+	uintptr_t b_addr = (uintptr_t)edata_addr_get(b);
 
 	return (a_addr > b_addr) - (a_addr < b_addr);
 }
 
 static inline int
-extent_ead_comp(const extent_t *a, const extent_t *b) {
+edata_ead_comp(const edata_t *a, const edata_t *b) {
 	uintptr_t a_eaddr = (uintptr_t)a;
 	uintptr_t b_eaddr = (uintptr_t)b;
 
@@ -595,32 +594,32 @@ extent_ead_comp(const extent_t *a, const extent_t *b) {
 }
 
 static inline int
-extent_snad_comp(const extent_t *a, const extent_t *b) {
+edata_snad_comp(const edata_t *a, const edata_t *b) {
 	int ret;
 
-	ret = extent_sn_comp(a, b);
+	ret = edata_sn_comp(a, b);
 	if (ret != 0) {
 		return ret;
 	}
 
-	ret = extent_ad_comp(a, b);
+	ret = edata_ad_comp(a, b);
 	return ret;
 }
 
 static inline int
-extent_esnead_comp(const extent_t *a, const extent_t *b) {
+edata_esnead_comp(const edata_t *a, const edata_t *b) {
 	int ret;
 
-	ret = extent_esn_comp(a, b);
+	ret = edata_esn_comp(a, b);
 	if (ret != 0) {
 		return ret;
 	}
 
-	ret = extent_ead_comp(a, b);
+	ret = edata_ead_comp(a, b);
 	return ret;
 }
 
-ph_proto(, extent_avail_, extent_tree_t, extent_t)
-ph_proto(, extent_heap_, extent_heap_t, extent_t)
+ph_proto(, edata_avail_, edata_tree_t, edata_t)
+ph_proto(, edata_heap_, edata_heap_t, edata_t)
 
-#endif /* JEMALLOC_INTERNAL_EXTENT_H */
+#endif /* JEMALLOC_INTERNAL_EDATA_H */
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 833f19c..e76257a 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -19,7 +19,7 @@ struct eset_s {
 	 *
 	 * Synchronization: mtx.
 	 */
-	extent_heap_t heaps[SC_NPSIZES + 1];
+	edata_heap_t heaps[SC_NPSIZES + 1];
 	atomic_zu_t nextents[SC_NPSIZES + 1];
 	atomic_zu_t nbytes[SC_NPSIZES + 1];
 
@@ -35,7 +35,7 @@ struct eset_s {
 	 *
 	 * Synchronization: mtx.
 	 */
-	extent_list_t lru;
+	edata_list_t lru;
 
 	/*
 	 * Page sum for all extents in heaps.
@@ -67,13 +67,13 @@ size_t eset_nextents_get(eset_t *eset, pszind_t ind);
 /* Get the sum total bytes of the extents in the given page size index. */
 size_t eset_nbytes_get(eset_t *eset, pszind_t ind);
 
-void eset_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
-void eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
+void eset_insert_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata);
+void eset_remove_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata);
 /*
  * Select an extent from this eset of the given size and alignment.  Returns
  * null if no such item could be found.
  */
-extent_t *eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize,
+edata_t *eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize,
     size_t alignment);
 
 void eset_prefork(tsdn_t *tsdn, eset_t *eset);
diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index 7a18a61..ef23267 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -26,38 +26,38 @@ extern size_t opt_lg_extent_max_active_fit;
 
 extern rtree_t extents_rtree;
 
-extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
-void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+edata_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
+void extent_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *edata);
 
-extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     eset_t *eset, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit);
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, extent_t *extent);
-extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, edata_t *edata);
+edata_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     eset_t *eset, size_t npages_min);
-extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit);
-void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata);
 void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent);
+    edata_t *edata);
 void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent);
+    edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length);
+    edata_t *edata, size_t offset, size_t length);
 bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length);
+    edata_t *edata, size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length);
+    edata_t *edata, size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length);
-extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    edata_t *edata, size_t offset, size_t length);
+edata_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
     size_t size_b, szind_t szind_b, bool slab_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *a, extent_t *b);
-bool extent_head_no_merge(extent_t *a, extent_t *b);
+    edata_t *a, edata_t *b);
+bool extent_head_no_merge(edata_t *a, edata_t *b);
 
 bool extent_boot(void);
 
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index d4cb04c..8367ee2 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -76,12 +76,12 @@ arena_is_auto(arena_t *arena) {
 	return (arena_ind_get(arena) < manual_arena_base);
 }
 
-JEMALLOC_ALWAYS_INLINE extent_t *
+JEMALLOC_ALWAYS_INLINE edata_t *
 iealloc(tsdn_t *tsdn, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	return rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+	return rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
 }
 
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 2299920..fe5e606 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -6,7 +6,7 @@
 void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
 void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero);
-bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+bool large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
     size_t usize_max, bool zero);
 void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
@@ -18,12 +18,12 @@ extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk;
 typedef void (large_dalloc_maybe_junk_t)(void *, size_t);
 extern large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk;
 
-void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent);
-void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
-void large_dalloc(tsdn_t *tsdn, extent_t *extent);
-size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
-void large_prof_info_get(const extent_t *extent, prof_info_t *prof_info);
-void large_prof_tctx_reset(extent_t *extent);
-void large_prof_info_set(extent_t *extent, prof_tctx_t *tctx);
+void large_dalloc_prep_junked_locked(tsdn_t *tsdn, edata_t *edata);
+void large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
+void large_dalloc(tsdn_t *tsdn, edata_t *edata);
+size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
+void large_prof_info_get(const edata_t *edata, prof_info_t *prof_info);
+void large_prof_tctx_reset(edata_t *edata);
+void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx);
 
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 16ccbeb..339c7e5 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -48,18 +48,18 @@ struct rtree_leaf_elm_s {
 	/*
 	 * Single pointer-width field containing all three leaf element fields.
 	 * For example, on a 64-bit x64 system with 48 significant virtual
-	 * memory address bits, the index, extent, and slab fields are packed as
+	 * memory address bits, the index, edata, and slab fields are packed as
 	 * such:
 	 *
 	 * x: index
-	 * e: extent
+	 * e: edata
 	 * b: slab
 	 *
 	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee000b
 	 */
 	atomic_p_t	le_bits;
 #else
-	atomic_p_t	le_extent; /* (extent_t *) */
+	atomic_p_t	le_edata; /* (edata_t *) */
 	atomic_u_t	le_szind; /* (szind_t) */
 	atomic_b_t	le_slab; /* (bool) */
 #endif
@@ -176,8 +176,8 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 }
 
-JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
+JEMALLOC_ALWAYS_INLINE edata_t *
+rtree_leaf_elm_bits_edata_get(uintptr_t bits) {
 #    ifdef __aarch64__
 	/*
 	 * aarch64 doesn't sign extend the highest virtual address bit to set
@@ -187,10 +187,10 @@ rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
 	/* Mask off the slab bit. */
 	uintptr_t low_bit_mask = ~(uintptr_t)1;
 	uintptr_t mask = high_bit_mask & low_bit_mask;
-	return (extent_t *)(bits & mask);
+	return (edata_t *)(bits & mask);
 #    else
 	/* Restore sign-extended high bits, mask slab bit. */
-	return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
+	return (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
 	    RTREE_NHIB) & ~((uintptr_t)0x1));
 #    endif
 }
@@ -207,16 +207,16 @@ rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
 
 #  endif
 
-JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree,
+JEMALLOC_ALWAYS_INLINE edata_t *
+rtree_leaf_elm_edata_read(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	return rtree_leaf_elm_bits_extent_get(bits);
+	return rtree_leaf_elm_bits_edata_get(bits);
 #else
-	extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent
+	edata_t *edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
-	return extent;
+	return edata;
 #endif
 }
 
@@ -245,16 +245,16 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree,
 }
 
 static inline void
-rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, extent_t *extent) {
+rtree_leaf_elm_edata_write(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, edata_t *edata) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
 	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
-	    LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1))
+	    LG_VADDR) | ((uintptr_t)edata & (((uintptr_t)0x1 << LG_VADDR) - 1))
 	    | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
-	atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE);
+	atomic_store_p(&elm->le_edata, edata, ATOMIC_RELEASE);
 #endif
 }
 
@@ -267,7 +267,7 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree,
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
 	    true);
 	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
-	    ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
+	    ((uintptr_t)rtree_leaf_elm_bits_edata_get(old_bits) &
 	    (((uintptr_t)0x1 << LG_VADDR) - 1)) |
 	    ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
@@ -283,7 +283,7 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
 	    true);
 	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
-	    LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
+	    LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_edata_get(old_bits) &
 	    (((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
@@ -293,20 +293,20 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
 
 static inline void
 rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, extent_t *extent, szind_t szind, bool slab) {
+    rtree_leaf_elm_t *elm, edata_t *edata, szind_t szind, bool slab) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
-	    ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
+	    ((uintptr_t)edata & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
 	    ((uintptr_t)slab);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
 	rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
 	/*
-	 * Write extent last, since the element is atomically considered valid
-	 * as soon as the extent field is non-NULL.
+	 * Write edata last, since the element is atomically considered valid
+	 * as soon as the edata field is non-NULL.
 	 */
-	rtree_leaf_elm_extent_write(tsdn, rtree, elm, extent);
+	rtree_leaf_elm_edata_write(tsdn, rtree, elm, edata);
 #endif
 }
 
@@ -317,7 +317,7 @@ rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
 
 	/*
 	 * The caller implicitly assures that it is the only writer to the szind
-	 * and slab fields, and that the extent field cannot currently change.
+	 * and slab fields, and that the edata field cannot currently change.
 	 */
 	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
 	rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
@@ -384,9 +384,9 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 
 static inline bool
 rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
-    extent_t *extent, szind_t szind, bool slab) {
-	/* Use rtree_clear() to set the extent to NULL. */
-	assert(extent != NULL);
+    edata_t *edata, szind_t szind, bool slab) {
+	/* Use rtree_clear() to set the edata to NULL. */
+	assert(edata != NULL);
 
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
 	    key, false, true);
@@ -394,8 +394,8 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 		return true;
 	}
 
-	assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) == NULL);
-	rtree_leaf_elm_write(tsdn, rtree, elm, extent, szind, slab);
+	assert(rtree_leaf_elm_edata_read(tsdn, rtree, elm, false) == NULL);
+	rtree_leaf_elm_write(tsdn, rtree, elm, edata, szind, slab);
 
 	return false;
 }
@@ -412,15 +412,15 @@ rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 	return elm;
 }
 
-JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+JEMALLOC_ALWAYS_INLINE edata_t *
+rtree_edata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent) {
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
 	    dependent);
 	if (!dependent && elm == NULL) {
 		return NULL;
 	}
-	return rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
+	return rtree_leaf_elm_edata_read(tsdn, rtree, elm, dependent);
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
@@ -440,14 +440,14 @@ rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
  */
 
 JEMALLOC_ALWAYS_INLINE bool
-rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent, extent_t **r_extent, szind_t *r_szind) {
+rtree_edata_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, bool dependent, edata_t **r_edata, szind_t *r_szind) {
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
 	    dependent);
 	if (!dependent && elm == NULL) {
 		return true;
 	}
-	*r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
+	*r_edata = rtree_leaf_elm_edata_read(tsdn, rtree, elm, dependent);
 	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
 	return false;
 }
@@ -520,7 +520,7 @@ static inline void
 rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
-	assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) !=
+	assert(rtree_leaf_elm_edata_read(tsdn, rtree, elm, false) !=
 	    NULL);
 	rtree_leaf_elm_write(tsdn, rtree, elm, NULL, SC_NSIZES, false);
 }
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index d76b790..ddbcf9d 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -43,7 +43,7 @@
 #define WITNESS_RANK_TCACHE_QL		13U
 #define WITNESS_RANK_EXTENT_GROW	14U
 #define WITNESS_RANK_EXTENTS		15U
-#define WITNESS_RANK_EXTENT_AVAIL	16U
+#define WITNESS_RANK_EDATA_AVAIL	16U
 
 #define WITNESS_RANK_EXTENT_POOL	17U
 #define WITNESS_RANK_RTREE		18U
diff --git a/src/arena.c b/src/arena.c
index 2d46b9e..f05a1d1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -60,9 +60,9 @@ static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
     size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
-static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin);
-static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin);
 
 /******************************************************************************/
@@ -102,8 +102,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->retained,
 	    eset_npages_get(&arena->eset_retained) << LG_PAGE);
 
-	atomic_store_zu(&astats->extent_avail,
-	    atomic_load_zu(&arena->extent_avail_cnt, ATOMIC_RELAXED),
+	atomic_store_zu(&astats->edata_avail,
+	    atomic_load_zu(&arena->edata_avail_cnt, ATOMIC_RELAXED),
 	    ATOMIC_RELAXED);
 
 	arena_stats_accum_u64(&astats->decay_dirty.npurge,
@@ -224,7 +224,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	/* Gather per arena mutex profiling data. */
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
-	READ_ARENA_MUTEX_PROF_DATA(extent_avail_mtx,
+	READ_ARENA_MUTEX_PROF_DATA(edata_avail_mtx,
 	    arena_prof_mutex_extent_avail)
 	READ_ARENA_MUTEX_PROF_DATA(eset_dirty.mtx,
 	    arena_prof_mutex_extents_dirty)
@@ -254,11 +254,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 void
 arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
+    edata_t *edata) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extents_dalloc(tsdn, arena, ehooks, &arena->eset_dirty, extent);
+	extents_dalloc(tsdn, arena, ehooks, &arena->eset_dirty, edata);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -267,34 +267,34 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static void *
-arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) {
+arena_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
 	void *ret;
-	slab_data_t *slab_data = extent_slab_data_get(slab);
+	slab_data_t *slab_data = edata_slab_data_get(slab);
 	size_t regind;
 
-	assert(extent_nfree_get(slab) > 0);
+	assert(edata_nfree_get(slab) > 0);
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
 	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
-	ret = (void *)((uintptr_t)extent_addr_get(slab) +
+	ret = (void *)((uintptr_t)edata_addr_get(slab) +
 	    (uintptr_t)(bin_info->reg_size * regind));
-	extent_nfree_dec(slab);
+	edata_nfree_dec(slab);
 	return ret;
 }
 
 static void
-arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info,
+arena_slab_reg_alloc_batch(edata_t *slab, const bin_info_t *bin_info,
 			   unsigned cnt, void** ptrs) {
-	slab_data_t *slab_data = extent_slab_data_get(slab);
+	slab_data_t *slab_data = edata_slab_data_get(slab);
 
-	assert(extent_nfree_get(slab) >= cnt);
+	assert(edata_nfree_get(slab) >= cnt);
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
 #if (! defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
 	for (unsigned i = 0; i < cnt; i++) {
 		size_t regind = bitmap_sfu(slab_data->bitmap,
 					   &bin_info->bitmap_info);
-		*(ptrs + i) = (void *)((uintptr_t)extent_addr_get(slab) +
+		*(ptrs + i) = (void *)((uintptr_t)edata_addr_get(slab) +
 		    (uintptr_t)(bin_info->reg_size * regind));
 	}
 #else
@@ -315,7 +315,7 @@ arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info,
 		 * Load from memory locations only once, outside the
 		 * hot loop below.
 		 */
-		uintptr_t base = (uintptr_t)extent_addr_get(slab);
+		uintptr_t base = (uintptr_t)edata_addr_get(slab);
 		uintptr_t regsize = (uintptr_t)bin_info->reg_size;
 		while (pop--) {
 			size_t bit = cfs_lu(&g);
@@ -327,24 +327,24 @@ arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info,
 		slab_data->bitmap[group] = g;
 	}
 #endif
-	extent_nfree_sub(slab, cnt);
+	edata_nfree_sub(slab, cnt);
 }
 
 #ifndef JEMALLOC_JET
 static
 #endif
 size_t
-arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
+arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr) {
 	size_t diff, regind;
 
 	/* Freeing a pointer outside the slab can cause assertion failure. */
-	assert((uintptr_t)ptr >= (uintptr_t)extent_addr_get(slab));
-	assert((uintptr_t)ptr < (uintptr_t)extent_past_get(slab));
+	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
+	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
 	/* Freeing an interior pointer can cause assertion failure. */
-	assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) %
+	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
 	    (uintptr_t)bin_infos[binind].reg_size == 0);
 
-	diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab));
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
 
 	/* Avoid doing division with a variable divisor. */
 	regind = div_compute(&arena_binind_div_info[binind], diff);
@@ -355,17 +355,17 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 }
 
 static void
-arena_slab_reg_dalloc(extent_t *slab, slab_data_t *slab_data, void *ptr) {
-	szind_t binind = extent_szind_get(slab);
+arena_slab_reg_dalloc(edata_t *slab, slab_data_t *slab_data, void *ptr) {
+	szind_t binind = edata_szind_get(slab);
 	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t regind = arena_slab_regind(slab, binind, ptr);
 
-	assert(extent_nfree_get(slab) < bin_info->nregs);
+	assert(edata_nfree_get(slab) < bin_info->nregs);
 	/* Freeing an unallocated pointer can cause assertion failure. */
 	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
 
 	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
-	extent_nfree_inc(slab);
+	edata_nfree_inc(slab);
 }
 
 static void
@@ -423,7 +423,7 @@ arena_may_have_muzzy(arena_t *arena) {
 	return arena_muzzy_decay_ms_get(arena) != 0;
 }
 
-extent_t *
+edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
 	ehooks_t *ehooks = arena_get_ehooks(arena);
@@ -434,23 +434,22 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	bool commit = true;
-	extent_t *extent = extents_alloc(tsdn, arena, ehooks,
-	    &arena->eset_dirty, NULL, usize, sz_large_pad, alignment, false,
-	    szind, zero, &commit);
-	if (extent == NULL && arena_may_have_muzzy(arena)) {
-		extent = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+	edata_t *edata = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
+	    NULL, usize, sz_large_pad, alignment, false, szind, zero, &commit);
+	if (edata == NULL && arena_may_have_muzzy(arena)) {
+		edata = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
 		    NULL, usize, sz_large_pad, alignment, false, szind, zero,
 		    &commit);
 	}
 	size_t size = usize + sz_large_pad;
-	if (extent == NULL) {
-		extent = extent_alloc_wrapper(tsdn, arena, ehooks, NULL, usize,
+	if (edata == NULL) {
+		edata = extent_alloc_wrapper(tsdn, arena, ehooks, NULL, usize,
 		    sz_large_pad, alignment, false, szind, zero, &commit);
 		if (config_stats) {
 			/*
-			 * extent may be NULL on OOM, but in that case
-			 * mapped_add isn't used below, so there's no need to
-			 * conditionlly set it to 0 here.
+			 * edata may be NULL on OOM, but in that case mapped_add
+			 * isn't used below, so there's no need to conditionlly
+			 * set it to 0 here.
 			 */
 			mapped_add = size;
 		}
@@ -458,7 +457,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 		mapped_add = 0;
 	}
 
-	if (extent != NULL) {
+	if (edata != NULL) {
 		if (config_stats) {
 			arena_stats_lock(tsdn, &arena->stats);
 			arena_large_malloc_stats_update(tsdn, arena, usize);
@@ -471,24 +470,24 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 		arena_nactive_add(arena, size >> LG_PAGE);
 	}
 
-	return extent;
+	return edata;
 }
 
 void
-arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	if (config_stats) {
 		arena_stats_lock(tsdn, &arena->stats);
 		arena_large_dalloc_stats_update(tsdn, arena,
-		    extent_usize_get(extent));
+		    edata_usize_get(edata));
 		arena_stats_unlock(tsdn, &arena->stats);
 	}
-	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
+	arena_nactive_sub(arena, edata_size_get(edata) >> LG_PAGE);
 }
 
 void
-arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     size_t oldusize) {
-	size_t usize = extent_usize_get(extent);
+	size_t usize = edata_usize_get(edata);
 	size_t udiff = oldusize - usize;
 
 	if (config_stats) {
@@ -500,9 +499,9 @@ arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 }
 
 void
-arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     size_t oldusize) {
-	size_t usize = extent_usize_get(extent);
+	size_t usize = edata_usize_get(edata);
 	size_t udiff = usize - oldusize;
 
 	if (config_stats) {
@@ -819,25 +818,25 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 static size_t
 arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, eset_t *eset, size_t npages_limit,
-    size_t npages_decay_max, extent_list_t *decay_extents) {
+    size_t npages_decay_max, edata_list_t *decay_extents) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	/* Stash extents according to npages_limit. */
 	size_t nstashed = 0;
-	extent_t *extent;
+	edata_t *edata;
 	while (nstashed < npages_decay_max &&
-	    (extent = extents_evict(tsdn, arena, ehooks, eset, npages_limit))
+	    (edata = extents_evict(tsdn, arena, ehooks, eset, npages_limit))
 	    != NULL) {
-		extent_list_append(decay_extents, extent);
-		nstashed += extent_size_get(extent) >> LG_PAGE;
+		edata_list_append(decay_extents, edata);
+		nstashed += edata_size_get(edata) >> LG_PAGE;
 	}
 	return nstashed;
 }
 
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    arena_decay_t *decay, eset_t *eset, bool all, extent_list_t *decay_extents,
+    arena_decay_t *decay, eset_t *eset, bool all, edata_list_t *decay_extents,
     bool is_background_thread) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
@@ -849,31 +848,30 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	npurged = 0;
 
 	ssize_t muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
-	for (extent_t *extent = extent_list_first(decay_extents); extent !=
-	    NULL; extent = extent_list_first(decay_extents)) {
+	for (edata_t *edata = edata_list_first(decay_extents); edata !=
+	    NULL; edata = edata_list_first(decay_extents)) {
 		if (config_stats) {
 			nmadvise++;
 		}
-		size_t npages = extent_size_get(extent) >> LG_PAGE;
+		size_t npages = edata_size_get(edata) >> LG_PAGE;
 		npurged += npages;
-		extent_list_remove(decay_extents, extent);
+		edata_list_remove(decay_extents, edata);
 		switch (eset_state_get(eset)) {
 		case extent_state_active:
 			not_reached();
 		case extent_state_dirty:
 			if (!all && muzzy_decay_ms != 0 &&
 			    !extent_purge_lazy_wrapper(tsdn, arena,
-			    ehooks, extent, 0,
-			    extent_size_get(extent))) {
+			    ehooks, edata, 0, edata_size_get(edata))) {
 				extents_dalloc(tsdn, arena, ehooks,
-				    &arena->eset_muzzy, extent);
+				    &arena->eset_muzzy, edata);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
 				break;
 			}
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
+			extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
 			if (config_stats) {
 				nunmapped += npages;
 			}
@@ -923,8 +921,8 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 
-	extent_list_t decay_extents;
-	extent_list_init(&decay_extents);
+	edata_list_t decay_extents;
+	edata_list_init(&decay_extents);
 
 	size_t npurge = arena_stash_decayed(tsdn, arena, ehooks, eset,
 	    npages_limit, npages_decay_max, &decay_extents);
@@ -1000,33 +998,33 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 }
 
 static void
-arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
-	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
+arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
+	arena_nactive_sub(arena, edata_size_get(slab) >> LG_PAGE);
 
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	arena_extents_dirty_dalloc(tsdn, arena, ehooks, slab);
 }
 
 static void
-arena_bin_slabs_nonfull_insert(bin_t *bin, extent_t *slab) {
-	assert(extent_nfree_get(slab) > 0);
-	extent_heap_insert(&bin->slabs_nonfull, slab);
+arena_bin_slabs_nonfull_insert(bin_t *bin, edata_t *slab) {
+	assert(edata_nfree_get(slab) > 0);
+	edata_heap_insert(&bin->slabs_nonfull, slab);
 	if (config_stats) {
 		bin->stats.nonfull_slabs++;
 	}
 }
 
 static void
-arena_bin_slabs_nonfull_remove(bin_t *bin, extent_t *slab) {
-	extent_heap_remove(&bin->slabs_nonfull, slab);
+arena_bin_slabs_nonfull_remove(bin_t *bin, edata_t *slab) {
+	edata_heap_remove(&bin->slabs_nonfull, slab);
 	if (config_stats) {
 		bin->stats.nonfull_slabs--;
 	}
 }
 
-static extent_t *
+static edata_t *
 arena_bin_slabs_nonfull_tryget(bin_t *bin) {
-	extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull);
+	edata_t *slab = edata_heap_remove_first(&bin->slabs_nonfull);
 	if (slab == NULL) {
 		return NULL;
 	}
@@ -1038,30 +1036,30 @@ arena_bin_slabs_nonfull_tryget(bin_t *bin) {
 }
 
 static void
-arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, extent_t *slab) {
-	assert(extent_nfree_get(slab) == 0);
+arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, edata_t *slab) {
+	assert(edata_nfree_get(slab) == 0);
 	/*
 	 *  Tracking extents is required by arena_reset, which is not allowed
-	 *  for auto arenas.  Bypass this step to avoid touching the extent
+	 *  for auto arenas.  Bypass this step to avoid touching the edata
 	 *  linkage (often results in cache misses) for auto arenas.
 	 */
 	if (arena_is_auto(arena)) {
 		return;
 	}
-	extent_list_append(&bin->slabs_full, slab);
+	edata_list_append(&bin->slabs_full, slab);
 }
 
 static void
-arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, extent_t *slab) {
+arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) {
 	if (arena_is_auto(arena)) {
 		return;
 	}
-	extent_list_remove(&bin->slabs_full, slab);
+	edata_list_remove(&bin->slabs_full, slab);
 }
 
 static void
 arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
-	extent_t *slab;
+	edata_t *slab;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 	if (bin->slabcur != NULL) {
@@ -1071,13 +1069,13 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 	}
-	while ((slab = extent_heap_remove_first(&bin->slabs_nonfull)) != NULL) {
+	while ((slab = edata_heap_remove_first(&bin->slabs_nonfull)) != NULL) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 	}
-	for (slab = extent_list_first(&bin->slabs_full); slab != NULL;
-	     slab = extent_list_first(&bin->slabs_full)) {
+	for (slab = edata_list_first(&bin->slabs_full); slab != NULL;
+	     slab = edata_list_first(&bin->slabs_full)) {
 		arena_bin_slabs_full_remove(arena, bin, slab);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
@@ -1109,9 +1107,9 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Large allocations. */
 	malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 
-	for (extent_t *extent = extent_list_first(&arena->large); extent !=
-	    NULL; extent = extent_list_first(&arena->large)) {
-		void *ptr = extent_base_get(extent);
+	for (edata_t *edata = edata_list_first(&arena->large); edata !=
+	    NULL; edata = edata_list_first(&arena->large)) {
+		void *ptr = edata_base_get(edata);
 		size_t usize;
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
@@ -1129,7 +1127,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		if (config_prof && opt_prof) {
 			prof_free(tsd, ptr, usize, &alloc_ctx);
 		}
-		large_dalloc(tsd_tsdn(tsd), extent);
+		large_dalloc(tsd_tsdn(tsd), edata);
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
@@ -1157,10 +1155,10 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 * dss-based extents for later reuse.
 	 */
 	ehooks_t *ehooks = arena_get_ehooks(arena);
-	extent_t *extent;
-	while ((extent = extents_evict(tsdn, arena, ehooks,
+	edata_t *edata;
+	while ((edata = extents_evict(tsdn, arena, ehooks,
 	    &arena->eset_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, arena, ehooks, extent);
+		extent_destroy_wrapper(tsdn, arena, ehooks, edata);
 	}
 }
 
@@ -1200,10 +1198,10 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	base_delete(tsd_tsdn(tsd), arena->base);
 }
 
-static extent_t *
+static edata_t *
 arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     const bin_info_t *bin_info, szind_t szind) {
-	extent_t *slab;
+	edata_t *slab;
 	bool zero, commit;
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1222,7 +1220,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	return slab;
 }
 
-static extent_t *
+static edata_t *
 arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
     const bin_info_t *bin_info) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1232,7 +1230,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
 	bool commit = true;
-	extent_t *slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
+	edata_t *slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
 	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
 		slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
@@ -1246,22 +1244,22 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 			return NULL;
 		}
 	}
-	assert(extent_slab_get(slab));
+	assert(edata_slab_get(slab));
 
 	/* Initialize slab internals. */
-	slab_data_t *slab_data = extent_slab_data_get(slab);
-	extent_nfree_binshard_set(slab, bin_info->nregs, binshard);
+	slab_data_t *slab_data = edata_slab_data_get(slab);
+	edata_nfree_binshard_set(slab, bin_info->nregs, binshard);
 	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false);
 
-	arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE);
+	arena_nactive_add(arena, edata_size_get(slab) >> LG_PAGE);
 
 	return slab;
 }
 
-static extent_t *
+static edata_t *
 arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, unsigned binshard) {
-	extent_t *slab;
+	edata_t *slab;
 	const bin_info_t *bin_info;
 
 	/* Look for a usable slab. */
@@ -1307,14 +1305,14 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 
 	if (bin->slabcur != NULL) {
 		/* Only attempted when current slab is full. */
-		assert(extent_nfree_get(bin->slabcur) == 0);
+		assert(edata_nfree_get(bin->slabcur) == 0);
 	}
 
 	const bin_info_t *bin_info = &bin_infos[binind];
-	extent_t *slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind,
+	edata_t *slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind,
 	    binshard);
 	if (bin->slabcur != NULL) {
-		if (extent_nfree_get(bin->slabcur) > 0) {
+		if (edata_nfree_get(bin->slabcur) > 0) {
 			/*
 			 * Another thread updated slabcur while this one ran
 			 * without the bin lock in arena_bin_nonfull_slab_get().
@@ -1331,7 +1329,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 				 * arena_bin_lower_slab() must be called, as if
 				 * a region were just deallocated from the slab.
 				 */
-				if (extent_nfree_get(slab) == bin_info->nregs) {
+				if (edata_nfree_get(slab) == bin_info->nregs) {
 					arena_dalloc_bin_slab(tsdn, arena, slab,
 					    bin);
 				} else {
@@ -1350,7 +1348,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 		return NULL;
 	}
 	bin->slabcur = slab;
-	assert(extent_nfree_get(bin->slabcur) > 0);
+	assert(edata_nfree_get(bin->slabcur) > 0);
 
 	return arena_slab_reg_alloc(slab, bin_info);
 }
@@ -1386,12 +1384,12 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	void **empty_position = cache_bin_empty_position_get(tbin, binind);
 	for (i = 0, nfill = (cache_bin_ncached_max_get(binind) >>
 	    tcache->lg_fill_div[binind]); i < nfill; i += cnt) {
-		extent_t *slab;
-		if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) >
+		edata_t *slab;
+		if ((slab = bin->slabcur) != NULL && edata_nfree_get(slab) >
 		    0) {
 			unsigned tofill = nfill - i;
-			cnt = tofill < extent_nfree_get(slab) ?
-				tofill : extent_nfree_get(slab);
+			cnt = tofill < edata_nfree_get(slab) ?
+				tofill : edata_nfree_get(slab);
 			arena_slab_reg_alloc_batch(
 			   slab, &bin_infos[binind], cnt,
 			   empty_position - nfill + i);
@@ -1454,14 +1452,14 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	void *ret;
 	bin_t *bin;
 	size_t usize;
-	extent_t *slab;
+	edata_t *slab;
 
 	assert(binind < SC_NBINS);
 	usize = sz_index2size(binind);
 	unsigned binshard;
 	bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
 
-	if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) {
+	if ((slab = bin->slabcur) != NULL && edata_nfree_get(slab) > 0) {
 		ret = arena_slab_reg_alloc(slab, &bin_infos[binind]);
 	} else {
 		ret = arena_bin_malloc_hard(tsdn, arena, bin, binind, binshard);
@@ -1554,11 +1552,11 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+	edata_t *edata = rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
 
 	szind_t szind = sz_size2index(usize);
-	extent_szind_set(extent, szind);
+	edata_szind_set(edata, szind);
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
 	    szind, false);
 
@@ -1568,11 +1566,11 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 }
 
 static size_t
-arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) {
+arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	extent_szind_set(extent, SC_NBINS);
+	edata_szind_set(edata, SC_NBINS);
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
@@ -1589,9 +1587,9 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	cassert(config_prof);
 	assert(opt_prof);
 
-	extent_t *extent = iealloc(tsdn, ptr);
-	size_t usize = extent_usize_get(extent);
-	size_t bumped_usize = arena_prof_demote(tsdn, extent, ptr);
+	edata_t *edata = iealloc(tsdn, ptr);
+	size_t usize = edata_usize_get(edata);
+	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
 	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
 		/*
 		 * Currently, we only do redzoning for small sampled
@@ -1604,17 +1602,17 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
 		    sz_size2index(bumped_usize), slow_path);
 	} else {
-		large_dalloc(tsdn, extent);
+		large_dalloc(tsdn, edata);
 	}
 }
 
 static void
-arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, bin_t *bin) {
+arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) {
 	/* Dissociate slab from bin. */
 	if (slab == bin->slabcur) {
 		bin->slabcur = NULL;
 	} else {
-		szind_t binind = extent_szind_get(slab);
+		szind_t binind = edata_szind_get(slab);
 		const bin_info_t *bin_info = &bin_infos[binind];
 
 		/*
@@ -1631,7 +1629,7 @@ arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, bin_t *bin) {
 }
 
 static void
-arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin) {
 	assert(slab != bin->slabcur);
 
@@ -1646,9 +1644,9 @@ arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 }
 
 static void
-arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin) {
-	assert(extent_nfree_get(slab) > 0);
+	assert(edata_nfree_get(slab) > 0);
 
 	/*
 	 * Make sure that if bin->slabcur is non-NULL, it refers to the
@@ -1656,9 +1654,9 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 	 * than proactively keeping it pointing at the oldest/lowest non-full
 	 * slab.
 	 */
-	if (bin->slabcur != NULL && extent_snad_comp(bin->slabcur, slab) > 0) {
+	if (bin->slabcur != NULL && edata_snad_comp(bin->slabcur, slab) > 0) {
 		/* Switch slabcur. */
-		if (extent_nfree_get(bin->slabcur) > 0) {
+		if (edata_nfree_get(bin->slabcur) > 0) {
 			arena_bin_slabs_nonfull_insert(bin, bin->slabcur);
 		} else {
 			arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
@@ -1674,8 +1672,8 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 
 static void
 arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, extent_t *slab, void *ptr, bool junked) {
-	slab_data_t *slab_data = extent_slab_data_get(slab);
+    szind_t binind, edata_t *slab, void *ptr, bool junked) {
+	slab_data_t *slab_data = edata_slab_data_get(slab);
 	const bin_info_t *bin_info = &bin_infos[binind];
 
 	if (!junked && config_fill && unlikely(opt_junk_free)) {
@@ -1683,7 +1681,7 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	}
 
 	arena_slab_reg_dalloc(slab, slab_data, ptr);
-	unsigned nfree = extent_nfree_get(slab);
+	unsigned nfree = edata_nfree_get(slab);
 	if (nfree == bin_info->nregs) {
 		arena_dissociate_bin_slab(arena, slab, bin);
 		arena_dalloc_bin_slab(tsdn, arena, slab, bin);
@@ -1700,29 +1698,29 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 
 void
 arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, extent_t *extent, void *ptr) {
-	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, extent, ptr,
+    szind_t binind, edata_t *edata, void *ptr) {
+	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata, ptr,
 	    true);
 }
 
 static void
-arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
-	szind_t binind = extent_szind_get(extent);
-	unsigned binshard = extent_binshard_get(extent);
+arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
+	szind_t binind = edata_szind_get(edata);
+	unsigned binshard = edata_binshard_get(edata);
 	bin_t *bin = &arena->bins[binind].bin_shards[binshard];
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, extent, ptr,
+	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata, ptr,
 	    false);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
 void
 arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
-	extent_t *extent = iealloc(tsdn, ptr);
-	arena_t *arena = arena_get_from_extent(extent);
+	edata_t *edata = iealloc(tsdn, ptr);
+	arena_t *arena = arena_get_from_edata(edata);
 
-	arena_dalloc_bin(tsdn, arena, extent, ptr);
+	arena_dalloc_bin(tsdn, arena, edata, ptr);
 	arena_decay_tick(tsdn, arena);
 }
 
@@ -1733,7 +1731,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	/* Calls with non-zero extra had to clamp extra. */
 	assert(extra == 0 || size + extra <= SC_LARGE_MAXCLASS);
 
-	extent_t *extent = iealloc(tsdn, ptr);
+	edata_t *edata = iealloc(tsdn, ptr);
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		ret = true;
 		goto done;
@@ -1756,19 +1754,19 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 			goto done;
 		}
 
-		arena_t *arena = arena_get_from_extent(extent);
+		arena_t *arena = arena_get_from_edata(edata);
 		arena_decay_tick(tsdn, arena);
 		ret = false;
 	} else if (oldsize >= SC_LARGE_MINCLASS
 	    && usize_max >= SC_LARGE_MINCLASS) {
-		ret = large_ralloc_no_move(tsdn, extent, usize_min, usize_max,
+		ret = large_ralloc_no_move(tsdn, edata, usize_min, usize_max,
 		    zero);
 	} else {
 		ret = true;
 	}
 done:
-	assert(extent == iealloc(tsdn, ptr));
-	*newsize = extent_usize_get(extent);
+	assert(edata == iealloc(tsdn, ptr));
+	*newsize = edata_usize_get(edata);
 
 	return ret;
 }
@@ -2006,7 +2004,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED);
 
-	extent_list_init(&arena->large);
+	edata_list_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
 	    WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
 		goto label_error;
@@ -2055,9 +2053,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	extent_avail_new(&arena->extent_avail);
-	if (malloc_mutex_init(&arena->extent_avail_mtx, "extent_avail",
-	    WITNESS_RANK_EXTENT_AVAIL, malloc_mutex_rank_exclusive)) {
+	edata_avail_new(&arena->edata_avail);
+	if (malloc_mutex_init(&arena->edata_avail_mtx, "edata_avail",
+	    WITNESS_RANK_EDATA_AVAIL, malloc_mutex_rank_exclusive)) {
 		goto label_error;
 	}
 
@@ -2203,7 +2201,7 @@ arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->extent_avail_mtx);
+	malloc_mutex_prefork(tsdn, &arena->edata_avail_mtx);
 }
 
 void
@@ -2237,7 +2235,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
-	malloc_mutex_postfork_parent(tsdn, &arena->extent_avail_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->edata_avail_mtx);
 	eset_postfork_parent(tsdn, &arena->eset_dirty);
 	eset_postfork_parent(tsdn, &arena->eset_muzzy);
 	eset_postfork_parent(tsdn, &arena->eset_retained);
@@ -2283,7 +2281,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
-	malloc_mutex_postfork_child(tsdn, &arena->extent_avail_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->edata_avail_mtx);
 	eset_postfork_child(tsdn, &arena->eset_dirty);
 	eset_postfork_child(tsdn, &arena->eset_muzzy);
 	eset_postfork_child(tsdn, &arena->eset_retained);
diff --git a/src/base.c b/src/base.c
index 79736cd..76d7655 100644
--- a/src/base.c
+++ b/src/base.c
@@ -105,14 +105,14 @@ label_done:
 }
 
 static void
-base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
+base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr,
     size_t size) {
 	size_t sn;
 
 	sn = *extent_sn_next;
 	(*extent_sn_next)++;
 
-	extent_binit(extent, addr, size, sn);
+	edata_binit(edata, addr, size, sn);
 }
 
 static size_t
@@ -158,7 +158,7 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 		pages_huge(block, block->size);
 		if (config_stats) {
 			base->n_thp += HUGEPAGE_CEILING(block->size -
-			    extent_bsize_get(&block->extent)) >> LG_HUGEPAGE;
+			    edata_bsize_get(&block->edata)) >> LG_HUGEPAGE;
 		}
 		block = block->next;
 		assert(block == NULL || (base_ind_get(base) == 0));
@@ -166,34 +166,34 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 }
 
 static void *
-base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
+base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size,
     size_t alignment) {
 	void *ret;
 
 	assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
 	assert(size == ALIGNMENT_CEILING(size, alignment));
 
-	*gap_size = ALIGNMENT_CEILING((uintptr_t)extent_addr_get(extent),
-	    alignment) - (uintptr_t)extent_addr_get(extent);
-	ret = (void *)((uintptr_t)extent_addr_get(extent) + *gap_size);
-	assert(extent_bsize_get(extent) >= *gap_size + size);
-	extent_binit(extent, (void *)((uintptr_t)extent_addr_get(extent) +
-	    *gap_size + size), extent_bsize_get(extent) - *gap_size - size,
-	    extent_sn_get(extent));
+	*gap_size = ALIGNMENT_CEILING((uintptr_t)edata_addr_get(edata),
+	    alignment) - (uintptr_t)edata_addr_get(edata);
+	ret = (void *)((uintptr_t)edata_addr_get(edata) + *gap_size);
+	assert(edata_bsize_get(edata) >= *gap_size + size);
+	edata_binit(edata, (void *)((uintptr_t)edata_addr_get(edata) +
+	    *gap_size + size), edata_bsize_get(edata) - *gap_size - size,
+	    edata_sn_get(edata));
 	return ret;
 }
 
 static void
-base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size,
+base_extent_bump_alloc_post(base_t *base, edata_t *edata, size_t gap_size,
     void *addr, size_t size) {
-	if (extent_bsize_get(extent) > 0) {
+	if (edata_bsize_get(edata) > 0) {
 		/*
 		 * Compute the index for the largest size class that does not
 		 * exceed extent's size.
 		 */
 		szind_t index_floor =
-		    sz_size2index(extent_bsize_get(extent) + 1) - 1;
-		extent_heap_insert(&base->avail[index_floor], extent);
+		    sz_size2index(edata_bsize_get(edata) + 1) - 1;
+		edata_heap_insert(&base->avail[index_floor], edata);
 	}
 
 	if (config_stats) {
@@ -218,13 +218,13 @@ base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size,
 }
 
 static void *
-base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size,
+base_extent_bump_alloc(base_t *base, edata_t *edata, size_t size,
     size_t alignment) {
 	void *ret;
 	size_t gap_size;
 
-	ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment);
-	base_extent_bump_alloc_post(base, extent, gap_size, ret, size);
+	ret = base_extent_bump_alloc_helper(edata, &gap_size, size, alignment);
+	base_extent_bump_alloc_post(base, edata, gap_size, ret, size);
 	return ret;
 }
 
@@ -284,7 +284,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	block->size = block_size;
 	block->next = NULL;
 	assert(block_size >= header_size);
-	base_extent_init(extent_sn_next, &block->extent,
+	base_edata_init(extent_sn_next, &block->edata,
 	    (void *)((uintptr_t)block + header_size), block_size - header_size);
 	return block;
 }
@@ -293,7 +293,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
  * Allocate an extent that is at least as large as specified size, with
  * specified alignment.
  */
-static extent_t *
+static edata_t *
 base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	malloc_mutex_assert_owner(tsdn, &base->mtx);
 
@@ -327,7 +327,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 		assert(base->resident <= base->mapped);
 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
 	}
-	return &block->extent;
+	return &block->edata;
 }
 
 base_t *
@@ -357,7 +357,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	size_t gap_size;
 	size_t base_alignment = CACHELINE;
 	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
-	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->extent,
+	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
 	    &gap_size, base_size, base_alignment);
 	base->ind = ind;
 	ehooks_init(&base->ehooks, extent_hooks);
@@ -371,7 +371,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	base->blocks = block;
 	base->auto_thp_switched = false;
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
-		extent_heap_new(&base->avail[i]);
+		edata_heap_new(&base->avail[i]);
 	}
 	if (config_stats) {
 		base->allocated = sizeof(base_block_t);
@@ -384,7 +384,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		assert(base->resident <= base->mapped);
 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
 	}
-	base_extent_bump_alloc_post(base, &block->extent, gap_size, base,
+	base_extent_bump_alloc_post(base, &block->edata, gap_size, base,
 	    base_size);
 
 	return base;
@@ -422,28 +422,28 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
 	size_t usize = ALIGNMENT_CEILING(size, alignment);
 	size_t asize = usize + alignment - QUANTUM;
 
-	extent_t *extent = NULL;
+	edata_t *edata = NULL;
 	malloc_mutex_lock(tsdn, &base->mtx);
 	for (szind_t i = sz_size2index(asize); i < SC_NSIZES; i++) {
-		extent = extent_heap_remove_first(&base->avail[i]);
-		if (extent != NULL) {
+		edata = edata_heap_remove_first(&base->avail[i]);
+		if (edata != NULL) {
 			/* Use existing space. */
 			break;
 		}
 	}
-	if (extent == NULL) {
+	if (edata == NULL) {
 		/* Try to allocate more space. */
-		extent = base_extent_alloc(tsdn, base, usize, alignment);
+		edata = base_extent_alloc(tsdn, base, usize, alignment);
 	}
 	void *ret;
-	if (extent == NULL) {
+	if (edata == NULL) {
 		ret = NULL;
 		goto label_return;
 	}
 
-	ret = base_extent_bump_alloc(base, extent, usize, alignment);
+	ret = base_extent_bump_alloc(base, edata, usize, alignment);
 	if (esn != NULL) {
-		*esn = extent_sn_get(extent);
+		*esn = edata_sn_get(edata);
 	}
 label_return:
 	malloc_mutex_unlock(tsdn, &base->mtx);
@@ -463,16 +463,16 @@ base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	return base_alloc_impl(tsdn, base, size, alignment, NULL);
 }
 
-extent_t *
-base_alloc_extent(tsdn_t *tsdn, base_t *base) {
+edata_t *
+base_alloc_edata(tsdn_t *tsdn, base_t *base) {
 	size_t esn;
-	extent_t *extent = base_alloc_impl(tsdn, base, sizeof(extent_t),
+	edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t),
 	    CACHELINE, &esn);
-	if (extent == NULL) {
+	if (edata == NULL) {
 		return NULL;
 	}
-	extent_esn_set(extent, esn);
-	return extent;
+	edata_esn_set(edata, esn);
+	return edata;
 }
 
 void
diff --git a/src/bin.c b/src/bin.c
index d7cbfb5..52de9ff 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -45,8 +45,8 @@ bin_init(bin_t *bin) {
 		return true;
 	}
 	bin->slabcur = NULL;
-	extent_heap_new(&bin->slabs_nonfull);
-	extent_list_init(&bin->slabs_full);
+	edata_heap_new(&bin->slabs_nonfull);
+	edata_list_init(&bin->slabs_full);
 	if (config_stats) {
 		memset(&bin->stats, 0, sizeof(bin_stats_t));
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 4aa4af8..1e72bf4 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -855,8 +855,8 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			    &astats->astats.mapped);
 			accum_atomic_zu(&sdstats->astats.retained,
 			    &astats->astats.retained);
-			accum_atomic_zu(&sdstats->astats.extent_avail,
-			    &astats->astats.extent_avail);
+			accum_atomic_zu(&sdstats->astats.edata_avail,
+			    &astats->astats.edata_avail);
 		}
 
 		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge,
@@ -2603,18 +2603,18 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	int ret;
 	unsigned arena_ind;
 	void *ptr;
-	extent_t *extent;
+	edata_t *edata;
 	arena_t *arena;
 
 	ptr = NULL;
 	ret = EINVAL;
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
-	extent = iealloc(tsd_tsdn(tsd), ptr);
-	if (extent == NULL)
+	edata = iealloc(tsd_tsdn(tsd), ptr);
+	if (edata == NULL)
 		goto label_return;
 
-	arena = arena_get_from_extent(extent);
+	arena = arena_get_from_edata(edata);
 	if (arena == NULL)
 		goto label_return;
 
@@ -2860,7 +2860,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.retained, ATOMIC_RELAXED),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.extent_avail,
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.edata_avail,
         ATOMIC_RELAXED),
     size_t)
 
@@ -3010,7 +3010,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 			continue;
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
-		MUTEX_PROF_RESET(arena->extent_avail_mtx);
+		MUTEX_PROF_RESET(arena->edata_avail_mtx);
 		MUTEX_PROF_RESET(arena->eset_dirty.mtx);
 		MUTEX_PROF_RESET(arena->eset_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->eset_retained.mtx);
diff --git a/src/edata.c b/src/edata.c
index 1a5a1fa..5e53e99 100644
--- a/src/edata.c
+++ b/src/edata.c
@@ -1,6 +1,6 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-ph_gen(, extent_avail_, extent_tree_t, extent_t, ph_link,
-    extent_esnead_comp)
-ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
+ph_gen(, edata_avail_, edata_tree_t, edata_t, ph_link,
+    edata_esnead_comp)
+ph_gen(, edata_heap_, edata_heap_t, edata_t, ph_link, edata_snad_comp)
diff --git a/src/ehooks.c b/src/ehooks.c
index 25aef1c..a62586b 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -200,8 +200,8 @@ ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	if (!maps_coalesce) {
 		tsdn_t *tsdn = tsdn_fetch();
-		extent_t *a = iealloc(tsdn, addr_a);
-		extent_t *b = iealloc(tsdn, addr_b);
+		edata_t *a = iealloc(tsdn, addr_a);
+		edata_t *b = iealloc(tsdn, addr_b);
 		if (extent_head_no_merge(a, b)) {
 			return true;
 		}
diff --git a/src/eset.c b/src/eset.c
index 9cc8cee..88b9c8c 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -16,10 +16,10 @@ eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
 		return true;
 	}
 	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
-		extent_heap_new(&eset->heaps[i]);
+		edata_heap_new(&eset->heaps[i]);
 	}
 	bitmap_init(eset->bitmap, &eset_bitmap_info, true);
-	extent_list_init(&eset->lru);
+	edata_list_init(&eset->lru);
 	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
 	eset->state = state;
 	eset->delay_coalesce = delay_coalesce;
@@ -63,24 +63,24 @@ eset_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
 }
 
 void
-eset_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
+eset_insert_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &eset->mtx);
-	assert(extent_state_get(extent) == eset->state);
+	assert(edata_state_get(edata) == eset->state);
 
-	size_t size = extent_size_get(extent);
+	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	if (extent_heap_empty(&eset->heaps[pind])) {
+	if (edata_heap_empty(&eset->heaps[pind])) {
 		bitmap_unset(eset->bitmap, &eset_bitmap_info,
 		    (size_t)pind);
 	}
-	extent_heap_insert(&eset->heaps[pind], extent);
+	edata_heap_insert(&eset->heaps[pind], edata);
 
 	if (config_stats) {
 		eset_stats_add(eset, pind, size);
 	}
 
-	extent_list_append(&eset->lru, extent);
+	edata_list_append(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
 	/*
 	 * All modifications to npages hold the mutex (as asserted above), so we
@@ -94,24 +94,24 @@ eset_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
 }
 
 void
-eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
+eset_remove_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &eset->mtx);
-	assert(extent_state_get(extent) == eset->state);
+	assert(edata_state_get(edata) == eset->state);
 
-	size_t size = extent_size_get(extent);
+	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	extent_heap_remove(&eset->heaps[pind], extent);
+	edata_heap_remove(&eset->heaps[pind], edata);
 
 	if (config_stats) {
 		eset_stats_sub(eset, pind, size);
 	}
 
-	if (extent_heap_empty(&eset->heaps[pind])) {
+	if (edata_heap_empty(&eset->heaps[pind])) {
 		bitmap_set(eset->bitmap, &eset_bitmap_info,
 		    (size_t)pind);
 	}
-	extent_list_remove(&eset->lru, extent);
+	edata_list_remove(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
 	/*
 	 * As in eset_insert_locked, we hold eset->mtx and so don't need atomic
@@ -128,7 +128,7 @@ eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
  * Find an extent with size [min_size, max_size) to satisfy the alignment
  * requirement.  For each size, try only the first extent in the heap.
  */
-static extent_t *
+static edata_t *
 eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
     size_t alignment) {
         pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
@@ -139,10 +139,10 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
 	    (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
 	    (size_t)i+1)) {
 		assert(i < SC_NPSIZES);
-		assert(!extent_heap_empty(&eset->heaps[i]));
-		extent_t *extent = extent_heap_first(&eset->heaps[i]);
-		uintptr_t base = (uintptr_t)extent_base_get(extent);
-		size_t candidate_size = extent_size_get(extent);
+		assert(!edata_heap_empty(&eset->heaps[i]));
+		edata_t *edata = edata_heap_first(&eset->heaps[i]);
+		uintptr_t base = (uintptr_t)edata_base_get(edata);
+		size_t candidate_size = edata_size_get(edata);
 		assert(candidate_size >= min_size);
 
 		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
@@ -154,7 +154,7 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
 
 		size_t leadsize = next_align - base;
 		if (candidate_size - leadsize >= min_size) {
-			return extent;
+			return edata;
 		}
 	}
 
@@ -165,9 +165,9 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
  * Do first-fit extent selection, i.e. select the oldest/lowest extent that is
  * large enough.
  */
-static extent_t *
+static edata_t *
 eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
-	extent_t *ret = NULL;
+	edata_t *ret = NULL;
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
@@ -176,8 +176,8 @@ eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
 		 * No split / merge allowed (Windows w/o retain). Try exact fit
 		 * only.
 		 */
-		return extent_heap_empty(&eset->heaps[pind]) ? NULL :
-		    extent_heap_first(&eset->heaps[pind]);
+		return edata_heap_empty(&eset->heaps[pind]) ? NULL :
+		    edata_heap_first(&eset->heaps[pind]);
 	}
 
 	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
@@ -185,9 +185,9 @@ eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
 	    i < SC_NPSIZES + 1;
 	    i = (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
 	    (size_t)i+1)) {
-		assert(!extent_heap_empty(&eset->heaps[i]));
-		extent_t *extent = extent_heap_first(&eset->heaps[i]);
-		assert(extent_size_get(extent) >= size);
+		assert(!edata_heap_empty(&eset->heaps[i]));
+		edata_t *edata = edata_heap_first(&eset->heaps[i]);
+		assert(edata_size_get(edata) >= size);
 		/*
 		 * In order to reduce fragmentation, avoid reusing and splitting
 		 * large eset for much smaller sizes.
@@ -198,8 +198,8 @@ eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
 		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
 			break;
 		}
-		if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
-			ret = extent;
+		if (ret == NULL || edata_snad_comp(edata, ret) < 0) {
+			ret = edata;
 		}
 		if (i == SC_NPSIZES) {
 			break;
@@ -210,7 +210,7 @@ eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
 	return ret;
 }
 
-extent_t *
+edata_t *
 eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize, size_t alignment) {
 	malloc_mutex_assert_owner(tsdn, &eset->mtx);
 
@@ -220,18 +220,18 @@ eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize, size_t alignment) {
 		return NULL;
 	}
 
-	extent_t *extent = eset_first_fit_locked(tsdn, eset, max_size);
+	edata_t *edata = eset_first_fit_locked(tsdn, eset, max_size);
 
-	if (alignment > PAGE && extent == NULL) {
+	if (alignment > PAGE && edata == NULL) {
 		/*
 		 * max_size guarantees the alignment requirement but is rather
 		 * pessimistic.  Next we try to satisfy the aligned allocation
 		 * with sizes in [esize, max_size).
 		 */
-		extent = eset_fit_alignment(eset, esize, max_size, alignment);
+		edata = eset_fit_alignment(eset, esize, max_size, alignment);
 	}
 
-	return extent;
+	return edata;
 }
 
 void
diff --git a/src/extent2.c b/src/extent2.c
index 4001d17..5bacb8f 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -13,25 +13,25 @@
 /* Data. */
 
 rtree_t		extents_rtree;
-/* Keyed by the address of the extent_t being protected. */
+/* Keyed by the address of the edata_t being protected. */
 mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
 static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained);
+    edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
+    ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
     bool growing_retained);
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
+    ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
     bool growing_retained);
-static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent, size_t size_a, szind_t szind_a,
+static edata_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
 static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *a, extent_t *b, bool growing_retained);
+    edata_t *a, edata_t *b, bool growing_retained);
 
 /* Used exclusively for gdump triggering. */
 static atomic_zu_t curpages;
@@ -43,15 +43,15 @@ static atomic_zu_t highpages;
  * definition.
  */
 
-static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
-static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
+static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     eset_t *eset, void *new_addr, size_t usize, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
-static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent,
+static edata_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata,
     bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, extent_t *extent, bool growing_retained);
+    eset_t *eset, edata_t *edata, bool growing_retained);
 
 /******************************************************************************/
 
@@ -62,68 +62,68 @@ typedef enum {
 } lock_result_t;
 
 static inline void
-extent_lock(tsdn_t *tsdn, extent_t *extent) {
-	assert(extent != NULL);
-	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+extent_lock_edata(tsdn_t *tsdn, edata_t *edata) {
+	assert(edata != NULL);
+	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)edata);
 }
 
 static inline void
-extent_unlock(tsdn_t *tsdn, extent_t *extent) {
-	assert(extent != NULL);
-	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+extent_unlock_edata(tsdn_t *tsdn, edata_t *edata) {
+	assert(edata != NULL);
+	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)edata);
 }
 
 static inline void
-extent_lock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
-	assert(extent1 != NULL && extent2 != NULL);
-	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
-	    (uintptr_t)extent2);
+extent_lock_edata2(tsdn_t *tsdn, edata_t *edata1, edata_t *edata2) {
+	assert(edata1 != NULL && edata2 != NULL);
+	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)edata1,
+	    (uintptr_t)edata2);
 }
 
 static inline void
-extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
-	assert(extent1 != NULL && extent2 != NULL);
-	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
-	    (uintptr_t)extent2);
+extent_unlock_edata2(tsdn_t *tsdn, edata_t *edata1, edata_t *edata2) {
+	assert(edata1 != NULL && edata2 != NULL);
+	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)edata1,
+	    (uintptr_t)edata2);
 }
 
 static lock_result_t
 extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
-    extent_t **result, bool inactive_only) {
-	extent_t *extent1 = rtree_leaf_elm_extent_read(tsdn, &extents_rtree,
+    edata_t **result, bool inactive_only) {
+	edata_t *edata1 = rtree_leaf_elm_edata_read(tsdn, &extents_rtree,
 	    elm, true);
 
 	/* Slab implies active extents and should be skipped. */
-	if (extent1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
+	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
 	    &extents_rtree, elm, true))) {
 		return lock_result_no_extent;
 	}
 
 	/*
 	 * It's possible that the extent changed out from under us, and with it
-	 * the leaf->extent mapping.  We have to recheck while holding the lock.
+	 * the leaf->edata mapping.  We have to recheck while holding the lock.
 	 */
-	extent_lock(tsdn, extent1);
-	extent_t *extent2 = rtree_leaf_elm_extent_read(tsdn,
-	    &extents_rtree, elm, true);
+	extent_lock_edata(tsdn, edata1);
+	edata_t *edata2 = rtree_leaf_elm_edata_read(tsdn, &extents_rtree, elm,
+	    true);
 
-	if (extent1 == extent2) {
-		*result = extent1;
+	if (edata1 == edata2) {
+		*result = edata1;
 		return lock_result_success;
 	} else {
-		extent_unlock(tsdn, extent1);
+		extent_unlock_edata(tsdn, edata1);
 		return lock_result_failure;
 	}
 }
 
 /*
- * Returns a pool-locked extent_t * if there's one associated with the given
+ * Returns a pool-locked edata_t * if there's one associated with the given
  * address, and NULL otherwise.
  */
-static extent_t *
-extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
+static edata_t *
+extent_lock_edata_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
     bool inactive_only) {
-	extent_t *ret = NULL;
+	edata_t *ret = NULL;
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree,
 	    rtree_ctx, (uintptr_t)addr, false, false);
 	if (elm == NULL) {
@@ -138,9 +138,9 @@ extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
 }
 
 static void
-extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     size_t alignment) {
-	assert(extent_base_get(extent) == extent_addr_get(extent));
+	assert(edata_base_get(edata) == edata_addr_get(edata));
 
 	if (alignment < PAGE) {
 		unsigned lg_range = LG_PAGE -
@@ -156,52 +156,52 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 		}
 		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
 		    lg_range);
-		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
+		edata->e_addr = (void *)((uintptr_t)edata->e_addr +
 		    random_offset);
-		assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
-		    extent->e_addr);
+		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
+		    edata->e_addr);
 	}
 }
 
-extent_t *
+edata_t *
 extent_alloc(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
-	extent_t *extent = extent_avail_first(&arena->extent_avail);
-	if (extent == NULL) {
-		malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
-		return base_alloc_extent(tsdn, arena->base);
-	}
-	extent_avail_remove(&arena->extent_avail, extent);
-	atomic_fetch_sub_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
-	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
-	return extent;
+	malloc_mutex_lock(tsdn, &arena->edata_avail_mtx);
+	edata_t *edata = edata_avail_first(&arena->edata_avail);
+	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &arena->edata_avail_mtx);
+		return base_alloc_edata(tsdn, arena->base);
+	}
+	edata_avail_remove(&arena->edata_avail, edata);
+	atomic_fetch_sub_zu(&arena->edata_avail_cnt, 1, ATOMIC_RELAXED);
+	malloc_mutex_unlock(tsdn, &arena->edata_avail_mtx);
+	return edata;
 }
 
 void
-extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
-	extent_avail_insert(&arena->extent_avail, extent);
-	atomic_fetch_add_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
-	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
+extent_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
+	malloc_mutex_lock(tsdn, &arena->edata_avail_mtx);
+	edata_avail_insert(&arena->edata_avail, edata);
+	atomic_fetch_add_zu(&arena->edata_avail_cnt, 1, ATOMIC_RELAXED);
+	malloc_mutex_unlock(tsdn, &arena->edata_avail_mtx);
 }
 
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent) {
-	extent_state_set(extent, extent_state_active);
+    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata) {
+	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, eset,
-	    extent, &coalesced, false);
-	extent_state_set(extent, eset_state_get(eset));
+	edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, eset,
+	    edata, &coalesced, false);
+	edata_state_set(edata, eset_state_get(eset));
 
 	if (!coalesced) {
 		return true;
 	}
-	eset_insert_locked(tsdn, eset, extent);
+	eset_insert_locked(tsdn, eset, edata);
 	return false;
 }
 
-extent_t *
+edata_t *
 extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
@@ -210,28 +210,28 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_t *extent = extent_recycle(tsdn, arena, ehooks, eset, new_addr,
+	edata_t *edata = extent_recycle(tsdn, arena, ehooks, eset, new_addr,
 	    size, pad, alignment, slab, szind, zero, commit, false);
-	assert(extent == NULL || extent_dumpable_get(extent));
-	return extent;
+	assert(edata == NULL || edata_dumpable_get(edata));
+	return edata;
 }
 
 void
 extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *extent) {
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
-	assert(extent_dumpable_get(extent));
+    edata_t *edata) {
+	assert(edata_base_get(edata) != NULL);
+	assert(edata_size_get(edata) != 0);
+	assert(edata_dumpable_get(edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_addr_set(extent, extent_base_get(extent));
-	extent_zeroed_set(extent, false);
+	edata_addr_set(edata, edata_base_get(edata));
+	edata_zeroed_set(edata, false);
 
-	extent_record(tsdn, arena, ehooks, eset, extent, false);
+	extent_record(tsdn, arena, ehooks, eset, edata, false);
 }
 
-extent_t *
+edata_t *
 extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
     size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
@@ -243,27 +243,27 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	 * Get the LRU coalesced extent, if any.  If coalescing was delayed,
 	 * the loop will iterate until the LRU extent is fully coalesced.
 	 */
-	extent_t *extent;
+	edata_t *edata;
 	while (true) {
 		/* Get the LRU extent, if any. */
-		extent = extent_list_first(&eset->lru);
-		if (extent == NULL) {
+		edata = edata_list_first(&eset->lru);
+		if (edata == NULL) {
 			goto label_return;
 		}
 		/* Check the eviction limit. */
 		size_t extents_npages = atomic_load_zu(&eset->npages,
 		    ATOMIC_RELAXED);
 		if (extents_npages <= npages_min) {
-			extent = NULL;
+			edata = NULL;
 			goto label_return;
 		}
-		eset_remove_locked(tsdn, eset, extent);
+		eset_remove_locked(tsdn, eset, edata);
 		if (!eset->delay_coalesce) {
 			break;
 		}
 		/* Try to coalesce. */
 		if (extent_try_delayed_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    eset, extent)) {
+		    eset, edata)) {
 			break;
 		}
 		/*
@@ -281,10 +281,10 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 		not_reached();
 	case extent_state_dirty:
 	case extent_state_muzzy:
-		extent_state_set(extent, extent_state_active);
+		edata_state_set(edata, extent_state_active);
 		break;
 	case extent_state_retained:
-		extent_deregister(tsdn, extent);
+		extent_deregister(tsdn, edata);
 		break;
 	default:
 		not_reached();
@@ -292,7 +292,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 
 label_return:
 	malloc_mutex_unlock(tsdn, &eset->mtx);
-	return extent;
+	return edata;
 }
 
 /*
@@ -301,8 +301,8 @@ label_return:
  */
 static void
 extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *extent, bool growing_retained) {
-	size_t sz = extent_size_get(extent);
+    edata_t *edata, bool growing_retained) {
+	size_t sz = edata_size_get(edata);
 	if (config_stats) {
 		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
 	}
@@ -311,56 +311,56 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	 * that this is only a virtual memory leak.
 	 */
 	if (eset_state_get(eset) == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, arena, ehooks, extent, 0, sz,
+		if (extent_purge_lazy_impl(tsdn, arena, ehooks, edata, 0, sz,
 		    growing_retained)) {
-			extent_purge_forced_impl(tsdn, arena, ehooks, extent, 0,
-			    extent_size_get(extent), growing_retained);
+			extent_purge_forced_impl(tsdn, arena, ehooks, edata, 0,
+			    edata_size_get(edata), growing_retained);
 		}
 	}
-	extent_dalloc(tsdn, arena, extent);
+	extent_dalloc(tsdn, arena, edata);
 }
 
 static void
 extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    extent_t *extent) {
-	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
-	assert(extent_state_get(extent) == extent_state_active);
+    edata_t *edata) {
+	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
+	assert(edata_state_get(edata) == extent_state_active);
 
-	extent_state_set(extent, eset_state_get(eset));
-	eset_insert_locked(tsdn, eset, extent);
+	edata_state_set(edata, eset_state_get(eset));
+	eset_insert_locked(tsdn, eset, edata);
 }
 
 static void
 extent_deactivate(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    extent_t *extent) {
+    edata_t *edata) {
 	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_deactivate_locked(tsdn, arena, eset, extent);
+	extent_deactivate_locked(tsdn, arena, eset, edata);
 	malloc_mutex_unlock(tsdn, &eset->mtx);
 }
 
 static void
 extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    extent_t *extent) {
-	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
-	assert(extent_state_get(extent) == eset_state_get(eset));
+    edata_t *edata) {
+	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
+	assert(edata_state_get(edata) == eset_state_get(eset));
 
-	eset_remove_locked(tsdn, eset, extent);
-	extent_state_set(extent, extent_state_active);
+	eset_remove_locked(tsdn, eset, edata);
+	edata_state_set(edata, extent_state_active);
 }
 
 static bool
 extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    const extent_t *extent, bool dependent, bool init_missing,
+    const edata_t *edata, bool dependent, bool init_missing,
     rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
 	*r_elm_a = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
+	    (uintptr_t)edata_base_get(edata), dependent, init_missing);
 	if (!dependent && *r_elm_a == NULL) {
 		return true;
 	}
 	assert(*r_elm_a != NULL);
 
 	*r_elm_b = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_last_get(extent), dependent, init_missing);
+	    (uintptr_t)edata_last_get(edata), dependent, init_missing);
 	if (!dependent && *r_elm_b == NULL) {
 		return true;
 	}
@@ -371,36 +371,36 @@ extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 
 static void
 extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
-    rtree_leaf_elm_t *elm_b, extent_t *extent, szind_t szind, bool slab) {
-	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, extent, szind, slab);
+    rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
+	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, edata, szind, slab);
 	if (elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, extent, szind,
+		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, edata, szind,
 		    slab);
 	}
 }
 
 static void
-extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, extent_t *extent,
+extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, edata_t *edata,
     szind_t szind) {
-	assert(extent_slab_get(extent));
+	assert(edata_slab_get(edata));
 
 	/* Register interior. */
-	for (size_t i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
 		rtree_write(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
-		    LG_PAGE), extent, szind, true);
+		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
+		    LG_PAGE), edata, szind, true);
 	}
 }
 
 static void
-extent_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
+extent_gdump_add(tsdn_t *tsdn, const edata_t *edata) {
 	cassert(config_prof);
 	/* prof_gdump() requirement. */
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (opt_prof && extent_state_get(extent) == extent_state_active) {
-		size_t nadd = extent_size_get(extent) >> LG_PAGE;
+	if (opt_prof && edata_state_get(edata) == extent_state_active) {
+		size_t nadd = edata_size_get(edata) >> LG_PAGE;
 		size_t cur = atomic_fetch_add_zu(&curpages, nadd,
 		    ATOMIC_RELAXED) + nadd;
 		size_t high = atomic_load_zu(&highpages, ATOMIC_RELAXED);
@@ -419,18 +419,18 @@ extent_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
 }
 
 static void
-extent_gdump_sub(tsdn_t *tsdn, const extent_t *extent) {
+extent_gdump_sub(tsdn_t *tsdn, const edata_t *edata) {
 	cassert(config_prof);
 
-	if (opt_prof && extent_state_get(extent) == extent_state_active) {
-		size_t nsub = extent_size_get(extent) >> LG_PAGE;
+	if (opt_prof && edata_state_get(edata) == extent_state_active) {
+		size_t nsub = edata_size_get(edata) >> LG_PAGE;
 		assert(atomic_load_zu(&curpages, ATOMIC_RELAXED) >= nsub);
 		atomic_fetch_sub_zu(&curpages, nsub, ATOMIC_RELAXED);
 	}
 }
 
 static bool
-extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) {
+extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *elm_a, *elm_b;
@@ -439,43 +439,43 @@ extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) {
 	 * We need to hold the lock to protect against a concurrent coalesce
 	 * operation that sees us in a partial state.
 	 */
-	extent_lock(tsdn, extent);
+	extent_lock_edata(tsdn, edata);
 
-	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, false, true,
+	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, edata, false, true,
 	    &elm_a, &elm_b)) {
-		extent_unlock(tsdn, extent);
+		extent_unlock_edata(tsdn, edata);
 		return true;
 	}
 
-	szind_t szind = extent_szind_get_maybe_invalid(extent);
-	bool slab = extent_slab_get(extent);
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent, szind, slab);
+	szind_t szind = edata_szind_get_maybe_invalid(edata);
+	bool slab = edata_slab_get(edata);
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, edata, szind, slab);
 	if (slab) {
-		extent_interior_register(tsdn, rtree_ctx, extent, szind);
+		extent_interior_register(tsdn, rtree_ctx, edata, szind);
 	}
 
-	extent_unlock(tsdn, extent);
+	extent_unlock_edata(tsdn, edata);
 
 	if (config_prof && gdump_add) {
-		extent_gdump_add(tsdn, extent);
+		extent_gdump_add(tsdn, edata);
 	}
 
 	return false;
 }
 
 static bool
-extent_register(tsdn_t *tsdn, extent_t *extent) {
-	return extent_register_impl(tsdn, extent, true);
+extent_register(tsdn_t *tsdn, edata_t *edata) {
+	return extent_register_impl(tsdn, edata, true);
 }
 
 static bool
-extent_register_no_gdump_add(tsdn_t *tsdn, extent_t *extent) {
-	return extent_register_impl(tsdn, extent, false);
+extent_register_no_gdump_add(tsdn_t *tsdn, edata_t *edata) {
+	return extent_register_impl(tsdn, edata, false);
 }
 
 static void
-extent_reregister(tsdn_t *tsdn, extent_t *extent) {
-	bool err = extent_register(tsdn, extent);
+extent_reregister(tsdn_t *tsdn, edata_t *edata) {
+	bool err = extent_register(tsdn, edata);
 	assert(!err);
 }
 
@@ -488,14 +488,14 @@ extent_reregister(tsdn_t *tsdn, extent_t *extent) {
  */
 static void
 extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    extent_t *extent) {
+    edata_t *edata) {
 	size_t i;
 
-	assert(extent_slab_get(extent));
+	assert(edata_slab_get(edata));
 
-	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+	for (i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
 		rtree_clear(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
+		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
 		    LG_PAGE));
 	}
 }
@@ -504,43 +504,43 @@ extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
  * Removes all pointers to the given extent from the global rtree.
  */
 static void
-extent_deregister_impl(tsdn_t *tsdn, extent_t *extent, bool gdump) {
+extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *elm_a, *elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, true, false,
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, edata, true, false,
 	    &elm_a, &elm_b);
 
-	extent_lock(tsdn, extent);
+	extent_lock_edata(tsdn, edata);
 
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false);
-	if (extent_slab_get(extent)) {
-		extent_interior_deregister(tsdn, rtree_ctx, extent);
-		extent_slab_set(extent, false);
+	if (edata_slab_get(edata)) {
+		extent_interior_deregister(tsdn, rtree_ctx, edata);
+		edata_slab_set(edata, false);
 	}
 
-	extent_unlock(tsdn, extent);
+	extent_unlock_edata(tsdn, edata);
 
 	if (config_prof && gdump) {
-		extent_gdump_sub(tsdn, extent);
+		extent_gdump_sub(tsdn, edata);
 	}
 }
 
 static void
-extent_deregister(tsdn_t *tsdn, extent_t *extent) {
-	extent_deregister_impl(tsdn, extent, true);
+extent_deregister(tsdn_t *tsdn, edata_t *edata) {
+	extent_deregister_impl(tsdn, edata, true);
 }
 
 static void
-extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
-	extent_deregister_impl(tsdn, extent, false);
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
+	extent_deregister_impl(tsdn, edata, false);
 }
 
 /*
  * Tries to find and remove an extent from eset that can be used for the
  * given allocation request.
  */
-static extent_t *
+static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
     size_t pad, size_t alignment, bool slab, bool growing_retained) {
@@ -566,62 +566,60 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	size_t esize = size + pad;
 	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_t *extent;
+	edata_t *edata;
 	if (new_addr != NULL) {
-		extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr,
+		edata = extent_lock_edata_from_addr(tsdn, rtree_ctx, new_addr,
 		    false);
-		if (extent != NULL) {
+		if (edata != NULL) {
 			/*
-			 * We might null-out extent to report an error, but we
+			 * We might null-out edata to report an error, but we
 			 * still need to unlock the associated mutex after.
 			 */
-			extent_t *unlock_extent = extent;
-			assert(extent_base_get(extent) == new_addr);
-			if (extent_arena_ind_get(extent)
-			    != arena_ind_get(arena) ||
-			    extent_size_get(extent) < esize ||
-			    extent_state_get(extent) !=
-			    eset_state_get(eset)) {
-				extent = NULL;
+			edata_t *unlock_edata = edata;
+			assert(edata_base_get(edata) == new_addr);
+			if (edata_arena_ind_get(edata) != arena_ind_get(arena)
+			    || edata_size_get(edata) < esize
+			    || edata_state_get(edata) != eset_state_get(eset)) {
+				edata = NULL;
 			}
-			extent_unlock(tsdn, unlock_extent);
+			extent_unlock_edata(tsdn, unlock_edata);
 		}
 	} else {
-		extent = eset_fit_locked(tsdn, eset, esize, alignment);
+		edata = eset_fit_locked(tsdn, eset, esize, alignment);
 	}
-	if (extent == NULL) {
+	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &eset->mtx);
 		return NULL;
 	}
 
-	extent_activate_locked(tsdn, arena, eset, extent);
+	extent_activate_locked(tsdn, arena, eset, edata);
 	malloc_mutex_unlock(tsdn, &eset->mtx);
 
-	return extent;
+	return edata;
 }
 
 /*
  * Given an allocation request and an extent guaranteed to be able to satisfy
- * it, this splits off lead and trail extents, leaving extent pointing to an
+ * it, this splits off lead and trail extents, leaving edata pointing to an
  * extent satisfying the allocation.
  * This function doesn't put lead or trail into any eset_t; it's the caller's
  * job to ensure that they can be reused.
  */
 typedef enum {
 	/*
-	 * Split successfully.  lead, extent, and trail, are modified to extents
+	 * Split successfully.  lead, edata, and trail, are modified to extents
 	 * describing the ranges before, in, and after the given allocation.
 	 */
 	extent_split_interior_ok,
 	/*
 	 * The extent can't satisfy the given allocation request.  None of the
-	 * input extent_t *s are touched.
+	 * input edata_t *s are touched.
 	 */
 	extent_split_interior_cant_alloc,
 	/*
 	 * In a potentially invalid state.  Must leak (if *to_leak is non-NULL),
 	 * and salvage what's still salvageable (if *to_salvage is non-NULL).
-	 * None of lead, extent, or trail are valid.
+	 * None of lead, edata, or trail are valid.
 	 */
 	extent_split_interior_error
 } extent_split_interior_result_t;
@@ -630,19 +628,19 @@ static extent_split_interior_result_t
 extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     rtree_ctx_t *rtree_ctx,
     /* The result of splitting, in case of success. */
-    extent_t **extent, extent_t **lead, extent_t **trail,
+    edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
-    extent_t **to_leak, extent_t **to_salvage,
+    edata_t **to_leak, edata_t **to_salvage,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool growing_retained) {
 	size_t esize = size + pad;
-	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(*extent),
-	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(*extent);
+	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
+	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
 	assert(new_addr == NULL || leadsize == 0);
-	if (extent_size_get(*extent) < leadsize + esize) {
+	if (edata_size_get(*edata) < leadsize + esize) {
 		return extent_split_interior_cant_alloc;
 	}
-	size_t trailsize = extent_size_get(*extent) - leadsize - esize;
+	size_t trailsize = edata_size_get(*edata) - leadsize - esize;
 
 	*lead = NULL;
 	*trail = NULL;
@@ -651,11 +649,11 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Split the lead. */
 	if (leadsize != 0) {
-		*lead = *extent;
-		*extent = extent_split_impl(tsdn, arena, ehooks, *lead,
+		*lead = *edata;
+		*edata = extent_split_impl(tsdn, arena, ehooks, *lead,
 		    leadsize, SC_NSIZES, false, esize + trailsize, szind, slab,
 		    growing_retained);
-		if (*extent == NULL) {
+		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
 			return extent_split_interior_error;
@@ -664,13 +662,13 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, arena, ehooks, *extent, esize,
+		*trail = extent_split_impl(tsdn, arena, ehooks, *edata, esize,
 		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
 		if (*trail == NULL) {
-			*to_leak = *extent;
+			*to_leak = *edata;
 			*to_salvage = *lead;
 			*lead = NULL;
-			*extent = NULL;
+			*edata = NULL;
 			return extent_split_interior_error;
 		}
 	}
@@ -680,14 +678,14 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		 * Splitting causes szind to be set as a side effect, but no
 		 * splitting occurred.
 		 */
-		extent_szind_set(*extent, szind);
+		edata_szind_set(*edata, szind);
 		if (szind != SC_NSIZES) {
 			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
-			    (uintptr_t)extent_addr_get(*extent), szind, slab);
-			if (slab && extent_size_get(*extent) > PAGE) {
+			    (uintptr_t)edata_addr_get(*edata), szind, slab);
+			if (slab && edata_size_get(*edata) > PAGE) {
 				rtree_szind_slab_update(tsdn, &extents_rtree,
 				    rtree_ctx,
-				    (uintptr_t)extent_past_get(*extent) -
+				    (uintptr_t)edata_past_get(*edata) -
 				    (uintptr_t)PAGE, szind, slab);
 			}
 		}
@@ -702,18 +700,18 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * before or after the resulting allocation, that space is given its own extent
  * and put back into eset.
  */
-static extent_t *
+static edata_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
-    size_t pad, size_t alignment, bool slab, szind_t szind, extent_t *extent,
+    size_t pad, size_t alignment, bool slab, szind_t szind, edata_t *edata,
     bool growing_retained) {
-	extent_t *lead;
-	extent_t *trail;
-	extent_t *to_leak;
-	extent_t *to_salvage;
+	edata_t *lead;
+	edata_t *trail;
+	edata_t *to_leak;
+	edata_t *to_salvage;
 
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
+	    tsdn, arena, ehooks, rtree_ctx, &edata, &lead, &trail, &to_leak,
 	    &to_salvage, new_addr, size, pad, alignment, slab, szind,
 	    growing_retained);
 
@@ -735,7 +733,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		if (trail != NULL) {
 			extent_deactivate(tsdn, arena, eset, trail);
 		}
-		return extent;
+		return edata;
 	} else {
 		/*
 		 * We should have picked an extent that was large enough to
@@ -746,11 +744,11 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			extent_deregister(tsdn, to_salvage);
 		}
 		if (to_leak != NULL) {
-			void *leak = extent_base_get(to_leak);
+			void *leak = edata_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_abandon_vm(tsdn, arena, ehooks, eset, to_leak,
 			    growing_retained);
-			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
+			assert(extent_lock_edata_from_addr(tsdn, rtree_ctx, leak,
 			    false) == NULL);
 		}
 		return NULL;
@@ -762,7 +760,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * Tries to satisfy the given allocation request by reusing one of the extents
  * in the given eset_t.
  */
-static extent_t *
+static edata_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit, bool growing_retained) {
@@ -775,54 +773,54 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *extent = extent_recycle_extract(tsdn, arena, ehooks,
+	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks,
 	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
 	    growing_retained);
-	if (extent == NULL) {
+	if (edata == NULL) {
 		return NULL;
 	}
 
-	extent = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, eset,
-	    new_addr, size, pad, alignment, slab, szind, extent,
+	edata = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, eset,
+	    new_addr, size, pad, alignment, slab, szind, edata,
 	    growing_retained);
-	if (extent == NULL) {
+	if (edata == NULL) {
 		return NULL;
 	}
 
-	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
-		    extent_size_get(extent), growing_retained)) {
-			extent_record(tsdn, arena, ehooks, eset, extent,
+	if (*commit && !edata_committed_get(edata)) {
+		if (extent_commit_impl(tsdn, arena, ehooks, edata, 0,
+		    edata_size_get(edata), growing_retained)) {
+			extent_record(tsdn, arena, ehooks, eset, edata,
 			    growing_retained);
 			return NULL;
 		}
 	}
 
-	if (extent_committed_get(extent)) {
+	if (edata_committed_get(edata)) {
 		*commit = true;
 	}
-	if (extent_zeroed_get(extent)) {
+	if (edata_zeroed_get(edata)) {
 		*zero = true;
 	}
 
 	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, extent, alignment);
+		extent_addr_randomize(tsdn, arena, edata, alignment);
 	}
-	assert(extent_state_get(extent) == extent_state_active);
+	assert(edata_state_get(edata) == extent_state_active);
 	if (slab) {
-		extent_slab_set(extent, slab);
-		extent_interior_register(tsdn, rtree_ctx, extent, szind);
+		edata_slab_set(edata, slab);
+		extent_interior_register(tsdn, rtree_ctx, edata, szind);
 	}
 
 	if (*zero) {
-		void *addr = extent_base_get(extent);
-		if (!extent_zeroed_get(extent)) {
-			size_t size = extent_size_get(extent);
+		void *addr = edata_base_get(edata);
+		if (!edata_zeroed_get(edata)) {
+			size_t size = edata_size_get(edata);
 			ehooks_zero(tsdn, ehooks, addr, size,
 			    arena_ind_get(arena));
 		}
 	}
-	return extent;
+	return edata;
 }
 
 /*
@@ -830,7 +828,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
  * to split requested extents in order to limit the total number of disjoint
  * virtual memory ranges retained by each arena.
  */
-static extent_t *
+static edata_t *
 extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit) {
@@ -860,8 +858,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
 	}
 
-	extent_t *extent = extent_alloc(tsdn, arena);
-	if (extent == NULL) {
+	edata_t *edata = extent_alloc(tsdn, arena);
+	if (edata == NULL) {
 		goto label_err;
 	}
 	bool zeroed = false;
@@ -870,35 +868,35 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
 	    &committed, arena_ind_get(arena));
 
-	extent_init(extent, arena_ind_get(arena), ptr, alloc_size, false,
+	edata_init(edata, arena_ind_get(arena), ptr, alloc_size, false,
 	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
 	    committed, true, EXTENT_IS_HEAD);
 	if (ptr == NULL) {
-		extent_dalloc(tsdn, arena, extent);
+		extent_dalloc(tsdn, arena, edata);
 		goto label_err;
 	}
 
-	if (extent_register_no_gdump_add(tsdn, extent)) {
-		extent_dalloc(tsdn, arena, extent);
+	if (extent_register_no_gdump_add(tsdn, edata)) {
+		extent_dalloc(tsdn, arena, edata);
 		goto label_err;
 	}
 
-	if (extent_zeroed_get(extent) && extent_committed_get(extent)) {
+	if (edata_zeroed_get(edata) && edata_committed_get(edata)) {
 		*zero = true;
 	}
-	if (extent_committed_get(extent)) {
+	if (edata_committed_get(edata)) {
 		*commit = true;
 	}
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *lead;
-	extent_t *trail;
-	extent_t *to_leak;
-	extent_t *to_salvage;
+	edata_t *lead;
+	edata_t *trail;
+	edata_t *to_leak;
+	edata_t *to_salvage;
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
+	    arena, ehooks, rtree_ctx, &edata, &lead, &trail, &to_leak,
 	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
 
 	if (result == extent_split_interior_ok) {
@@ -931,16 +929,16 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		goto label_err;
 	}
 
-	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
-		    extent_size_get(extent), true)) {
+	if (*commit && !edata_committed_get(edata)) {
+		if (extent_commit_impl(tsdn, arena, ehooks, edata, 0,
+		    edata_size_get(edata), true)) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, extent, true);
+			    &arena->eset_retained, edata, true);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
 		if (config_debug) {
-			void *addr = extent_addr_get(extent);
+			void *addr = edata_addr_get(edata);
 			size_t *p = (size_t *)(uintptr_t)addr;
 			/* Check the first page only. */
 			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
@@ -964,32 +962,32 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
-		extent_gdump_add(tsdn, extent);
+		extent_gdump_add(tsdn, edata);
 	}
 	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, extent, alignment);
+		extent_addr_randomize(tsdn, arena, edata, alignment);
 	}
 	if (slab) {
 		rtree_ctx_t rtree_ctx_fallback;
 		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
 		    &rtree_ctx_fallback);
 
-		extent_slab_set(extent, true);
-		extent_interior_register(tsdn, rtree_ctx, extent, szind);
+		edata_slab_set(edata, true);
+		extent_interior_register(tsdn, rtree_ctx, edata, szind);
 	}
-	if (*zero && !extent_zeroed_get(extent)) {
-		void *addr = extent_base_get(extent);
-		size_t size = extent_size_get(extent);
+	if (*zero && !edata_zeroed_get(edata)) {
+		void *addr = edata_base_get(edata);
+		size_t size = edata_size_get(edata);
 		ehooks_zero(tsdn, ehooks, addr, size, arena_ind_get(arena));
 	}
 
-	return extent;
+	return edata;
 label_err:
 	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
 	return NULL;
 }
 
-static extent_t *
+static edata_t *
 extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
@@ -998,16 +996,16 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
 
-	extent_t *extent = extent_recycle(tsdn, arena, ehooks,
+	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
 	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
 	    szind, zero, commit, true);
-	if (extent != NULL) {
+	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
 		if (config_prof) {
-			extent_gdump_add(tsdn, extent);
+			extent_gdump_add(tsdn, edata);
 		}
 	} else if (opt_retain && new_addr == NULL) {
-		extent = extent_grow_retained(tsdn, arena, ehooks, size, pad,
+		edata = extent_grow_retained(tsdn, arena, ehooks, size, pad,
 		    alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
@@ -1015,49 +1013,49 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 	malloc_mutex_assert_not_owner(tsdn, &arena->extent_grow_mtx);
 
-	return extent;
+	return edata;
 }
 
-static extent_t *
+static edata_t *
 extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	size_t esize = size + pad;
-	extent_t *extent = extent_alloc(tsdn, arena);
-	if (extent == NULL) {
+	edata_t *edata = extent_alloc(tsdn, arena);
+	if (edata == NULL) {
 		return NULL;
 	}
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
 	    zero, commit, arena_ind_get(arena));
 	if (addr == NULL) {
-		extent_dalloc(tsdn, arena, extent);
+		extent_dalloc(tsdn, arena, edata);
 		return NULL;
 	}
-	extent_init(extent, arena_ind_get(arena), addr, esize, slab, szind,
+	edata_init(edata, arena_ind_get(arena), addr, esize, slab, szind,
 	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
 	    true, EXTENT_NOT_HEAD);
 	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, extent, alignment);
+		extent_addr_randomize(tsdn, arena, edata, alignment);
 	}
-	if (extent_register(tsdn, extent)) {
-		extent_dalloc(tsdn, arena, extent);
+	if (extent_register(tsdn, edata)) {
+		extent_dalloc(tsdn, arena, edata);
 		return NULL;
 	}
 
-	return extent;
+	return edata;
 }
 
-extent_t *
+edata_t *
 extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_t *extent = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
+	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
 	    size, pad, alignment, slab, szind, zero, commit);
-	if (extent == NULL) {
+	if (edata == NULL) {
 		if (opt_retain && new_addr != NULL) {
 			/*
 			 * When retain is enabled and new_addr is set, we do not
@@ -1067,28 +1065,28 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			 */
 			return NULL;
 		}
-		extent = extent_alloc_wrapper_hard(tsdn, arena, ehooks,
+		edata = extent_alloc_wrapper_hard(tsdn, arena, ehooks,
 		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}
 
-	assert(extent == NULL || extent_dumpable_get(extent));
-	return extent;
+	assert(edata == NULL || edata_dumpable_get(edata));
+	return edata;
 }
 
 static bool
-extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
-    const extent_t *outer) {
-	assert(extent_arena_ind_get(inner) == arena_ind_get(arena));
-	if (extent_arena_ind_get(outer) != arena_ind_get(arena)) {
+extent_can_coalesce(arena_t *arena, eset_t *eset, const edata_t *inner,
+    const edata_t *outer) {
+	assert(edata_arena_ind_get(inner) == arena_ind_get(arena));
+	if (edata_arena_ind_get(outer) != arena_ind_get(arena)) {
 		return false;
 	}
 
-	assert(extent_state_get(inner) == extent_state_active);
-	if (extent_state_get(outer) != eset->state) {
+	assert(edata_state_get(inner) == extent_state_active);
+	if (edata_state_get(outer) != eset->state) {
 		return false;
 	}
 
-	if (extent_committed_get(inner) != extent_committed_get(outer)) {
+	if (edata_committed_get(inner) != edata_committed_get(outer)) {
 		return false;
 	}
 
@@ -1097,7 +1095,7 @@ extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
 
 static bool
 extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *inner, extent_t *outer, bool forward, bool growing_retained) {
+    edata_t *inner, edata_t *outer, bool forward, bool growing_retained) {
 	assert(extent_can_coalesce(arena, eset, inner, outer));
 
 	extent_activate_locked(tsdn, arena, eset, outer);
@@ -1114,9 +1112,9 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	return err;
 }
 
-static extent_t *
+static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata, bool *coalesced,
     bool growing_retained, bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
@@ -1132,8 +1130,8 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		again = false;
 
 		/* Try to coalesce forward. */
-		extent_t *next = extent_lock_from_addr(tsdn, rtree_ctx,
-		    extent_past_get(extent), inactive_only);
+		edata_t *next = extent_lock_edata_from_addr(tsdn, rtree_ctx,
+		    edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
 			 * eset->mtx only protects against races for
@@ -1141,38 +1139,38 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			 * before releasing next's pool lock.
 			 */
 			bool can_coalesce = extent_can_coalesce(arena, eset,
-			    extent, next);
+			    edata, next);
 
-			extent_unlock(tsdn, next);
+			extent_unlock_edata(tsdn, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, eset, extent, next, true,
+			    ehooks, eset, edata, next, true,
 			    growing_retained)) {
 				if (eset->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
-					return extent;
+					return edata;
 				}
 				again = true;
 			}
 		}
 
 		/* Try to coalesce backward. */
-		extent_t *prev = extent_lock_from_addr(tsdn, rtree_ctx,
-		    extent_before_get(extent), inactive_only);
+		edata_t *prev = extent_lock_edata_from_addr(tsdn, rtree_ctx,
+		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(arena, eset,
-			    extent, prev);
-			extent_unlock(tsdn, prev);
+			    edata, prev);
+			extent_unlock_edata(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, eset, extent, prev, false,
+			    ehooks, eset, edata, prev, false,
 			    growing_retained)) {
-				extent = prev;
+				edata = prev;
 				if (eset->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
-					return extent;
+					return edata;
 				}
 				again = true;
 			}
@@ -1182,23 +1180,23 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (eset->delay_coalesce) {
 		*coalesced = false;
 	}
-	return extent;
+	return edata;
 }
 
-static extent_t *
+static edata_t *
 extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata, bool *coalesced,
     bool growing_retained) {
 	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
-	    extent, coalesced, growing_retained, false);
+	    edata, coalesced, growing_retained, false);
 }
 
-static extent_t *
+static edata_t *
 extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata, bool *coalesced,
     bool growing_retained) {
 	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
-	    extent, coalesced, growing_retained, true);
+	    edata, coalesced, growing_retained, true);
 }
 
 /*
@@ -1207,62 +1205,62 @@ extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  */
 static void
 extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *extent, bool growing_retained) {
+    edata_t *edata, bool growing_retained) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
 	assert((eset_state_get(eset) != extent_state_dirty &&
 	    eset_state_get(eset) != extent_state_muzzy) ||
-	    !extent_zeroed_get(extent));
+	    !edata_zeroed_get(edata));
 
 	malloc_mutex_lock(tsdn, &eset->mtx);
 
-	extent_szind_set(extent, SC_NSIZES);
-	if (extent_slab_get(extent)) {
-		extent_interior_deregister(tsdn, rtree_ctx, extent);
-		extent_slab_set(extent, false);
+	edata_szind_set(edata, SC_NSIZES);
+	if (edata_slab_get(edata)) {
+		extent_interior_deregister(tsdn, rtree_ctx, edata);
+		edata_slab_set(edata, false);
 	}
 
-	assert(rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_base_get(extent), true) == extent);
+	assert(rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata), true) == edata);
 
 	if (!eset->delay_coalesce) {
-		extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    eset, extent, NULL, growing_retained);
-	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
+		edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
+		    eset, edata, NULL, growing_retained);
+	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(eset == &arena->eset_dirty);
 		/* Always coalesce large eset eagerly. */
 		bool coalesced;
 		do {
-			assert(extent_state_get(extent) == extent_state_active);
-			extent = extent_try_coalesce_large(tsdn, arena, ehooks,
-			    rtree_ctx, eset, extent, &coalesced,
+			assert(edata_state_get(edata) == extent_state_active);
+			edata = extent_try_coalesce_large(tsdn, arena, ehooks,
+			    rtree_ctx, eset, edata, &coalesced,
 			    growing_retained);
 		} while (coalesced);
-		if (extent_size_get(extent) >= oversize_threshold) {
+		if (edata_size_get(edata) >= oversize_threshold) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &eset->mtx);
-			arena_decay_extent(tsdn, arena, ehooks, extent);
+			arena_decay_extent(tsdn, arena, ehooks, edata);
 			return;
 		}
 	}
-	extent_deactivate_locked(tsdn, arena, eset, extent);
+	extent_deactivate_locked(tsdn, arena, eset, edata);
 
 	malloc_mutex_unlock(tsdn, &eset->mtx);
 }
 
 void
-extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (extent_register(tsdn, extent)) {
-		extent_dalloc(tsdn, arena, extent);
+	if (extent_register(tsdn, edata)) {
+		extent_dalloc(tsdn, arena, edata);
 		return;
 	}
-	extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
+	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
 }
 
 static bool
@@ -1273,23 +1271,23 @@ extent_may_dalloc(void) {
 
 static bool
 extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
+    edata_t *edata) {
 	bool err;
 
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
+	assert(edata_base_get(edata) != NULL);
+	assert(edata_size_get(edata) != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_addr_set(extent, extent_base_get(extent));
+	edata_addr_set(edata, edata_base_get(edata));
 
 	/* Try to deallocate. */
-	err = ehooks_dalloc(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), extent_committed_get(extent),
+	err = ehooks_dalloc(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), edata_committed_get(edata),
 	    arena_ind_get(arena));
 
 	if (!err) {
-		extent_dalloc(tsdn, arena, extent);
+		extent_dalloc(tsdn, arena, edata);
 	}
 
 	return err;
@@ -1297,8 +1295,8 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
-	assert(extent_dumpable_get(extent));
+    edata_t *edata) {
+	assert(edata_dumpable_get(edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -1308,124 +1306,123 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.
 		 */
-		extent_deregister(tsdn, extent);
-		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, extent)) {
+		extent_deregister(tsdn, edata);
+		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, edata)) {
 			return;
 		}
-		extent_reregister(tsdn, extent);
+		extent_reregister(tsdn, edata);
 	}
 
 	/* Try to decommit; purge if that fails. */
 	bool zeroed;
-	if (!extent_committed_get(extent)) {
+	if (!edata_committed_get(edata)) {
 		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, extent, 0,
-	    extent_size_get(extent))) {
+	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, edata, 0,
+	    edata_size_get(edata))) {
 		zeroed = true;
-	} else if (!ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), 0, extent_size_get(extent),
+	} else if (!ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), 0, edata_size_get(edata),
 	    arena_ind_get(arena))) {
 		zeroed = true;
-	} else if (extent_state_get(extent) == extent_state_muzzy ||
-	    !ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), 0, extent_size_get(extent),
+	} else if (edata_state_get(edata) == extent_state_muzzy ||
+	    !ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), 0, edata_size_get(edata),
 	    arena_ind_get(arena))) {
 		zeroed = false;
 	} else {
 		zeroed = false;
 	}
-	extent_zeroed_set(extent, zeroed);
+	edata_zeroed_set(edata, zeroed);
 
 	if (config_prof) {
-		extent_gdump_sub(tsdn, extent);
+		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, arena, ehooks, &arena->eset_retained, extent,
-	    false);
+	extent_record(tsdn, arena, ehooks, &arena->eset_retained, edata, false);
 }
 
 void
 extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
+    edata_t *edata) {
+	assert(edata_base_get(edata) != NULL);
+	assert(edata_size_get(edata) != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	/* Deregister first to avoid a race with other allocating threads. */
-	extent_deregister(tsdn, extent);
+	extent_deregister(tsdn, edata);
 
-	extent_addr_set(extent, extent_base_get(extent));
+	edata_addr_set(edata, edata_base_get(edata));
 
 	/* Try to destroy; silently fail otherwise. */
-	ehooks_destroy(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), extent_committed_get(extent),
+	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), edata_committed_get(edata),
 	    arena_ind_get(arena));
 
-	extent_dalloc(tsdn, arena, extent);
+	extent_dalloc(tsdn, arena, edata);
 }
 
 static bool
 extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
+    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_commit(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	extent_committed_set(extent, extent_committed_get(extent) || !err);
+	bool err = ehooks_commit(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), offset, length, arena_ind_get(arena));
+	edata_committed_set(edata, edata_committed_get(edata) || !err);
 	return err;
 }
 
 bool
 extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset,
+    edata_t *edata, size_t offset,
     size_t length) {
-	return extent_commit_impl(tsdn, arena, ehooks, extent, offset, length,
+	return extent_commit_impl(tsdn, arena, ehooks, edata, offset, length,
 	    false);
 }
 
 bool
 extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length) {
+    edata_t *edata, size_t offset, size_t length) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
-	bool err = ehooks_decommit(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	extent_committed_set(extent, extent_committed_get(extent) && err);
+	bool err = ehooks_decommit(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), offset, length, arena_ind_get(arena));
+	edata_committed_set(edata, edata_committed_get(edata) && err);
 	return err;
 }
 
 static bool
 extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
+    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	bool err = ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), offset, length, arena_ind_get(arena));
 	return err;
 }
 
 bool
 extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length) {
-	return extent_purge_lazy_impl(tsdn, arena, ehooks, extent, offset,
+    edata_t *edata, size_t offset, size_t length) {
+	return extent_purge_lazy_impl(tsdn, arena, ehooks, edata, offset,
 	    length, false);
 }
 
 static bool
 extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
+    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	bool err = ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), offset, length, arena_ind_get(arena));
 	return err;
 }
 
 bool
 extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length) {
-	return extent_purge_forced_impl(tsdn, arena, ehooks, extent,
+    edata_t *edata, size_t offset, size_t length) {
+	return extent_purge_forced_impl(tsdn, arena, ehooks, edata,
 	    offset, length, false);
 }
 
@@ -1436,11 +1433,11 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * with the trail (the higher addressed portion).  This makes 'extent' the lead,
  * and returns the trail (except in case of error).
  */
-static extent_t *
+static edata_t *
 extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
     size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
-	assert(extent_size_get(extent) == size_a + size_b);
+	assert(edata_size_get(edata) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
@@ -1448,28 +1445,28 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	extent_t *trail = extent_alloc(tsdn, arena);
+	edata_t *trail = extent_alloc(tsdn, arena);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
 
-	extent_init(trail, arena_ind_get(arena),
-	    (void *)((uintptr_t)extent_base_get(extent) + size_a), size_b,
-	    slab_b, szind_b, extent_sn_get(extent), extent_state_get(extent),
-	    extent_zeroed_get(extent), extent_committed_get(extent),
-	    extent_dumpable_get(extent), EXTENT_NOT_HEAD);
+	edata_init(trail, arena_ind_get(arena),
+	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
+	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
+	    edata_zeroed_get(edata), edata_committed_get(edata),
+	    edata_dumpable_get(edata), EXTENT_NOT_HEAD);
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *lead_elm_a, *lead_elm_b;
 	{
-		extent_t lead;
+		edata_t lead;
 
-		extent_init(&lead, arena_ind_get(arena),
-		    extent_addr_get(extent), size_a,
-		    slab_a, szind_a, extent_sn_get(extent),
-		    extent_state_get(extent), extent_zeroed_get(extent),
-		    extent_committed_get(extent), extent_dumpable_get(extent),
+		edata_init(&lead, arena_ind_get(arena),
+		    edata_addr_get(edata), size_a,
+		    slab_a, szind_a, edata_sn_get(edata),
+		    edata_state_get(edata), edata_zeroed_get(edata),
+		    edata_committed_get(edata), edata_dumpable_get(edata),
 		    EXTENT_NOT_HEAD);
 
 		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
@@ -1484,40 +1481,40 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		goto label_error_b;
 	}
 
-	extent_lock2(tsdn, extent, trail);
+	extent_lock_edata2(tsdn, edata, trail);
 
-	bool err = ehooks_split(tsdn, ehooks, extent_base_get(extent),
-	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
+	bool err = ehooks_split(tsdn, ehooks, edata_base_get(edata),
+	    size_a + size_b, size_a, size_b, edata_committed_get(edata),
 	    arena_ind_get(arena));
 
 	if (err) {
 		goto label_error_c;
 	}
 
-	extent_size_set(extent, size_a);
-	extent_szind_set(extent, szind_a);
+	edata_size_set(edata, size_a);
+	edata_szind_set(edata, szind_a);
 
-	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent,
+	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, edata,
 	    szind_a, slab_a);
 	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
 	    szind_b, slab_b);
 
-	extent_unlock2(tsdn, extent, trail);
+	extent_unlock_edata2(tsdn, edata, trail);
 
 	return trail;
 label_error_c:
-	extent_unlock2(tsdn, extent, trail);
+	extent_unlock_edata2(tsdn, edata, trail);
 label_error_b:
 	extent_dalloc(tsdn, arena, trail);
 label_error_a:
 	return NULL;
 }
 
-extent_t *
+edata_t *
 extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
     size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, arena, ehooks, extent, size_a, szind_a,
+	return extent_split_impl(tsdn, arena, ehooks, edata, size_a, szind_a,
 	    slab_a, size_b, szind_b, slab_b, false);
 }
 
@@ -1526,8 +1523,8 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * settings.  Assumes the second extent has the higher address.
  */
 bool
-extent_head_no_merge(extent_t *a, extent_t *b) {
-	assert(extent_base_get(a) < extent_base_get(b));
+extent_head_no_merge(edata_t *a, edata_t *b) {
+	assert(edata_base_get(a) < edata_base_get(b));
 	/*
 	 * When coalesce is not always allowed (Windows), only merge extents
 	 * from the same VirtualAlloc region under opt.retain (in which case
@@ -1540,33 +1537,33 @@ extent_head_no_merge(extent_t *a, extent_t *b) {
 		return true;
 	}
 	/* If b is a head extent, disallow the cross-region merge. */
-	if (extent_is_head_get(b)) {
+	if (edata_is_head_get(b)) {
 		/*
 		 * Additionally, sn should not overflow with retain; sanity
 		 * check that different regions have unique sn.
 		 */
-		assert(extent_sn_comp(a, b) != 0);
+		assert(edata_sn_comp(a, b) != 0);
 		return true;
 	}
-	assert(extent_sn_comp(a, b) == 0);
+	assert(edata_sn_comp(a, b) == 0);
 
 	return false;
 }
 
 static bool
-extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
-    extent_t *b, bool growing_retained) {
+extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
+    edata_t *b, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(extent_base_get(a) < extent_base_get(b));
+	assert(edata_base_get(a) < edata_base_get(b));
 
 	if (ehooks_merge_will_fail(ehooks) || extent_head_no_merge(a, b)) {
 		return true;
 	}
 
-	bool err = ehooks_merge(tsdn, ehooks, extent_base_get(a),
-	    extent_size_get(a), extent_base_get(b), extent_size_get(b),
-	    extent_committed_get(a), arena_ind_get(arena));
+	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
+	    edata_size_get(a), edata_base_get(b), edata_size_get(b),
+	    edata_committed_get(a), arena_ind_get(arena));
 
 	if (err) {
 		return true;
@@ -1585,7 +1582,7 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
 	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, b, true, false, &b_elm_a,
 	    &b_elm_b);
 
-	extent_lock2(tsdn, a, b);
+	extent_lock_edata2(tsdn, a, b);
 
 	if (a_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL,
@@ -1598,22 +1595,22 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
 		b_elm_b = b_elm_a;
 	}
 
-	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
-	extent_szind_set(a, SC_NSIZES);
-	extent_sn_set(a, (extent_sn_get(a) < extent_sn_get(b)) ?
-	    extent_sn_get(a) : extent_sn_get(b));
-	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
+	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
+	edata_szind_set(a, SC_NSIZES);
+	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
+	    edata_sn_get(a) : edata_sn_get(b));
+	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
 	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES,
 	    false);
 
-	extent_unlock2(tsdn, a, b);
+	extent_unlock_edata2(tsdn, a, b);
 
 	/*
 	 * If we got here, we merged the extents; so they must be from the same
 	 * arena (i.e. this one).
 	 */
-	assert(extent_arena_ind_get(b) == arena_ind_get(arena));
+	assert(edata_arena_ind_get(b) == arena_ind_get(arena));
 	extent_dalloc(tsdn, arena, b);
 
 	return false;
@@ -1621,7 +1618,7 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
 
 bool
 extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *a, extent_t *b) {
+    edata_t *a, edata_t *b) {
 	return extent_merge_impl(tsdn, arena, ehooks, a, b, false);
 }
 
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 59e7e7d..a66afb6 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -109,7 +109,7 @@ extent_dss_max_update(void *new_addr) {
 void *
 extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit) {
-	extent_t *gap;
+	edata_t *gap;
 
 	cassert(have_dss);
 	assert(size > 0);
@@ -153,7 +153,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			size_t gap_size_page = (uintptr_t)ret -
 			    (uintptr_t)gap_addr_page;
 			if (gap_size_page != 0) {
-				extent_init(gap, arena_ind_get(arena),
+				edata_init(gap, arena_ind_get(arena),
 				    gap_addr_page, gap_size_page, false,
 				    SC_NSIZES, arena_extent_sn_next(arena),
 				    extent_state_active, false, true, true,
@@ -194,17 +194,17 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					*commit = pages_decommit(ret, size);
 				}
 				if (*zero && *commit) {
-					extent_t extent;
+					edata_t edata;
 					ehooks_t *ehooks = arena_get_ehooks(
 					    arena);
 
-					extent_init(&extent,
+					edata_init(&edata,
 					    arena_ind_get(arena), ret, size,
 					    size, false, SC_NSIZES,
 					    extent_state_active, false, true,
 					    true, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
-					    arena, ehooks, &extent, 0, size)) {
+					    arena, ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);
 					}
 				}
diff --git a/src/inspect.c b/src/inspect.c
index 435016e..5ad23a0 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -6,21 +6,21 @@ inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
     size_t *nregs, size_t *size) {
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
 
-	const extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(extent == NULL)) {
+	const edata_t *edata = iealloc(tsdn, ptr);
+	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = 0;
 		return;
 	}
 
-	*size = extent_size_get(extent);
-	if (!extent_slab_get(extent)) {
+	*size = edata_size_get(edata);
+	if (!edata_slab_get(edata)) {
 		*nfree = 0;
 		*nregs = 1;
 	} else {
-		*nfree = extent_nfree_get(extent);
-		*nregs = bin_infos[extent_szind_get(extent)].nregs;
+		*nfree = edata_nfree_get(edata);
+		*nregs = bin_infos[edata_szind_get(edata)].nregs;
 		assert(*nfree <= *nregs);
-		assert(*nfree * extent_usize_get(extent) <= *size);
+		assert(*nfree * edata_usize_get(edata) <= *size);
 	}
 }
 
@@ -31,31 +31,31 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
 	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
 
-	const extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(extent == NULL)) {
+	const edata_t *edata = iealloc(tsdn, ptr);
+	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
 		*slabcur_addr = NULL;
 		return;
 	}
 
-	*size = extent_size_get(extent);
-	if (!extent_slab_get(extent)) {
+	*size = edata_size_get(edata);
+	if (!edata_slab_get(edata)) {
 		*nfree = *bin_nfree = *bin_nregs = 0;
 		*nregs = 1;
 		*slabcur_addr = NULL;
 		return;
 	}
 
-	*nfree = extent_nfree_get(extent);
-	const szind_t szind = extent_szind_get(extent);
+	*nfree = edata_nfree_get(edata);
+	const szind_t szind = edata_szind_get(edata);
 	*nregs = bin_infos[szind].nregs;
 	assert(*nfree <= *nregs);
-	assert(*nfree * extent_usize_get(extent) <= *size);
+	assert(*nfree * edata_usize_get(edata) <= *size);
 
 	const arena_t *arena = (arena_t *)atomic_load_p(
-	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
+	    &arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED);
 	assert(arena != NULL);
-	const unsigned binshard = extent_binshard_get(extent);
+	const unsigned binshard = edata_binshard_get(edata);
 	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
 
 	malloc_mutex_lock(tsdn, &bin->lock);
@@ -66,12 +66,12 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	} else {
 		*bin_nfree = *bin_nregs = 0;
 	}
-	extent_t *slab;
+	edata_t *slab;
 	if (bin->slabcur != NULL) {
 		slab = bin->slabcur;
 	} else {
-		slab = extent_heap_first(&bin->slabs_nonfull);
+		slab = edata_heap_first(&bin->slabs_nonfull);
 	}
-	*slabcur_addr = slab != NULL ? extent_addr_get(slab) : NULL;
+	*slabcur_addr = slab != NULL ? edata_addr_get(slab) : NULL;
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
diff --git a/src/large.c b/src/large.c
index 6fd21be..67b4745 100644
--- a/src/large.c
+++ b/src/large.c
@@ -21,7 +21,7 @@ void *
 large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero) {
 	size_t ausize;
-	extent_t *extent;
+	edata_t *edata;
 	bool is_zeroed;
 	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
 
@@ -44,28 +44,28 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (likely(!tsdn_null(tsdn))) {
 		arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, usize);
 	}
-	if (unlikely(arena == NULL) || (extent = arena_extent_alloc_large(tsdn,
+	if (unlikely(arena == NULL) || (edata = arena_extent_alloc_large(tsdn,
 	    arena, usize, alignment, &is_zeroed)) == NULL) {
 		return NULL;
 	}
 
 	/* See comments in arena_bin_slabs_full_insert(). */
 	if (!arena_is_auto(arena)) {
-		/* Insert extent into large. */
+		/* Insert edata into large. */
 		malloc_mutex_lock(tsdn, &arena->large_mtx);
-		extent_list_append(&arena->large, extent);
+		edata_list_append(&arena->large, edata);
 		malloc_mutex_unlock(tsdn, &arena->large_mtx);
 	}
 
 	if (zero) {
 		assert(is_zeroed);
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset(extent_addr_get(extent), JEMALLOC_ALLOC_JUNK,
-		    extent_usize_get(extent));
+		memset(edata_addr_get(edata), JEMALLOC_ALLOC_JUNK,
+		    edata_usize_get(edata));
 	}
 
 	arena_decay_tick(tsdn, arena);
-	return extent_addr_get(extent);
+	return edata_addr_get(edata);
 }
 
 static void
@@ -90,11 +90,11 @@ large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk =
     large_dalloc_maybe_junk_impl;
 
 static bool
-large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
-	arena_t *arena = arena_get_from_extent(extent);
-	size_t oldusize = extent_usize_get(extent);
+large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
+	arena_t *arena = arena_get_from_edata(edata);
+	size_t oldusize = edata_usize_get(edata);
 	ehooks_t *ehooks = arena_get_ehooks(arena);
-	size_t diff = extent_size_get(extent) - (usize + sz_large_pad);
+	size_t diff = edata_size_get(edata) - (usize + sz_large_pad);
 
 	assert(oldusize > usize);
 
@@ -104,31 +104,31 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 
 	/* Split excess pages. */
 	if (diff != 0) {
-		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    ehooks, extent, usize + sz_large_pad, sz_size2index(usize),
+		edata_t *trail = extent_split_wrapper(tsdn, arena,
+		    ehooks, edata, usize + sz_large_pad, sz_size2index(usize),
 		    false, diff, SC_NSIZES, false);
 		if (trail == NULL) {
 			return true;
 		}
 
 		if (config_fill && unlikely(opt_junk_free)) {
-			large_dalloc_maybe_junk(extent_addr_get(trail),
-			    extent_size_get(trail));
+			large_dalloc_maybe_junk(edata_addr_get(trail),
+			    edata_size_get(trail));
 		}
 
 		arena_extents_dirty_dalloc(tsdn, arena, ehooks, trail);
 	}
 
-	arena_extent_ralloc_large_shrink(tsdn, arena, extent, oldusize);
+	arena_extent_ralloc_large_shrink(tsdn, arena, edata, oldusize);
 
 	return false;
 }
 
 static bool
-large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
+large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
     bool zero) {
-	arena_t *arena = arena_get_from_extent(extent);
-	size_t oldusize = extent_usize_get(extent);
+	arena_t *arena = arena_get_from_edata(edata);
+	size_t oldusize = edata_usize_get(edata);
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	size_t trailsize = usize - oldusize;
 
@@ -147,20 +147,20 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	 */
 	bool is_zeroed_trail = zero;
 	bool commit = true;
-	extent_t *trail;
+	edata_t *trail;
 	bool new_mapping;
 	if ((trail = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
-	    extent_past_get(extent), trailsize, 0, CACHELINE, false, SC_NSIZES,
+	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail, &commit)) != NULL
 	    || (trail = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
-	    extent_past_get(extent), trailsize, 0, CACHELINE, false, SC_NSIZES,
+	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
 		if ((trail = extent_alloc_wrapper(tsdn, arena, ehooks,
-		    extent_past_get(extent), trailsize, 0, CACHELINE, false,
+		    edata_past_get(edata), trailsize, 0, CACHELINE, false,
 		    SC_NSIZES, &is_zeroed_trail, &commit)) == NULL) {
 			return true;
 		}
@@ -169,16 +169,16 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		}
 	}
 
-	if (extent_merge_wrapper(tsdn, arena, ehooks, extent, trail)) {
+	if (extent_merge_wrapper(tsdn, arena, ehooks, edata, trail)) {
 		extent_dalloc_wrapper(tsdn, arena, ehooks, trail);
 		return true;
 	}
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	szind_t szind = sz_size2index(usize);
-	extent_szind_set(extent, szind);
+	edata_szind_set(edata, szind);
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_addr_get(extent), szind, false);
+	    (uintptr_t)edata_addr_get(edata), szind, false);
 
 	if (config_stats && new_mapping) {
 		arena_stats_mapped_add(tsdn, &arena->stats, trailsize);
@@ -194,7 +194,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 			 * of CACHELINE in [0 .. PAGE).
 			 */
 			void *zbase = (void *)
-			    ((uintptr_t)extent_addr_get(extent) + oldusize);
+			    ((uintptr_t)edata_addr_get(edata) + oldusize);
 			void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase +
 			    PAGE));
 			size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase;
@@ -203,19 +203,19 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		}
 		assert(is_zeroed_trail);
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset((void *)((uintptr_t)extent_addr_get(extent) + oldusize),
+		memset((void *)((uintptr_t)edata_addr_get(edata) + oldusize),
 		    JEMALLOC_ALLOC_JUNK, usize - oldusize);
 	}
 
-	arena_extent_ralloc_large_expand(tsdn, arena, extent, oldusize);
+	arena_extent_ralloc_large_expand(tsdn, arena, edata, oldusize);
 
 	return false;
 }
 
 bool
-large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
     size_t usize_max, bool zero) {
-	size_t oldusize = extent_usize_get(extent);
+	size_t oldusize = edata_usize_get(edata);
 
 	/* The following should have been caught by callers. */
 	assert(usize_min > 0 && usize_max <= SC_LARGE_MAXCLASS);
@@ -225,16 +225,15 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 
 	if (usize_max > oldusize) {
 		/* Attempt to expand the allocation in-place. */
-		if (!large_ralloc_no_move_expand(tsdn, extent, usize_max,
+		if (!large_ralloc_no_move_expand(tsdn, edata, usize_max,
 		    zero)) {
-			arena_decay_tick(tsdn, arena_get_from_extent(extent));
+			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && usize_min > oldusize &&
-		    large_ralloc_no_move_expand(tsdn, extent, usize_min,
-		    zero)) {
-			arena_decay_tick(tsdn, arena_get_from_extent(extent));
+		    large_ralloc_no_move_expand(tsdn, edata, usize_min, zero)) {
+			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
 		}
 	}
@@ -244,14 +243,14 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	 * the new size.
 	 */
 	if (oldusize >= usize_min && oldusize <= usize_max) {
-		arena_decay_tick(tsdn, arena_get_from_extent(extent));
+		arena_decay_tick(tsdn, arena_get_from_edata(edata));
 		return false;
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (oldusize > usize_max) {
-		if (!large_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
-			arena_decay_tick(tsdn, arena_get_from_extent(extent));
+		if (!large_ralloc_no_move_shrink(tsdn, edata, usize_max)) {
+			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
 		}
 	}
@@ -271,9 +270,9 @@ void *
 large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
-	extent_t *extent = iealloc(tsdn, ptr);
+	edata_t *edata = iealloc(tsdn, ptr);
 
-	size_t oldusize = extent_usize_get(extent);
+	size_t oldusize = edata_usize_get(edata);
 	/* The following should have been caught by callers. */
 	assert(usize > 0 && usize <= SC_LARGE_MAXCLASS);
 	/* Both allocation sizes must be large to avoid a move. */
@@ -281,11 +280,11 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 	    && usize >= SC_LARGE_MINCLASS);
 
 	/* Try to avoid moving the allocation. */
-	if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero)) {
+	if (!large_ralloc_no_move(tsdn, edata, usize, usize, zero)) {
 		hook_invoke_expand(hook_args->is_realloc
 		    ? hook_expand_realloc : hook_expand_rallocx, ptr, oldusize,
 		    usize, (uintptr_t)ptr, hook_args->args);
-		return extent_addr_get(extent);
+		return edata_addr_get(edata);
 	}
 
 	/*
@@ -306,8 +305,8 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
 
 	size_t copysize = (usize < oldusize) ? usize : oldusize;
-	memcpy(ret, extent_addr_get(extent), copysize);
-	isdalloct(tsdn, extent_addr_get(extent), oldusize, tcache, NULL, true);
+	memcpy(ret, edata_addr_get(edata), copysize);
+	isdalloct(tsdn, edata_addr_get(edata), oldusize, tcache, NULL, true);
 	return ret;
 }
 
@@ -316,76 +315,75 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
  * whether the arena's large_mtx is currently held.
  */
 static void
-large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     bool junked_locked) {
 	if (!junked_locked) {
 		/* See comments in arena_bin_slabs_full_insert(). */
 		if (!arena_is_auto(arena)) {
 			malloc_mutex_lock(tsdn, &arena->large_mtx);
-			extent_list_remove(&arena->large, extent);
+			edata_list_remove(&arena->large, edata);
 			malloc_mutex_unlock(tsdn, &arena->large_mtx);
 		}
-		large_dalloc_maybe_junk(extent_addr_get(extent),
-		    extent_usize_get(extent));
+		large_dalloc_maybe_junk(edata_addr_get(edata),
+		    edata_usize_get(edata));
 	} else {
 		/* Only hold the large_mtx if necessary. */
 		if (!arena_is_auto(arena)) {
 			malloc_mutex_assert_owner(tsdn, &arena->large_mtx);
-			extent_list_remove(&arena->large, extent);
+			edata_list_remove(&arena->large, edata);
 		}
 	}
-	arena_extent_dalloc_large_prep(tsdn, arena, extent);
+	arena_extent_dalloc_large_prep(tsdn, arena, edata);
 }
 
 static void
-large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	ehooks_t *ehooks = arena_get_ehooks(arena);
-	arena_extents_dirty_dalloc(tsdn, arena, ehooks, extent);
+	arena_extents_dirty_dalloc(tsdn, arena, ehooks, edata);
 }
 
 void
-large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_prep_impl(tsdn, arena_get_from_extent(extent), extent,
-	    true);
+large_dalloc_prep_junked_locked(tsdn_t *tsdn, edata_t *edata) {
+	large_dalloc_prep_impl(tsdn, arena_get_from_edata(edata), edata, true);
 }
 
 void
-large_dalloc_finish(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_finish_impl(tsdn, arena_get_from_extent(extent), extent);
+large_dalloc_finish(tsdn_t *tsdn, edata_t *edata) {
+	large_dalloc_finish_impl(tsdn, arena_get_from_edata(edata), edata);
 }
 
 void
-large_dalloc(tsdn_t *tsdn, extent_t *extent) {
-	arena_t *arena = arena_get_from_extent(extent);
-	large_dalloc_prep_impl(tsdn, arena, extent, false);
-	large_dalloc_finish_impl(tsdn, arena, extent);
+large_dalloc(tsdn_t *tsdn, edata_t *edata) {
+	arena_t *arena = arena_get_from_edata(edata);
+	large_dalloc_prep_impl(tsdn, arena, edata, false);
+	large_dalloc_finish_impl(tsdn, arena, edata);
 	arena_decay_tick(tsdn, arena);
 }
 
 size_t
-large_salloc(tsdn_t *tsdn, const extent_t *extent) {
-	return extent_usize_get(extent);
+large_salloc(tsdn_t *tsdn, const edata_t *edata) {
+	return edata_usize_get(edata);
 }
 
 void
-large_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
-	extent_prof_info_get(extent, prof_info);
+large_prof_info_get(const edata_t *edata, prof_info_t *prof_info) {
+	edata_prof_info_get(edata, prof_info);
 }
 
 static void
-large_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
-	extent_prof_tctx_set(extent, tctx);
+large_prof_tctx_set(edata_t *edata, prof_tctx_t *tctx) {
+	edata_prof_tctx_set(edata, tctx);
 }
 
 void
-large_prof_tctx_reset(extent_t *extent) {
-	large_prof_tctx_set(extent, (prof_tctx_t *)(uintptr_t)1U);
+large_prof_tctx_reset(edata_t *edata) {
+	large_prof_tctx_set(edata, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 void
-large_prof_info_set(extent_t *extent, prof_tctx_t *tctx) {
-	large_prof_tctx_set(extent, tctx);
+large_prof_info_set(edata_t *edata, prof_tctx_t *tctx) {
+	large_prof_tctx_set(edata, tctx);
 	nstime_t t;
 	nstime_init_update(&t);
-	extent_prof_alloc_time_set(extent, &t);
+	edata_prof_alloc_time_set(edata, &t);
 }
diff --git a/src/tcache.c b/src/tcache.c
index 7922e59..0a511e2 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -114,8 +114,8 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 /* Enabled with --enable-extra-size-check. */
 static void
-tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
-    size_t nflush, extent_t **extents){
+tbin_edatas_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
+    size_t nflush, edata_t **edatas){
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -129,9 +129,9 @@ tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 	size_t sz_sum = binind * nflush;
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	for (unsigned i = 0 ; i < nflush; i++) {
-		rtree_extent_szind_read(tsdn, &extents_rtree,
+		rtree_edata_szind_read(tsdn, &extents_rtree,
 		    rtree_ctx, (uintptr_t)*(bottom_item - i), true,
-		    &extents[i], &szind);
+		    &edatas[i], &szind);
 		sz_sum -= szind;
 	}
 	if (sz_sum != 0) {
@@ -154,26 +154,26 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	arena_t *arena = tcache->arena;
 	assert(arena != NULL);
 	unsigned nflush = ncached - rem;
-	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
+	VARIABLE_ARRAY(edata_t *, item_edata, nflush);
 
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
-	/* Look up extent once per item. */
+	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
-		tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind,
-		    nflush, item_extent);
+		tbin_edatas_lookup_size_check(tsd_tsdn(tsd), tbin, binind,
+		    nflush, item_edata);
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
-			item_extent[i] = iealloc(tsd_tsdn(tsd),
+			item_edata[i] = iealloc(tsd_tsdn(tsd),
 			    *(bottom_item - i));
 		}
 	}
 	while (nflush > 0) {
 		/* Lock the arena bin associated with the first object. */
-		extent_t *extent = item_extent[0];
-		unsigned bin_arena_ind = extent_arena_ind_get(extent);
+		edata_t *edata = item_edata[0];
+		unsigned bin_arena_ind = edata_arena_ind_get(edata);
 		arena_t *bin_arena = arena_get(tsd_tsdn(tsd), bin_arena_ind,
 		    false);
-		unsigned binshard = extent_binshard_get(extent);
+		unsigned binshard = edata_binshard_get(edata);
 		assert(binshard < bin_infos[binind].n_shards);
 		bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard];
 
@@ -187,13 +187,13 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(bottom_item - i);
-			extent = item_extent[i];
-			assert(ptr != NULL && extent != NULL);
+			edata = item_edata[i];
+			assert(ptr != NULL && edata != NULL);
 
-			if (extent_arena_ind_get(extent) == bin_arena_ind
-			    && extent_binshard_get(extent) == binshard) {
+			if (edata_arena_ind_get(edata) == bin_arena_ind
+			    && edata_binshard_get(edata) == binshard) {
 				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
-				    bin_arena, bin, binind, extent, ptr);
+				    bin_arena, bin, binind, edata, ptr);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -202,7 +202,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 				 * handled in a future pass.
 				 */
 				*(bottom_item - ndeferred) = ptr;
-				item_extent[ndeferred] = extent;
+				item_edata[ndeferred] = edata;
 				ndeferred++;
 			}
 		}
@@ -244,22 +244,22 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 	arena_t *tcache_arena = tcache->arena;
 	assert(tcache_arena != NULL);
 	unsigned nflush = ncached - rem;
-	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
+	VARIABLE_ARRAY(edata_t *, item_edata, nflush);
 
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 #ifndef JEMALLOC_EXTRA_SIZE_CHECK
-	/* Look up extent once per item. */
+	/* Look up edata once per item. */
 	for (unsigned i = 0 ; i < nflush; i++) {
-		item_extent[i] = iealloc(tsd_tsdn(tsd), *(bottom_item - i));
+		item_edata[i] = iealloc(tsd_tsdn(tsd), *(bottom_item - i));
 	}
 #else
 	tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
-	    item_extent);
+	    item_edata);
 #endif
 	while (nflush > 0) {
 		/* Lock the arena associated with the first object. */
-		extent_t *extent = item_extent[0];
-		unsigned locked_arena_ind = extent_arena_ind_get(extent);
+		edata_t *edata = item_edata[0];
+		unsigned locked_arena_ind = edata_arena_ind_get(edata);
 		arena_t *locked_arena = arena_get(tsd_tsdn(tsd),
 		    locked_arena_ind, false);
 
@@ -270,10 +270,10 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(bottom_item - i);
 			assert(ptr != NULL);
-			extent = item_extent[i];
-			if (extent_arena_ind_get(extent) == locked_arena_ind) {
+			edata = item_edata[i];
+			if (edata_arena_ind_get(edata) == locked_arena_ind) {
 				large_dalloc_prep_junked_locked(tsd_tsdn(tsd),
-				    extent);
+				    edata);
 			}
 		}
 		if ((config_prof || config_stats) &&
@@ -293,11 +293,11 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(bottom_item - i);
-			extent = item_extent[i];
-			assert(ptr != NULL && extent != NULL);
+			edata = item_edata[i];
+			assert(ptr != NULL && edata != NULL);
 
-			if (extent_arena_ind_get(extent) == locked_arena_ind) {
-				large_dalloc_finish(tsd_tsdn(tsd), extent);
+			if (edata_arena_ind_get(edata) == locked_arena_ind) {
+				large_dalloc_finish(tsd_tsdn(tsd), edata);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -306,7 +306,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 				 * in a future pass.
 				 */
 				*(bottom_item - ndeferred) = ptr;
-				item_extent[ndeferred] = extent;
+				item_edata[ndeferred] = edata;
 				ndeferred++;
 			}
 		}
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index b182f31..854799d 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -63,17 +63,17 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *extent;
+	edata_t *edata;
 	szind_t szind;
-	if (rtree_extent_szind_read(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)ptr, false, &extent, &szind)) {
+	if (rtree_edata_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, false, &edata, &szind)) {
 		return 0;
 	}
 
-	if (extent == NULL) {
+	if (edata == NULL) {
 		return 0;
 	}
-	if (extent_state_get(extent) != extent_state_active) {
+	if (edata_state_get(edata) != extent_state_active) {
 		return 0;
 	}
 
diff --git a/test/unit/base.c b/test/unit/base.c
index 7ced15f..3b848ca 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -168,14 +168,14 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	 * that the first block's remaining space is considered for subsequent
 	 * allocation.
 	 */
-	assert_zu_ge(extent_bsize_get(&base->blocks->extent), QUANTUM,
+	assert_zu_ge(edata_bsize_get(&base->blocks->edata), QUANTUM,
 	    "Remainder insufficient for test");
 	/* Use up all but one quantum of block. */
-	while (extent_bsize_get(&base->blocks->extent) > QUANTUM) {
+	while (edata_bsize_get(&base->blocks->edata) > QUANTUM) {
 		p = base_alloc(tsdn, base, QUANTUM, QUANTUM);
 		assert_ptr_not_null(p, "Unexpected base_alloc() failure");
 	}
-	r_exp = extent_addr_get(&base->blocks->extent);
+	r_exp = edata_addr_get(&base->blocks->edata);
 	assert_zu_eq(base->extent_sn_next, 1, "One extant block expected");
 	q = base_alloc(tsdn, base, QUANTUM + 1, QUANTUM);
 	assert_ptr_not_null(q, "Unexpected base_alloc() failure");
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index d7a8df8..d9a0d59 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -53,7 +53,7 @@ TEST_END
 static void *
 thd_start(void *varg) {
 	void *ptr, *ptr2;
-	extent_t *extent;
+	edata_t *edata;
 	unsigned shard1, shard2;
 
 	tsdn_t *tsdn = tsdn_fetch();
@@ -62,13 +62,13 @@ thd_start(void *varg) {
 		ptr = mallocx(1, MALLOCX_TCACHE_NONE);
 		ptr2 = mallocx(129, MALLOCX_TCACHE_NONE);
 
-		extent = iealloc(tsdn, ptr);
-		shard1 = extent_binshard_get(extent);
+		edata = iealloc(tsdn, ptr);
+		shard1 = edata_binshard_get(edata);
 		dallocx(ptr, 0);
 		assert_u_lt(shard1, 16, "Unexpected bin shard used");
 
-		extent = iealloc(tsdn, ptr2);
-		shard2 = extent_binshard_get(extent);
+		edata = iealloc(tsdn, ptr2);
+		shard2 = edata_binshard_get(edata);
 		dallocx(ptr2, 0);
 		assert_u_lt(shard2, 4, "Unexpected bin shard used");
 
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 9105e3e..2477db0 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -75,8 +75,8 @@ TEST_BEGIN(test_rtree_read_empty) {
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
-	assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx, PAGE,
-	    false), "rtree_extent_read() should return NULL for empty tree");
+	assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE,
+	    false), "rtree_edata_read() should return NULL for empty tree");
 	rtree_delete(tsdn, rtree);
 }
 TEST_END
@@ -86,11 +86,11 @@ TEST_END
 #undef SEED
 
 TEST_BEGIN(test_rtree_extrema) {
-	extent_t extent_a, extent_b;
-	extent_init(&extent_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
+	edata_t edata_a, edata_b;
+	edata_init(&edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
 	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
-	extent_init(&extent_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	edata_init(&edata_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	tsdn_t *tsdn = tsdn_fetch();
@@ -100,21 +100,21 @@ TEST_BEGIN(test_rtree_extrema) {
 	rtree_ctx_data_init(&rtree_ctx);
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
 
-	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &extent_a,
-	    extent_szind_get(&extent_a), extent_slab_get(&extent_a)),
+	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &edata_a,
+	    edata_szind_get(&edata_a), edata_slab_get(&edata_a)),
 	    "Unexpected rtree_write() failure");
 	rtree_szind_slab_update(tsdn, rtree, &rtree_ctx, PAGE,
-	    extent_szind_get(&extent_a), extent_slab_get(&extent_a));
-	assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx, PAGE, true),
-	    &extent_a,
-	    "rtree_extent_read() should return previously set value");
+	    edata_szind_get(&edata_a), edata_slab_get(&edata_a));
+	assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE, true),
+	    &edata_a,
+	    "rtree_edata_read() should return previously set value");
 
 	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
-	    &extent_b, extent_szind_get_maybe_invalid(&extent_b),
-	    extent_slab_get(&extent_b)), "Unexpected rtree_write() failure");
-	assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
-	    ~((uintptr_t)0), true), &extent_b,
-	    "rtree_extent_read() should return previously set value");
+	    &edata_b, edata_szind_get_maybe_invalid(&edata_b),
+	    edata_slab_get(&edata_b)), "Unexpected rtree_write() failure");
+	assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+	    ~((uintptr_t)0), true), &edata_b,
+	    "rtree_edata_read() should return previously set value");
 
 	rtree_delete(tsdn, rtree);
 }
@@ -126,8 +126,8 @@ TEST_BEGIN(test_rtree_bits) {
 	uintptr_t keys[] = {PAGE, PAGE + 1,
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
 
-	extent_t extent;
-	extent_init(&extent, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	edata_t edata;
+	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	rtree_t *rtree = &test_rtree;
@@ -137,17 +137,17 @@ TEST_BEGIN(test_rtree_bits) {
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
 		assert_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
-		    &extent, SC_NSIZES, false),
+		    &edata, SC_NSIZES, false),
 		    "Unexpected rtree_write() failure");
 		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
-			    keys[j], true), &extent,
-			    "rtree_extent_read() should return previously set "
+			assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+			    keys[j], true), &edata,
+			    "rtree_edata_read() should return previously set "
 			    "value and ignore insignificant key bits; i=%u, "
 			    "j=%u, set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
 			    j, keys[i], keys[j]);
 		}
-		assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+		assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    (((uintptr_t)2) << LG_PAGE), false),
 		    "Only leftmost rtree leaf should be set; i=%u", i);
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
@@ -167,8 +167,8 @@ TEST_BEGIN(test_rtree_random) {
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 
-	extent_t extent;
-	extent_init(&extent, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	edata_t edata;
+	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
@@ -179,29 +179,29 @@ TEST_BEGIN(test_rtree_random) {
 		    &rtree_ctx, keys[i], false, true);
 		assert_ptr_not_null(elm,
 		    "Unexpected rtree_leaf_elm_lookup() failure");
-		rtree_leaf_elm_write(tsdn, rtree, elm, &extent, SC_NSIZES,
+		rtree_leaf_elm_write(tsdn, rtree, elm, &edata, SC_NSIZES,
 		    false);
-		assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
-		    keys[i], true), &extent,
-		    "rtree_extent_read() should return previously set value");
+		assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		    keys[i], true), &edata,
+		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
-		    keys[i], true), &extent,
-		    "rtree_extent_read() should return previously set value, "
+		assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		    keys[i], true), &edata,
+		    "rtree_edata_read() should return previously set value, "
 		    "i=%u", i);
 	}
 
 	for (unsigned i = 0; i < NSET; i++) {
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
-		assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+		assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true),
-		   "rtree_extent_read() should return previously set value");
+		   "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+		assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true),
-		    "rtree_extent_read() should return previously set value");
+		    "rtree_edata_read() should return previously set value");
 	}
 
 	rtree_delete(tsdn, rtree);
diff --git a/test/unit/slab.c b/test/unit/slab.c
index bcc752e..5d2b35f 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -7,24 +7,24 @@ TEST_BEGIN(test_arena_slab_regind) {
 
 	for (binind = 0; binind < SC_NBINS; binind++) {
 		size_t regind;
-		extent_t slab;
+		edata_t slab;
 		const bin_info_t *bin_info = &bin_infos[binind];
-		extent_init(&slab, INVALID_ARENA_IND,
+		edata_init(&slab, INVALID_ARENA_IND,
 		    mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE)),
 		    bin_info->slab_size, true,
 		    binind, 0, extent_state_active, false, true, true,
 		    EXTENT_NOT_HEAD);
-		assert_ptr_not_null(extent_addr_get(&slab),
+		assert_ptr_not_null(edata_addr_get(&slab),
 		    "Unexpected malloc() failure");
 		for (regind = 0; regind < bin_info->nregs; regind++) {
-			void *reg = (void *)((uintptr_t)extent_addr_get(&slab) +
+			void *reg = (void *)((uintptr_t)edata_addr_get(&slab) +
 			    (bin_info->reg_size * regind));
 			assert_zu_eq(arena_slab_regind(&slab, binind, reg),
 			    regind,
 			    "Incorrect region index computed for size %zu",
 			    bin_info->reg_size);
 		}
-		free(extent_addr_get(&slab));
+		free(edata_addr_get(&slab));
 	}
 }
 TEST_END
-- 
cgit v0.12


From 78591841798fa548feba468d1bb7338592039180 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Dec 2019 11:17:19 -0800
Subject: Pull out edata_t caching into its own module.

---
 Makefile.in                                    |  1 +
 include/jemalloc/internal/arena_structs.h      | 12 ++----
 include/jemalloc/internal/edata_cache.h        | 25 +++++++++++++
 include/jemalloc/internal/extent2.h            |  3 --
 include/jemalloc/internal/witness.h            |  2 +-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 src/arena.c                                    | 14 +++----
 src/ctl.c                                      |  2 +-
 src/edata_cache.c                              | 47 ++++++++++++++++++++++++
 src/extent2.c                                  | 51 ++++++++------------------
 src/extent_dss.c                               |  7 ++--
 12 files changed, 106 insertions(+), 60 deletions(-)
 create mode 100644 include/jemalloc/internal/edata_cache.h
 create mode 100644 src/edata_cache.c

diff --git a/Makefile.in b/Makefile.in
index 86a51cc..f75ae4b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -105,6 +105,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
 	$(srcroot)src/edata.c \
+	$(srcroot)src/edata_cache.c \
 	$(srcroot)src/ehooks.c \
 	$(srcroot)src/eset.c \
 	$(srcroot)src/extent2.c \
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index aac620b..38c8b27 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/eset.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
@@ -184,15 +185,8 @@ struct arena_s {
 	pszind_t		retain_grow_limit;
 	malloc_mutex_t		extent_grow_mtx;
 
-	/*
-	 * Available edata structures that were allocated via
-	 * base_alloc_edata().
-	 *
-	 * Synchronization: edata_avail_mtx.
-	 */
-	edata_tree_t		edata_avail;
-	atomic_zu_t		edata_avail_cnt;
-	malloc_mutex_t		edata_avail_mtx;
+	/* The source of edata_t objects. */
+	edata_cache_t		edata_cache;
 
 	/*
 	 * bins is used to store heaps of free regions.
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
new file mode 100644
index 0000000..fc18408
--- /dev/null
+++ b/include/jemalloc/internal/edata_cache.h
@@ -0,0 +1,25 @@
+#ifndef JEMALLOC_INTERNAL_EDATA_CACHE_H
+#define JEMALLOC_INTERNAL_EDATA_CACHE_H
+
+/*
+ * A cache of edata_t structures allocated via base_alloc_edata (as opposed to
+ * the underlying extents they describe).  The contents of returned edata_t
+ * objects are garbage and cannot be relied upon.
+ */
+
+typedef struct edata_cache_s edata_cache_t;
+struct edata_cache_s {
+	edata_tree_t avail;
+	atomic_zu_t count;
+	malloc_mutex_t mtx;
+};
+
+bool edata_cache_init(edata_cache_t *edata_cache);
+edata_t *edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    base_t *base);
+void edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata);
+void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache);
+void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache);
+void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
+
+#endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index ef23267..629474e 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -26,9 +26,6 @@ extern size_t opt_lg_extent_max_active_fit;
 
 extern rtree_t extents_rtree;
 
-edata_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
-void extent_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *edata);
-
 edata_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     eset_t *eset, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit);
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index ddbcf9d..985e0a3 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -43,7 +43,7 @@
 #define WITNESS_RANK_TCACHE_QL		13U
 #define WITNESS_RANK_EXTENT_GROW	14U
 #define WITNESS_RANK_EXTENTS		15U
-#define WITNESS_RANK_EDATA_AVAIL	16U
+#define WITNESS_RANK_EDATA_CACHE	16U
 
 #define WITNESS_RANK_EXTENT_POOL	17U
 #define WITNESS_RANK_RTREE		18U
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 9dfc36d..23312d3 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -45,6 +45,7 @@
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\edata.c" />
+    <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent2.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 0ec4d1e..76c16c5 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -45,6 +45,7 @@
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\edata.c" />
+    <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent2.c" />
diff --git a/src/arena.c b/src/arena.c
index f05a1d1..a23419a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -103,7 +103,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    eset_npages_get(&arena->eset_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->edata_avail,
-	    atomic_load_zu(&arena->edata_avail_cnt, ATOMIC_RELAXED),
+	    atomic_load_zu(&arena->edata_cache.count, ATOMIC_RELAXED),
 	    ATOMIC_RELAXED);
 
 	arena_stats_accum_u64(&astats->decay_dirty.npurge,
@@ -224,7 +224,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	/* Gather per arena mutex profiling data. */
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
-	READ_ARENA_MUTEX_PROF_DATA(edata_avail_mtx,
+	READ_ARENA_MUTEX_PROF_DATA(edata_cache.mtx,
 	    arena_prof_mutex_extent_avail)
 	READ_ARENA_MUTEX_PROF_DATA(eset_dirty.mtx,
 	    arena_prof_mutex_extents_dirty)
@@ -2053,9 +2053,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	edata_avail_new(&arena->edata_avail);
-	if (malloc_mutex_init(&arena->edata_avail_mtx, "edata_avail",
-	    WITNESS_RANK_EDATA_AVAIL, malloc_mutex_rank_exclusive)) {
+	if (edata_cache_init(&arena->edata_cache)) {
 		goto label_error;
 	}
 
@@ -2201,7 +2199,7 @@ arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->edata_avail_mtx);
+	edata_cache_prefork(tsdn, &arena->edata_cache);
 }
 
 void
@@ -2235,7 +2233,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
-	malloc_mutex_postfork_parent(tsdn, &arena->edata_avail_mtx);
+	edata_cache_postfork_parent(tsdn, &arena->edata_cache);
 	eset_postfork_parent(tsdn, &arena->eset_dirty);
 	eset_postfork_parent(tsdn, &arena->eset_muzzy);
 	eset_postfork_parent(tsdn, &arena->eset_retained);
@@ -2281,7 +2279,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
-	malloc_mutex_postfork_child(tsdn, &arena->edata_avail_mtx);
+	edata_cache_postfork_child(tsdn, &arena->edata_cache);
 	eset_postfork_child(tsdn, &arena->eset_dirty);
 	eset_postfork_child(tsdn, &arena->eset_muzzy);
 	eset_postfork_child(tsdn, &arena->eset_retained);
diff --git a/src/ctl.c b/src/ctl.c
index 1e72bf4..a58b22b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3010,7 +3010,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 			continue;
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
-		MUTEX_PROF_RESET(arena->edata_avail_mtx);
+		MUTEX_PROF_RESET(arena->edata_cache.mtx);
 		MUTEX_PROF_RESET(arena->eset_dirty.mtx);
 		MUTEX_PROF_RESET(arena->eset_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->eset_retained.mtx);
diff --git a/src/edata_cache.c b/src/edata_cache.c
new file mode 100644
index 0000000..4d02602
--- /dev/null
+++ b/src/edata_cache.c
@@ -0,0 +1,47 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+bool
+edata_cache_init(edata_cache_t *edata_cache) {
+	if (malloc_mutex_init(&edata_cache->mtx, "edata_cache",
+	    WITNESS_RANK_EDATA_CACHE, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+
+	edata_avail_new(&edata_cache->avail);
+	return false;
+}
+
+edata_t *
+edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache, base_t *base) {
+	malloc_mutex_lock(tsdn, &edata_cache->mtx);
+	edata_t *edata = edata_avail_first(&edata_cache->avail);
+	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &edata_cache->mtx);
+		return base_alloc_edata(tsdn, base);
+	}
+	edata_avail_remove(&edata_cache->avail, edata);
+	atomic_fetch_sub_zu(&edata_cache->count, 1, ATOMIC_RELAXED);
+	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
+	return edata;
+}
+
+void
+edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata) {
+	malloc_mutex_lock(tsdn, &edata_cache->mtx);
+	edata_avail_insert(&edata_cache->avail, edata);
+	atomic_fetch_add_zu(&edata_cache->count, 1, ATOMIC_RELAXED);
+	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
+}
+
+void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+	malloc_mutex_prefork(tsdn, &edata_cache->mtx);
+}
+
+void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+	malloc_mutex_postfork_parent(tsdn, &edata_cache->mtx);
+}
+
+void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+	malloc_mutex_postfork_child(tsdn, &edata_cache->mtx);
+}
diff --git a/src/extent2.c b/src/extent2.c
index 5bacb8f..b77e4b8 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -163,28 +163,6 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	}
 }
 
-edata_t *
-extent_alloc(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_lock(tsdn, &arena->edata_avail_mtx);
-	edata_t *edata = edata_avail_first(&arena->edata_avail);
-	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &arena->edata_avail_mtx);
-		return base_alloc_edata(tsdn, arena->base);
-	}
-	edata_avail_remove(&arena->edata_avail, edata);
-	atomic_fetch_sub_zu(&arena->edata_avail_cnt, 1, ATOMIC_RELAXED);
-	malloc_mutex_unlock(tsdn, &arena->edata_avail_mtx);
-	return edata;
-}
-
-void
-extent_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
-	malloc_mutex_lock(tsdn, &arena->edata_avail_mtx);
-	edata_avail_insert(&arena->edata_avail, edata);
-	atomic_fetch_add_zu(&arena->edata_avail_cnt, 1, ATOMIC_RELAXED);
-	malloc_mutex_unlock(tsdn, &arena->edata_avail_mtx);
-}
-
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata) {
@@ -317,7 +295,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 			    edata_size_get(edata), growing_retained);
 		}
 	}
-	extent_dalloc(tsdn, arena, edata);
+	edata_cache_put(tsdn, &arena->edata_cache, edata);
 }
 
 static void
@@ -858,7 +836,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
 	}
 
-	edata_t *edata = extent_alloc(tsdn, arena);
+	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache,
+	    arena->base);
 	if (edata == NULL) {
 		goto label_err;
 	}
@@ -872,12 +851,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
 	    committed, true, EXTENT_IS_HEAD);
 	if (ptr == NULL) {
-		extent_dalloc(tsdn, arena, edata);
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		goto label_err;
 	}
 
 	if (extent_register_no_gdump_add(tsdn, edata)) {
-		extent_dalloc(tsdn, arena, edata);
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		goto label_err;
 	}
 
@@ -1021,7 +1000,8 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	size_t esize = size + pad;
-	edata_t *edata = extent_alloc(tsdn, arena);
+	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache,
+	    arena->base);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -1029,7 +1009,7 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
 	    zero, commit, arena_ind_get(arena));
 	if (addr == NULL) {
-		extent_dalloc(tsdn, arena, edata);
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		return NULL;
 	}
 	edata_init(edata, arena_ind_get(arena), addr, esize, slab, szind,
@@ -1039,7 +1019,7 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		extent_addr_randomize(tsdn, arena, edata, alignment);
 	}
 	if (extent_register(tsdn, edata)) {
-		extent_dalloc(tsdn, arena, edata);
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		return NULL;
 	}
 
@@ -1257,7 +1237,7 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	    WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, edata)) {
-		extent_dalloc(tsdn, arena, edata);
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		return;
 	}
 	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
@@ -1287,7 +1267,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    arena_ind_get(arena));
 
 	if (!err) {
-		extent_dalloc(tsdn, arena, edata);
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
 	}
 
 	return err;
@@ -1359,7 +1339,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    edata_size_get(edata), edata_committed_get(edata),
 	    arena_ind_get(arena));
 
-	extent_dalloc(tsdn, arena, edata);
+	edata_cache_put(tsdn, &arena->edata_cache, edata);
 }
 
 static bool
@@ -1445,7 +1425,8 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	edata_t *trail = extent_alloc(tsdn, arena);
+	edata_t *trail = edata_cache_get(tsdn, &arena->edata_cache,
+	    arena->base);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
@@ -1505,7 +1486,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 label_error_c:
 	extent_unlock_edata2(tsdn, edata, trail);
 label_error_b:
-	extent_dalloc(tsdn, arena, trail);
+	edata_cache_put(tsdn, &arena->edata_cache, trail);
 label_error_a:
 	return NULL;
 }
@@ -1611,7 +1592,7 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
 	 * arena (i.e. this one).
 	 */
 	assert(edata_arena_ind_get(b) == arena_ind_get(arena));
-	extent_dalloc(tsdn, arena, b);
+	edata_cache_put(tsdn, &arena->edata_cache, b);
 
 	return false;
 }
diff --git a/src/extent_dss.c b/src/extent_dss.c
index a66afb6..25ba944 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -123,7 +123,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		return NULL;
 	}
 
-	gap = extent_alloc(tsdn, arena);
+	gap = edata_cache_get(tsdn, &arena->edata_cache, arena->base);
 	if (gap == NULL) {
 		return NULL;
 	}
@@ -188,7 +188,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				if (gap_size_page != 0) {
 					extent_dalloc_gap(tsdn, arena, gap);
 				} else {
-					extent_dalloc(tsdn, arena, gap);
+					edata_cache_put(tsdn,
+					    &arena->edata_cache, gap);
 				}
 				if (!*commit) {
 					*commit = pages_decommit(ret, size);
@@ -224,7 +225,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	}
 label_oom:
 	extent_dss_extending_finish();
-	extent_dalloc(tsdn, arena, gap);
+	edata_cache_put(tsdn, &arena->edata_cache, gap);
 	return NULL;
 }
 
-- 
cgit v0.12


From 09475bf8acfef36924df787deb0247a7b0456c66 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Dec 2019 13:35:43 -0800
Subject: extent_may_dalloc -> ehooks_dalloc_will_fail

---
 include/jemalloc/internal/ehooks.h | 10 ++++++++++
 src/extent2.c                      |  8 +-------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index c046cd1..711a534 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -9,6 +9,7 @@
  */
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/extent_mmap.h"
 
 extern const extent_hooks_t ehooks_default_extent_hooks;
 
@@ -97,6 +98,15 @@ ehooks_are_default(ehooks_t *ehooks) {
  * include some checks for such cases.
  */
 static inline bool
+ehooks_dalloc_will_fail(ehooks_t *ehooks) {
+	if (ehooks_are_default(ehooks)) {
+		return opt_retain;
+	} else {
+		return ehooks_get_extent_hooks_ptr(ehooks)->dalloc == NULL;
+	}
+}
+
+static inline bool
 ehooks_split_will_fail(ehooks_t *ehooks) {
 	return ehooks_get_extent_hooks_ptr(ehooks)->split == NULL;
 }
diff --git a/src/extent2.c b/src/extent2.c
index b77e4b8..148c328 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -1244,12 +1244,6 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 }
 
 static bool
-extent_may_dalloc(void) {
-	/* With retain enabled, the default dalloc always fails. */
-	return !opt_retain;
-}
-
-static bool
 extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata) {
 	bool err;
@@ -1281,7 +1275,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	/* Avoid calling the default extent_dalloc unless have to. */
-	if (!ehooks_are_default(ehooks) || extent_may_dalloc()) {
+	if (!ehooks_dalloc_will_fail(ehooks)) {
 		/*
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.
-- 
cgit v0.12


From 07045162459f1d5f529ca530f035157f97645b0d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Dec 2019 17:23:24 -0800
Subject: Ehooks: Add head tracking.

---
 include/jemalloc/internal/edata.h   | 12 +-------
 include/jemalloc/internal/ehooks.h  | 20 +++++++++----
 include/jemalloc/internal/extent2.h |  1 -
 src/ehooks.c                        | 60 ++++++++++++++++++++++++++++++-------
 src/extent2.c                       | 41 ++-----------------------
 5 files changed, 68 insertions(+), 66 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 990c325..86f5ac5 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -467,20 +467,12 @@ edata_prof_alloc_time_set(edata_t *edata, nstime_t *t) {
 
 static inline bool
 edata_is_head_get(edata_t *edata) {
-	if (maps_coalesce) {
-		not_reached();
-	}
-
 	return (bool)((edata->e_bits & EDATA_BITS_IS_HEAD_MASK) >>
 	    EDATA_BITS_IS_HEAD_SHIFT);
 }
 
 static inline void
 edata_is_head_set(edata_t *edata, bool is_head) {
-	if (maps_coalesce) {
-		not_reached();
-	}
-
 	edata->e_bits = (edata->e_bits & ~EDATA_BITS_IS_HEAD_MASK) |
 	    ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
 }
@@ -502,9 +494,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	edata_committed_set(edata, committed);
 	edata_dumpable_set(edata, dumpable);
 	ql_elm_new(edata, ql_link);
-	if (!maps_coalesce) {
-		edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
-	}
+	edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
 	if (config_prof) {
 		edata_prof_tctx_set(edata, NULL);
 	}
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 711a534..6f4f950 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -1,16 +1,21 @@
 #ifndef JEMALLOC_INTERNAL_EHOOKS_H
 #define JEMALLOC_INTERNAL_EHOOKS_H
 
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/extent_mmap.h"
+
 /*
  * This module is the internal interface to the extent hooks (both
  * user-specified and external).  Eventually, this will give us the flexibility
  * to use multiple different versions of user-visible extent-hook APIs under a
  * single user interface.
+ *
+ * Current API expansions (not available to anyone but the default hooks yet):
+ *   - Head state tracking.  Hooks can decide whether or not to merge two
+ *     extents based on whether or not one of them is the head (i.e. was
+ *     allocated on its own).  The later extent loses its "head" status.
  */
 
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/extent_mmap.h"
-
 extern const extent_hooks_t ehooks_default_extent_hooks;
 
 typedef struct ehooks_s ehooks_t;
@@ -43,7 +48,8 @@ bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
 bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
 #endif
 bool ehooks_default_split_impl();
-bool ehooks_default_merge_impl(void *addr_a, void *addr_b);
+bool ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a,
+    void *addr_b, bool head_b);
 void ehooks_default_zero_impl(void *addr, size_t size);
 
 /*
@@ -314,10 +320,12 @@ ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
-    void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
+    bool head_a, void *addr_b, size_t size_b, bool head_b, bool committed,
+    unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
-		return ehooks_default_merge_impl(addr_a, addr_b);
+		return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b,
+		    head_b);
 	} else if (extent_hooks->merge == NULL) {
 		return true;
 	} else {
diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index 629474e..0844336 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -54,7 +54,6 @@ edata_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     size_t size_b, szind_t szind_b, bool slab_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *a, edata_t *b);
-bool extent_head_no_merge(edata_t *a, edata_t *b);
 
 bool extent_boot(void);
 
diff --git a/src/ehooks.c b/src/ehooks.c
index a62586b..51b1514 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -183,8 +183,51 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	return ehooks_default_split_impl();
 }
 
+static inline bool
+ehooks_same_sn(tsdn_t *tsdn, void *addr_a, void *addr_b) {
+	edata_t *a = iealloc(tsdn, addr_a);
+	edata_t *b = iealloc(tsdn, addr_b);
+	return edata_sn_comp(a, b) == 0;
+}
+
+/*
+ * Returns true if the given extents can't be merged because of their head bit
+ * settings.  Assumes the second extent has the higher address.
+ */
+static bool
+ehooks_no_merge_heads(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
+    bool head_b) {
+	/*
+	 * When coalesce is not always allowed (Windows), only merge extents
+	 * from the same VirtualAlloc region under opt.retain (in which case
+	 * MEM_DECOMMIT is utilized for purging).
+	 */
+	if (maps_coalesce) {
+		return false;
+	}
+	if (!opt_retain) {
+		return true;
+	}
+	/* If b is a head extent, disallow the cross-region merge. */
+	if (head_b) {
+		/*
+		 * Additionally, sn should not overflow with retain; sanity
+		 * check that different regions have unique sn.
+		 */
+		assert(!ehooks_same_sn(tsdn, addr_a, addr_b));
+		return true;
+	}
+	assert(ehooks_same_sn(tsdn, addr_a, addr_b));
+
+	return false;
+}
+
 bool
-ehooks_default_merge_impl(void *addr_a, void *addr_b) {
+ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
+    bool head_b) {
+	if (ehooks_no_merge_heads(tsdn, addr_a, head_a, addr_b, head_b)) {
+		return true;
+	}
 	if (!maps_coalesce && !opt_retain) {
 		return true;
 	}
@@ -198,15 +241,12 @@ ehooks_default_merge_impl(void *addr_a, void *addr_b) {
 static bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
-	if (!maps_coalesce) {
-		tsdn_t *tsdn = tsdn_fetch();
-		edata_t *a = iealloc(tsdn, addr_a);
-		edata_t *b = iealloc(tsdn, addr_b);
-		if (extent_head_no_merge(a, b)) {
-			return true;
-		}
-	}
-	return ehooks_default_merge_impl(addr_a, addr_b);
+	tsdn_t *tsdn = tsdn_fetch();
+	edata_t *a = iealloc(tsdn, addr_a);
+	bool head_a = edata_is_head_get(a);
+	edata_t *b = iealloc(tsdn, addr_b);
+	bool head_b = edata_is_head_get(b);
+	return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b, head_b);
 }
 
 void
diff --git a/src/extent2.c b/src/extent2.c
index 148c328..21f9cdb 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -1493,38 +1493,6 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    slab_a, size_b, szind_b, slab_b, false);
 }
 
-/*
- * Returns true if the given extents can't be merged because of their head bit
- * settings.  Assumes the second extent has the higher address.
- */
-bool
-extent_head_no_merge(edata_t *a, edata_t *b) {
-	assert(edata_base_get(a) < edata_base_get(b));
-	/*
-	 * When coalesce is not always allowed (Windows), only merge extents
-	 * from the same VirtualAlloc region under opt.retain (in which case
-	 * MEM_DECOMMIT is utilized for purging).
-	 */
-	if (maps_coalesce) {
-		return false;
-	}
-	if (!opt_retain) {
-		return true;
-	}
-	/* If b is a head extent, disallow the cross-region merge. */
-	if (edata_is_head_get(b)) {
-		/*
-		 * Additionally, sn should not overflow with retain; sanity
-		 * check that different regions have unique sn.
-		 */
-		assert(edata_sn_comp(a, b) != 0);
-		return true;
-	}
-	assert(edata_sn_comp(a, b) == 0);
-
-	return false;
-}
-
 static bool
 extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
     edata_t *b, bool growing_retained) {
@@ -1532,13 +1500,10 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(edata_base_get(a) < edata_base_get(b));
 
-	if (ehooks_merge_will_fail(ehooks) || extent_head_no_merge(a, b)) {
-		return true;
-	}
-
 	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
-	    edata_size_get(a), edata_base_get(b), edata_size_get(b),
-	    edata_committed_get(a), arena_ind_get(arena));
+	    edata_size_get(a), edata_is_head_get(a), edata_base_get(b),
+	    edata_size_get(b), edata_is_head_get(b), edata_committed_get(a),
+	    arena_ind_get(arena));
 
 	if (err) {
 		return true;
-- 
cgit v0.12


From bb70df8e5babcf2779230d40b6a34fb04187c818 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Dec 2019 16:25:24 -0800
Subject: Extent refactor: Introduce ecache module.

This will eventually completely wrap the eset, and handle concurrency,
allocation, and deallocation.  For now, we only pull out the mutex from the
eset.
---
 Makefile.in                                    |   1 +
 include/jemalloc/internal/arena_structs.h      |  28 +--
 include/jemalloc/internal/ecache.h             |  59 +++++++
 include/jemalloc/internal/eset.h               |  47 ++---
 include/jemalloc/internal/extent2.h            |   8 +-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 src/arena.c                                    | 156 +++++++++--------
 src/background_thread.c                        |  12 +-
 src/ctl.c                                      |   6 +-
 src/ecache.c                                   |  54 ++++++
 src/eset.c                                     |  44 ++---
 src/extent2.c                                  | 226 +++++++++++++------------
 src/large.c                                    |   4 +-
 test/unit/retained.c                           |   2 +-
 15 files changed, 354 insertions(+), 295 deletions(-)
 create mode 100644 include/jemalloc/internal/ecache.h
 create mode 100644 src/ecache.c

diff --git a/Makefile.in b/Makefile.in
index f75ae4b..7145848 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -104,6 +104,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
+	$(srcroot)src/ecache.c \
 	$(srcroot)src/edata.c \
 	$(srcroot)src/edata_cache.c \
 	$(srcroot)src/ehooks.c \
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 38c8b27..48d13b8 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -5,8 +5,8 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
-#include "jemalloc/internal/eset.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
@@ -53,7 +53,7 @@ struct arena_decay_s {
 	/*
 	 * Number of unpurged pages at beginning of current epoch.  During epoch
 	 * advancement we use the delta between arena->decay_*.nunpurged and
-	 * eset_npages_get(&arena->extents_*) to determine how many dirty pages,
+	 * ecache_npages_get(&arena->ecache_*) to determine how many dirty pages,
 	 * if any, were generated.
 	 */
 	size_t			nunpurged;
@@ -155,9 +155,9 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	eset_t		eset_dirty;
-	eset_t		eset_muzzy;
-	eset_t		eset_retained;
+	ecache_t	ecache_dirty;
+	ecache_t	ecache_muzzy;
+	ecache_t	ecache_retained;
 
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
@@ -168,22 +168,8 @@ struct arena_s {
 	arena_decay_t		decay_dirty; /* dirty --> muzzy */
 	arena_decay_t		decay_muzzy; /* muzzy --> retained */
 
-	/*
-	 * Next extent size class in a growing series to use when satisfying a
-	 * request via the extent hooks (only if opt_retain).  This limits the
-	 * number of disjoint virtual memory ranges so that extent merging can
-	 * be effective even if multiple arenas' extent allocation requests are
-	 * highly interleaved.
-	 *
-	 * retain_grow_limit is the max allowed size ind to expand (unless the
-	 * required size is greater).  Default is no limit, and controlled
-	 * through mallctl only.
-	 *
-	 * Synchronization: extent_grow_mtx
-	 */
-	pszind_t		extent_grow_next;
-	pszind_t		retain_grow_limit;
-	malloc_mutex_t		extent_grow_mtx;
+	/* The grow info for the retained ecache. */
+	ecache_grow_t		ecache_grow;
 
 	/* The source of edata_t objects. */
 	edata_cache_t		edata_cache;
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
new file mode 100644
index 0000000..7085720
--- /dev/null
+++ b/include/jemalloc/internal/ecache.h
@@ -0,0 +1,59 @@
+#ifndef JEMALLOC_INTERNAL_ECACHE_H
+#define JEMALLOC_INTERNAL_ECACHE_H
+
+#include "jemalloc/internal/eset.h"
+#include "jemalloc/internal/mutex.h"
+
+typedef struct ecache_s ecache_t;
+struct ecache_s {
+	malloc_mutex_t mtx;
+	eset_t eset;
+};
+
+typedef struct ecache_grow_s ecache_grow_t;
+struct ecache_grow_s {
+	/*
+	 * Next extent size class in a growing series to use when satisfying a
+	 * request via the extent hooks (only if opt_retain).  This limits the
+	 * number of disjoint virtual memory ranges so that extent merging can
+	 * be effective even if multiple arenas' extent allocation requests are
+	 * highly interleaved.
+	 *
+	 * retain_grow_limit is the max allowed size ind to expand (unless the
+	 * required size is greater).  Default is no limit, and controlled
+	 * through mallctl only.
+	 *
+	 * Synchronization: extent_grow_mtx
+	 */
+	pszind_t next;
+	pszind_t limit;
+	malloc_mutex_t mtx;
+};
+
+static inline size_t
+ecache_npages_get(ecache_t *ecache) {
+	return eset_npages_get(&ecache->eset);
+}
+/* Get the number of extents in the given page size index. */
+static inline size_t
+ecache_nextents_get(ecache_t *ecache, pszind_t ind) {
+	return eset_nextents_get(&ecache->eset, ind);
+}
+/* Get the sum total bytes of the extents in the given page size index. */
+static inline size_t
+ecache_nbytes_get(ecache_t *ecache, pszind_t ind) {
+	return eset_nbytes_get(&ecache->eset, ind);
+}
+
+bool ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state,
+    bool delay_coalesce);
+void ecache_prefork(tsdn_t *tsdn, ecache_t *ecache);
+void ecache_postfork_parent(tsdn_t *tsdn, ecache_t *ecache);
+void ecache_postfork_child(tsdn_t *tsdn, ecache_t *ecache);
+
+bool ecache_grow_init(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
+void ecache_grow_prefork(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
+void ecache_grow_postfork_parent(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
+void ecache_grow_postfork_child(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
+
+#endif /* JEMALLOC_INTERNAL_ECACHE_H */
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index e76257a..bbc6b5c 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -9,42 +9,25 @@
 /*
  * An eset ("extent set") is a quantized collection of extents, with built-in
  * LRU queue.
+ *
+ * This class is not thread-safe; synchronization must be done externally if
+ * there are mutating operations.  One exception is the stats counters, which
+ * may be read without any locking.
  */
 typedef struct eset_s eset_t;
 struct eset_s {
-	malloc_mutex_t mtx;
-
-	/*
-	 * Quantized per size class heaps of extents.
-	 *
-	 * Synchronization: mtx.
-	 */
+	/* Quantized per size class heaps of extents. */
 	edata_heap_t heaps[SC_NPSIZES + 1];
 	atomic_zu_t nextents[SC_NPSIZES + 1];
 	atomic_zu_t nbytes[SC_NPSIZES + 1];
 
-	/*
-	 * Bitmap for which set bits correspond to non-empty heaps.
-	 *
-	 * Synchronization: mtx.
-	 */
+	/* Bitmap for which set bits correspond to non-empty heaps. */
 	bitmap_t bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
 
-	/*
-	 * LRU of all extents in heaps.
-	 *
-	 * Synchronization: mtx.
-	 */
+	/* LRU of all extents in heaps. */
 	edata_list_t lru;
 
-	/*
-	 * Page sum for all extents in heaps.
-	 *
-	 * The synchronization here is a little tricky.  Modifications to npages
-	 * must hold mtx, but reads need not (though, a reader who sees npages
-	 * without holding the mutex can't assume anything about the rest of the
-	 * state of the eset_t).
-	 */
+	/* Page sum for all extents in heaps. */
 	atomic_zu_t npages;
 
 	/* All stored extents must be in the same state. */
@@ -57,8 +40,7 @@ struct eset_s {
 	bool delay_coalesce;
 };
 
-bool eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
-    bool delay_coalesce);
+void eset_init(eset_t *eset, extent_state_t state, bool delay_coalesce);
 extent_state_t eset_state_get(const eset_t *eset);
 
 size_t eset_npages_get(eset_t *eset);
@@ -67,17 +49,12 @@ size_t eset_nextents_get(eset_t *eset, pszind_t ind);
 /* Get the sum total bytes of the extents in the given page size index. */
 size_t eset_nbytes_get(eset_t *eset, pszind_t ind);
 
-void eset_insert_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata);
-void eset_remove_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata);
+void eset_insert(eset_t *eset, edata_t *edata);
+void eset_remove(eset_t *eset, edata_t *edata);
 /*
  * Select an extent from this eset of the given size and alignment.  Returns
  * null if no such item could be found.
  */
-edata_t *eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize,
-    size_t alignment);
-
-void eset_prefork(tsdn_t *tsdn, eset_t *eset);
-void eset_postfork_parent(tsdn_t *tsdn, eset_t *eset);
-void eset_postfork_child(tsdn_t *tsdn, eset_t *eset);
+edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment);
 
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index 0844336..80e789e 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -1,8 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT2_H
 #define JEMALLOC_INTERNAL_EXTENT2_H
 
+#include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/ehooks.h"
-#include "jemalloc/internal/eset.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/rtree.h"
 
@@ -27,12 +27,12 @@ extern size_t opt_lg_extent_max_active_fit;
 extern rtree_t extents_rtree;
 
 edata_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, void *new_addr, size_t size, size_t pad, size_t alignment,
+    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit);
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, edata_t *edata);
+    ecache_t *ecache, edata_t *edata);
 edata_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, size_t npages_min);
+    ecache_t *ecache, size_t npages_min);
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit);
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 23312d3..7b2e84a 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -44,6 +44,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ecache.c" />
     <ClCompile Include="..\..\..\..\src\edata.c" />
     <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 76c16c5..338962b 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -44,6 +44,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ecache.c" />
     <ClCompile Include="..\..\..\..\src\edata.c" />
     <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
diff --git a/src/arena.c b/src/arena.c
index a23419a..2652207 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -56,7 +56,7 @@ static unsigned huge_arena_ind;
  */
 
 static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, eset_t *eset, bool all, size_t npages_limit,
+    arena_decay_t *decay, ecache_t *ecache, bool all, size_t npages_limit,
     size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
@@ -76,8 +76,8 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
 	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
 	*nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED);
-	*ndirty += eset_npages_get(&arena->eset_dirty);
-	*nmuzzy += eset_npages_get(&arena->eset_muzzy);
+	*ndirty += ecache_npages_get(&arena->ecache_dirty);
+	*nmuzzy += ecache_npages_get(&arena->ecache_muzzy);
 }
 
 void
@@ -100,7 +100,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->mapped, base_mapped
 	    + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
 	arena_stats_accum_zu(&astats->retained,
-	    eset_npages_get(&arena->eset_retained) << LG_PAGE);
+	    ecache_npages_get(&arena->ecache_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->edata_avail,
 	    atomic_load_zu(&arena->edata_cache.count, ATOMIC_RELAXED),
@@ -131,8 +131,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
 	arena_stats_accum_zu(&astats->resident, base_resident +
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
-	    eset_npages_get(&arena->eset_dirty) +
-	    eset_npages_get(&arena->eset_muzzy)) << LG_PAGE)));
+	    ecache_npages_get(&arena->ecache_dirty) +
+	    ecache_npages_get(&arena->ecache_muzzy)) << LG_PAGE)));
 	arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu(
 	    &arena->stats.abandoned_vm, ATOMIC_RELAXED));
 
@@ -174,12 +174,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
 		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
 		    retained_bytes;
-		dirty = eset_nextents_get(&arena->eset_dirty, i);
-		muzzy = eset_nextents_get(&arena->eset_muzzy, i);
-		retained = eset_nextents_get(&arena->eset_retained, i);
-		dirty_bytes = eset_nbytes_get(&arena->eset_dirty, i);
-		muzzy_bytes = eset_nbytes_get(&arena->eset_muzzy, i);
-		retained_bytes = eset_nbytes_get(&arena->eset_retained, i);
+		dirty = ecache_nextents_get(&arena->ecache_dirty, i);
+		muzzy = ecache_nextents_get(&arena->ecache_muzzy, i);
+		retained = ecache_nextents_get(&arena->ecache_retained, i);
+		dirty_bytes = ecache_nbytes_get(&arena->ecache_dirty, i);
+		muzzy_bytes = ecache_nbytes_get(&arena->ecache_muzzy, i);
+		retained_bytes = ecache_nbytes_get(&arena->ecache_retained, i);
 
 		atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED);
 		atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED);
@@ -226,11 +226,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
 	READ_ARENA_MUTEX_PROF_DATA(edata_cache.mtx,
 	    arena_prof_mutex_extent_avail)
-	READ_ARENA_MUTEX_PROF_DATA(eset_dirty.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(ecache_dirty.mtx,
 	    arena_prof_mutex_extents_dirty)
-	READ_ARENA_MUTEX_PROF_DATA(eset_muzzy.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(ecache_muzzy.mtx,
 	    arena_prof_mutex_extents_muzzy)
-	READ_ARENA_MUTEX_PROF_DATA(eset_retained.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(ecache_retained.mtx,
 	    arena_prof_mutex_extents_retained)
 	READ_ARENA_MUTEX_PROF_DATA(decay_dirty.mtx,
 	    arena_prof_mutex_decay_dirty)
@@ -258,7 +258,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extents_dalloc(tsdn, arena, ehooks, &arena->eset_dirty, edata);
+	extents_dalloc(tsdn, arena, ehooks, &arena->ecache_dirty, edata);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -434,10 +434,11 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	bool commit = true;
-	edata_t *edata = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
-	    NULL, usize, sz_large_pad, alignment, false, szind, zero, &commit);
+	edata_t *edata = extents_alloc(tsdn, arena, ehooks,
+	    &arena->ecache_dirty, NULL, usize, sz_large_pad, alignment, false,
+	    szind, zero, &commit);
 	if (edata == NULL && arena_may_have_muzzy(arena)) {
-		edata = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+		edata = extents_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 		    NULL, usize, sz_large_pad, alignment, false, szind, zero,
 		    &commit);
 	}
@@ -606,10 +607,10 @@ arena_decay_backlog_update(arena_decay_t *decay, uint64_t nadvance_u64,
 
 static void
 arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    eset_t *eset, size_t current_npages, size_t npages_limit,
+    ecache_t *ecache, size_t current_npages, size_t npages_limit,
     bool is_background_thread) {
 	if (current_npages > npages_limit) {
-		arena_decay_to_limit(tsdn, arena, decay, eset, false,
+		arena_decay_to_limit(tsdn, arena, decay, ecache, false,
 		    npages_limit, current_npages - npages_limit,
 		    is_background_thread);
 	}
@@ -641,8 +642,8 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time,
 
 static void
 arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    eset_t *eset, const nstime_t *time, bool is_background_thread) {
-	size_t current_npages = eset_npages_get(eset);
+    ecache_t *ecache, const nstime_t *time, bool is_background_thread) {
+	size_t current_npages = ecache_npages_get(ecache);
 	arena_decay_epoch_advance_helper(decay, time, current_npages);
 
 	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
@@ -651,7 +652,7 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	    current_npages;
 
 	if (!background_thread_enabled() || is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, eset,
+		arena_decay_try_purge(tsdn, arena, decay, ecache,
 		    current_npages, npages_limit, is_background_thread);
 	}
 }
@@ -708,15 +709,15 @@ arena_decay_ms_valid(ssize_t decay_ms) {
 
 static bool
 arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    eset_t *eset, bool is_background_thread) {
+    ecache_t *ecache, bool is_background_thread) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
 	ssize_t decay_ms = arena_decay_ms_read(decay);
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
-			arena_decay_to_limit(tsdn, arena, decay, eset, false,
-			    0, eset_npages_get(eset),
+			arena_decay_to_limit(tsdn, arena, decay, ecache, false,
+			    0, ecache_npages_get(ecache),
 			    is_background_thread);
 		}
 		return false;
@@ -751,11 +752,11 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 */
 	bool advance_epoch = arena_decay_deadline_reached(decay, &time);
 	if (advance_epoch) {
-		arena_decay_epoch_advance(tsdn, arena, decay, eset, &time,
+		arena_decay_epoch_advance(tsdn, arena, decay, ecache, &time,
 		    is_background_thread);
 	} else if (is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, eset,
-		    eset_npages_get(eset),
+		arena_decay_try_purge(tsdn, arena, decay, ecache,
+		    ecache_npages_get(ecache),
 		    arena_decay_backlog_npages_limit(decay),
 		    is_background_thread);
 	}
@@ -780,7 +781,7 @@ arena_muzzy_decay_ms_get(arena_t *arena) {
 
 static bool
 arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    eset_t *eset, ssize_t decay_ms) {
+    ecache_t *ecache, ssize_t decay_ms) {
 	if (!arena_decay_ms_valid(decay_ms)) {
 		return true;
 	}
@@ -795,7 +796,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 * arbitrary change during initial arena configuration.
 	 */
 	arena_decay_reinit(decay, decay_ms);
-	arena_maybe_decay(tsdn, arena, decay, eset, false);
+	arena_maybe_decay(tsdn, arena, decay, ecache, false);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
 	return false;
@@ -805,19 +806,19 @@ bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->decay_dirty,
-	    &arena->eset_dirty, decay_ms);
+	    &arena->ecache_dirty, decay_ms);
 }
 
 bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->decay_muzzy,
-	    &arena->eset_muzzy, decay_ms);
+	    &arena->ecache_muzzy, decay_ms);
 }
 
 static size_t
 arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, eset_t *eset, size_t npages_limit,
+    ehooks_t *ehooks, ecache_t *ecache, size_t npages_limit,
     size_t npages_decay_max, edata_list_t *decay_extents) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -826,7 +827,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	size_t nstashed = 0;
 	edata_t *edata;
 	while (nstashed < npages_decay_max &&
-	    (edata = extents_evict(tsdn, arena, ehooks, eset, npages_limit))
+	    (edata = extents_evict(tsdn, arena, ehooks, ecache, npages_limit))
 	    != NULL) {
 		edata_list_append(decay_extents, edata);
 		nstashed += edata_size_get(edata) >> LG_PAGE;
@@ -836,8 +837,8 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    arena_decay_t *decay, eset_t *eset, bool all, edata_list_t *decay_extents,
-    bool is_background_thread) {
+    arena_decay_t *decay, ecache_t *ecache, bool all,
+    edata_list_t *decay_extents, bool is_background_thread) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
 
@@ -856,7 +857,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		size_t npages = edata_size_get(edata) >> LG_PAGE;
 		npurged += npages;
 		edata_list_remove(decay_extents, edata);
-		switch (eset_state_get(eset)) {
+		switch (eset_state_get(&ecache->eset)) {
 		case extent_state_active:
 			not_reached();
 		case extent_state_dirty:
@@ -864,7 +865,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    !extent_purge_lazy_wrapper(tsdn, arena,
 			    ehooks, edata, 0, edata_size_get(edata))) {
 				extents_dalloc(tsdn, arena, ehooks,
-				    &arena->eset_muzzy, edata);
+				    &arena->ecache_muzzy, edata);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
 				break;
@@ -900,14 +901,14 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 /*
  * npages_limit: Decay at most npages_decay_max pages without violating the
- * invariant: (eset_npages_get(extents) >= npages_limit).  We need an upper
+ * invariant: (ecache_npages_get(ecache) >= npages_limit).  We need an upper
  * bound on number of pages in order to prevent unbounded growth (namely in
  * stashed), otherwise unbounded new pages could be added to extents during the
  * current decay run, so that the purging thread never finishes.
  */
 static void
 arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    eset_t *eset, bool all, size_t npages_limit, size_t npages_decay_max,
+    ecache_t *ecache, bool all, size_t npages_limit, size_t npages_decay_max,
     bool is_background_thread) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
@@ -924,11 +925,11 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	edata_list_t decay_extents;
 	edata_list_init(&decay_extents);
 
-	size_t npurge = arena_stash_decayed(tsdn, arena, ehooks, eset,
+	size_t npurge = arena_stash_decayed(tsdn, arena, ehooks, ecache,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = arena_decay_stashed(tsdn, arena, ehooks, decay,
-		    eset, all, &decay_extents, is_background_thread);
+		    ecache, all, &decay_extents, is_background_thread);
 		assert(npurged == npurge);
 	}
 
@@ -938,11 +939,11 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    eset_t *eset, bool is_background_thread, bool all) {
+    ecache_t *ecache, bool is_background_thread, bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
-		arena_decay_to_limit(tsdn, arena, decay, eset, all, 0,
-		    eset_npages_get(eset), is_background_thread);
+		arena_decay_to_limit(tsdn, arena, decay, ecache, all, 0,
+		    ecache_npages_get(ecache), is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 
 		return false;
@@ -953,7 +954,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 		return true;
 	}
 
-	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, eset,
+	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, ecache,
 	    is_background_thread);
 	size_t npages_new;
 	if (epoch_advanced) {
@@ -975,18 +976,18 @@ static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->decay_dirty,
-	    &arena->eset_dirty, is_background_thread, all);
+	    &arena->ecache_dirty, is_background_thread, all);
 }
 
 static bool
 arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
-	if (eset_npages_get(&arena->eset_muzzy) == 0 &&
+	if (ecache_npages_get(&arena->ecache_muzzy) == 0 &&
 	    arena_muzzy_decay_ms_get(arena) <= 0) {
 		return false;
 	}
 	return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
-	    &arena->eset_muzzy, is_background_thread, all);
+	    &arena->ecache_muzzy, is_background_thread, all);
 }
 
 void
@@ -1157,7 +1158,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	edata_t *edata;
 	while ((edata = extents_evict(tsdn, arena, ehooks,
-	    &arena->eset_retained, 0)) != NULL) {
+	    &arena->ecache_retained, 0)) != NULL) {
 		extent_destroy_wrapper(tsdn, arena, ehooks, edata);
 	}
 }
@@ -1173,8 +1174,8 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
 	 * extents, so only retained extents may remain.
 	 */
-	assert(eset_npages_get(&arena->eset_dirty) == 0);
-	assert(eset_npages_get(&arena->eset_muzzy) == 0);
+	assert(ecache_npages_get(&arena->ecache_dirty) == 0);
+	assert(ecache_npages_get(&arena->ecache_muzzy) == 0);
 
 	/* Deallocate retained memory. */
 	arena_destroy_retained(tsd_tsdn(tsd), arena);
@@ -1230,10 +1231,10 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
 	bool commit = true;
-	edata_t *slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
+	edata_t *slab = extents_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
 	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+		slab = extents_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 		    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero,
 		    &commit);
 	}
@@ -1917,14 +1918,14 @@ arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
 		}
 	}
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &arena->extent_grow_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->ecache_grow.mtx);
 	if (old_limit != NULL) {
-		*old_limit = sz_pind2sz(arena->retain_grow_limit);
+		*old_limit = sz_pind2sz(arena->ecache_grow.limit);
 	}
 	if (new_limit != NULL) {
-		arena->retain_grow_limit = new_ind;
+		arena->ecache_grow.limit = new_ind;
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->extent_grow_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->ecache_grow.mtx);
 
 	return false;
 }
@@ -2016,14 +2017,14 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * are likely to be reused soon after deallocation, and the cost of
 	 * merging/splitting extents is non-trivial.
 	 */
-	if (eset_init(tsdn, &arena->eset_dirty, extent_state_dirty, true)) {
+	if (ecache_init(tsdn, &arena->ecache_dirty, extent_state_dirty, true)) {
 		goto label_error;
 	}
 	/*
 	 * Coalesce muzzy extents immediately, because operations on them are in
 	 * the critical path much less often than for dirty extents.
 	 */
-	if (eset_init(tsdn, &arena->eset_muzzy, extent_state_muzzy, false)) {
+	if (ecache_init(tsdn, &arena->ecache_muzzy, extent_state_muzzy, false)) {
 		goto label_error;
 	}
 	/*
@@ -2032,7 +2033,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (eset_init(tsdn, &arena->eset_retained, extent_state_retained,
+	if (ecache_init(tsdn, &arena->ecache_retained, extent_state_retained,
 	    false)) {
 		goto label_error;
 	}
@@ -2046,10 +2047,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	arena->extent_grow_next = sz_psz2ind(HUGEPAGE);
-	arena->retain_grow_limit = sz_psz2ind(SC_LARGE_MAXCLASS);
-	if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow",
-	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
+	if (ecache_grow_init(tsdn, &arena->ecache_grow)) {
 		goto label_error;
 	}
 
@@ -2187,14 +2185,14 @@ arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->extent_grow_mtx);
+	ecache_grow_prefork(tsdn, &arena->ecache_grow);
 }
 
 void
 arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
-	eset_prefork(tsdn, &arena->eset_dirty);
-	eset_prefork(tsdn, &arena->eset_muzzy);
-	eset_prefork(tsdn, &arena->eset_retained);
+	ecache_prefork(tsdn, &arena->ecache_dirty);
+	ecache_prefork(tsdn, &arena->ecache_muzzy);
+	ecache_prefork(tsdn, &arena->ecache_retained);
 }
 
 void
@@ -2234,10 +2232,10 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
 	edata_cache_postfork_parent(tsdn, &arena->edata_cache);
-	eset_postfork_parent(tsdn, &arena->eset_dirty);
-	eset_postfork_parent(tsdn, &arena->eset_muzzy);
-	eset_postfork_parent(tsdn, &arena->eset_retained);
-	malloc_mutex_postfork_parent(tsdn, &arena->extent_grow_mtx);
+	ecache_postfork_parent(tsdn, &arena->ecache_dirty);
+	ecache_postfork_parent(tsdn, &arena->ecache_muzzy);
+	ecache_postfork_parent(tsdn, &arena->ecache_retained);
+	ecache_grow_postfork_parent(tsdn, &arena->ecache_grow);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats) {
@@ -2280,10 +2278,10 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
 	edata_cache_postfork_child(tsdn, &arena->edata_cache);
-	eset_postfork_child(tsdn, &arena->eset_dirty);
-	eset_postfork_child(tsdn, &arena->eset_muzzy);
-	eset_postfork_child(tsdn, &arena->eset_retained);
-	malloc_mutex_postfork_child(tsdn, &arena->extent_grow_mtx);
+	ecache_postfork_child(tsdn, &arena->ecache_dirty);
+	ecache_postfork_child(tsdn, &arena->ecache_muzzy);
+	ecache_postfork_child(tsdn, &arena->ecache_retained);
+	ecache_grow_postfork_child(tsdn, &arena->ecache_grow);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats) {
diff --git a/src/background_thread.c b/src/background_thread.c
index 400dae5..90b027e 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -114,7 +114,7 @@ decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
 
 static uint64_t
 arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
-    eset_t *eset) {
+    ecache_t *ecache) {
 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
 		/* Use minimal interval if decay is contended. */
 		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
@@ -130,7 +130,7 @@ arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
 
 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
 	assert(decay_interval_ns > 0);
-	size_t npages = eset_npages_get(eset);
+	size_t npages = ecache_npages_get(ecache);
 	if (npages == 0) {
 		unsigned i;
 		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
@@ -202,12 +202,12 @@ static uint64_t
 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	uint64_t i1, i2;
 	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
-	    &arena->eset_dirty);
+	    &arena->ecache_dirty);
 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return i1;
 	}
 	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
-	    &arena->eset_muzzy);
+	    &arena->ecache_muzzy);
 
 	return i1 < i2 ? i1 : i2;
 }
@@ -717,8 +717,8 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
 		should_signal = true;
 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (eset_npages_get(&arena->eset_dirty) > 0 ||
-	    eset_npages_get(&arena->eset_muzzy) > 0 ||
+	    (ecache_npages_get(&arena->ecache_dirty) > 0 ||
+	    ecache_npages_get(&arena->ecache_muzzy) > 0 ||
 	    info->npages_to_purge_new > 0)) {
 		should_signal = true;
 	} else {
diff --git a/src/ctl.c b/src/ctl.c
index a58b22b..eee1277 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3011,9 +3011,9 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
 		MUTEX_PROF_RESET(arena->edata_cache.mtx);
-		MUTEX_PROF_RESET(arena->eset_dirty.mtx);
-		MUTEX_PROF_RESET(arena->eset_muzzy.mtx);
-		MUTEX_PROF_RESET(arena->eset_retained.mtx);
+		MUTEX_PROF_RESET(arena->ecache_dirty.mtx);
+		MUTEX_PROF_RESET(arena->ecache_muzzy.mtx);
+		MUTEX_PROF_RESET(arena->ecache_retained.mtx);
 		MUTEX_PROF_RESET(arena->decay_dirty.mtx);
 		MUTEX_PROF_RESET(arena->decay_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
diff --git a/src/ecache.c b/src/ecache.c
new file mode 100644
index 0000000..7155de3
--- /dev/null
+++ b/src/ecache.c
@@ -0,0 +1,54 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+bool
+ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state,
+    bool delay_coalesce) {
+	if (malloc_mutex_init(&ecache->mtx, "extents", WITNESS_RANK_EXTENTS,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	eset_init(&ecache->eset, state, delay_coalesce);
+	return false;
+}
+
+void
+ecache_prefork(tsdn_t *tsdn, ecache_t *ecache) {
+	malloc_mutex_prefork(tsdn, &ecache->mtx);
+}
+
+void
+ecache_postfork_parent(tsdn_t *tsdn, ecache_t *ecache) {
+	malloc_mutex_postfork_parent(tsdn, &ecache->mtx);
+}
+
+void
+ecache_postfork_child(tsdn_t *tsdn, ecache_t *ecache) {
+	malloc_mutex_postfork_child(tsdn, &ecache->mtx);
+}
+
+bool
+ecache_grow_init(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
+	ecache_grow->next = sz_psz2ind(HUGEPAGE);
+	ecache_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
+	if (malloc_mutex_init(&ecache_grow->mtx, "extent_grow",
+	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	return false;
+}
+
+void
+ecache_grow_prefork(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
+	malloc_mutex_prefork(tsdn, &ecache_grow->mtx);
+}
+
+void
+ecache_grow_postfork_parent(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
+	malloc_mutex_postfork_parent(tsdn, &ecache_grow->mtx);
+}
+
+void
+ecache_grow_postfork_child(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
+	malloc_mutex_postfork_child(tsdn, &ecache_grow->mtx);
+}
diff --git a/src/eset.c b/src/eset.c
index 88b9c8c..e71868c 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -8,13 +8,9 @@
 const bitmap_info_t eset_bitmap_info =
     BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
 
-bool
-eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
+void
+eset_init(eset_t *eset, extent_state_t state,
     bool delay_coalesce) {
-	if (malloc_mutex_init(&eset->mtx, "extents", WITNESS_RANK_EXTENTS,
-	    malloc_mutex_rank_exclusive)) {
-		return true;
-	}
 	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
 		edata_heap_new(&eset->heaps[i]);
 	}
@@ -23,7 +19,6 @@ eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
 	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
 	eset->state = state;
 	eset->delay_coalesce = delay_coalesce;
-	return false;
 }
 
 extent_state_t
@@ -63,8 +58,7 @@ eset_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
 }
 
 void
-eset_insert_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata) {
-	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+eset_insert(eset_t *eset, edata_t *edata) {
 	assert(edata_state_get(edata) == eset->state);
 
 	size_t size = edata_size_get(edata);
@@ -94,8 +88,7 @@ eset_insert_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata) {
 }
 
 void
-eset_remove_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata) {
-	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+eset_remove(eset_t *eset, edata_t *edata) {
 	assert(edata_state_get(edata) == eset->state);
 
 	size_t size = edata_size_get(edata);
@@ -114,9 +107,13 @@ eset_remove_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata) {
 	edata_list_remove(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
 	/*
-	 * As in eset_insert_locked, we hold eset->mtx and so don't need atomic
+	 * As in eset_insert, we hold eset->mtx and so don't need atomic
 	 * operations for updating eset->npages.
 	 */
+	/*
+	 * This class is not thread-safe in general; we rely on external
+	 * synchronization for all mutating operations.
+	 */
 	size_t cur_extents_npages =
 	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
 	assert(cur_extents_npages >= npages);
@@ -166,7 +163,7 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
  * large enough.
  */
 static edata_t *
-eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
+eset_first_fit(eset_t *eset, size_t size) {
 	edata_t *ret = NULL;
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
@@ -211,16 +208,14 @@ eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
 }
 
 edata_t *
-eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize, size_t alignment) {
-	malloc_mutex_assert_owner(tsdn, &eset->mtx);
-
+eset_fit(eset_t *eset, size_t esize, size_t alignment) {
 	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (max_size < esize) {
 		return NULL;
 	}
 
-	edata_t *edata = eset_first_fit_locked(tsdn, eset, max_size);
+	edata_t *edata = eset_first_fit(eset, max_size);
 
 	if (alignment > PAGE && edata == NULL) {
 		/*
@@ -233,18 +228,3 @@ eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize, size_t alignment) {
 
 	return edata;
 }
-
-void
-eset_prefork(tsdn_t *tsdn, eset_t *eset) {
-	malloc_mutex_prefork(tsdn, &eset->mtx);
-}
-
-void
-eset_postfork_parent(tsdn_t *tsdn, eset_t *eset) {
-	malloc_mutex_postfork_parent(tsdn, &eset->mtx);
-}
-
-void
-eset_postfork_child(tsdn_t *tsdn, eset_t *eset) {
-	malloc_mutex_postfork_child(tsdn, &eset->mtx);
-}
diff --git a/src/extent2.c b/src/extent2.c
index 21f9cdb..3b47e07 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -45,13 +45,13 @@ static atomic_zu_t highpages;
 
 static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, void *new_addr, size_t usize, size_t pad, size_t alignment,
+    ecache_t *ecache, void *new_addr, size_t usize, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
     bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, edata_t *edata, bool growing_retained);
+    ecache_t *ecache, edata_t *edata, bool growing_retained);
 
 /******************************************************************************/
 
@@ -165,22 +165,22 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata) {
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, eset,
+	edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, ecache,
 	    edata, &coalesced, false);
-	edata_state_set(edata, eset_state_get(eset));
+	edata_state_set(edata, eset_state_get(&ecache->eset));
 
 	if (!coalesced) {
 		return true;
 	}
-	eset_insert_locked(tsdn, eset, edata);
+	eset_insert(&ecache->eset, edata);
 	return false;
 }
 
 edata_t *
-extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	assert(size + pad != 0);
@@ -188,14 +188,14 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = extent_recycle(tsdn, arena, ehooks, eset, new_addr,
+	edata_t *edata = extent_recycle(tsdn, arena, ehooks, ecache, new_addr,
 	    size, pad, alignment, slab, szind, zero, commit, false);
 	assert(edata == NULL || edata_dumpable_get(edata));
 	return edata;
 }
 
 void
-extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
@@ -206,16 +206,16 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
 
-	extent_record(tsdn, arena, ehooks, eset, edata, false);
+	extent_record(tsdn, arena, ehooks, ecache, edata, false);
 }
 
 edata_t *
-extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	malloc_mutex_lock(tsdn, &eset->mtx);
+	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	/*
 	 * Get the LRU coalesced extent, if any.  If coalescing was delayed,
@@ -224,24 +224,23 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	edata_t *edata;
 	while (true) {
 		/* Get the LRU extent, if any. */
-		edata = edata_list_first(&eset->lru);
+		edata = edata_list_first(&ecache->eset.lru);
 		if (edata == NULL) {
 			goto label_return;
 		}
 		/* Check the eviction limit. */
-		size_t extents_npages = atomic_load_zu(&eset->npages,
-		    ATOMIC_RELAXED);
+		size_t extents_npages = ecache_npages_get(ecache);
 		if (extents_npages <= npages_min) {
 			edata = NULL;
 			goto label_return;
 		}
-		eset_remove_locked(tsdn, eset, edata);
-		if (!eset->delay_coalesce) {
+		eset_remove(&ecache->eset, edata);
+		if (!ecache->eset.delay_coalesce) {
 			break;
 		}
 		/* Try to coalesce. */
 		if (extent_try_delayed_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    eset, edata)) {
+		    ecache, edata)) {
 			break;
 		}
 		/*
@@ -254,7 +253,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	 * Either mark the extent active or deregister it to protect against
 	 * concurrent operations.
 	 */
-	switch (eset_state_get(eset)) {
+	switch (eset_state_get(&ecache->eset)) {
 	case extent_state_active:
 		not_reached();
 	case extent_state_dirty:
@@ -269,7 +268,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	}
 
 label_return:
-	malloc_mutex_unlock(tsdn, &eset->mtx);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	return edata;
 }
 
@@ -278,8 +277,8 @@ label_return:
  * indicates OOM), e.g. when trying to split an existing extent.
  */
 static void
-extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    edata_t *edata, bool growing_retained) {
+extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
 		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
@@ -288,7 +287,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
-	if (eset_state_get(eset) == extent_state_dirty) {
+	if (eset_state_get(&ecache->eset) == extent_state_dirty) {
 		if (extent_purge_lazy_impl(tsdn, arena, ehooks, edata, 0, sz,
 		    growing_retained)) {
 			extent_purge_forced_impl(tsdn, arena, ehooks, edata, 0,
@@ -299,30 +298,30 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 }
 
 static void
-extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
+extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
 	assert(edata_state_get(edata) == extent_state_active);
 
-	edata_state_set(edata, eset_state_get(eset));
-	eset_insert_locked(tsdn, eset, edata);
+	edata_state_set(edata, eset_state_get(&ecache->eset));
+	eset_insert(&ecache->eset, edata);
 }
 
 static void
-extent_deactivate(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
+extent_deactivate(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
     edata_t *edata) {
-	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_deactivate_locked(tsdn, arena, eset, edata);
-	malloc_mutex_unlock(tsdn, &eset->mtx);
+	malloc_mutex_lock(tsdn, &ecache->mtx);
+	extent_deactivate_locked(tsdn, arena, ecache, edata);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
 }
 
 static void
-extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
+extent_activate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
-	assert(edata_state_get(edata) == eset_state_get(eset));
+	assert(edata_state_get(edata) == eset_state_get(&ecache->eset));
 
-	eset_remove_locked(tsdn, eset, edata);
+	eset_remove(&ecache->eset, edata);
 	edata_state_set(edata, extent_state_active);
 }
 
@@ -515,12 +514,12 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
 }
 
 /*
- * Tries to find and remove an extent from eset that can be used for the
+ * Tries to find and remove an extent from ecache that can be used for the
  * given allocation request.
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, void *new_addr, size_t size,
     size_t pad, size_t alignment, bool slab, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -543,7 +542,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 
 	size_t esize = size + pad;
-	malloc_mutex_lock(tsdn, &eset->mtx);
+	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
 		edata = extent_lock_edata_from_addr(tsdn, rtree_ctx, new_addr,
@@ -557,21 +556,22 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			assert(edata_base_get(edata) == new_addr);
 			if (edata_arena_ind_get(edata) != arena_ind_get(arena)
 			    || edata_size_get(edata) < esize
-			    || edata_state_get(edata) != eset_state_get(eset)) {
+			    || edata_state_get(edata)
+			    != eset_state_get(&ecache->eset)) {
 				edata = NULL;
 			}
 			extent_unlock_edata(tsdn, unlock_edata);
 		}
 	} else {
-		edata = eset_fit_locked(tsdn, eset, esize, alignment);
+		edata = eset_fit(&ecache->eset, esize, alignment);
 	}
 	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &eset->mtx);
+		malloc_mutex_unlock(tsdn, &ecache->mtx);
 		return NULL;
 	}
 
-	extent_activate_locked(tsdn, arena, eset, edata);
-	malloc_mutex_unlock(tsdn, &eset->mtx);
+	extent_activate_locked(tsdn, arena, ecache, edata);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
 
 	return edata;
 }
@@ -580,7 +580,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * Given an allocation request and an extent guaranteed to be able to satisfy
  * it, this splits off lead and trail extents, leaving edata pointing to an
  * extent satisfying the allocation.
- * This function doesn't put lead or trail into any eset_t; it's the caller's
+ * This function doesn't put lead or trail into any ecache; it's the caller's
  * job to ensure that they can be reused.
  */
 typedef enum {
@@ -676,11 +676,11 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * This fulfills the indicated allocation request out of the given extent (which
  * the caller should have ensured was big enough).  If there's any unused space
  * before or after the resulting allocation, that space is given its own extent
- * and put back into eset.
+ * and put back into ecache.
  */
 static edata_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, void *new_addr, size_t size,
     size_t pad, size_t alignment, bool slab, szind_t szind, edata_t *edata,
     bool growing_retained) {
 	edata_t *lead;
@@ -697,19 +697,19 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    && !opt_retain) {
 		/*
 		 * Split isn't supported (implies Windows w/o retain).  Avoid
-		 * leaking the eset.
+		 * leaking the extent.
 		 */
 		assert(to_leak != NULL && lead == NULL && trail == NULL);
-		extent_deactivate(tsdn, arena, eset, to_leak);
+		extent_deactivate(tsdn, arena, ecache, to_leak);
 		return NULL;
 	}
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_deactivate(tsdn, arena, eset, lead);
+			extent_deactivate(tsdn, arena, ecache, lead);
 		}
 		if (trail != NULL) {
-			extent_deactivate(tsdn, arena, eset, trail);
+			extent_deactivate(tsdn, arena, ecache, trail);
 		}
 		return edata;
 	} else {
@@ -724,7 +724,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		if (to_leak != NULL) {
 			void *leak = edata_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, ehooks, eset, to_leak,
+			extents_abandon_vm(tsdn, arena, ehooks, ecache, to_leak,
 			    growing_retained);
 			assert(extent_lock_edata_from_addr(tsdn, rtree_ctx, leak,
 			    false) == NULL);
@@ -736,10 +736,10 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 /*
  * Tries to satisfy the given allocation request by reusing one of the extents
- * in the given eset_t.
+ * in the given ecache_t.
  */
 static edata_t *
-extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -752,13 +752,13 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
 	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks,
-	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
+	    rtree_ctx, ecache, new_addr, size, pad, alignment, slab,
 	    growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
-	edata = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, eset,
+	edata = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, ecache,
 	    new_addr, size, pad, alignment, slab, szind, edata,
 	    growing_retained);
 	if (edata == NULL) {
@@ -768,7 +768,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	if (*commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, arena, ehooks, edata, 0,
 		    edata_size_get(edata), growing_retained)) {
-			extent_record(tsdn, arena, ehooks, eset, edata,
+			extent_record(tsdn, arena, ehooks, ecache, edata,
 			    growing_retained);
 			return NULL;
 		}
@@ -810,7 +810,7 @@ static edata_t *
 extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &arena->extent_grow_mtx);
+	malloc_mutex_assert_owner(tsdn, &arena->ecache_grow.mtx);
 	assert(pad == 0 || !slab);
 	assert(!*zero || !slab);
 
@@ -825,15 +825,15 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
+	size_t alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (arena->extent_grow_next + egn_skip >=
+		if (arena->ecache_grow.next + egn_skip >=
 		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			goto label_err;
 		}
-		alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
+		alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
 	}
 
 	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache,
@@ -881,11 +881,11 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, lead, true);
+			    &arena->ecache_retained, lead, true);
 		}
 		if (trail != NULL) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, trail, true);
+			    &arena->ecache_retained, trail, true);
 		}
 	} else {
 		/*
@@ -898,12 +898,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 				extent_gdump_add(tsdn, to_salvage);
 			}
 			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, to_salvage, true);
+			    &arena->ecache_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_abandon_vm(tsdn, arena, ehooks,
-			    &arena->eset_retained, to_leak, true);
+			    &arena->ecache_retained, to_leak, true);
 		}
 		goto label_err;
 	}
@@ -912,7 +912,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		if (extent_commit_impl(tsdn, arena, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, edata, true);
+			    &arena->ecache_retained, edata, true);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
@@ -930,14 +930,14 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (arena->extent_grow_next + egn_skip + 1 <=
-	    arena->retain_grow_limit) {
-		arena->extent_grow_next += egn_skip + 1;
+	if (arena->ecache_grow.next + egn_skip + 1 <=
+	    arena->ecache_grow.limit) {
+		arena->ecache_grow.next += egn_skip + 1;
 	} else {
-		arena->extent_grow_next = arena->retain_grow_limit;
+		arena->ecache_grow.next = arena->ecache_grow.limit;
 	}
 	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+	malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -962,7 +962,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+	malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
 	return NULL;
 }
 
@@ -973,13 +973,13 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
+	malloc_mutex_lock(tsdn, &arena->ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
-	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
+	    &arena->ecache_retained, new_addr, size, pad, alignment, slab,
 	    szind, zero, commit, true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
@@ -988,9 +988,9 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		    alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &arena->extent_grow_mtx);
+	malloc_mutex_assert_not_owner(tsdn, &arena->ecache_grow.mtx);
 
 	return edata;
 }
@@ -1054,7 +1054,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static bool
-extent_can_coalesce(arena_t *arena, eset_t *eset, const edata_t *inner,
+extent_can_coalesce(arena_t *arena, ecache_t *ecache, const edata_t *inner,
     const edata_t *outer) {
 	assert(edata_arena_ind_get(inner) == arena_ind_get(arena));
 	if (edata_arena_ind_get(outer) != arena_ind_get(arena)) {
@@ -1062,7 +1062,7 @@ extent_can_coalesce(arena_t *arena, eset_t *eset, const edata_t *inner,
 	}
 
 	assert(edata_state_get(inner) == extent_state_active);
-	if (edata_state_get(outer) != eset->state) {
+	if (edata_state_get(outer) != ecache->eset.state) {
 		return false;
 	}
 
@@ -1074,19 +1074,20 @@ extent_can_coalesce(arena_t *arena, eset_t *eset, const edata_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    edata_t *inner, edata_t *outer, bool forward, bool growing_retained) {
-	assert(extent_can_coalesce(arena, eset, inner, outer));
+extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
+    bool growing_retained) {
+	assert(extent_can_coalesce(arena, ecache, inner, outer));
 
-	extent_activate_locked(tsdn, arena, eset, outer);
+	extent_activate_locked(tsdn, arena, ecache, outer);
 
-	malloc_mutex_unlock(tsdn, &eset->mtx);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	bool err = extent_merge_impl(tsdn, arena, ehooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
-	malloc_mutex_lock(tsdn, &eset->mtx);
+	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	if (err) {
-		extent_deactivate_locked(tsdn, arena, eset, outer);
+		extent_deactivate_locked(tsdn, arena, ecache, outer);
 	}
 
 	return err;
@@ -1094,7 +1095,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 
 static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata, bool *coalesced,
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained, bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
@@ -1114,19 +1115,19 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		    edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
-			 * eset->mtx only protects against races for
-			 * like-state eset, so call extent_can_coalesce()
+			 * ecache->mtx only protects against races for
+			 * like-state extents, so call extent_can_coalesce()
 			 * before releasing next's pool lock.
 			 */
-			bool can_coalesce = extent_can_coalesce(arena, eset,
+			bool can_coalesce = extent_can_coalesce(arena, ecache,
 			    edata, next);
 
 			extent_unlock_edata(tsdn, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, eset, edata, next, true,
+			    ehooks, ecache, edata, next, true,
 			    growing_retained)) {
-				if (eset->delay_coalesce) {
+				if (ecache->eset.delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
 					return edata;
@@ -1139,15 +1140,15 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		edata_t *prev = extent_lock_edata_from_addr(tsdn, rtree_ctx,
 		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
-			bool can_coalesce = extent_can_coalesce(arena, eset,
+			bool can_coalesce = extent_can_coalesce(arena, ecache,
 			    edata, prev);
 			extent_unlock_edata(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, eset, edata, prev, false,
+			    ehooks, ecache, edata, prev, false,
 			    growing_retained)) {
 				edata = prev;
-				if (eset->delay_coalesce) {
+				if (ecache->eset.delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
 					return edata;
@@ -1157,7 +1158,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		}
 	} while (again);
 
-	if (eset->delay_coalesce) {
+	if (ecache->eset.delay_coalesce) {
 		*coalesced = false;
 	}
 	return edata;
@@ -1165,35 +1166,35 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 static edata_t *
 extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata, bool *coalesced,
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
+	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, ecache,
 	    edata, coalesced, growing_retained, false);
 }
 
 static edata_t *
 extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata, bool *coalesced,
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
+	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, ecache,
 	    edata, coalesced, growing_retained, true);
 }
 
 /*
  * Does the metadata management portions of putting an unused extent into the
- * given eset_t (coalesces, deregisters slab interiors, the heap operations).
+ * given ecache_t (coalesces, deregisters slab interiors, the heap operations).
  */
 static void
-extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata, bool growing_retained) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	assert((eset_state_get(eset) != extent_state_dirty &&
-	    eset_state_get(eset) != extent_state_muzzy) ||
+	assert((eset_state_get(&ecache->eset) != extent_state_dirty &&
+	    eset_state_get(&ecache->eset) != extent_state_muzzy) ||
 	    !edata_zeroed_get(edata));
 
-	malloc_mutex_lock(tsdn, &eset->mtx);
+	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	edata_szind_set(edata, SC_NSIZES);
 	if (edata_slab_get(edata)) {
@@ -1204,29 +1205,29 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	assert(rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)edata_base_get(edata), true) == edata);
 
-	if (!eset->delay_coalesce) {
+	if (!ecache->eset.delay_coalesce) {
 		edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    eset, edata, NULL, growing_retained);
+		    ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
-		assert(eset == &arena->eset_dirty);
-		/* Always coalesce large eset eagerly. */
+		assert(ecache == &arena->ecache_dirty);
+		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn, arena, ehooks,
-			    rtree_ctx, eset, edata, &coalesced,
+			    rtree_ctx, ecache, edata, &coalesced,
 			    growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold) {
 			/* Shortcut to purge the oversize extent eagerly. */
-			malloc_mutex_unlock(tsdn, &eset->mtx);
+			malloc_mutex_unlock(tsdn, &ecache->mtx);
 			arena_decay_extent(tsdn, arena, ehooks, edata);
 			return;
 		}
 	}
-	extent_deactivate_locked(tsdn, arena, eset, edata);
+	extent_deactivate_locked(tsdn, arena, ecache, edata);
 
-	malloc_mutex_unlock(tsdn, &eset->mtx);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
 }
 
 void
@@ -1312,7 +1313,8 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, arena, ehooks, &arena->eset_retained, edata, false);
+	extent_record(tsdn, arena, ehooks, &arena->ecache_retained, edata,
+	    false);
 }
 
 void
diff --git a/src/large.c b/src/large.c
index 67b4745..b8b70f4 100644
--- a/src/large.c
+++ b/src/large.c
@@ -149,10 +149,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	bool commit = true;
 	edata_t *trail;
 	bool new_mapping;
-	if ((trail = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
+	if ((trail = extents_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
 	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail, &commit)) != NULL
-	    || (trail = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+	    || (trail = extents_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 7993fd3..e6b6f7b 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -142,7 +142,7 @@ TEST_BEGIN(test_retained) {
 		size_t usable = 0;
 		size_t fragmented = 0;
 		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
-		    arena->extent_grow_next; pind++) {
+		    arena->ecache_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;
-- 
cgit v0.12


From 98eb40e563bd2c42bfd5d7275584a4aa69a2b3b7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Dec 2019 16:33:19 -0800
Subject: Move delay_coalesce from the eset to the ecache.

---
 include/jemalloc/internal/ecache.h |  5 +++++
 include/jemalloc/internal/eset.h   | 11 +++--------
 src/ecache.c                       |  3 ++-
 src/eset.c                         | 12 +++++-------
 src/extent2.c                      | 13 +++++++------
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index 7085720..05388b6 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -8,6 +8,11 @@ typedef struct ecache_s ecache_t;
 struct ecache_s {
 	malloc_mutex_t mtx;
 	eset_t eset;
+	/*
+	 * If true, delay coalescing until eviction; otherwise coalesce during
+	 * deallocation.
+	 */
+	bool delay_coalesce;
 };
 
 typedef struct ecache_grow_s ecache_grow_t;
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index bbc6b5c..7a1055c 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -32,15 +32,9 @@ struct eset_s {
 
 	/* All stored extents must be in the same state. */
 	extent_state_t state;
-
-	/*
-	 * If true, delay coalescing until eviction; otherwise coalesce during
-	 * deallocation.
-	 */
-	bool delay_coalesce;
 };
 
-void eset_init(eset_t *eset, extent_state_t state, bool delay_coalesce);
+void eset_init(eset_t *eset, extent_state_t state);
 extent_state_t eset_state_get(const eset_t *eset);
 
 size_t eset_npages_get(eset_t *eset);
@@ -55,6 +49,7 @@ void eset_remove(eset_t *eset, edata_t *edata);
  * Select an extent from this eset of the given size and alignment.  Returns
  * null if no such item could be found.
  */
-edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment);
+edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment,
+    bool delay_coalesce);
 
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/src/ecache.c b/src/ecache.c
index 7155de3..d5ed235 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -8,7 +8,8 @@ ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	eset_init(&ecache->eset, state, delay_coalesce);
+	ecache->delay_coalesce = delay_coalesce;
+	eset_init(&ecache->eset, state);
 	return false;
 }
 
diff --git a/src/eset.c b/src/eset.c
index e71868c..9d3deda 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -9,8 +9,7 @@ const bitmap_info_t eset_bitmap_info =
     BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
 
 void
-eset_init(eset_t *eset, extent_state_t state,
-    bool delay_coalesce) {
+eset_init(eset_t *eset, extent_state_t state) {
 	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
 		edata_heap_new(&eset->heaps[i]);
 	}
@@ -18,7 +17,6 @@ eset_init(eset_t *eset, extent_state_t state,
 	edata_list_init(&eset->lru);
 	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
 	eset->state = state;
-	eset->delay_coalesce = delay_coalesce;
 }
 
 extent_state_t
@@ -163,7 +161,7 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
  * large enough.
  */
 static edata_t *
-eset_first_fit(eset_t *eset, size_t size) {
+eset_first_fit(eset_t *eset, size_t size, bool delay_coalesce) {
 	edata_t *ret = NULL;
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
@@ -191,7 +189,7 @@ eset_first_fit(eset_t *eset, size_t size) {
 		 *
 		 * Only do check for dirty eset (delay_coalesce).
 		 */
-		if (eset->delay_coalesce &&
+		if (delay_coalesce &&
 		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
 			break;
 		}
@@ -208,14 +206,14 @@ eset_first_fit(eset_t *eset, size_t size) {
 }
 
 edata_t *
-eset_fit(eset_t *eset, size_t esize, size_t alignment) {
+eset_fit(eset_t *eset, size_t esize, size_t alignment, bool delay_coalesce) {
 	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (max_size < esize) {
 		return NULL;
 	}
 
-	edata_t *edata = eset_first_fit(eset, max_size);
+	edata_t *edata = eset_first_fit(eset, max_size, delay_coalesce);
 
 	if (alignment > PAGE && edata == NULL) {
 		/*
diff --git a/src/extent2.c b/src/extent2.c
index 3b47e07..96d4a11 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -235,7 +235,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			goto label_return;
 		}
 		eset_remove(&ecache->eset, edata);
-		if (!ecache->eset.delay_coalesce) {
+		if (!ecache->delay_coalesce) {
 			break;
 		}
 		/* Try to coalesce. */
@@ -563,7 +563,8 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			extent_unlock_edata(tsdn, unlock_edata);
 		}
 	} else {
-		edata = eset_fit(&ecache->eset, esize, alignment);
+		edata = eset_fit(&ecache->eset, esize, alignment,
+		    ecache->delay_coalesce);
 	}
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &ecache->mtx);
@@ -1127,7 +1128,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
 			    ehooks, ecache, edata, next, true,
 			    growing_retained)) {
-				if (ecache->eset.delay_coalesce) {
+				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
 					return edata;
@@ -1148,7 +1149,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    ehooks, ecache, edata, prev, false,
 			    growing_retained)) {
 				edata = prev;
-				if (ecache->eset.delay_coalesce) {
+				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
 					return edata;
@@ -1158,7 +1159,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		}
 	} while (again);
 
-	if (ecache->eset.delay_coalesce) {
+	if (ecache->delay_coalesce) {
 		*coalesced = false;
 	}
 	return edata;
@@ -1205,7 +1206,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	assert(rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)edata_base_get(edata), true) == edata);
 
-	if (!ecache->eset.delay_coalesce) {
+	if (!ecache->delay_coalesce) {
 		edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
 		    ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
-- 
cgit v0.12


From d8b0b66c6c0818f83661f69a5eba05924efe0755 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Dec 2019 16:44:49 -0800
Subject: Put extent_state_t into ecache as well as eset.

---
 include/jemalloc/internal/ecache.h |  2 ++
 include/jemalloc/internal/eset.h   |  6 ++++--
 src/arena.c                        |  2 +-
 src/ecache.c                       |  1 +
 src/eset.c                         |  5 -----
 src/extent2.c                      | 18 +++++++++---------
 6 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index 05388b6..8532192 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -8,6 +8,8 @@ typedef struct ecache_s ecache_t;
 struct ecache_s {
 	malloc_mutex_t mtx;
 	eset_t eset;
+	/* All stored extents must be in the same state. */
+	extent_state_t state;
 	/*
 	 * If true, delay coalescing until eviction; otherwise coalesce during
 	 * deallocation.
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 7a1055c..5c1051a 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -30,12 +30,14 @@ struct eset_s {
 	/* Page sum for all extents in heaps. */
 	atomic_zu_t npages;
 
-	/* All stored extents must be in the same state. */
+	/*
+	 * A duplication of the data in the containing ecache.  We use this only
+	 * for assertions on the states of the passed-in extents.
+	 */
 	extent_state_t state;
 };
 
 void eset_init(eset_t *eset, extent_state_t state);
-extent_state_t eset_state_get(const eset_t *eset);
 
 size_t eset_npages_get(eset_t *eset);
 /* Get the number of extents in the given page size index. */
diff --git a/src/arena.c b/src/arena.c
index 2652207..5407ab9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -857,7 +857,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		size_t npages = edata_size_get(edata) >> LG_PAGE;
 		npurged += npages;
 		edata_list_remove(decay_extents, edata);
-		switch (eset_state_get(&ecache->eset)) {
+		switch (ecache->state) {
 		case extent_state_active:
 			not_reached();
 		case extent_state_dirty:
diff --git a/src/ecache.c b/src/ecache.c
index d5ed235..a57a0a6 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -8,6 +8,7 @@ ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
+	ecache->state = state;
 	ecache->delay_coalesce = delay_coalesce;
 	eset_init(&ecache->eset, state);
 	return false;
diff --git a/src/eset.c b/src/eset.c
index 9d3deda..16ca72d 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -19,11 +19,6 @@ eset_init(eset_t *eset, extent_state_t state) {
 	eset->state = state;
 }
 
-extent_state_t
-eset_state_get(const eset_t *eset) {
-	return eset->state;
-}
-
 size_t
 eset_npages_get(eset_t *eset) {
 	return atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
diff --git a/src/extent2.c b/src/extent2.c
index 96d4a11..96c6ca6 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -170,7 +170,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	bool coalesced;
 	edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, ecache,
 	    edata, &coalesced, false);
-	edata_state_set(edata, eset_state_get(&ecache->eset));
+	edata_state_set(edata, ecache->state);
 
 	if (!coalesced) {
 		return true;
@@ -253,7 +253,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	 * Either mark the extent active or deregister it to protect against
 	 * concurrent operations.
 	 */
-	switch (eset_state_get(&ecache->eset)) {
+	switch (ecache->state) {
 	case extent_state_active:
 		not_reached();
 	case extent_state_dirty:
@@ -287,7 +287,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
-	if (eset_state_get(&ecache->eset) == extent_state_dirty) {
+	if (ecache->state == extent_state_dirty) {
 		if (extent_purge_lazy_impl(tsdn, arena, ehooks, edata, 0, sz,
 		    growing_retained)) {
 			extent_purge_forced_impl(tsdn, arena, ehooks, edata, 0,
@@ -303,7 +303,7 @@ extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
 	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
 	assert(edata_state_get(edata) == extent_state_active);
 
-	edata_state_set(edata, eset_state_get(&ecache->eset));
+	edata_state_set(edata, ecache->state);
 	eset_insert(&ecache->eset, edata);
 }
 
@@ -319,7 +319,7 @@ static void
 extent_activate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
-	assert(edata_state_get(edata) == eset_state_get(&ecache->eset));
+	assert(edata_state_get(edata) == ecache->state);
 
 	eset_remove(&ecache->eset, edata);
 	edata_state_set(edata, extent_state_active);
@@ -557,7 +557,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			if (edata_arena_ind_get(edata) != arena_ind_get(arena)
 			    || edata_size_get(edata) < esize
 			    || edata_state_get(edata)
-			    != eset_state_get(&ecache->eset)) {
+			    != ecache->state) {
 				edata = NULL;
 			}
 			extent_unlock_edata(tsdn, unlock_edata);
@@ -1063,7 +1063,7 @@ extent_can_coalesce(arena_t *arena, ecache_t *ecache, const edata_t *inner,
 	}
 
 	assert(edata_state_get(inner) == extent_state_active);
-	if (edata_state_get(outer) != ecache->eset.state) {
+	if (edata_state_get(outer) != ecache->state) {
 		return false;
 	}
 
@@ -1191,8 +1191,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	assert((eset_state_get(&ecache->eset) != extent_state_dirty &&
-	    eset_state_get(&ecache->eset) != extent_state_muzzy) ||
+	assert((ecache->state != extent_state_dirty &&
+	    ecache->state != extent_state_muzzy) ||
 	    !edata_zeroed_get(edata));
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
-- 
cgit v0.12


From ae23e5f42676bc7c851c8ea8036dfa87763be11b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Dec 2019 17:30:28 -0800
Subject: Unify extent_alloc_wrapper with the other wrappers.

Previously, it was really more like extents_alloc (it looks in an ecache for an
extent to reuse as its primary allocation pathway).  Make that pathway more
explciitly like extents_alloc, and rename extent_alloc_wrapper_hard accordingly.
---
 include/jemalloc/internal/extent2.h |  4 +++
 src/arena.c                         |  9 ++---
 src/extent2.c                       | 66 +++++++++++++++++++++----------------
 src/large.c                         |  7 ++--
 4 files changed, 50 insertions(+), 36 deletions(-)

diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index 80e789e..d74e232 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -29,10 +29,14 @@ extern rtree_t extents_rtree;
 edata_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit);
+edata_t *extents_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit);
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
+
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit);
diff --git a/src/arena.c b/src/arena.c
index 5407ab9..9edeaf6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -444,8 +444,9 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	}
 	size_t size = usize + sz_large_pad;
 	if (edata == NULL) {
-		edata = extent_alloc_wrapper(tsdn, arena, ehooks, NULL, usize,
-		    sz_large_pad, alignment, false, szind, zero, &commit);
+		edata = extents_alloc_grow(tsdn, arena, ehooks,
+		    &arena->ecache_retained, NULL, usize, sz_large_pad,
+		    alignment, false, szind, zero, &commit);
 		if (config_stats) {
 			/*
 			 * edata may be NULL on OOM, but in that case mapped_add
@@ -1210,8 +1211,8 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	zero = false;
 	commit = true;
-	slab = extent_alloc_wrapper(tsdn, arena, ehooks, NULL,
-	    bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
+	slab = extents_alloc_grow(tsdn, arena, ehooks, &arena->ecache_retained,
+	    NULL, bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
 
 	if (config_stats && slab != NULL) {
 		arena_stats_mapped_add(tsdn, &arena->stats,
diff --git a/src/extent2.c b/src/extent2.c
index 96c6ca6..7ecf332 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -52,6 +52,9 @@ static edata_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
     bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
+static edata_t *extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit);
 
 /******************************************************************************/
 
@@ -194,6 +197,35 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	return edata;
 }
 
+edata_t *
+extents_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit) {
+	assert(size + pad != 0);
+	assert(alignment != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
+	    size, pad, alignment, slab, szind, zero, commit);
+	if (edata == NULL) {
+		if (opt_retain && new_addr != NULL) {
+			/*
+			 * When retain is enabled and new_addr is set, we do not
+			 * attempt extent_alloc_wrapper which does mmap that is
+			 * very unlikely to succeed (unless it happens to be at
+			 * the end).
+			 */
+			return NULL;
+		}
+		edata = extent_alloc_wrapper(tsdn, arena, ehooks,
+		    new_addr, size, pad, alignment, slab, szind, zero, commit);
+	}
+
+	assert(edata == NULL || edata_dumpable_get(edata));
+	return edata;
+}
+
 void
 extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata) {
@@ -996,10 +1028,13 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	return edata;
 }
 
-static edata_t *
-extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *
+extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
 	size_t esize = size + pad;
 	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache,
 	    arena->base);
@@ -1027,33 +1062,6 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	return edata;
 }
 
-edata_t *
-extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit);
-	if (edata == NULL) {
-		if (opt_retain && new_addr != NULL) {
-			/*
-			 * When retain is enabled and new_addr is set, we do not
-			 * attempt extent_alloc_wrapper_hard which does mmap
-			 * that is very unlikely to succeed (unless it happens
-			 * to be at the end).
-			 */
-			return NULL;
-		}
-		edata = extent_alloc_wrapper_hard(tsdn, arena, ehooks,
-		    new_addr, size, pad, alignment, slab, szind, zero, commit);
-	}
-
-	assert(edata == NULL || edata_dumpable_get(edata));
-	return edata;
-}
-
 static bool
 extent_can_coalesce(arena_t *arena, ecache_t *ecache, const edata_t *inner,
     const edata_t *outer) {
diff --git a/src/large.c b/src/large.c
index b8b70f4..4a3ad85 100644
--- a/src/large.c
+++ b/src/large.c
@@ -159,9 +159,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 			new_mapping = false;
 		}
 	} else {
-		if ((trail = extent_alloc_wrapper(tsdn, arena, ehooks,
-		    edata_past_get(edata), trailsize, 0, CACHELINE, false,
-		    SC_NSIZES, &is_zeroed_trail, &commit)) == NULL) {
+		if ((trail = extents_alloc_grow(tsdn, arena, ehooks,
+		    &arena->ecache_retained, edata_past_get(edata), trailsize,
+		    0, CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit))
+			== NULL) {
 			return true;
 		}
 		if (config_stats) {
-- 
cgit v0.12


From c792f3e4abd856933d4043a2b8f5fc2477c5d93d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Dec 2019 18:28:37 -0800
Subject: edata_cache: Remember the associated base_t.

This will save us some trouble down the line when we stop passing arena pointers
everywhere; we won't have to pass around a base_t pointer either.
---
 include/jemalloc/internal/edata_cache.h |  7 ++++---
 src/arena.c                             |  2 +-
 src/edata_cache.c                       | 16 +++++++++++-----
 src/extent2.c                           |  9 +++------
 src/extent_dss.c                        |  2 +-
 5 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index fc18408..9cb0d1c 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -12,12 +12,13 @@ struct edata_cache_s {
 	edata_tree_t avail;
 	atomic_zu_t count;
 	malloc_mutex_t mtx;
+	base_t *base;
 };
 
-bool edata_cache_init(edata_cache_t *edata_cache);
-edata_t *edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    base_t *base);
+bool edata_cache_init(edata_cache_t *edata_cache, base_t *base);
+edata_t *edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata);
+
 void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
diff --git a/src/arena.c b/src/arena.c
index 9edeaf6..b5c8606 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2052,7 +2052,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (edata_cache_init(&arena->edata_cache)) {
+	if (edata_cache_init(&arena->edata_cache, base)) {
 		goto label_error;
 	}
 
diff --git a/src/edata_cache.c b/src/edata_cache.c
index 4d02602..1af7b96 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -2,23 +2,29 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 bool
-edata_cache_init(edata_cache_t *edata_cache) {
+edata_cache_init(edata_cache_t *edata_cache, base_t *base) {
+	edata_avail_new(&edata_cache->avail);
+	/*
+	 * This is not strictly necessary, since the edata_cache_t is only
+	 * created inside an arena, which is zeroed on creation.  But this is
+	 * handy as a safety measure.
+	 */
+	atomic_store_zu(&edata_cache->count, 0, ATOMIC_RELAXED);
 	if (malloc_mutex_init(&edata_cache->mtx, "edata_cache",
 	    WITNESS_RANK_EDATA_CACHE, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-
-	edata_avail_new(&edata_cache->avail);
+	edata_cache->base = base;
 	return false;
 }
 
 edata_t *
-edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache, base_t *base) {
+edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 	malloc_mutex_lock(tsdn, &edata_cache->mtx);
 	edata_t *edata = edata_avail_first(&edata_cache->avail);
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &edata_cache->mtx);
-		return base_alloc_edata(tsdn, base);
+		return base_alloc_edata(tsdn, edata_cache->base);
 	}
 	edata_avail_remove(&edata_cache->avail, edata);
 	atomic_fetch_sub_zu(&edata_cache->count, 1, ATOMIC_RELAXED);
diff --git a/src/extent2.c b/src/extent2.c
index 7ecf332..ff98aa5 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -869,8 +869,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
 	}
 
-	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache,
-	    arena->base);
+	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
 	if (edata == NULL) {
 		goto label_err;
 	}
@@ -1036,8 +1035,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	size_t esize = size + pad;
-	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache,
-	    arena->base);
+	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -1430,8 +1428,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	edata_t *trail = edata_cache_get(tsdn, &arena->edata_cache,
-	    arena->base);
+	edata_t *trail = edata_cache_get(tsdn, &arena->edata_cache);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 25ba944..9cf098e 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -123,7 +123,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		return NULL;
 	}
 
-	gap = edata_cache_get(tsdn, &arena->edata_cache, arena->base);
+	gap = edata_cache_get(tsdn, &arena->edata_cache);
 	if (gap == NULL) {
 		return NULL;
 	}
-- 
cgit v0.12


From 57fe99d4be118a1f34b45013be962f31f7786703 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 10:09:57 -0800
Subject: Move relevant index into the ehooks_t itself.

It's always passed into the ehooks; keeping it colocated lets us avoid passing
the arena everywhere.
---
 include/jemalloc/internal/base_inlines.h |  2 +-
 include/jemalloc/internal/base_structs.h |  3 ---
 include/jemalloc/internal/ehooks.h       | 13 ++++++++++++-
 src/base.c                               |  7 +++----
 src/ehooks.c                             |  4 +++-
 5 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
index aec0e2e..221fca8 100644
--- a/include/jemalloc/internal/base_inlines.h
+++ b/include/jemalloc/internal/base_inlines.h
@@ -3,7 +3,7 @@
 
 static inline unsigned
 base_ind_get(const base_t *base) {
-	return base->ind;
+	return ehooks_ind_get(&base->ehooks);
 }
 
 static inline bool
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index fb7e68a..ff1fdfb 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -20,9 +20,6 @@ struct base_block_s {
 };
 
 struct base_s {
-	/* Associated arena's index within the arenas array. */
-	unsigned ind;
-
 	/*
 	 * User-configurable extent hook functions.
 	 */
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 6f4f950..23ab29c 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -20,6 +20,12 @@ extern const extent_hooks_t ehooks_default_extent_hooks;
 
 typedef struct ehooks_s ehooks_t;
 struct ehooks_s {
+	/*
+	 * The user-visible id that goes with the ehooks (i.e. that of the base
+	 * they're a part of, the associated arena's index within the arenas
+	 * array).
+	 */
+	unsigned ind;
 	/* Logically an extent_hooks_t *. */
 	atomic_p_t ptr;
 };
@@ -80,7 +86,12 @@ ehooks_post_reentrancy(tsdn_t *tsdn) {
 }
 
 /* Beginning of the public API. */
-void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks);
+void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks, unsigned ind);
+
+static inline unsigned
+ehooks_ind_get(const ehooks_t *ehooks) {
+	return ehooks->ind;
+}
 
 static inline void
 ehooks_set_extent_hooks_ptr(ehooks_t *ehooks, extent_hooks_t *extent_hooks) {
diff --git a/src/base.c b/src/base.c
index 76d7655..ad3fe83 100644
--- a/src/base.c
+++ b/src/base.c
@@ -346,7 +346,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * memory, and then initialize the ehooks within the base_t.
 	 */
 	ehooks_t fake_ehooks;
-	ehooks_init(&fake_ehooks, extent_hooks);
+	ehooks_init(&fake_ehooks, extent_hooks, ind);
 
 	base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
@@ -359,8 +359,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
 	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
 	    &gap_size, base_size, base_alignment);
-	base->ind = ind;
-	ehooks_init(&base->ehooks, extent_hooks);
+	ehooks_init(&base->ehooks, extent_hooks, ind);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
 	    malloc_mutex_rank_exclusive)) {
 		base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
@@ -411,7 +410,7 @@ extent_hooks_t *
 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
 	extent_hooks_t *old_extent_hooks =
 	    ehooks_get_extent_hooks_ptr(&base->ehooks);
-	ehooks_init(&base->ehooks, extent_hooks);
+	ehooks_init(&base->ehooks, extent_hooks, ehooks_ind_get(&base->ehooks));
 	return old_extent_hooks;
 }
 
diff --git a/src/ehooks.c b/src/ehooks.c
index 51b1514..2fb2c4c 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -4,7 +4,9 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent_mmap.h"
 
-void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks) {
+void
+ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks, unsigned ind) {
+	ehooks->ind = ind;
 	ehooks_set_extent_hooks_ptr(ehooks, extent_hooks);
 }
 
-- 
cgit v0.12


From 9cad5639ff7bca9f33b161363252ae868cec1d34 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 10:44:03 -0800
Subject: Ehooks: remove arena_ind parameter.

This lives within the ehooks_t now, so that callers don't need to know it.
---
 include/jemalloc/internal/ehooks.h | 42 ++++++++++++++++++--------------------
 src/base.c                         | 12 +++++------
 src/extent2.c                      | 35 +++++++++++++------------------
 3 files changed, 39 insertions(+), 50 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 23ab29c..4d183e0 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -172,17 +172,17 @@ ehooks_debug_zero_check(void *addr, size_t size) {
 
 static inline void *
 ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
+    size_t alignment, bool *zero, bool *commit) {
 	bool orig_zero = *zero;
 	void *ret;
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		ret = ehooks_default_alloc_impl(tsdn, new_addr, size,
-		    alignment, zero, commit, arena_ind);
+		    alignment, zero, commit, ehooks_ind_get(ehooks));
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		ret = extent_hooks->alloc(extent_hooks, new_addr, size,
-		    alignment, zero, commit, arena_ind);
+		    alignment, zero, commit, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 	}
 	assert(new_addr == NULL || ret == NULL || new_addr == ret);
@@ -195,7 +195,7 @@ ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
 
 static inline bool
 ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    bool committed, unsigned arena_ind) {
+    bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_dalloc_impl(addr, size);
@@ -204,7 +204,7 @@ ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		bool err = extent_hooks->dalloc(extent_hooks, addr, size,
-		    committed, arena_ind);
+		    committed, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 		return err;
 	}
@@ -212,7 +212,7 @@ ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline void
 ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    bool committed, unsigned arena_ind) {
+    bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_destroy_impl(addr, size);
@@ -221,14 +221,14 @@ ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		extent_hooks->destroy(extent_hooks, addr, size, committed,
-		    arena_ind);
+		    ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 	}
 }
 
 static inline bool
 ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
+    size_t offset, size_t length) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	bool err;
 	if (extent_hooks == &ehooks_default_extent_hooks) {
@@ -238,7 +238,7 @@ ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		err = extent_hooks->commit(extent_hooks, addr, size,
-		    offset, length, arena_ind);
+		    offset, length, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 	}
 	if (!err) {
@@ -249,7 +249,7 @@ ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_decommit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
+    size_t offset, size_t length) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_decommit_impl(addr, offset, length);
@@ -258,7 +258,7 @@ ehooks_decommit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		bool err = extent_hooks->decommit(extent_hooks, addr, size,
-		    offset, length, arena_ind);
+		    offset, length, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 		return err;
 	}
@@ -266,7 +266,7 @@ ehooks_decommit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_purge_lazy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
+    size_t offset, size_t length) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 #ifdef PAGES_CAN_PURGE_LAZY
 	if (extent_hooks == &ehooks_default_extent_hooks) {
@@ -278,7 +278,7 @@ ehooks_purge_lazy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		bool err = extent_hooks->purge_lazy(extent_hooks, addr, size,
-		    offset, length, arena_ind);
+		    offset, length, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 		return err;
 	}
@@ -286,7 +286,7 @@ ehooks_purge_lazy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
+    size_t offset, size_t length) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	/*
 	 * It would be correct to have a ehooks_debug_zero_check call at the end
@@ -306,7 +306,7 @@ ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		bool err = extent_hooks->purge_forced(extent_hooks, addr, size,
-		    offset, length, arena_ind);
+		    offset, length, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 		return err;
 	}
@@ -314,7 +314,7 @@ ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
+    size_t size_a, size_t size_b, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (ehooks_are_default(ehooks)) {
 		return ehooks_default_split_impl();
@@ -323,7 +323,7 @@ ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		bool err = extent_hooks->split(extent_hooks, addr, size, size_a,
-		    size_b, committed, arena_ind);
+		    size_b, committed, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 		return err;
 	}
@@ -331,8 +331,7 @@ ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
-    bool head_a, void *addr_b, size_t size_b, bool head_b, bool committed,
-    unsigned arena_ind) {
+    bool head_a, void *addr_b, size_t size_b, bool head_b, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b,
@@ -342,15 +341,14 @@ ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		bool err = extent_hooks->merge(extent_hooks, addr_a, size_a,
-		    addr_b, size_b, committed, arena_ind);
+		    addr_b, size_b, committed, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 		return err;
 	}
 }
 
 static inline void
-ehooks_zero(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    unsigned arena_ind) {
+ehooks_zero(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		ehooks_default_zero_impl(addr, size);
diff --git a/src/base.c b/src/base.c
index ad3fe83..005b0c5 100644
--- a/src/base.c
+++ b/src/base.c
@@ -44,7 +44,7 @@ base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 		}
 	} else {
 		addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero,
-		    &commit, ind);
+		    &commit);
 	}
 
 	return addr;
@@ -79,18 +79,16 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 		/* Nothing worked.  This should never happen. */
 		not_reached();
 	} else {
-		if (!ehooks_dalloc(tsdn, ehooks, addr, size, true, ind)) {
+		if (!ehooks_dalloc(tsdn, ehooks, addr, size, true)) {
 			goto label_done;
 		}
-		if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size, ind)) {
+		if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size)) {
 			goto label_done;
 		}
-		if (!ehooks_purge_forced(tsdn, ehooks, addr, size, 0, size,
-		    ind)) {
+		if (!ehooks_purge_forced(tsdn, ehooks, addr, size, 0, size)) {
 			goto label_done;
 		}
-		if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size,
-		    ind)) {
+		if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size)) {
 			goto label_done;
 		}
 		/* Nothing worked.  That's the application's problem. */
diff --git a/src/extent2.c b/src/extent2.c
index ff98aa5..13b2920 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -827,8 +827,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		void *addr = edata_base_get(edata);
 		if (!edata_zeroed_get(edata)) {
 			size_t size = edata_size_get(edata);
-			ehooks_zero(tsdn, ehooks, addr, size,
-			    arena_ind_get(arena));
+			ehooks_zero(tsdn, ehooks, addr, size);
 		}
 	}
 	return edata;
@@ -877,7 +876,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	bool committed = false;
 
 	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
-	    &committed, arena_ind_get(arena));
+	    &committed);
 
 	edata_init(edata, arena_ind_get(arena), ptr, alloc_size, false,
 	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
@@ -989,7 +988,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (*zero && !edata_zeroed_get(edata)) {
 		void *addr = edata_base_get(edata);
 		size_t size = edata_size_get(edata);
-		ehooks_zero(tsdn, ehooks, addr, size, arena_ind_get(arena));
+		ehooks_zero(tsdn, ehooks, addr, size);
 	}
 
 	return edata;
@@ -1041,7 +1040,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
-	    zero, commit, arena_ind_get(arena));
+	    zero, commit);
 	if (addr == NULL) {
 		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		return NULL;
@@ -1265,8 +1264,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Try to deallocate. */
 	err = ehooks_dalloc(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), edata_committed_get(edata),
-	    arena_ind_get(arena));
+	    edata_size_get(edata), edata_committed_get(edata));
 
 	if (!err) {
 		edata_cache_put(tsdn, &arena->edata_cache, edata);
@@ -1303,13 +1301,11 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    edata_size_get(edata))) {
 		zeroed = true;
 	} else if (!ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), 0, edata_size_get(edata),
-	    arena_ind_get(arena))) {
+	    edata_size_get(edata), 0, edata_size_get(edata))) {
 		zeroed = true;
 	} else if (edata_state_get(edata) == extent_state_muzzy ||
 	    !ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), 0, edata_size_get(edata),
-	    arena_ind_get(arena))) {
+	    edata_size_get(edata), 0, edata_size_get(edata))) {
 		zeroed = false;
 	} else {
 		zeroed = false;
@@ -1339,8 +1335,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Try to destroy; silently fail otherwise. */
 	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), edata_committed_get(edata),
-	    arena_ind_get(arena));
+	    edata_size_get(edata), edata_committed_get(edata));
 
 	edata_cache_put(tsdn, &arena->edata_cache, edata);
 }
@@ -1351,7 +1346,7 @@ extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	bool err = ehooks_commit(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), offset, length, arena_ind_get(arena));
+	    edata_size_get(edata), offset, length);
 	edata_committed_set(edata, edata_committed_get(edata) || !err);
 	return err;
 }
@@ -1370,7 +1365,7 @@ extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	bool err = ehooks_decommit(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), offset, length, arena_ind_get(arena));
+	    edata_size_get(edata), offset, length);
 	edata_committed_set(edata, edata_committed_get(edata) && err);
 	return err;
 }
@@ -1381,7 +1376,7 @@ extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	bool err = ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), offset, length, arena_ind_get(arena));
+	    edata_size_get(edata), offset, length);
 	return err;
 }
 
@@ -1398,7 +1393,7 @@ extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	bool err = ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), offset, length, arena_ind_get(arena));
+	    edata_size_get(edata), offset, length);
 	return err;
 }
 
@@ -1467,8 +1462,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	extent_lock_edata2(tsdn, edata, trail);
 
 	bool err = ehooks_split(tsdn, ehooks, edata_base_get(edata),
-	    size_a + size_b, size_a, size_b, edata_committed_get(edata),
-	    arena_ind_get(arena));
+	    size_a + size_b, size_a, size_b, edata_committed_get(edata));
 
 	if (err) {
 		goto label_error_c;
@@ -1510,8 +1504,7 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
 
 	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
 	    edata_size_get(a), edata_is_head_get(a), edata_base_get(b),
-	    edata_size_get(b), edata_is_head_get(b), edata_committed_get(a),
-	    arena_ind_get(arena));
+	    edata_size_get(b), edata_is_head_get(b), edata_committed_get(a));
 
 	if (err) {
 		return true;
-- 
cgit v0.12


From 439219be7e350113771a27c6fb19ce77f5d26e03 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 10:52:51 -0800
Subject: Remove extent_can_coalesce arena dependency.

---
 src/extent2.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/extent2.c b/src/extent2.c
index 13b2920..0c816bc 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -1060,10 +1060,9 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static bool
-extent_can_coalesce(arena_t *arena, ecache_t *ecache, const edata_t *inner,
+extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
     const edata_t *outer) {
-	assert(edata_arena_ind_get(inner) == arena_ind_get(arena));
-	if (edata_arena_ind_get(outer) != arena_ind_get(arena)) {
+	if (edata_arena_ind_get(inner) != edata_arena_ind_get(outer)) {
 		return false;
 	}
 
@@ -1083,7 +1082,7 @@ static bool
 extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
     bool growing_retained) {
-	assert(extent_can_coalesce(arena, ecache, inner, outer));
+	assert(extent_can_coalesce(ecache, inner, outer));
 
 	extent_activate_locked(tsdn, arena, ecache, outer);
 
@@ -1125,7 +1124,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			 * like-state extents, so call extent_can_coalesce()
 			 * before releasing next's pool lock.
 			 */
-			bool can_coalesce = extent_can_coalesce(arena, ecache,
+			bool can_coalesce = extent_can_coalesce(ecache,
 			    edata, next);
 
 			extent_unlock_edata(tsdn, next);
@@ -1146,8 +1145,8 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		edata_t *prev = extent_lock_edata_from_addr(tsdn, rtree_ctx,
 		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
-			bool can_coalesce = extent_can_coalesce(arena, ecache,
-			    edata, prev);
+			bool can_coalesce = extent_can_coalesce(ecache, edata,
+			    prev);
 			extent_unlock_edata(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-- 
cgit v0.12


From 372042a082347dd4c036f5cfeff3853d5eac4b91 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 11:16:58 -0800
Subject: Remove merge dependence on the arena.

---
 include/jemalloc/internal/extent2.h |  4 ++--
 src/extent2.c                       | 24 +++++++++++-------------
 src/large.c                         |  3 ++-
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index d74e232..d685455 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -56,8 +56,8 @@ bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 edata_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
     size_t size_b, szind_t szind_b, bool slab_b);
-bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *a, edata_t *b);
+bool extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_cache_t *edata_cache, edata_t *a, edata_t *b);
 
 bool extent_boot(void);
 
diff --git a/src/extent2.c b/src/extent2.c
index 0c816bc..1b70f20 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -30,8 +30,8 @@ static edata_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
-static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *a, edata_t *b, bool growing_retained);
+static bool extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_cache_t *edata_cache, edata_t *a, edata_t *b, bool growing_retained);
 
 /* Used exclusively for gdump triggering. */
 static atomic_zu_t curpages;
@@ -1087,7 +1087,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	extent_activate_locked(tsdn, arena, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
-	bool err = extent_merge_impl(tsdn, arena, ehooks,
+	bool err = extent_merge_impl(tsdn, ehooks, &arena->edata_cache,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -1495,12 +1495,15 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static bool
-extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
-    edata_t *b, bool growing_retained) {
+extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
+    edata_t *a, edata_t *b, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(edata_base_get(a) < edata_base_get(b));
 
+	assert(edata_arena_ind_get(a) == edata_arena_ind_get(b));
+	assert(edata_arena_ind_get(a) == ehooks_ind_get(ehooks));
+
 	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
 	    edata_size_get(a), edata_is_head_get(a), edata_base_get(b),
 	    edata_size_get(b), edata_is_head_get(b), edata_committed_get(a));
@@ -1546,20 +1549,15 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
 
 	extent_unlock_edata2(tsdn, a, b);
 
-	/*
-	 * If we got here, we merged the extents; so they must be from the same
-	 * arena (i.e. this one).
-	 */
-	assert(edata_arena_ind_get(b) == arena_ind_get(arena));
-	edata_cache_put(tsdn, &arena->edata_cache, b);
+	edata_cache_put(tsdn, edata_cache, b);
 
 	return false;
 }
 
 bool
-extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
     edata_t *a, edata_t *b) {
-	return extent_merge_impl(tsdn, arena, ehooks, a, b, false);
+	return extent_merge_impl(tsdn, ehooks, edata_cache, a, b, false);
 }
 
 bool
diff --git a/src/large.c b/src/large.c
index 4a3ad85..4af586d 100644
--- a/src/large.c
+++ b/src/large.c
@@ -170,7 +170,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 		}
 	}
 
-	if (extent_merge_wrapper(tsdn, arena, ehooks, edata, trail)) {
+	if (extent_merge_wrapper(tsdn, ehooks, &arena->edata_cache, edata,
+	    trail)) {
 		extent_dalloc_wrapper(tsdn, arena, ehooks, trail);
 		return true;
 	}
-- 
cgit v0.12


From 576d7047ab93baf37d851136f6ccd4fb38810ded Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 11:33:03 -0800
Subject: Ecache: Should know its arena_ind.

What we call an arena_ind is really the index associated with some particular
set of ehooks; the arena is just the user-visible portion of that.  Making this
explicit, and reframing checks in terms of that, makes the code simpler and
cleaner, and helps us avoid passing the arena itself all throughout extent code.

This lets us put back an arena-specific assert.
---
 include/jemalloc/internal/ecache.h | 9 ++++++++-
 src/arena.c                        | 8 +++++---
 src/ecache.c                       | 3 ++-
 src/extent2.c                      | 2 ++
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index 8532192..a11418c 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -10,6 +10,8 @@ struct ecache_s {
 	eset_t eset;
 	/* All stored extents must be in the same state. */
 	extent_state_t state;
+	/* The index of the ehooks the ecache is associated with. */
+	unsigned ind;
 	/*
 	 * If true, delay coalescing until eviction; otherwise coalesce during
 	 * deallocation.
@@ -52,8 +54,13 @@ ecache_nbytes_get(ecache_t *ecache, pszind_t ind) {
 	return eset_nbytes_get(&ecache->eset, ind);
 }
 
+static inline unsigned
+ecache_ind_get(ecache_t *ecache) {
+	return ecache->ind;
+}
+
 bool ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state,
-    bool delay_coalesce);
+    unsigned ind, bool delay_coalesce);
 void ecache_prefork(tsdn_t *tsdn, ecache_t *ecache);
 void ecache_postfork_parent(tsdn_t *tsdn, ecache_t *ecache);
 void ecache_postfork_child(tsdn_t *tsdn, ecache_t *ecache);
diff --git a/src/arena.c b/src/arena.c
index b5c8606..e795acf 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2018,14 +2018,16 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * are likely to be reused soon after deallocation, and the cost of
 	 * merging/splitting extents is non-trivial.
 	 */
-	if (ecache_init(tsdn, &arena->ecache_dirty, extent_state_dirty, true)) {
+	if (ecache_init(tsdn, &arena->ecache_dirty, extent_state_dirty, ind,
+	    true)) {
 		goto label_error;
 	}
 	/*
 	 * Coalesce muzzy extents immediately, because operations on them are in
 	 * the critical path much less often than for dirty extents.
 	 */
-	if (ecache_init(tsdn, &arena->ecache_muzzy, extent_state_muzzy, false)) {
+	if (ecache_init(tsdn, &arena->ecache_muzzy, extent_state_muzzy, ind,
+	    false)) {
 		goto label_error;
 	}
 	/*
@@ -2035,7 +2037,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * in the critical path.
 	 */
 	if (ecache_init(tsdn, &arena->ecache_retained, extent_state_retained,
-	    false)) {
+	    ind, false)) {
 		goto label_error;
 	}
 
diff --git a/src/ecache.c b/src/ecache.c
index a57a0a6..301b7ca 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -2,13 +2,14 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 bool
-ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state,
+ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state, unsigned ind,
     bool delay_coalesce) {
 	if (malloc_mutex_init(&ecache->mtx, "extents", WITNESS_RANK_EXTENTS,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	ecache->state = state;
+	ecache->ind = ind;
 	ecache->delay_coalesce = delay_coalesce;
 	eset_init(&ecache->eset, state);
 	return false;
diff --git a/src/extent2.c b/src/extent2.c
index 1b70f20..1dbccf6 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -1062,6 +1062,8 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 static bool
 extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
     const edata_t *outer) {
+	assert(edata_arena_ind_get(inner) == ecache_ind_get(ecache));
+
 	if (edata_arena_ind_get(inner) != edata_arena_ind_get(outer)) {
 		return false;
 	}
-- 
cgit v0.12


From 282a382326fc4271f77df207074d73016fe8dcb0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 13:34:35 -0800
Subject: Extent: Break [de]activation's arena dependence.

---
 src/extent2.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/extent2.c b/src/extent2.c
index 1dbccf6..30f0f02 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -330,9 +330,8 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static void
-extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
-    edata_t *edata) {
-	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
+extent_deactivate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
+	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == extent_state_active);
 
 	edata_state_set(edata, ecache->state);
@@ -340,17 +339,15 @@ extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
 }
 
 static void
-extent_deactivate(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
-    edata_t *edata) {
+extent_deactivate(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
 	malloc_mutex_lock(tsdn, &ecache->mtx);
-	extent_deactivate_locked(tsdn, arena, ecache, edata);
+	extent_deactivate_locked(tsdn, ecache, edata);
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 }
 
 static void
-extent_activate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
-    edata_t *edata) {
-	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
+extent_activate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
+	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == ecache->state);
 
 	eset_remove(&ecache->eset, edata);
@@ -603,7 +600,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	extent_activate_locked(tsdn, arena, ecache, edata);
+	extent_activate_locked(tsdn, ecache, edata);
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 
 	return edata;
@@ -733,16 +730,16 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		 * leaking the extent.
 		 */
 		assert(to_leak != NULL && lead == NULL && trail == NULL);
-		extent_deactivate(tsdn, arena, ecache, to_leak);
+		extent_deactivate(tsdn, ecache, to_leak);
 		return NULL;
 	}
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_deactivate(tsdn, arena, ecache, lead);
+			extent_deactivate(tsdn, ecache, lead);
 		}
 		if (trail != NULL) {
-			extent_deactivate(tsdn, arena, ecache, trail);
+			extent_deactivate(tsdn, ecache, trail);
 		}
 		return edata;
 	} else {
@@ -1086,7 +1083,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     bool growing_retained) {
 	assert(extent_can_coalesce(ecache, inner, outer));
 
-	extent_activate_locked(tsdn, arena, ecache, outer);
+	extent_activate_locked(tsdn, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	bool err = extent_merge_impl(tsdn, ehooks, &arena->edata_cache,
@@ -1094,7 +1091,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	if (err) {
-		extent_deactivate_locked(tsdn, arena, ecache, outer);
+		extent_deactivate_locked(tsdn, ecache, outer);
 	}
 
 	return err;
@@ -1232,7 +1229,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			return;
 		}
 	}
-	extent_deactivate_locked(tsdn, arena, ecache, edata);
+	extent_deactivate_locked(tsdn, ecache, edata);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 }
-- 
cgit v0.12


From 48ec5d4355c66c20d9143214c83823875ea91579 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 13:37:00 -0800
Subject: Break extent_coalesce arena dependence

---
 src/extent2.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/extent2.c b/src/extent2.c
index 30f0f02..6539146 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -1078,7 +1078,7 @@ extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
     bool growing_retained) {
 	assert(extent_can_coalesce(ecache, inner, outer));
@@ -1086,7 +1086,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	extent_activate_locked(tsdn, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
-	bool err = extent_merge_impl(tsdn, ehooks, &arena->edata_cache,
+	bool err = extent_merge_impl(tsdn, ehooks, edata_cache,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -1128,9 +1128,9 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 			extent_unlock_edata(tsdn, next);
 
-			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, ecache, edata, next, true,
-			    growing_retained)) {
+			if (can_coalesce && !extent_coalesce(tsdn,
+			    &arena->edata_cache, ehooks, ecache, edata, next,
+			    true, growing_retained)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
@@ -1148,9 +1148,9 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    prev);
 			extent_unlock_edata(tsdn, prev);
 
-			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, ecache, edata, prev, false,
-			    growing_retained)) {
+			if (can_coalesce && !extent_coalesce(tsdn,
+			    &arena->edata_cache, ehooks, ecache, edata, prev,
+			    false, growing_retained)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
-- 
cgit v0.12


From 0aa9769fb0cc73e1df6c728af10b45dfb4d1bc71 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 13:46:25 -0800
Subject: Break commit functions' arena dependence

---
 include/jemalloc/internal/extent2.h |  8 ++--
 src/extent2.c                       | 84 ++++++++++++++++++-------------------
 2 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index d685455..eda31cd 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -45,10 +45,10 @@ void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata);
 void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata);
-bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length);
-bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length);
+bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length);
+bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
diff --git a/src/extent2.c b/src/extent2.c
index 6539146..c1dfa99 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -18,8 +18,8 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length, bool growing_retained);
+static bool extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length, bool growing_retained);
 static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
     bool growing_retained);
@@ -47,7 +47,7 @@ static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t usize, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
-static edata_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
+static edata_t *extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
     bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
@@ -167,12 +167,13 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 }
 
 static bool
-extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata) {
+extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache,
+    edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, ecache,
-	    edata, &coalesced, false);
+	edata = extent_try_coalesce(tsdn, edata_cache, ehooks, rtree_ctx,
+	    ecache, edata, &coalesced, false);
 	edata_state_set(edata, ecache->state);
 
 	if (!coalesced) {
@@ -271,8 +272,8 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    ecache, edata)) {
+		if (extent_try_delayed_coalesce(tsdn, &arena->edata_cache,
+		    ehooks, rtree_ctx, ecache, edata)) {
 			break;
 		}
 		/*
@@ -796,7 +797,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	}
 
 	if (*commit && !edata_committed_get(edata)) {
-		if (extent_commit_impl(tsdn, arena, ehooks, edata, 0,
+		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), growing_retained)) {
 			extent_record(tsdn, arena, ehooks, ecache, edata,
 			    growing_retained);
@@ -937,7 +938,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 
 	if (*commit && !edata_committed_get(edata)) {
-		if (extent_commit_impl(tsdn, arena, ehooks, edata, 0,
+		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
 			extent_record(tsdn, arena, ehooks,
 			    &arena->ecache_retained, edata, true);
@@ -1098,9 +1099,9 @@ extent_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 }
 
 static edata_t *
-extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained, bool inactive_only) {
+extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
+    bool *coalesced, bool growing_retained, bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -1128,9 +1129,9 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 			extent_unlock_edata(tsdn, next);
 
-			if (can_coalesce && !extent_coalesce(tsdn,
-			    &arena->edata_cache, ehooks, ecache, edata, next,
-			    true, growing_retained)) {
+			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
+			    ehooks, ecache, edata, next, true,
+			    growing_retained)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
@@ -1148,9 +1149,9 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    prev);
 			extent_unlock_edata(tsdn, prev);
 
-			if (can_coalesce && !extent_coalesce(tsdn,
-			    &arena->edata_cache, ehooks, ecache, edata, prev,
-			    false, growing_retained)) {
+			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
+			    ehooks, ecache, edata, prev, false,
+			    growing_retained)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -1169,19 +1170,19 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static edata_t *
-extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
     rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, ecache,
-	    edata, coalesced, growing_retained, false);
+	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, rtree_ctx,
+	    ecache, edata, coalesced, growing_retained, false);
 }
 
 static edata_t *
-extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, ecache,
-	    edata, coalesced, growing_retained, true);
+extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
+    bool *coalesced, bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, rtree_ctx,
+	    ecache, edata, coalesced, growing_retained, true);
 }
 
 /*
@@ -1210,17 +1211,17 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	    (uintptr_t)edata_base_get(edata), true) == edata);
 
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    ecache, edata, NULL, growing_retained);
+		edata = extent_try_coalesce(tsdn, &arena->edata_cache, ehooks,
+		    rtree_ctx, ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &arena->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
-			edata = extent_try_coalesce_large(tsdn, arena, ehooks,
-			    rtree_ctx, ecache, edata, &coalesced,
-			    growing_retained);
+			edata = extent_try_coalesce_large(tsdn,
+			    &arena->edata_cache, ehooks, rtree_ctx, ecache,
+			    edata, &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold) {
 			/* Shortcut to purge the oversize extent eagerly. */
@@ -1295,7 +1296,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	bool zeroed;
 	if (!edata_committed_get(edata)) {
 		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, edata, 0,
+	} else if (!extent_decommit_wrapper(tsdn, ehooks, edata, 0,
 	    edata_size_get(edata))) {
 		zeroed = true;
 	} else if (!ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
@@ -1339,8 +1340,8 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static bool
-extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
+extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	bool err = ehooks_commit(tsdn, ehooks, edata_base_get(edata),
@@ -1350,16 +1351,15 @@ extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 bool
-extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset,
-    size_t length) {
-	return extent_commit_impl(tsdn, arena, ehooks, edata, offset, length,
+extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length) {
+	return extent_commit_impl(tsdn, ehooks, edata, offset, length,
 	    false);
 }
 
 bool
-extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length) {
+extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	bool err = ehooks_decommit(tsdn, ehooks, edata_base_get(edata),
-- 
cgit v0.12


From 56cc56b69214bf3dbcd64ad83aa63fe22be20d62 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 13:52:34 -0800
Subject: Break extent split dependence on arena.

---
 include/jemalloc/internal/extent2.h |  6 +++---
 src/extent2.c                       | 29 +++++++++++++++--------------
 src/large.c                         |  2 +-
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index eda31cd..9e1f0d6 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -53,9 +53,9 @@ bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length);
-edata_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
-    size_t size_b, szind_t szind_b, bool slab_b);
+edata_t *extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_cache_t *edata_cache, edata_t *a, edata_t *b);
 
diff --git a/src/extent2.c b/src/extent2.c
index c1dfa99..e4218c5 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -26,7 +26,7 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
     bool growing_retained);
-static edata_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
+static edata_t *extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
@@ -659,9 +659,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *edata;
-		*edata = extent_split_impl(tsdn, arena, ehooks, *lead,
-		    leadsize, SC_NSIZES, false, esize + trailsize, szind, slab,
-		    growing_retained);
+		*edata = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
+		    *lead, leadsize, SC_NSIZES, false, esize + trailsize, szind,
+		    slab, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -671,8 +671,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, arena, ehooks, *edata, esize,
-		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
+		*trail = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
+		    *edata, esize, szind, slab, trailsize, SC_NSIZES, false,
+		    growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -1410,7 +1411,7 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * and returns the trail (except in case of error).
  */
 static edata_t *
-extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
     edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
     size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
 	assert(edata_size_get(edata) == size_a + size_b);
@@ -1421,12 +1422,12 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	edata_t *trail = edata_cache_get(tsdn, &arena->edata_cache);
+	edata_t *trail = edata_cache_get(tsdn, edata_cache);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
 
-	edata_init(trail, arena_ind_get(arena),
+	edata_init(trail, ehooks_ind_get(ehooks),
 	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
 	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
 	    edata_zeroed_get(edata), edata_committed_get(edata),
@@ -1438,7 +1439,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	{
 		edata_t lead;
 
-		edata_init(&lead, arena_ind_get(arena),
+		edata_init(&lead, ehooks_ind_get(ehooks),
 		    edata_addr_get(edata), size_a,
 		    slab_a, szind_a, edata_sn_get(edata),
 		    edata_state_get(edata), edata_zeroed_get(edata),
@@ -1480,17 +1481,17 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 label_error_c:
 	extent_unlock_edata2(tsdn, edata, trail);
 label_error_b:
-	edata_cache_put(tsdn, &arena->edata_cache, trail);
+	edata_cache_put(tsdn, edata_cache, trail);
 label_error_a:
 	return NULL;
 }
 
 edata_t *
-extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
     edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
     size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, arena, ehooks, edata, size_a, szind_a,
-	    slab_a, size_b, szind_b, slab_b, false);
+	return extent_split_impl(tsdn, edata_cache, ehooks, edata, size_a,
+	    szind_a, slab_a, size_b, szind_b, slab_b, false);
 }
 
 static bool
diff --git a/src/large.c b/src/large.c
index 4af586d..f91fb74 100644
--- a/src/large.c
+++ b/src/large.c
@@ -104,7 +104,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 
 	/* Split excess pages. */
 	if (diff != 0) {
-		edata_t *trail = extent_split_wrapper(tsdn, arena,
+		edata_t *trail = extent_split_wrapper(tsdn, &arena->edata_cache,
 		    ehooks, edata, usize + sz_large_pad, sz_size2index(usize),
 		    false, diff, SC_NSIZES, false);
 		if (trail == NULL) {
-- 
cgit v0.12


From 2f4fa80414fc9e7374f0b784e0f925aa31d0e599 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 16 Dec 2019 11:01:34 -0800
Subject: Rename extents -> ecache.

---
 include/jemalloc/internal/extent2.h |  8 ++++----
 src/arena.c                         | 23 +++++++++++------------
 src/extent2.c                       |  8 ++++----
 src/large.c                         |  6 +++---
 4 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index 9e1f0d6..fff69bb 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -26,15 +26,15 @@ extern size_t opt_lg_extent_max_active_fit;
 
 extern rtree_t extents_rtree;
 
-edata_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit);
-edata_t *extents_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit);
-void extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+void ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
-edata_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
diff --git a/src/arena.c b/src/arena.c
index e795acf..7e1a673 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -258,7 +258,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extents_dalloc(tsdn, arena, ehooks, &arena->ecache_dirty, edata);
+	ecache_dalloc(tsdn, arena, ehooks, &arena->ecache_dirty, edata);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -434,17 +434,16 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	bool commit = true;
-	edata_t *edata = extents_alloc(tsdn, arena, ehooks,
-	    &arena->ecache_dirty, NULL, usize, sz_large_pad, alignment, false,
-	    szind, zero, &commit);
+	edata_t *edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
+	    NULL, usize, sz_large_pad, alignment, false, szind, zero, &commit);
 	if (edata == NULL && arena_may_have_muzzy(arena)) {
-		edata = extents_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
+		edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 		    NULL, usize, sz_large_pad, alignment, false, szind, zero,
 		    &commit);
 	}
 	size_t size = usize + sz_large_pad;
 	if (edata == NULL) {
-		edata = extents_alloc_grow(tsdn, arena, ehooks,
+		edata = ecache_alloc_grow(tsdn, arena, ehooks,
 		    &arena->ecache_retained, NULL, usize, sz_large_pad,
 		    alignment, false, szind, zero, &commit);
 		if (config_stats) {
@@ -828,7 +827,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	size_t nstashed = 0;
 	edata_t *edata;
 	while (nstashed < npages_decay_max &&
-	    (edata = extents_evict(tsdn, arena, ehooks, ecache, npages_limit))
+	    (edata = ecache_evict(tsdn, arena, ehooks, ecache, npages_limit))
 	    != NULL) {
 		edata_list_append(decay_extents, edata);
 		nstashed += edata_size_get(edata) >> LG_PAGE;
@@ -865,7 +864,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			if (!all && muzzy_decay_ms != 0 &&
 			    !extent_purge_lazy_wrapper(tsdn, arena,
 			    ehooks, edata, 0, edata_size_get(edata))) {
-				extents_dalloc(tsdn, arena, ehooks,
+				ecache_dalloc(tsdn, arena, ehooks,
 				    &arena->ecache_muzzy, edata);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
@@ -1158,7 +1157,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 */
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	edata_t *edata;
-	while ((edata = extents_evict(tsdn, arena, ehooks,
+	while ((edata = ecache_evict(tsdn, arena, ehooks,
 	    &arena->ecache_retained, 0)) != NULL) {
 		extent_destroy_wrapper(tsdn, arena, ehooks, edata);
 	}
@@ -1211,7 +1210,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	zero = false;
 	commit = true;
-	slab = extents_alloc_grow(tsdn, arena, ehooks, &arena->ecache_retained,
+	slab = ecache_alloc_grow(tsdn, arena, ehooks, &arena->ecache_retained,
 	    NULL, bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
 
 	if (config_stats && slab != NULL) {
@@ -1232,10 +1231,10 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
 	bool commit = true;
-	edata_t *slab = extents_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
+	edata_t *slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
 	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = extents_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
+		slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 		    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero,
 		    &commit);
 	}
diff --git a/src/extent2.c b/src/extent2.c
index e4218c5..8d78f95 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -184,7 +184,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
 }
 
 edata_t *
-extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
+ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	assert(size + pad != 0);
@@ -199,7 +199,7 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 edata_t *
-extents_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit) {
 	assert(size + pad != 0);
@@ -228,7 +228,7 @@ extents_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 void
-extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
+ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
@@ -243,7 +243,7 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 edata_t *
-extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
+ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
diff --git a/src/large.c b/src/large.c
index f91fb74..5ca09f6 100644
--- a/src/large.c
+++ b/src/large.c
@@ -149,17 +149,17 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	bool commit = true;
 	edata_t *trail;
 	bool new_mapping;
-	if ((trail = extents_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
+	if ((trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
 	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail, &commit)) != NULL
-	    || (trail = extents_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
+	    || (trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
-		if ((trail = extents_alloc_grow(tsdn, arena, ehooks,
+		if ((trail = ecache_alloc_grow(tsdn, arena, ehooks,
 		    &arena->ecache_retained, edata_past_get(edata), trailsize,
 		    0, CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit))
 			== NULL) {
-- 
cgit v0.12


From e210ccc57ed165cc4308a09a9637f5d6e49b0dbd Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 16 Dec 2019 11:05:07 -0800
Subject: Move extent2 -> extent.

Eventually, we may fully break off the extent module; but not for some time.  If
it's going to live on in a non-transitory state, it might as well have the nicer
name.
---
 Makefile.in                                        |    2 +-
 include/jemalloc/internal/extent.h                 |   59 +
 include/jemalloc/internal/extent2.h                |   64 -
 .../internal/jemalloc_internal_inlines_b.h         |    2 +-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |    2 +-
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |    2 +-
 src/extent.c                                       | 1579 ++++++++++++++++++++
 src/extent2.c                                      | 1579 --------------------
 8 files changed, 1642 insertions(+), 1647 deletions(-)
 create mode 100644 include/jemalloc/internal/extent.h
 delete mode 100644 include/jemalloc/internal/extent2.h
 create mode 100644 src/extent.c
 delete mode 100644 src/extent2.c

diff --git a/Makefile.in b/Makefile.in
index 7145848..40ba7f2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -109,7 +109,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/edata_cache.c \
 	$(srcroot)src/ehooks.c \
 	$(srcroot)src/eset.c \
-	$(srcroot)src/extent2.c \
+	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/hash.c \
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
new file mode 100644
index 0000000..8fecee6
--- /dev/null
+++ b/include/jemalloc/internal/extent.h
@@ -0,0 +1,59 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_H
+#define JEMALLOC_INTERNAL_EXTENT_H
+
+#include "jemalloc/internal/ecache.h"
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/rtree.h"
+
+/*
+ * This module contains the page-level allocator.  It chooses the addresses that
+ * allocations requested by other modules will inhabit, and updates the global
+ * metadata to reflect allocation/deallocation/purging decisions.
+ */
+
+/*
+ * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
+ * is the max ratio between the size of the active extent and the new extent.
+ */
+#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
+extern size_t opt_lg_extent_max_active_fit;
+
+extern rtree_t extents_rtree;
+
+edata_t *ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit);
+edata_t *ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit);
+void ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata);
+edata_t *ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, size_t npages_min);
+
+edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit);
+void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata);
+void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata);
+void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata);
+bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length);
+bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length);
+bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata, size_t offset, size_t length);
+bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata, size_t offset, size_t length);
+edata_t *extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
+bool extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_cache_t *edata_cache, edata_t *a, edata_t *b);
+
+bool extent_boot(void);
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_H */
diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
deleted file mode 100644
index fff69bb..0000000
--- a/include/jemalloc/internal/extent2.h
+++ /dev/null
@@ -1,64 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT2_H
-#define JEMALLOC_INTERNAL_EXTENT2_H
-
-#include "jemalloc/internal/ecache.h"
-#include "jemalloc/internal/ehooks.h"
-#include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/rtree.h"
-
-/*
- * This module contains the page-level allocator.  It chooses the addresses that
- * allocations requested by other modules will inhabit, and updates the global
- * metadata to reflect allocation/deallocation/purging decisions.
- *
- * The naming ("extent2" for the module, and "extent_" or "extents_" for most of
- * the functions) is historical.  Eventually, the naming should be updated to
- * reflect the functionality.  Similarly, the utilization stats live here for no
- * particular reason.  This will also be changed, but much more immediately.
- */
-
-/*
- * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
- * is the max ratio between the size of the active extent and the new extent.
- */
-#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
-extern size_t opt_lg_extent_max_active_fit;
-
-extern rtree_t extents_rtree;
-
-edata_t *ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit);
-edata_t *ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit);
-void ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata);
-edata_t *ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, size_t npages_min);
-
-edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit);
-void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata);
-void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata);
-void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata);
-bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length);
-bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length);
-bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length);
-bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length);
-edata_t *extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
-bool extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
-    edata_cache_t *edata_cache, edata_t *a, edata_t *b);
-
-bool extent_boot(void);
-
-#endif /* JEMALLOC_INTERNAL_EXTENT2_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 8367ee2..ebfb331 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_B_H
 #define JEMALLOC_INTERNAL_INLINES_B_H
 
-#include "jemalloc/internal/extent2.h"
+#include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/rtree.h"
 
 /* Choose an arena based on a per-thread value. */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 7b2e84a..5879090 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -49,7 +49,7 @@
     <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
-    <ClCompile Include="..\..\..\..\src\extent2.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 338962b..631de57 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -49,7 +49,7 @@
     <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
-    <ClCompile Include="..\..\..\..\src\extent2.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
diff --git a/src/extent.c b/src/extent.c
new file mode 100644
index 0000000..8d78f95
--- /dev/null
+++ b/src/extent.c
@@ -0,0 +1,1579 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_pool.h"
+
+/******************************************************************************/
+/* Data. */
+
+rtree_t		extents_rtree;
+/* Keyed by the address of the edata_t being protected. */
+mutex_pool_t	extent_mutex_pool;
+
+size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
+
+static bool extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length, bool growing_retained);
+static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
+    bool growing_retained);
+static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
+    bool growing_retained);
+static edata_t *extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+    bool growing_retained);
+static bool extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_cache_t *edata_cache, edata_t *a, edata_t *b, bool growing_retained);
+
+/* Used exclusively for gdump triggering. */
+static atomic_zu_t curpages;
+static atomic_zu_t highpages;
+
+/******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
+
+static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
+static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t usize, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
+static edata_t *extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
+    bool *coalesced, bool growing_retained);
+static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool growing_retained);
+static edata_t *extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit);
+
+/******************************************************************************/
+
+typedef enum {
+	lock_result_success,
+	lock_result_failure,
+	lock_result_no_extent
+} lock_result_t;
+
+static inline void
+extent_lock_edata(tsdn_t *tsdn, edata_t *edata) {
+	assert(edata != NULL);
+	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)edata);
+}
+
+static inline void
+extent_unlock_edata(tsdn_t *tsdn, edata_t *edata) {
+	assert(edata != NULL);
+	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)edata);
+}
+
+static inline void
+extent_lock_edata2(tsdn_t *tsdn, edata_t *edata1, edata_t *edata2) {
+	assert(edata1 != NULL && edata2 != NULL);
+	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)edata1,
+	    (uintptr_t)edata2);
+}
+
+static inline void
+extent_unlock_edata2(tsdn_t *tsdn, edata_t *edata1, edata_t *edata2) {
+	assert(edata1 != NULL && edata2 != NULL);
+	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)edata1,
+	    (uintptr_t)edata2);
+}
+
+static lock_result_t
+extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
+    edata_t **result, bool inactive_only) {
+	edata_t *edata1 = rtree_leaf_elm_edata_read(tsdn, &extents_rtree,
+	    elm, true);
+
+	/* Slab implies active extents and should be skipped. */
+	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
+	    &extents_rtree, elm, true))) {
+		return lock_result_no_extent;
+	}
+
+	/*
+	 * It's possible that the extent changed out from under us, and with it
+	 * the leaf->edata mapping.  We have to recheck while holding the lock.
+	 */
+	extent_lock_edata(tsdn, edata1);
+	edata_t *edata2 = rtree_leaf_elm_edata_read(tsdn, &extents_rtree, elm,
+	    true);
+
+	if (edata1 == edata2) {
+		*result = edata1;
+		return lock_result_success;
+	} else {
+		extent_unlock_edata(tsdn, edata1);
+		return lock_result_failure;
+	}
+}
+
+/*
+ * Returns a pool-locked edata_t * if there's one associated with the given
+ * address, and NULL otherwise.
+ */
+static edata_t *
+extent_lock_edata_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
+    bool inactive_only) {
+	edata_t *ret = NULL;
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree,
+	    rtree_ctx, (uintptr_t)addr, false, false);
+	if (elm == NULL) {
+		return NULL;
+	}
+	lock_result_t lock_result;
+	do {
+		lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret,
+		    inactive_only);
+	} while (lock_result == lock_result_failure);
+	return ret;
+}
+
+static void
+extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
+    size_t alignment) {
+	assert(edata_base_get(edata) == edata_addr_get(edata));
+
+	if (alignment < PAGE) {
+		unsigned lg_range = LG_PAGE -
+		    lg_floor(CACHELINE_CEILING(alignment));
+		size_t r;
+		if (!tsdn_null(tsdn)) {
+			tsd_t *tsd = tsdn_tsd(tsdn);
+			r = (size_t)prng_lg_range_u64(
+			    tsd_prng_statep_get(tsd), lg_range);
+		} else {
+			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
+			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
+		}
+		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
+		    lg_range);
+		edata->e_addr = (void *)((uintptr_t)edata->e_addr +
+		    random_offset);
+		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
+		    edata->e_addr);
+	}
+}
+
+static bool
+extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache,
+    edata_t *edata) {
+	edata_state_set(edata, extent_state_active);
+	bool coalesced;
+	edata = extent_try_coalesce(tsdn, edata_cache, ehooks, rtree_ctx,
+	    ecache, edata, &coalesced, false);
+	edata_state_set(edata, ecache->state);
+
+	if (!coalesced) {
+		return true;
+	}
+	eset_insert(&ecache->eset, edata);
+	return false;
+}
+
+edata_t *
+ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
+	assert(size + pad != 0);
+	assert(alignment != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	edata_t *edata = extent_recycle(tsdn, arena, ehooks, ecache, new_addr,
+	    size, pad, alignment, slab, szind, zero, commit, false);
+	assert(edata == NULL || edata_dumpable_get(edata));
+	return edata;
+}
+
+edata_t *
+ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit) {
+	assert(size + pad != 0);
+	assert(alignment != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
+	    size, pad, alignment, slab, szind, zero, commit);
+	if (edata == NULL) {
+		if (opt_retain && new_addr != NULL) {
+			/*
+			 * When retain is enabled and new_addr is set, we do not
+			 * attempt extent_alloc_wrapper which does mmap that is
+			 * very unlikely to succeed (unless it happens to be at
+			 * the end).
+			 */
+			return NULL;
+		}
+		edata = extent_alloc_wrapper(tsdn, arena, ehooks,
+		    new_addr, size, pad, alignment, slab, szind, zero, commit);
+	}
+
+	assert(edata == NULL || edata_dumpable_get(edata));
+	return edata;
+}
+
+void
+ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata) {
+	assert(edata_base_get(edata) != NULL);
+	assert(edata_size_get(edata) != 0);
+	assert(edata_dumpable_get(edata));
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	edata_addr_set(edata, edata_base_get(edata));
+	edata_zeroed_set(edata, false);
+
+	extent_record(tsdn, arena, ehooks, ecache, edata, false);
+}
+
+edata_t *
+ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
+    size_t npages_min) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	malloc_mutex_lock(tsdn, &ecache->mtx);
+
+	/*
+	 * Get the LRU coalesced extent, if any.  If coalescing was delayed,
+	 * the loop will iterate until the LRU extent is fully coalesced.
+	 */
+	edata_t *edata;
+	while (true) {
+		/* Get the LRU extent, if any. */
+		edata = edata_list_first(&ecache->eset.lru);
+		if (edata == NULL) {
+			goto label_return;
+		}
+		/* Check the eviction limit. */
+		size_t extents_npages = ecache_npages_get(ecache);
+		if (extents_npages <= npages_min) {
+			edata = NULL;
+			goto label_return;
+		}
+		eset_remove(&ecache->eset, edata);
+		if (!ecache->delay_coalesce) {
+			break;
+		}
+		/* Try to coalesce. */
+		if (extent_try_delayed_coalesce(tsdn, &arena->edata_cache,
+		    ehooks, rtree_ctx, ecache, edata)) {
+			break;
+		}
+		/*
+		 * The LRU extent was just coalesced and the result placed in
+		 * the LRU at its neighbor's position.  Start over.
+		 */
+	}
+
+	/*
+	 * Either mark the extent active or deregister it to protect against
+	 * concurrent operations.
+	 */
+	switch (ecache->state) {
+	case extent_state_active:
+		not_reached();
+	case extent_state_dirty:
+	case extent_state_muzzy:
+		edata_state_set(edata, extent_state_active);
+		break;
+	case extent_state_retained:
+		extent_deregister(tsdn, edata);
+		break;
+	default:
+		not_reached();
+	}
+
+label_return:
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
+	return edata;
+}
+
+/*
+ * This can only happen when we fail to allocate a new extent struct (which
+ * indicates OOM), e.g. when trying to split an existing extent.
+ */
+static void
+extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool growing_retained) {
+	size_t sz = edata_size_get(edata);
+	if (config_stats) {
+		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
+	}
+	/*
+	 * Leak extent after making sure its pages have already been purged, so
+	 * that this is only a virtual memory leak.
+	 */
+	if (ecache->state == extent_state_dirty) {
+		if (extent_purge_lazy_impl(tsdn, arena, ehooks, edata, 0, sz,
+		    growing_retained)) {
+			extent_purge_forced_impl(tsdn, arena, ehooks, edata, 0,
+			    edata_size_get(edata), growing_retained);
+		}
+	}
+	edata_cache_put(tsdn, &arena->edata_cache, edata);
+}
+
+static void
+extent_deactivate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
+	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
+	assert(edata_state_get(edata) == extent_state_active);
+
+	edata_state_set(edata, ecache->state);
+	eset_insert(&ecache->eset, edata);
+}
+
+static void
+extent_deactivate(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
+	malloc_mutex_lock(tsdn, &ecache->mtx);
+	extent_deactivate_locked(tsdn, ecache, edata);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
+}
+
+static void
+extent_activate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
+	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
+	assert(edata_state_get(edata) == ecache->state);
+
+	eset_remove(&ecache->eset, edata);
+	edata_state_set(edata, extent_state_active);
+}
+
+static bool
+extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
+    const edata_t *edata, bool dependent, bool init_missing,
+    rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
+	*r_elm_a = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata), dependent, init_missing);
+	if (!dependent && *r_elm_a == NULL) {
+		return true;
+	}
+	assert(*r_elm_a != NULL);
+
+	*r_elm_b = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)edata_last_get(edata), dependent, init_missing);
+	if (!dependent && *r_elm_b == NULL) {
+		return true;
+	}
+	assert(*r_elm_b != NULL);
+
+	return false;
+}
+
+static void
+extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
+    rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
+	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, edata, szind, slab);
+	if (elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, edata, szind,
+		    slab);
+	}
+}
+
+static void
+extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, edata_t *edata,
+    szind_t szind) {
+	assert(edata_slab_get(edata));
+
+	/* Register interior. */
+	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
+		rtree_write(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
+		    LG_PAGE), edata, szind, true);
+	}
+}
+
+static void
+extent_gdump_add(tsdn_t *tsdn, const edata_t *edata) {
+	cassert(config_prof);
+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	if (opt_prof && edata_state_get(edata) == extent_state_active) {
+		size_t nadd = edata_size_get(edata) >> LG_PAGE;
+		size_t cur = atomic_fetch_add_zu(&curpages, nadd,
+		    ATOMIC_RELAXED) + nadd;
+		size_t high = atomic_load_zu(&highpages, ATOMIC_RELAXED);
+		while (cur > high && !atomic_compare_exchange_weak_zu(
+		    &highpages, &high, cur, ATOMIC_RELAXED, ATOMIC_RELAXED)) {
+			/*
+			 * Don't refresh cur, because it may have decreased
+			 * since this thread lost the highpages update race.
+			 * Note that high is updated in case of CAS failure.
+			 */
+		}
+		if (cur > high && prof_gdump_get_unlocked()) {
+			prof_gdump(tsdn);
+		}
+	}
+}
+
+static void
+extent_gdump_sub(tsdn_t *tsdn, const edata_t *edata) {
+	cassert(config_prof);
+
+	if (opt_prof && edata_state_get(edata) == extent_state_active) {
+		size_t nsub = edata_size_get(edata) >> LG_PAGE;
+		assert(atomic_load_zu(&curpages, ATOMIC_RELAXED) >= nsub);
+		atomic_fetch_sub_zu(&curpages, nsub, ATOMIC_RELAXED);
+	}
+}
+
+static bool
+extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_leaf_elm_t *elm_a, *elm_b;
+
+	/*
+	 * We need to hold the lock to protect against a concurrent coalesce
+	 * operation that sees us in a partial state.
+	 */
+	extent_lock_edata(tsdn, edata);
+
+	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, edata, false, true,
+	    &elm_a, &elm_b)) {
+		extent_unlock_edata(tsdn, edata);
+		return true;
+	}
+
+	szind_t szind = edata_szind_get_maybe_invalid(edata);
+	bool slab = edata_slab_get(edata);
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, edata, szind, slab);
+	if (slab) {
+		extent_interior_register(tsdn, rtree_ctx, edata, szind);
+	}
+
+	extent_unlock_edata(tsdn, edata);
+
+	if (config_prof && gdump_add) {
+		extent_gdump_add(tsdn, edata);
+	}
+
+	return false;
+}
+
+static bool
+extent_register(tsdn_t *tsdn, edata_t *edata) {
+	return extent_register_impl(tsdn, edata, true);
+}
+
+static bool
+extent_register_no_gdump_add(tsdn_t *tsdn, edata_t *edata) {
+	return extent_register_impl(tsdn, edata, false);
+}
+
+static void
+extent_reregister(tsdn_t *tsdn, edata_t *edata) {
+	bool err = extent_register(tsdn, edata);
+	assert(!err);
+}
+
+/*
+ * Removes all pointers to the given extent from the global rtree indices for
+ * its interior.  This is relevant for slab extents, for which we need to do
+ * metadata lookups at places other than the head of the extent.  We deregister
+ * on the interior, then, when an extent moves from being an active slab to an
+ * inactive state.
+ */
+static void
+extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
+    edata_t *edata) {
+	size_t i;
+
+	assert(edata_slab_get(edata));
+
+	for (i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
+		rtree_clear(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
+		    LG_PAGE));
+	}
+}
+
+/*
+ * Removes all pointers to the given extent from the global rtree.
+ */
+static void
+extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_leaf_elm_t *elm_a, *elm_b;
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, edata, true, false,
+	    &elm_a, &elm_b);
+
+	extent_lock_edata(tsdn, edata);
+
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false);
+	if (edata_slab_get(edata)) {
+		extent_interior_deregister(tsdn, rtree_ctx, edata);
+		edata_slab_set(edata, false);
+	}
+
+	extent_unlock_edata(tsdn, edata);
+
+	if (config_prof && gdump) {
+		extent_gdump_sub(tsdn, edata);
+	}
+}
+
+static void
+extent_deregister(tsdn_t *tsdn, edata_t *edata) {
+	extent_deregister_impl(tsdn, edata, true);
+}
+
+static void
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
+	extent_deregister_impl(tsdn, edata, false);
+}
+
+/*
+ * Tries to find and remove an extent from ecache that can be used for the
+ * given allocation request.
+ */
+static edata_t *
+extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, void *new_addr, size_t size,
+    size_t pad, size_t alignment, bool slab, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	assert(alignment > 0);
+	if (config_debug && new_addr != NULL) {
+		/*
+		 * Non-NULL new_addr has two use cases:
+		 *
+		 *   1) Recycle a known-extant extent, e.g. during purging.
+		 *   2) Perform in-place expanding reallocation.
+		 *
+		 * Regardless of use case, new_addr must either refer to a
+		 * non-existing extent, or to the base of an extant extent,
+		 * since only active slabs support interior lookups (which of
+		 * course cannot be recycled).
+		 */
+		assert(PAGE_ADDR2BASE(new_addr) == new_addr);
+		assert(pad == 0);
+		assert(alignment <= PAGE);
+	}
+
+	size_t esize = size + pad;
+	malloc_mutex_lock(tsdn, &ecache->mtx);
+	edata_t *edata;
+	if (new_addr != NULL) {
+		edata = extent_lock_edata_from_addr(tsdn, rtree_ctx, new_addr,
+		    false);
+		if (edata != NULL) {
+			/*
+			 * We might null-out edata to report an error, but we
+			 * still need to unlock the associated mutex after.
+			 */
+			edata_t *unlock_edata = edata;
+			assert(edata_base_get(edata) == new_addr);
+			if (edata_arena_ind_get(edata) != arena_ind_get(arena)
+			    || edata_size_get(edata) < esize
+			    || edata_state_get(edata)
+			    != ecache->state) {
+				edata = NULL;
+			}
+			extent_unlock_edata(tsdn, unlock_edata);
+		}
+	} else {
+		edata = eset_fit(&ecache->eset, esize, alignment,
+		    ecache->delay_coalesce);
+	}
+	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &ecache->mtx);
+		return NULL;
+	}
+
+	extent_activate_locked(tsdn, ecache, edata);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
+
+	return edata;
+}
+
+/*
+ * Given an allocation request and an extent guaranteed to be able to satisfy
+ * it, this splits off lead and trail extents, leaving edata pointing to an
+ * extent satisfying the allocation.
+ * This function doesn't put lead or trail into any ecache; it's the caller's
+ * job to ensure that they can be reused.
+ */
+typedef enum {
+	/*
+	 * Split successfully.  lead, edata, and trail, are modified to extents
+	 * describing the ranges before, in, and after the given allocation.
+	 */
+	extent_split_interior_ok,
+	/*
+	 * The extent can't satisfy the given allocation request.  None of the
+	 * input edata_t *s are touched.
+	 */
+	extent_split_interior_cant_alloc,
+	/*
+	 * In a potentially invalid state.  Must leak (if *to_leak is non-NULL),
+	 * and salvage what's still salvageable (if *to_salvage is non-NULL).
+	 * None of lead, edata, or trail are valid.
+	 */
+	extent_split_interior_error
+} extent_split_interior_result_t;
+
+static extent_split_interior_result_t
+extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx,
+    /* The result of splitting, in case of success. */
+    edata_t **edata, edata_t **lead, edata_t **trail,
+    /* The mess to clean up, in case of error. */
+    edata_t **to_leak, edata_t **to_salvage,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool growing_retained) {
+	size_t esize = size + pad;
+	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
+	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
+	assert(new_addr == NULL || leadsize == 0);
+	if (edata_size_get(*edata) < leadsize + esize) {
+		return extent_split_interior_cant_alloc;
+	}
+	size_t trailsize = edata_size_get(*edata) - leadsize - esize;
+
+	*lead = NULL;
+	*trail = NULL;
+	*to_leak = NULL;
+	*to_salvage = NULL;
+
+	/* Split the lead. */
+	if (leadsize != 0) {
+		*lead = *edata;
+		*edata = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
+		    *lead, leadsize, SC_NSIZES, false, esize + trailsize, szind,
+		    slab, growing_retained);
+		if (*edata == NULL) {
+			*to_leak = *lead;
+			*lead = NULL;
+			return extent_split_interior_error;
+		}
+	}
+
+	/* Split the trail. */
+	if (trailsize != 0) {
+		*trail = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
+		    *edata, esize, szind, slab, trailsize, SC_NSIZES, false,
+		    growing_retained);
+		if (*trail == NULL) {
+			*to_leak = *edata;
+			*to_salvage = *lead;
+			*lead = NULL;
+			*edata = NULL;
+			return extent_split_interior_error;
+		}
+	}
+
+	if (leadsize == 0 && trailsize == 0) {
+		/*
+		 * Splitting causes szind to be set as a side effect, but no
+		 * splitting occurred.
+		 */
+		edata_szind_set(*edata, szind);
+		if (szind != SC_NSIZES) {
+			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
+			    (uintptr_t)edata_addr_get(*edata), szind, slab);
+			if (slab && edata_size_get(*edata) > PAGE) {
+				rtree_szind_slab_update(tsdn, &extents_rtree,
+				    rtree_ctx,
+				    (uintptr_t)edata_past_get(*edata) -
+				    (uintptr_t)PAGE, szind, slab);
+			}
+		}
+	}
+
+	return extent_split_interior_ok;
+}
+
+/*
+ * This fulfills the indicated allocation request out of the given extent (which
+ * the caller should have ensured was big enough).  If there's any unused space
+ * before or after the resulting allocation, that space is given its own extent
+ * and put back into ecache.
+ */
+static edata_t *
+extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, void *new_addr, size_t size,
+    size_t pad, size_t alignment, bool slab, szind_t szind, edata_t *edata,
+    bool growing_retained) {
+	edata_t *lead;
+	edata_t *trail;
+	edata_t *to_leak;
+	edata_t *to_salvage;
+
+	extent_split_interior_result_t result = extent_split_interior(
+	    tsdn, arena, ehooks, rtree_ctx, &edata, &lead, &trail, &to_leak,
+	    &to_salvage, new_addr, size, pad, alignment, slab, szind,
+	    growing_retained);
+
+	if (!maps_coalesce && result != extent_split_interior_ok
+	    && !opt_retain) {
+		/*
+		 * Split isn't supported (implies Windows w/o retain).  Avoid
+		 * leaking the extent.
+		 */
+		assert(to_leak != NULL && lead == NULL && trail == NULL);
+		extent_deactivate(tsdn, ecache, to_leak);
+		return NULL;
+	}
+
+	if (result == extent_split_interior_ok) {
+		if (lead != NULL) {
+			extent_deactivate(tsdn, ecache, lead);
+		}
+		if (trail != NULL) {
+			extent_deactivate(tsdn, ecache, trail);
+		}
+		return edata;
+	} else {
+		/*
+		 * We should have picked an extent that was large enough to
+		 * fulfill our allocation request.
+		 */
+		assert(result == extent_split_interior_error);
+		if (to_salvage != NULL) {
+			extent_deregister(tsdn, to_salvage);
+		}
+		if (to_leak != NULL) {
+			void *leak = edata_base_get(to_leak);
+			extent_deregister_no_gdump_sub(tsdn, to_leak);
+			extents_abandon_vm(tsdn, arena, ehooks, ecache, to_leak,
+			    growing_retained);
+			assert(extent_lock_edata_from_addr(tsdn, rtree_ctx, leak,
+			    false) == NULL);
+		}
+		return NULL;
+	}
+	unreachable();
+}
+
+/*
+ * Tries to satisfy the given allocation request by reusing one of the extents
+ * in the given ecache_t.
+ */
+static edata_t *
+extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	assert(new_addr == NULL || !slab);
+	assert(pad == 0 || !slab);
+	assert(!*zero || !slab);
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks,
+	    rtree_ctx, ecache, new_addr, size, pad, alignment, slab,
+	    growing_retained);
+	if (edata == NULL) {
+		return NULL;
+	}
+
+	edata = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, ecache,
+	    new_addr, size, pad, alignment, slab, szind, edata,
+	    growing_retained);
+	if (edata == NULL) {
+		return NULL;
+	}
+
+	if (*commit && !edata_committed_get(edata)) {
+		if (extent_commit_impl(tsdn, ehooks, edata, 0,
+		    edata_size_get(edata), growing_retained)) {
+			extent_record(tsdn, arena, ehooks, ecache, edata,
+			    growing_retained);
+			return NULL;
+		}
+	}
+
+	if (edata_committed_get(edata)) {
+		*commit = true;
+	}
+	if (edata_zeroed_get(edata)) {
+		*zero = true;
+	}
+
+	if (pad != 0) {
+		extent_addr_randomize(tsdn, arena, edata, alignment);
+	}
+	assert(edata_state_get(edata) == extent_state_active);
+	if (slab) {
+		edata_slab_set(edata, slab);
+		extent_interior_register(tsdn, rtree_ctx, edata, szind);
+	}
+
+	if (*zero) {
+		void *addr = edata_base_get(edata);
+		if (!edata_zeroed_get(edata)) {
+			size_t size = edata_size_get(edata);
+			ehooks_zero(tsdn, ehooks, addr, size);
+		}
+	}
+	return edata;
+}
+
+/*
+ * If virtual memory is retained, create increasingly larger extents from which
+ * to split requested extents in order to limit the total number of disjoint
+ * virtual memory ranges retained by each arena.
+ */
+static edata_t *
+extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit) {
+	malloc_mutex_assert_owner(tsdn, &arena->ecache_grow.mtx);
+	assert(pad == 0 || !slab);
+	assert(!*zero || !slab);
+
+	size_t esize = size + pad;
+	size_t alloc_size_min = esize + PAGE_CEILING(alignment) - PAGE;
+	/* Beware size_t wrap-around. */
+	if (alloc_size_min < esize) {
+		goto label_err;
+	}
+	/*
+	 * Find the next extent size in the series that would be large enough to
+	 * satisfy this request.
+	 */
+	pszind_t egn_skip = 0;
+	size_t alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
+	while (alloc_size < alloc_size_min) {
+		egn_skip++;
+		if (arena->ecache_grow.next + egn_skip >=
+		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
+			/* Outside legal range. */
+			goto label_err;
+		}
+		alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
+	}
+
+	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
+	if (edata == NULL) {
+		goto label_err;
+	}
+	bool zeroed = false;
+	bool committed = false;
+
+	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
+	    &committed);
+
+	edata_init(edata, arena_ind_get(arena), ptr, alloc_size, false,
+	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
+	    committed, true, EXTENT_IS_HEAD);
+	if (ptr == NULL) {
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		goto label_err;
+	}
+
+	if (extent_register_no_gdump_add(tsdn, edata)) {
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		goto label_err;
+	}
+
+	if (edata_zeroed_get(edata) && edata_committed_get(edata)) {
+		*zero = true;
+	}
+	if (edata_committed_get(edata)) {
+		*commit = true;
+	}
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	edata_t *lead;
+	edata_t *trail;
+	edata_t *to_leak;
+	edata_t *to_salvage;
+	extent_split_interior_result_t result = extent_split_interior(tsdn,
+	    arena, ehooks, rtree_ctx, &edata, &lead, &trail, &to_leak,
+	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
+
+	if (result == extent_split_interior_ok) {
+		if (lead != NULL) {
+			extent_record(tsdn, arena, ehooks,
+			    &arena->ecache_retained, lead, true);
+		}
+		if (trail != NULL) {
+			extent_record(tsdn, arena, ehooks,
+			    &arena->ecache_retained, trail, true);
+		}
+	} else {
+		/*
+		 * We should have allocated a sufficiently large extent; the
+		 * cant_alloc case should not occur.
+		 */
+		assert(result == extent_split_interior_error);
+		if (to_salvage != NULL) {
+			if (config_prof) {
+				extent_gdump_add(tsdn, to_salvage);
+			}
+			extent_record(tsdn, arena, ehooks,
+			    &arena->ecache_retained, to_salvage, true);
+		}
+		if (to_leak != NULL) {
+			extent_deregister_no_gdump_sub(tsdn, to_leak);
+			extents_abandon_vm(tsdn, arena, ehooks,
+			    &arena->ecache_retained, to_leak, true);
+		}
+		goto label_err;
+	}
+
+	if (*commit && !edata_committed_get(edata)) {
+		if (extent_commit_impl(tsdn, ehooks, edata, 0,
+		    edata_size_get(edata), true)) {
+			extent_record(tsdn, arena, ehooks,
+			    &arena->ecache_retained, edata, true);
+			goto label_err;
+		}
+		/* A successful commit should return zeroed memory. */
+		if (config_debug) {
+			void *addr = edata_addr_get(edata);
+			size_t *p = (size_t *)(uintptr_t)addr;
+			/* Check the first page only. */
+			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
+				assert(p[i] == 0);
+			}
+		}
+	}
+
+	/*
+	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
+	 * range.
+	 */
+	if (arena->ecache_grow.next + egn_skip + 1 <=
+	    arena->ecache_grow.limit) {
+		arena->ecache_grow.next += egn_skip + 1;
+	} else {
+		arena->ecache_grow.next = arena->ecache_grow.limit;
+	}
+	/* All opportunities for failure are past. */
+	malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
+
+	if (config_prof) {
+		/* Adjust gdump stats now that extent is final size. */
+		extent_gdump_add(tsdn, edata);
+	}
+	if (pad != 0) {
+		extent_addr_randomize(tsdn, arena, edata, alignment);
+	}
+	if (slab) {
+		rtree_ctx_t rtree_ctx_fallback;
+		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
+		    &rtree_ctx_fallback);
+
+		edata_slab_set(edata, true);
+		extent_interior_register(tsdn, rtree_ctx, edata, szind);
+	}
+	if (*zero && !edata_zeroed_get(edata)) {
+		void *addr = edata_base_get(edata);
+		size_t size = edata_size_get(edata);
+		ehooks_zero(tsdn, ehooks, addr, size);
+	}
+
+	return edata;
+label_err:
+	malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
+	return NULL;
+}
+
+static edata_t *
+extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
+	assert(size != 0);
+	assert(alignment != 0);
+
+	malloc_mutex_lock(tsdn, &arena->ecache_grow.mtx);
+
+	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
+	    &arena->ecache_retained, new_addr, size, pad, alignment, slab,
+	    szind, zero, commit, true);
+	if (edata != NULL) {
+		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
+		if (config_prof) {
+			extent_gdump_add(tsdn, edata);
+		}
+	} else if (opt_retain && new_addr == NULL) {
+		edata = extent_grow_retained(tsdn, arena, ehooks, size, pad,
+		    alignment, slab, szind, zero, commit);
+		/* extent_grow_retained() always releases extent_grow_mtx. */
+	} else {
+		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
+	}
+	malloc_mutex_assert_not_owner(tsdn, &arena->ecache_grow.mtx);
+
+	return edata;
+}
+
+edata_t *
+extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	size_t esize = size + pad;
+	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
+	if (edata == NULL) {
+		return NULL;
+	}
+	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
+	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
+	    zero, commit);
+	if (addr == NULL) {
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		return NULL;
+	}
+	edata_init(edata, arena_ind_get(arena), addr, esize, slab, szind,
+	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
+	    true, EXTENT_NOT_HEAD);
+	if (pad != 0) {
+		extent_addr_randomize(tsdn, arena, edata, alignment);
+	}
+	if (extent_register(tsdn, edata)) {
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		return NULL;
+	}
+
+	return edata;
+}
+
+static bool
+extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
+    const edata_t *outer) {
+	assert(edata_arena_ind_get(inner) == ecache_ind_get(ecache));
+
+	if (edata_arena_ind_get(inner) != edata_arena_ind_get(outer)) {
+		return false;
+	}
+
+	assert(edata_state_get(inner) == extent_state_active);
+	if (edata_state_get(outer) != ecache->state) {
+		return false;
+	}
+
+	if (edata_committed_get(inner) != edata_committed_get(outer)) {
+		return false;
+	}
+
+	return true;
+}
+
+static bool
+extent_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
+    bool growing_retained) {
+	assert(extent_can_coalesce(ecache, inner, outer));
+
+	extent_activate_locked(tsdn, ecache, outer);
+
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
+	bool err = extent_merge_impl(tsdn, ehooks, edata_cache,
+	    forward ? inner : outer, forward ? outer : inner, growing_retained);
+	malloc_mutex_lock(tsdn, &ecache->mtx);
+
+	if (err) {
+		extent_deactivate_locked(tsdn, ecache, outer);
+	}
+
+	return err;
+}
+
+static edata_t *
+extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
+    bool *coalesced, bool growing_retained, bool inactive_only) {
+	/*
+	 * We avoid checking / locking inactive neighbors for large size
+	 * classes, since they are eagerly coalesced on deallocation which can
+	 * cause lock contention.
+	 */
+	/*
+	 * Continue attempting to coalesce until failure, to protect against
+	 * races with other threads that are thwarted by this one.
+	 */
+	bool again;
+	do {
+		again = false;
+
+		/* Try to coalesce forward. */
+		edata_t *next = extent_lock_edata_from_addr(tsdn, rtree_ctx,
+		    edata_past_get(edata), inactive_only);
+		if (next != NULL) {
+			/*
+			 * ecache->mtx only protects against races for
+			 * like-state extents, so call extent_can_coalesce()
+			 * before releasing next's pool lock.
+			 */
+			bool can_coalesce = extent_can_coalesce(ecache,
+			    edata, next);
+
+			extent_unlock_edata(tsdn, next);
+
+			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
+			    ehooks, ecache, edata, next, true,
+			    growing_retained)) {
+				if (ecache->delay_coalesce) {
+					/* Do minimal coalescing. */
+					*coalesced = true;
+					return edata;
+				}
+				again = true;
+			}
+		}
+
+		/* Try to coalesce backward. */
+		edata_t *prev = extent_lock_edata_from_addr(tsdn, rtree_ctx,
+		    edata_before_get(edata), inactive_only);
+		if (prev != NULL) {
+			bool can_coalesce = extent_can_coalesce(ecache, edata,
+			    prev);
+			extent_unlock_edata(tsdn, prev);
+
+			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
+			    ehooks, ecache, edata, prev, false,
+			    growing_retained)) {
+				edata = prev;
+				if (ecache->delay_coalesce) {
+					/* Do minimal coalescing. */
+					*coalesced = true;
+					return edata;
+				}
+				again = true;
+			}
+		}
+	} while (again);
+
+	if (ecache->delay_coalesce) {
+		*coalesced = false;
+	}
+	return edata;
+}
+
+static edata_t *
+extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
+    bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, rtree_ctx,
+	    ecache, edata, coalesced, growing_retained, false);
+}
+
+static edata_t *
+extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
+    bool *coalesced, bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, rtree_ctx,
+	    ecache, edata, coalesced, growing_retained, true);
+}
+
+/*
+ * Does the metadata management portions of putting an unused extent into the
+ * given ecache_t (coalesces, deregisters slab interiors, the heap operations).
+ */
+static void
+extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata, bool growing_retained) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	assert((ecache->state != extent_state_dirty &&
+	    ecache->state != extent_state_muzzy) ||
+	    !edata_zeroed_get(edata));
+
+	malloc_mutex_lock(tsdn, &ecache->mtx);
+
+	edata_szind_set(edata, SC_NSIZES);
+	if (edata_slab_get(edata)) {
+		extent_interior_deregister(tsdn, rtree_ctx, edata);
+		edata_slab_set(edata, false);
+	}
+
+	assert(rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata), true) == edata);
+
+	if (!ecache->delay_coalesce) {
+		edata = extent_try_coalesce(tsdn, &arena->edata_cache, ehooks,
+		    rtree_ctx, ecache, edata, NULL, growing_retained);
+	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
+		assert(ecache == &arena->ecache_dirty);
+		/* Always coalesce large extents eagerly. */
+		bool coalesced;
+		do {
+			assert(edata_state_get(edata) == extent_state_active);
+			edata = extent_try_coalesce_large(tsdn,
+			    &arena->edata_cache, ehooks, rtree_ctx, ecache,
+			    edata, &coalesced, growing_retained);
+		} while (coalesced);
+		if (edata_size_get(edata) >= oversize_threshold) {
+			/* Shortcut to purge the oversize extent eagerly. */
+			malloc_mutex_unlock(tsdn, &ecache->mtx);
+			arena_decay_extent(tsdn, arena, ehooks, edata);
+			return;
+		}
+	}
+	extent_deactivate_locked(tsdn, ecache, edata);
+
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
+}
+
+void
+extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
+	ehooks_t *ehooks = arena_get_ehooks(arena);
+
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	if (extent_register(tsdn, edata)) {
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		return;
+	}
+	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
+}
+
+static bool
+extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata) {
+	bool err;
+
+	assert(edata_base_get(edata) != NULL);
+	assert(edata_size_get(edata) != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	edata_addr_set(edata, edata_base_get(edata));
+
+	/* Try to deallocate. */
+	err = ehooks_dalloc(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), edata_committed_get(edata));
+
+	if (!err) {
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
+	}
+
+	return err;
+}
+
+void
+extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata) {
+	assert(edata_dumpable_get(edata));
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	/* Avoid calling the default extent_dalloc unless have to. */
+	if (!ehooks_dalloc_will_fail(ehooks)) {
+		/*
+		 * Deregister first to avoid a race with other allocating
+		 * threads, and reregister if deallocation fails.
+		 */
+		extent_deregister(tsdn, edata);
+		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, edata)) {
+			return;
+		}
+		extent_reregister(tsdn, edata);
+	}
+
+	/* Try to decommit; purge if that fails. */
+	bool zeroed;
+	if (!edata_committed_get(edata)) {
+		zeroed = true;
+	} else if (!extent_decommit_wrapper(tsdn, ehooks, edata, 0,
+	    edata_size_get(edata))) {
+		zeroed = true;
+	} else if (!ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), 0, edata_size_get(edata))) {
+		zeroed = true;
+	} else if (edata_state_get(edata) == extent_state_muzzy ||
+	    !ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), 0, edata_size_get(edata))) {
+		zeroed = false;
+	} else {
+		zeroed = false;
+	}
+	edata_zeroed_set(edata, zeroed);
+
+	if (config_prof) {
+		extent_gdump_sub(tsdn, edata);
+	}
+
+	extent_record(tsdn, arena, ehooks, &arena->ecache_retained, edata,
+	    false);
+}
+
+void
+extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata) {
+	assert(edata_base_get(edata) != NULL);
+	assert(edata_size_get(edata) != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	/* Deregister first to avoid a race with other allocating threads. */
+	extent_deregister(tsdn, edata);
+
+	edata_addr_set(edata, edata_base_get(edata));
+
+	/* Try to destroy; silently fail otherwise. */
+	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), edata_committed_get(edata));
+
+	edata_cache_put(tsdn, &arena->edata_cache, edata);
+}
+
+static bool
+extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	bool err = ehooks_commit(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), offset, length);
+	edata_committed_set(edata, edata_committed_get(edata) || !err);
+	return err;
+}
+
+bool
+extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length) {
+	return extent_commit_impl(tsdn, ehooks, edata, offset, length,
+	    false);
+}
+
+bool
+extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+	bool err = ehooks_decommit(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), offset, length);
+	edata_committed_set(edata, edata_committed_get(edata) && err);
+	return err;
+}
+
+static bool
+extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	bool err = ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), offset, length);
+	return err;
+}
+
+bool
+extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata, size_t offset, size_t length) {
+	return extent_purge_lazy_impl(tsdn, arena, ehooks, edata, offset,
+	    length, false);
+}
+
+static bool
+extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	bool err = ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), offset, length);
+	return err;
+}
+
+bool
+extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata, size_t offset, size_t length) {
+	return extent_purge_forced_impl(tsdn, arena, ehooks, edata,
+	    offset, length, false);
+}
+
+/*
+ * Accepts the extent to split, and the characteristics of each side of the
+ * split.  The 'a' parameters go with the 'lead' of the resulting pair of
+ * extents (the lower addressed portion of the split), and the 'b' parameters go
+ * with the trail (the higher addressed portion).  This makes 'extent' the lead,
+ * and returns the trail (except in case of error).
+ */
+static edata_t *
+extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
+    size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
+	assert(edata_size_get(edata) == size_a + size_b);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+	if (ehooks_split_will_fail(ehooks)) {
+		return NULL;
+	}
+
+	edata_t *trail = edata_cache_get(tsdn, edata_cache);
+	if (trail == NULL) {
+		goto label_error_a;
+	}
+
+	edata_init(trail, ehooks_ind_get(ehooks),
+	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
+	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
+	    edata_zeroed_get(edata), edata_committed_get(edata),
+	    edata_dumpable_get(edata), EXTENT_NOT_HEAD);
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_leaf_elm_t *lead_elm_a, *lead_elm_b;
+	{
+		edata_t lead;
+
+		edata_init(&lead, ehooks_ind_get(ehooks),
+		    edata_addr_get(edata), size_a,
+		    slab_a, szind_a, edata_sn_get(edata),
+		    edata_state_get(edata), edata_zeroed_get(edata),
+		    edata_committed_get(edata), edata_dumpable_get(edata),
+		    EXTENT_NOT_HEAD);
+
+		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
+		    true, &lead_elm_a, &lead_elm_b);
+	}
+	rtree_leaf_elm_t *trail_elm_a, *trail_elm_b;
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, trail, false, true,
+	    &trail_elm_a, &trail_elm_b);
+
+	if (lead_elm_a == NULL || lead_elm_b == NULL || trail_elm_a == NULL
+	    || trail_elm_b == NULL) {
+		goto label_error_b;
+	}
+
+	extent_lock_edata2(tsdn, edata, trail);
+
+	bool err = ehooks_split(tsdn, ehooks, edata_base_get(edata),
+	    size_a + size_b, size_a, size_b, edata_committed_get(edata));
+
+	if (err) {
+		goto label_error_c;
+	}
+
+	edata_size_set(edata, size_a);
+	edata_szind_set(edata, szind_a);
+
+	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, edata,
+	    szind_a, slab_a);
+	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
+	    szind_b, slab_b);
+
+	extent_unlock_edata2(tsdn, edata, trail);
+
+	return trail;
+label_error_c:
+	extent_unlock_edata2(tsdn, edata, trail);
+label_error_b:
+	edata_cache_put(tsdn, edata_cache, trail);
+label_error_a:
+	return NULL;
+}
+
+edata_t *
+extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
+    size_t size_b, szind_t szind_b, bool slab_b) {
+	return extent_split_impl(tsdn, edata_cache, ehooks, edata, size_a,
+	    szind_a, slab_a, size_b, szind_b, slab_b, false);
+}
+
+static bool
+extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
+    edata_t *a, edata_t *b, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	assert(edata_base_get(a) < edata_base_get(b));
+
+	assert(edata_arena_ind_get(a) == edata_arena_ind_get(b));
+	assert(edata_arena_ind_get(a) == ehooks_ind_get(ehooks));
+
+	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
+	    edata_size_get(a), edata_is_head_get(a), edata_base_get(b),
+	    edata_size_get(b), edata_is_head_get(b), edata_committed_get(a));
+
+	if (err) {
+		return true;
+	}
+
+	/*
+	 * The rtree writes must happen while all the relevant elements are
+	 * owned, so the following code uses decomposed helper functions rather
+	 * than extent_{,de}register() to do things in the right order.
+	 */
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_leaf_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, a, true, false, &a_elm_a,
+	    &a_elm_b);
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, b, true, false, &b_elm_a,
+	    &b_elm_b);
+
+	extent_lock_edata2(tsdn, a, b);
+
+	if (a_elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL,
+		    SC_NSIZES, false);
+	}
+	if (b_elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, NULL,
+		    SC_NSIZES, false);
+	} else {
+		b_elm_b = b_elm_a;
+	}
+
+	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
+	edata_szind_set(a, SC_NSIZES);
+	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
+	    edata_sn_get(a) : edata_sn_get(b));
+	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
+
+	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES,
+	    false);
+
+	extent_unlock_edata2(tsdn, a, b);
+
+	edata_cache_put(tsdn, edata_cache, b);
+
+	return false;
+}
+
+bool
+extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
+    edata_t *a, edata_t *b) {
+	return extent_merge_impl(tsdn, ehooks, edata_cache, a, b, false);
+}
+
+bool
+extent_boot(void) {
+	if (rtree_new(&extents_rtree, true)) {
+		return true;
+	}
+
+	if (mutex_pool_init(&extent_mutex_pool, "extent_mutex_pool",
+	    WITNESS_RANK_EXTENT_POOL)) {
+		return true;
+	}
+
+	if (have_dss) {
+		extent_dss_boot();
+	}
+
+	return false;
+}
diff --git a/src/extent2.c b/src/extent2.c
deleted file mode 100644
index 8d78f95..0000000
--- a/src/extent2.c
+++ /dev/null
@@ -1,1579 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-#include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/extent_dss.h"
-#include "jemalloc/internal/extent_mmap.h"
-#include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool.h"
-
-/******************************************************************************/
-/* Data. */
-
-rtree_t		extents_rtree;
-/* Keyed by the address of the edata_t being protected. */
-mutex_pool_t	extent_mutex_pool;
-
-size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
-
-static bool extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length, bool growing_retained);
-static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
-    bool growing_retained);
-static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
-    bool growing_retained);
-static edata_t *extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
-    bool growing_retained);
-static bool extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks,
-    edata_cache_t *edata_cache, edata_t *a, edata_t *b, bool growing_retained);
-
-/* Used exclusively for gdump triggering. */
-static atomic_zu_t curpages;
-static atomic_zu_t highpages;
-
-/******************************************************************************/
-/*
- * Function prototypes for static functions that are referenced prior to
- * definition.
- */
-
-static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
-static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t usize, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
-static edata_t *extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
-    bool *coalesced, bool growing_retained);
-static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool growing_retained);
-static edata_t *extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit);
-
-/******************************************************************************/
-
-typedef enum {
-	lock_result_success,
-	lock_result_failure,
-	lock_result_no_extent
-} lock_result_t;
-
-static inline void
-extent_lock_edata(tsdn_t *tsdn, edata_t *edata) {
-	assert(edata != NULL);
-	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)edata);
-}
-
-static inline void
-extent_unlock_edata(tsdn_t *tsdn, edata_t *edata) {
-	assert(edata != NULL);
-	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)edata);
-}
-
-static inline void
-extent_lock_edata2(tsdn_t *tsdn, edata_t *edata1, edata_t *edata2) {
-	assert(edata1 != NULL && edata2 != NULL);
-	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)edata1,
-	    (uintptr_t)edata2);
-}
-
-static inline void
-extent_unlock_edata2(tsdn_t *tsdn, edata_t *edata1, edata_t *edata2) {
-	assert(edata1 != NULL && edata2 != NULL);
-	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)edata1,
-	    (uintptr_t)edata2);
-}
-
-static lock_result_t
-extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
-    edata_t **result, bool inactive_only) {
-	edata_t *edata1 = rtree_leaf_elm_edata_read(tsdn, &extents_rtree,
-	    elm, true);
-
-	/* Slab implies active extents and should be skipped. */
-	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
-	    &extents_rtree, elm, true))) {
-		return lock_result_no_extent;
-	}
-
-	/*
-	 * It's possible that the extent changed out from under us, and with it
-	 * the leaf->edata mapping.  We have to recheck while holding the lock.
-	 */
-	extent_lock_edata(tsdn, edata1);
-	edata_t *edata2 = rtree_leaf_elm_edata_read(tsdn, &extents_rtree, elm,
-	    true);
-
-	if (edata1 == edata2) {
-		*result = edata1;
-		return lock_result_success;
-	} else {
-		extent_unlock_edata(tsdn, edata1);
-		return lock_result_failure;
-	}
-}
-
-/*
- * Returns a pool-locked edata_t * if there's one associated with the given
- * address, and NULL otherwise.
- */
-static edata_t *
-extent_lock_edata_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
-    bool inactive_only) {
-	edata_t *ret = NULL;
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree,
-	    rtree_ctx, (uintptr_t)addr, false, false);
-	if (elm == NULL) {
-		return NULL;
-	}
-	lock_result_t lock_result;
-	do {
-		lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret,
-		    inactive_only);
-	} while (lock_result == lock_result_failure);
-	return ret;
-}
-
-static void
-extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    size_t alignment) {
-	assert(edata_base_get(edata) == edata_addr_get(edata));
-
-	if (alignment < PAGE) {
-		unsigned lg_range = LG_PAGE -
-		    lg_floor(CACHELINE_CEILING(alignment));
-		size_t r;
-		if (!tsdn_null(tsdn)) {
-			tsd_t *tsd = tsdn_tsd(tsdn);
-			r = (size_t)prng_lg_range_u64(
-			    tsd_prng_statep_get(tsd), lg_range);
-		} else {
-			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
-			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
-		}
-		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
-		    lg_range);
-		edata->e_addr = (void *)((uintptr_t)edata->e_addr +
-		    random_offset);
-		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
-		    edata->e_addr);
-	}
-}
-
-static bool
-extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache,
-    edata_t *edata) {
-	edata_state_set(edata, extent_state_active);
-	bool coalesced;
-	edata = extent_try_coalesce(tsdn, edata_cache, ehooks, rtree_ctx,
-	    ecache, edata, &coalesced, false);
-	edata_state_set(edata, ecache->state);
-
-	if (!coalesced) {
-		return true;
-	}
-	eset_insert(&ecache->eset, edata);
-	return false;
-}
-
-edata_t *
-ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
-	assert(size + pad != 0);
-	assert(alignment != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	edata_t *edata = extent_recycle(tsdn, arena, ehooks, ecache, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit, false);
-	assert(edata == NULL || edata_dumpable_get(edata));
-	return edata;
-}
-
-edata_t *
-ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit) {
-	assert(size + pad != 0);
-	assert(alignment != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit);
-	if (edata == NULL) {
-		if (opt_retain && new_addr != NULL) {
-			/*
-			 * When retain is enabled and new_addr is set, we do not
-			 * attempt extent_alloc_wrapper which does mmap that is
-			 * very unlikely to succeed (unless it happens to be at
-			 * the end).
-			 */
-			return NULL;
-		}
-		edata = extent_alloc_wrapper(tsdn, arena, ehooks,
-		    new_addr, size, pad, alignment, slab, szind, zero, commit);
-	}
-
-	assert(edata == NULL || edata_dumpable_get(edata));
-	return edata;
-}
-
-void
-ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata) {
-	assert(edata_base_get(edata) != NULL);
-	assert(edata_size_get(edata) != 0);
-	assert(edata_dumpable_get(edata));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	edata_addr_set(edata, edata_base_get(edata));
-	edata_zeroed_set(edata, false);
-
-	extent_record(tsdn, arena, ehooks, ecache, edata, false);
-}
-
-edata_t *
-ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    size_t npages_min) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	malloc_mutex_lock(tsdn, &ecache->mtx);
-
-	/*
-	 * Get the LRU coalesced extent, if any.  If coalescing was delayed,
-	 * the loop will iterate until the LRU extent is fully coalesced.
-	 */
-	edata_t *edata;
-	while (true) {
-		/* Get the LRU extent, if any. */
-		edata = edata_list_first(&ecache->eset.lru);
-		if (edata == NULL) {
-			goto label_return;
-		}
-		/* Check the eviction limit. */
-		size_t extents_npages = ecache_npages_get(ecache);
-		if (extents_npages <= npages_min) {
-			edata = NULL;
-			goto label_return;
-		}
-		eset_remove(&ecache->eset, edata);
-		if (!ecache->delay_coalesce) {
-			break;
-		}
-		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, &arena->edata_cache,
-		    ehooks, rtree_ctx, ecache, edata)) {
-			break;
-		}
-		/*
-		 * The LRU extent was just coalesced and the result placed in
-		 * the LRU at its neighbor's position.  Start over.
-		 */
-	}
-
-	/*
-	 * Either mark the extent active or deregister it to protect against
-	 * concurrent operations.
-	 */
-	switch (ecache->state) {
-	case extent_state_active:
-		not_reached();
-	case extent_state_dirty:
-	case extent_state_muzzy:
-		edata_state_set(edata, extent_state_active);
-		break;
-	case extent_state_retained:
-		extent_deregister(tsdn, edata);
-		break;
-	default:
-		not_reached();
-	}
-
-label_return:
-	malloc_mutex_unlock(tsdn, &ecache->mtx);
-	return edata;
-}
-
-/*
- * This can only happen when we fail to allocate a new extent struct (which
- * indicates OOM), e.g. when trying to split an existing extent.
- */
-static void
-extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool growing_retained) {
-	size_t sz = edata_size_get(edata);
-	if (config_stats) {
-		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
-	}
-	/*
-	 * Leak extent after making sure its pages have already been purged, so
-	 * that this is only a virtual memory leak.
-	 */
-	if (ecache->state == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, arena, ehooks, edata, 0, sz,
-		    growing_retained)) {
-			extent_purge_forced_impl(tsdn, arena, ehooks, edata, 0,
-			    edata_size_get(edata), growing_retained);
-		}
-	}
-	edata_cache_put(tsdn, &arena->edata_cache, edata);
-}
-
-static void
-extent_deactivate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
-	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
-	assert(edata_state_get(edata) == extent_state_active);
-
-	edata_state_set(edata, ecache->state);
-	eset_insert(&ecache->eset, edata);
-}
-
-static void
-extent_deactivate(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
-	malloc_mutex_lock(tsdn, &ecache->mtx);
-	extent_deactivate_locked(tsdn, ecache, edata);
-	malloc_mutex_unlock(tsdn, &ecache->mtx);
-}
-
-static void
-extent_activate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
-	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
-	assert(edata_state_get(edata) == ecache->state);
-
-	eset_remove(&ecache->eset, edata);
-	edata_state_set(edata, extent_state_active);
-}
-
-static bool
-extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    const edata_t *edata, bool dependent, bool init_missing,
-    rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
-	*r_elm_a = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata), dependent, init_missing);
-	if (!dependent && *r_elm_a == NULL) {
-		return true;
-	}
-	assert(*r_elm_a != NULL);
-
-	*r_elm_b = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)edata_last_get(edata), dependent, init_missing);
-	if (!dependent && *r_elm_b == NULL) {
-		return true;
-	}
-	assert(*r_elm_b != NULL);
-
-	return false;
-}
-
-static void
-extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
-    rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
-	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, edata, szind, slab);
-	if (elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, edata, szind,
-		    slab);
-	}
-}
-
-static void
-extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, edata_t *edata,
-    szind_t szind) {
-	assert(edata_slab_get(edata));
-
-	/* Register interior. */
-	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_write(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
-		    LG_PAGE), edata, szind, true);
-	}
-}
-
-static void
-extent_gdump_add(tsdn_t *tsdn, const edata_t *edata) {
-	cassert(config_prof);
-	/* prof_gdump() requirement. */
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	if (opt_prof && edata_state_get(edata) == extent_state_active) {
-		size_t nadd = edata_size_get(edata) >> LG_PAGE;
-		size_t cur = atomic_fetch_add_zu(&curpages, nadd,
-		    ATOMIC_RELAXED) + nadd;
-		size_t high = atomic_load_zu(&highpages, ATOMIC_RELAXED);
-		while (cur > high && !atomic_compare_exchange_weak_zu(
-		    &highpages, &high, cur, ATOMIC_RELAXED, ATOMIC_RELAXED)) {
-			/*
-			 * Don't refresh cur, because it may have decreased
-			 * since this thread lost the highpages update race.
-			 * Note that high is updated in case of CAS failure.
-			 */
-		}
-		if (cur > high && prof_gdump_get_unlocked()) {
-			prof_gdump(tsdn);
-		}
-	}
-}
-
-static void
-extent_gdump_sub(tsdn_t *tsdn, const edata_t *edata) {
-	cassert(config_prof);
-
-	if (opt_prof && edata_state_get(edata) == extent_state_active) {
-		size_t nsub = edata_size_get(edata) >> LG_PAGE;
-		assert(atomic_load_zu(&curpages, ATOMIC_RELAXED) >= nsub);
-		atomic_fetch_sub_zu(&curpages, nsub, ATOMIC_RELAXED);
-	}
-}
-
-static bool
-extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *elm_a, *elm_b;
-
-	/*
-	 * We need to hold the lock to protect against a concurrent coalesce
-	 * operation that sees us in a partial state.
-	 */
-	extent_lock_edata(tsdn, edata);
-
-	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, edata, false, true,
-	    &elm_a, &elm_b)) {
-		extent_unlock_edata(tsdn, edata);
-		return true;
-	}
-
-	szind_t szind = edata_szind_get_maybe_invalid(edata);
-	bool slab = edata_slab_get(edata);
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, edata, szind, slab);
-	if (slab) {
-		extent_interior_register(tsdn, rtree_ctx, edata, szind);
-	}
-
-	extent_unlock_edata(tsdn, edata);
-
-	if (config_prof && gdump_add) {
-		extent_gdump_add(tsdn, edata);
-	}
-
-	return false;
-}
-
-static bool
-extent_register(tsdn_t *tsdn, edata_t *edata) {
-	return extent_register_impl(tsdn, edata, true);
-}
-
-static bool
-extent_register_no_gdump_add(tsdn_t *tsdn, edata_t *edata) {
-	return extent_register_impl(tsdn, edata, false);
-}
-
-static void
-extent_reregister(tsdn_t *tsdn, edata_t *edata) {
-	bool err = extent_register(tsdn, edata);
-	assert(!err);
-}
-
-/*
- * Removes all pointers to the given extent from the global rtree indices for
- * its interior.  This is relevant for slab extents, for which we need to do
- * metadata lookups at places other than the head of the extent.  We deregister
- * on the interior, then, when an extent moves from being an active slab to an
- * inactive state.
- */
-static void
-extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    edata_t *edata) {
-	size_t i;
-
-	assert(edata_slab_get(edata));
-
-	for (i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_clear(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
-		    LG_PAGE));
-	}
-}
-
-/*
- * Removes all pointers to the given extent from the global rtree.
- */
-static void
-extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *elm_a, *elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, edata, true, false,
-	    &elm_a, &elm_b);
-
-	extent_lock_edata(tsdn, edata);
-
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false);
-	if (edata_slab_get(edata)) {
-		extent_interior_deregister(tsdn, rtree_ctx, edata);
-		edata_slab_set(edata, false);
-	}
-
-	extent_unlock_edata(tsdn, edata);
-
-	if (config_prof && gdump) {
-		extent_gdump_sub(tsdn, edata);
-	}
-}
-
-static void
-extent_deregister(tsdn_t *tsdn, edata_t *edata) {
-	extent_deregister_impl(tsdn, edata, true);
-}
-
-static void
-extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
-	extent_deregister_impl(tsdn, edata, false);
-}
-
-/*
- * Tries to find and remove an extent from ecache that can be used for the
- * given allocation request.
- */
-static edata_t *
-extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, void *new_addr, size_t size,
-    size_t pad, size_t alignment, bool slab, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(alignment > 0);
-	if (config_debug && new_addr != NULL) {
-		/*
-		 * Non-NULL new_addr has two use cases:
-		 *
-		 *   1) Recycle a known-extant extent, e.g. during purging.
-		 *   2) Perform in-place expanding reallocation.
-		 *
-		 * Regardless of use case, new_addr must either refer to a
-		 * non-existing extent, or to the base of an extant extent,
-		 * since only active slabs support interior lookups (which of
-		 * course cannot be recycled).
-		 */
-		assert(PAGE_ADDR2BASE(new_addr) == new_addr);
-		assert(pad == 0);
-		assert(alignment <= PAGE);
-	}
-
-	size_t esize = size + pad;
-	malloc_mutex_lock(tsdn, &ecache->mtx);
-	edata_t *edata;
-	if (new_addr != NULL) {
-		edata = extent_lock_edata_from_addr(tsdn, rtree_ctx, new_addr,
-		    false);
-		if (edata != NULL) {
-			/*
-			 * We might null-out edata to report an error, but we
-			 * still need to unlock the associated mutex after.
-			 */
-			edata_t *unlock_edata = edata;
-			assert(edata_base_get(edata) == new_addr);
-			if (edata_arena_ind_get(edata) != arena_ind_get(arena)
-			    || edata_size_get(edata) < esize
-			    || edata_state_get(edata)
-			    != ecache->state) {
-				edata = NULL;
-			}
-			extent_unlock_edata(tsdn, unlock_edata);
-		}
-	} else {
-		edata = eset_fit(&ecache->eset, esize, alignment,
-		    ecache->delay_coalesce);
-	}
-	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &ecache->mtx);
-		return NULL;
-	}
-
-	extent_activate_locked(tsdn, ecache, edata);
-	malloc_mutex_unlock(tsdn, &ecache->mtx);
-
-	return edata;
-}
-
-/*
- * Given an allocation request and an extent guaranteed to be able to satisfy
- * it, this splits off lead and trail extents, leaving edata pointing to an
- * extent satisfying the allocation.
- * This function doesn't put lead or trail into any ecache; it's the caller's
- * job to ensure that they can be reused.
- */
-typedef enum {
-	/*
-	 * Split successfully.  lead, edata, and trail, are modified to extents
-	 * describing the ranges before, in, and after the given allocation.
-	 */
-	extent_split_interior_ok,
-	/*
-	 * The extent can't satisfy the given allocation request.  None of the
-	 * input edata_t *s are touched.
-	 */
-	extent_split_interior_cant_alloc,
-	/*
-	 * In a potentially invalid state.  Must leak (if *to_leak is non-NULL),
-	 * and salvage what's still salvageable (if *to_salvage is non-NULL).
-	 * None of lead, edata, or trail are valid.
-	 */
-	extent_split_interior_error
-} extent_split_interior_result_t;
-
-static extent_split_interior_result_t
-extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx,
-    /* The result of splitting, in case of success. */
-    edata_t **edata, edata_t **lead, edata_t **trail,
-    /* The mess to clean up, in case of error. */
-    edata_t **to_leak, edata_t **to_salvage,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool growing_retained) {
-	size_t esize = size + pad;
-	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
-	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
-	assert(new_addr == NULL || leadsize == 0);
-	if (edata_size_get(*edata) < leadsize + esize) {
-		return extent_split_interior_cant_alloc;
-	}
-	size_t trailsize = edata_size_get(*edata) - leadsize - esize;
-
-	*lead = NULL;
-	*trail = NULL;
-	*to_leak = NULL;
-	*to_salvage = NULL;
-
-	/* Split the lead. */
-	if (leadsize != 0) {
-		*lead = *edata;
-		*edata = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
-		    *lead, leadsize, SC_NSIZES, false, esize + trailsize, szind,
-		    slab, growing_retained);
-		if (*edata == NULL) {
-			*to_leak = *lead;
-			*lead = NULL;
-			return extent_split_interior_error;
-		}
-	}
-
-	/* Split the trail. */
-	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
-		    *edata, esize, szind, slab, trailsize, SC_NSIZES, false,
-		    growing_retained);
-		if (*trail == NULL) {
-			*to_leak = *edata;
-			*to_salvage = *lead;
-			*lead = NULL;
-			*edata = NULL;
-			return extent_split_interior_error;
-		}
-	}
-
-	if (leadsize == 0 && trailsize == 0) {
-		/*
-		 * Splitting causes szind to be set as a side effect, but no
-		 * splitting occurred.
-		 */
-		edata_szind_set(*edata, szind);
-		if (szind != SC_NSIZES) {
-			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
-			    (uintptr_t)edata_addr_get(*edata), szind, slab);
-			if (slab && edata_size_get(*edata) > PAGE) {
-				rtree_szind_slab_update(tsdn, &extents_rtree,
-				    rtree_ctx,
-				    (uintptr_t)edata_past_get(*edata) -
-				    (uintptr_t)PAGE, szind, slab);
-			}
-		}
-	}
-
-	return extent_split_interior_ok;
-}
-
-/*
- * This fulfills the indicated allocation request out of the given extent (which
- * the caller should have ensured was big enough).  If there's any unused space
- * before or after the resulting allocation, that space is given its own extent
- * and put back into ecache.
- */
-static edata_t *
-extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, void *new_addr, size_t size,
-    size_t pad, size_t alignment, bool slab, szind_t szind, edata_t *edata,
-    bool growing_retained) {
-	edata_t *lead;
-	edata_t *trail;
-	edata_t *to_leak;
-	edata_t *to_salvage;
-
-	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, ehooks, rtree_ctx, &edata, &lead, &trail, &to_leak,
-	    &to_salvage, new_addr, size, pad, alignment, slab, szind,
-	    growing_retained);
-
-	if (!maps_coalesce && result != extent_split_interior_ok
-	    && !opt_retain) {
-		/*
-		 * Split isn't supported (implies Windows w/o retain).  Avoid
-		 * leaking the extent.
-		 */
-		assert(to_leak != NULL && lead == NULL && trail == NULL);
-		extent_deactivate(tsdn, ecache, to_leak);
-		return NULL;
-	}
-
-	if (result == extent_split_interior_ok) {
-		if (lead != NULL) {
-			extent_deactivate(tsdn, ecache, lead);
-		}
-		if (trail != NULL) {
-			extent_deactivate(tsdn, ecache, trail);
-		}
-		return edata;
-	} else {
-		/*
-		 * We should have picked an extent that was large enough to
-		 * fulfill our allocation request.
-		 */
-		assert(result == extent_split_interior_error);
-		if (to_salvage != NULL) {
-			extent_deregister(tsdn, to_salvage);
-		}
-		if (to_leak != NULL) {
-			void *leak = edata_base_get(to_leak);
-			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, ehooks, ecache, to_leak,
-			    growing_retained);
-			assert(extent_lock_edata_from_addr(tsdn, rtree_ctx, leak,
-			    false) == NULL);
-		}
-		return NULL;
-	}
-	unreachable();
-}
-
-/*
- * Tries to satisfy the given allocation request by reusing one of the extents
- * in the given ecache_t.
- */
-static edata_t *
-extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(new_addr == NULL || !slab);
-	assert(pad == 0 || !slab);
-	assert(!*zero || !slab);
-
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks,
-	    rtree_ctx, ecache, new_addr, size, pad, alignment, slab,
-	    growing_retained);
-	if (edata == NULL) {
-		return NULL;
-	}
-
-	edata = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, ecache,
-	    new_addr, size, pad, alignment, slab, szind, edata,
-	    growing_retained);
-	if (edata == NULL) {
-		return NULL;
-	}
-
-	if (*commit && !edata_committed_get(edata)) {
-		if (extent_commit_impl(tsdn, ehooks, edata, 0,
-		    edata_size_get(edata), growing_retained)) {
-			extent_record(tsdn, arena, ehooks, ecache, edata,
-			    growing_retained);
-			return NULL;
-		}
-	}
-
-	if (edata_committed_get(edata)) {
-		*commit = true;
-	}
-	if (edata_zeroed_get(edata)) {
-		*zero = true;
-	}
-
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, edata, alignment);
-	}
-	assert(edata_state_get(edata) == extent_state_active);
-	if (slab) {
-		edata_slab_set(edata, slab);
-		extent_interior_register(tsdn, rtree_ctx, edata, szind);
-	}
-
-	if (*zero) {
-		void *addr = edata_base_get(edata);
-		if (!edata_zeroed_get(edata)) {
-			size_t size = edata_size_get(edata);
-			ehooks_zero(tsdn, ehooks, addr, size);
-		}
-	}
-	return edata;
-}
-
-/*
- * If virtual memory is retained, create increasingly larger extents from which
- * to split requested extents in order to limit the total number of disjoint
- * virtual memory ranges retained by each arena.
- */
-static edata_t *
-extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &arena->ecache_grow.mtx);
-	assert(pad == 0 || !slab);
-	assert(!*zero || !slab);
-
-	size_t esize = size + pad;
-	size_t alloc_size_min = esize + PAGE_CEILING(alignment) - PAGE;
-	/* Beware size_t wrap-around. */
-	if (alloc_size_min < esize) {
-		goto label_err;
-	}
-	/*
-	 * Find the next extent size in the series that would be large enough to
-	 * satisfy this request.
-	 */
-	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
-	while (alloc_size < alloc_size_min) {
-		egn_skip++;
-		if (arena->ecache_grow.next + egn_skip >=
-		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
-			/* Outside legal range. */
-			goto label_err;
-		}
-		alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
-	}
-
-	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
-	if (edata == NULL) {
-		goto label_err;
-	}
-	bool zeroed = false;
-	bool committed = false;
-
-	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
-	    &committed);
-
-	edata_init(edata, arena_ind_get(arena), ptr, alloc_size, false,
-	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
-	    committed, true, EXTENT_IS_HEAD);
-	if (ptr == NULL) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
-		goto label_err;
-	}
-
-	if (extent_register_no_gdump_add(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
-		goto label_err;
-	}
-
-	if (edata_zeroed_get(edata) && edata_committed_get(edata)) {
-		*zero = true;
-	}
-	if (edata_committed_get(edata)) {
-		*commit = true;
-	}
-
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	edata_t *lead;
-	edata_t *trail;
-	edata_t *to_leak;
-	edata_t *to_salvage;
-	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    arena, ehooks, rtree_ctx, &edata, &lead, &trail, &to_leak,
-	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
-
-	if (result == extent_split_interior_ok) {
-		if (lead != NULL) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->ecache_retained, lead, true);
-		}
-		if (trail != NULL) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->ecache_retained, trail, true);
-		}
-	} else {
-		/*
-		 * We should have allocated a sufficiently large extent; the
-		 * cant_alloc case should not occur.
-		 */
-		assert(result == extent_split_interior_error);
-		if (to_salvage != NULL) {
-			if (config_prof) {
-				extent_gdump_add(tsdn, to_salvage);
-			}
-			extent_record(tsdn, arena, ehooks,
-			    &arena->ecache_retained, to_salvage, true);
-		}
-		if (to_leak != NULL) {
-			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, ehooks,
-			    &arena->ecache_retained, to_leak, true);
-		}
-		goto label_err;
-	}
-
-	if (*commit && !edata_committed_get(edata)) {
-		if (extent_commit_impl(tsdn, ehooks, edata, 0,
-		    edata_size_get(edata), true)) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->ecache_retained, edata, true);
-			goto label_err;
-		}
-		/* A successful commit should return zeroed memory. */
-		if (config_debug) {
-			void *addr = edata_addr_get(edata);
-			size_t *p = (size_t *)(uintptr_t)addr;
-			/* Check the first page only. */
-			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
-				assert(p[i] == 0);
-			}
-		}
-	}
-
-	/*
-	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
-	 * range.
-	 */
-	if (arena->ecache_grow.next + egn_skip + 1 <=
-	    arena->ecache_grow.limit) {
-		arena->ecache_grow.next += egn_skip + 1;
-	} else {
-		arena->ecache_grow.next = arena->ecache_grow.limit;
-	}
-	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
-
-	if (config_prof) {
-		/* Adjust gdump stats now that extent is final size. */
-		extent_gdump_add(tsdn, edata);
-	}
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, edata, alignment);
-	}
-	if (slab) {
-		rtree_ctx_t rtree_ctx_fallback;
-		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
-		    &rtree_ctx_fallback);
-
-		edata_slab_set(edata, true);
-		extent_interior_register(tsdn, rtree_ctx, edata, szind);
-	}
-	if (*zero && !edata_zeroed_get(edata)) {
-		void *addr = edata_base_get(edata);
-		size_t size = edata_size_get(edata);
-		ehooks_zero(tsdn, ehooks, addr, size);
-	}
-
-	return edata;
-label_err:
-	malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
-	return NULL;
-}
-
-static edata_t *
-extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
-	assert(size != 0);
-	assert(alignment != 0);
-
-	malloc_mutex_lock(tsdn, &arena->ecache_grow.mtx);
-
-	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
-	    &arena->ecache_retained, new_addr, size, pad, alignment, slab,
-	    szind, zero, commit, true);
-	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
-		if (config_prof) {
-			extent_gdump_add(tsdn, edata);
-		}
-	} else if (opt_retain && new_addr == NULL) {
-		edata = extent_grow_retained(tsdn, arena, ehooks, size, pad,
-		    alignment, slab, szind, zero, commit);
-		/* extent_grow_retained() always releases extent_grow_mtx. */
-	} else {
-		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
-	}
-	malloc_mutex_assert_not_owner(tsdn, &arena->ecache_grow.mtx);
-
-	return edata;
-}
-
-edata_t *
-extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	size_t esize = size + pad;
-	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
-	if (edata == NULL) {
-		return NULL;
-	}
-	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
-	    zero, commit);
-	if (addr == NULL) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
-		return NULL;
-	}
-	edata_init(edata, arena_ind_get(arena), addr, esize, slab, szind,
-	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
-	    true, EXTENT_NOT_HEAD);
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, edata, alignment);
-	}
-	if (extent_register(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
-		return NULL;
-	}
-
-	return edata;
-}
-
-static bool
-extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
-    const edata_t *outer) {
-	assert(edata_arena_ind_get(inner) == ecache_ind_get(ecache));
-
-	if (edata_arena_ind_get(inner) != edata_arena_ind_get(outer)) {
-		return false;
-	}
-
-	assert(edata_state_get(inner) == extent_state_active);
-	if (edata_state_get(outer) != ecache->state) {
-		return false;
-	}
-
-	if (edata_committed_get(inner) != edata_committed_get(outer)) {
-		return false;
-	}
-
-	return true;
-}
-
-static bool
-extent_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
-    bool growing_retained) {
-	assert(extent_can_coalesce(ecache, inner, outer));
-
-	extent_activate_locked(tsdn, ecache, outer);
-
-	malloc_mutex_unlock(tsdn, &ecache->mtx);
-	bool err = extent_merge_impl(tsdn, ehooks, edata_cache,
-	    forward ? inner : outer, forward ? outer : inner, growing_retained);
-	malloc_mutex_lock(tsdn, &ecache->mtx);
-
-	if (err) {
-		extent_deactivate_locked(tsdn, ecache, outer);
-	}
-
-	return err;
-}
-
-static edata_t *
-extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
-    bool *coalesced, bool growing_retained, bool inactive_only) {
-	/*
-	 * We avoid checking / locking inactive neighbors for large size
-	 * classes, since they are eagerly coalesced on deallocation which can
-	 * cause lock contention.
-	 */
-	/*
-	 * Continue attempting to coalesce until failure, to protect against
-	 * races with other threads that are thwarted by this one.
-	 */
-	bool again;
-	do {
-		again = false;
-
-		/* Try to coalesce forward. */
-		edata_t *next = extent_lock_edata_from_addr(tsdn, rtree_ctx,
-		    edata_past_get(edata), inactive_only);
-		if (next != NULL) {
-			/*
-			 * ecache->mtx only protects against races for
-			 * like-state extents, so call extent_can_coalesce()
-			 * before releasing next's pool lock.
-			 */
-			bool can_coalesce = extent_can_coalesce(ecache,
-			    edata, next);
-
-			extent_unlock_edata(tsdn, next);
-
-			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
-			    ehooks, ecache, edata, next, true,
-			    growing_retained)) {
-				if (ecache->delay_coalesce) {
-					/* Do minimal coalescing. */
-					*coalesced = true;
-					return edata;
-				}
-				again = true;
-			}
-		}
-
-		/* Try to coalesce backward. */
-		edata_t *prev = extent_lock_edata_from_addr(tsdn, rtree_ctx,
-		    edata_before_get(edata), inactive_only);
-		if (prev != NULL) {
-			bool can_coalesce = extent_can_coalesce(ecache, edata,
-			    prev);
-			extent_unlock_edata(tsdn, prev);
-
-			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
-			    ehooks, ecache, edata, prev, false,
-			    growing_retained)) {
-				edata = prev;
-				if (ecache->delay_coalesce) {
-					/* Do minimal coalescing. */
-					*coalesced = true;
-					return edata;
-				}
-				again = true;
-			}
-		}
-	} while (again);
-
-	if (ecache->delay_coalesce) {
-		*coalesced = false;
-	}
-	return edata;
-}
-
-static edata_t *
-extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, rtree_ctx,
-	    ecache, edata, coalesced, growing_retained, false);
-}
-
-static edata_t *
-extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
-    bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, rtree_ctx,
-	    ecache, edata, coalesced, growing_retained, true);
-}
-
-/*
- * Does the metadata management portions of putting an unused extent into the
- * given ecache_t (coalesces, deregisters slab interiors, the heap operations).
- */
-static void
-extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata, bool growing_retained) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	assert((ecache->state != extent_state_dirty &&
-	    ecache->state != extent_state_muzzy) ||
-	    !edata_zeroed_get(edata));
-
-	malloc_mutex_lock(tsdn, &ecache->mtx);
-
-	edata_szind_set(edata, SC_NSIZES);
-	if (edata_slab_get(edata)) {
-		extent_interior_deregister(tsdn, rtree_ctx, edata);
-		edata_slab_set(edata, false);
-	}
-
-	assert(rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata), true) == edata);
-
-	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, &arena->edata_cache, ehooks,
-		    rtree_ctx, ecache, edata, NULL, growing_retained);
-	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
-		assert(ecache == &arena->ecache_dirty);
-		/* Always coalesce large extents eagerly. */
-		bool coalesced;
-		do {
-			assert(edata_state_get(edata) == extent_state_active);
-			edata = extent_try_coalesce_large(tsdn,
-			    &arena->edata_cache, ehooks, rtree_ctx, ecache,
-			    edata, &coalesced, growing_retained);
-		} while (coalesced);
-		if (edata_size_get(edata) >= oversize_threshold) {
-			/* Shortcut to purge the oversize extent eagerly. */
-			malloc_mutex_unlock(tsdn, &ecache->mtx);
-			arena_decay_extent(tsdn, arena, ehooks, edata);
-			return;
-		}
-	}
-	extent_deactivate_locked(tsdn, ecache, edata);
-
-	malloc_mutex_unlock(tsdn, &ecache->mtx);
-}
-
-void
-extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	if (extent_register(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
-		return;
-	}
-	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
-}
-
-static bool
-extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata) {
-	bool err;
-
-	assert(edata_base_get(edata) != NULL);
-	assert(edata_size_get(edata) != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	edata_addr_set(edata, edata_base_get(edata));
-
-	/* Try to deallocate. */
-	err = ehooks_dalloc(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), edata_committed_get(edata));
-
-	if (!err) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
-	}
-
-	return err;
-}
-
-void
-extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata) {
-	assert(edata_dumpable_get(edata));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	/* Avoid calling the default extent_dalloc unless have to. */
-	if (!ehooks_dalloc_will_fail(ehooks)) {
-		/*
-		 * Deregister first to avoid a race with other allocating
-		 * threads, and reregister if deallocation fails.
-		 */
-		extent_deregister(tsdn, edata);
-		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, edata)) {
-			return;
-		}
-		extent_reregister(tsdn, edata);
-	}
-
-	/* Try to decommit; purge if that fails. */
-	bool zeroed;
-	if (!edata_committed_get(edata)) {
-		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, ehooks, edata, 0,
-	    edata_size_get(edata))) {
-		zeroed = true;
-	} else if (!ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), 0, edata_size_get(edata))) {
-		zeroed = true;
-	} else if (edata_state_get(edata) == extent_state_muzzy ||
-	    !ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), 0, edata_size_get(edata))) {
-		zeroed = false;
-	} else {
-		zeroed = false;
-	}
-	edata_zeroed_set(edata, zeroed);
-
-	if (config_prof) {
-		extent_gdump_sub(tsdn, edata);
-	}
-
-	extent_record(tsdn, arena, ehooks, &arena->ecache_retained, edata,
-	    false);
-}
-
-void
-extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata) {
-	assert(edata_base_get(edata) != NULL);
-	assert(edata_size_get(edata) != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	/* Deregister first to avoid a race with other allocating threads. */
-	extent_deregister(tsdn, edata);
-
-	edata_addr_set(edata, edata_base_get(edata));
-
-	/* Try to destroy; silently fail otherwise. */
-	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), edata_committed_get(edata));
-
-	edata_cache_put(tsdn, &arena->edata_cache, edata);
-}
-
-static bool
-extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_commit(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), offset, length);
-	edata_committed_set(edata, edata_committed_get(edata) || !err);
-	return err;
-}
-
-bool
-extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length) {
-	return extent_commit_impl(tsdn, ehooks, edata, offset, length,
-	    false);
-}
-
-bool
-extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-	bool err = ehooks_decommit(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), offset, length);
-	edata_committed_set(edata, edata_committed_get(edata) && err);
-	return err;
-}
-
-static bool
-extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), offset, length);
-	return err;
-}
-
-bool
-extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length) {
-	return extent_purge_lazy_impl(tsdn, arena, ehooks, edata, offset,
-	    length, false);
-}
-
-static bool
-extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), offset, length);
-	return err;
-}
-
-bool
-extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length) {
-	return extent_purge_forced_impl(tsdn, arena, ehooks, edata,
-	    offset, length, false);
-}
-
-/*
- * Accepts the extent to split, and the characteristics of each side of the
- * split.  The 'a' parameters go with the 'lead' of the resulting pair of
- * extents (the lower addressed portion of the split), and the 'b' parameters go
- * with the trail (the higher addressed portion).  This makes 'extent' the lead,
- * and returns the trail (except in case of error).
- */
-static edata_t *
-extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
-    size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
-	assert(edata_size_get(edata) == size_a + size_b);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-
-	if (ehooks_split_will_fail(ehooks)) {
-		return NULL;
-	}
-
-	edata_t *trail = edata_cache_get(tsdn, edata_cache);
-	if (trail == NULL) {
-		goto label_error_a;
-	}
-
-	edata_init(trail, ehooks_ind_get(ehooks),
-	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
-	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
-	    edata_zeroed_get(edata), edata_committed_get(edata),
-	    edata_dumpable_get(edata), EXTENT_NOT_HEAD);
-
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *lead_elm_a, *lead_elm_b;
-	{
-		edata_t lead;
-
-		edata_init(&lead, ehooks_ind_get(ehooks),
-		    edata_addr_get(edata), size_a,
-		    slab_a, szind_a, edata_sn_get(edata),
-		    edata_state_get(edata), edata_zeroed_get(edata),
-		    edata_committed_get(edata), edata_dumpable_get(edata),
-		    EXTENT_NOT_HEAD);
-
-		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
-		    true, &lead_elm_a, &lead_elm_b);
-	}
-	rtree_leaf_elm_t *trail_elm_a, *trail_elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, trail, false, true,
-	    &trail_elm_a, &trail_elm_b);
-
-	if (lead_elm_a == NULL || lead_elm_b == NULL || trail_elm_a == NULL
-	    || trail_elm_b == NULL) {
-		goto label_error_b;
-	}
-
-	extent_lock_edata2(tsdn, edata, trail);
-
-	bool err = ehooks_split(tsdn, ehooks, edata_base_get(edata),
-	    size_a + size_b, size_a, size_b, edata_committed_get(edata));
-
-	if (err) {
-		goto label_error_c;
-	}
-
-	edata_size_set(edata, size_a);
-	edata_szind_set(edata, szind_a);
-
-	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, edata,
-	    szind_a, slab_a);
-	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
-	    szind_b, slab_b);
-
-	extent_unlock_edata2(tsdn, edata, trail);
-
-	return trail;
-label_error_c:
-	extent_unlock_edata2(tsdn, edata, trail);
-label_error_b:
-	edata_cache_put(tsdn, edata_cache, trail);
-label_error_a:
-	return NULL;
-}
-
-edata_t *
-extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
-    size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, edata_cache, ehooks, edata, size_a,
-	    szind_a, slab_a, size_b, szind_b, slab_b, false);
-}
-
-static bool
-extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
-    edata_t *a, edata_t *b, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(edata_base_get(a) < edata_base_get(b));
-
-	assert(edata_arena_ind_get(a) == edata_arena_ind_get(b));
-	assert(edata_arena_ind_get(a) == ehooks_ind_get(ehooks));
-
-	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
-	    edata_size_get(a), edata_is_head_get(a), edata_base_get(b),
-	    edata_size_get(b), edata_is_head_get(b), edata_committed_get(a));
-
-	if (err) {
-		return true;
-	}
-
-	/*
-	 * The rtree writes must happen while all the relevant elements are
-	 * owned, so the following code uses decomposed helper functions rather
-	 * than extent_{,de}register() to do things in the right order.
-	 */
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, a, true, false, &a_elm_a,
-	    &a_elm_b);
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, b, true, false, &b_elm_a,
-	    &b_elm_b);
-
-	extent_lock_edata2(tsdn, a, b);
-
-	if (a_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL,
-		    SC_NSIZES, false);
-	}
-	if (b_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, NULL,
-		    SC_NSIZES, false);
-	} else {
-		b_elm_b = b_elm_a;
-	}
-
-	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
-	edata_szind_set(a, SC_NSIZES);
-	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
-	    edata_sn_get(a) : edata_sn_get(b));
-	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
-
-	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES,
-	    false);
-
-	extent_unlock_edata2(tsdn, a, b);
-
-	edata_cache_put(tsdn, edata_cache, b);
-
-	return false;
-}
-
-bool
-extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
-    edata_t *a, edata_t *b) {
-	return extent_merge_impl(tsdn, ehooks, edata_cache, a, b, false);
-}
-
-bool
-extent_boot(void) {
-	if (rtree_new(&extents_rtree, true)) {
-		return true;
-	}
-
-	if (mutex_pool_init(&extent_mutex_pool, "extent_mutex_pool",
-	    WITNESS_RANK_EXTENT_POOL)) {
-		return true;
-	}
-
-	if (have_dss) {
-		extent_dss_boot();
-	}
-
-	return false;
-}
-- 
cgit v0.12


From f2f2084e79c3546b38fb635401588afdd0560392 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 19 Dec 2019 17:15:57 -0800
Subject: Ehooks: Assert alloc isn't NULL

---
 src/ehooks.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/ehooks.c b/src/ehooks.c
index 2fb2c4c..78c2834 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -6,6 +6,8 @@
 
 void
 ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks, unsigned ind) {
+	/* All other hooks are optional; this one is not. */
+	assert(extent_hooks->alloc != NULL);
 	ehooks->ind = ind;
 	ehooks_set_extent_hooks_ptr(ehooks, extent_hooks);
 }
-- 
cgit v0.12


From 6342da0970257187f5fcc9504301eba75f92ccca Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 19 Dec 2019 17:53:52 -0800
Subject: Ehooks: Further optimize default merge case.

This avoids the cost of an iealloc in cases where the user uses the default
merge hook without using the default extent hooks.
---
 include/jemalloc/internal/ehooks.h | 19 ++++++++++++++++++-
 src/ehooks.c                       |  2 +-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 4d183e0..1bd44cb 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -54,6 +54,13 @@ bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
 bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
 #endif
 bool ehooks_default_split_impl();
+/*
+ * Merge is the only default extent hook we declare -- see the comment in
+ * ehooks_merge.
+ */
+bool ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a,
+    size_t size_a, void *addr_b, size_t size_b, bool committed,
+    unsigned arena_ind);
 bool ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a,
     void *addr_b, bool head_b);
 void ehooks_default_zero_impl(void *addr, size_t size);
@@ -333,7 +340,17 @@ static inline bool
 ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
     bool head_a, void *addr_b, size_t size_b, bool head_b, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &ehooks_default_extent_hooks) {
+	/*
+	 * The definition of extent_hooks merge function doesn't know about
+	 * extent head state, but the implementation does.  As a result, it
+	 * needs to call iealloc again and walk the rtree.  Since the cost of an
+	 * iealloc is large relative to the cost of the default merge hook
+	 * (which on posix-likes is just "return false"), we go even further
+	 * when we short-circuit; we don't just check if the extent hooks
+	 * generally are default, we check if the merge hook specifically is.
+	 */
+	if (extent_hooks == &ehooks_default_extent_hooks
+	    || extent_hooks->merge == &ehooks_default_merge) {
 		return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b,
 		    head_b);
 	} else if (extent_hooks->merge == NULL) {
diff --git a/src/ehooks.c b/src/ehooks.c
index 78c2834..667bee8 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -242,7 +242,7 @@ ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
 	return false;
 }
 
-static bool
+bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	tsdn_t *tsdn = tsdn_fetch();
-- 
cgit v0.12


From ea42174d07c2cf496e407bfae74be866ee090b2f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 18 Dec 2019 15:15:31 -0800
Subject: Refactor profiling headers

---
 include/jemalloc/internal/prof_data_externs.h | 22 ++++++++++++++++++
 include/jemalloc/internal/prof_externs.h      | 32 ++++-----------------------
 include/jemalloc/internal/prof_log_externs.h  | 17 ++++++++++++++
 src/prof.c                                    | 32 ++-------------------------
 src/prof_data.c                               | 31 ++++++++++++++++++++++++++
 src/prof_log.c                                |  4 +++-
 test/unit/prof_log.c                          |  1 +
 7 files changed, 80 insertions(+), 59 deletions(-)
 create mode 100644 include/jemalloc/internal/prof_data_externs.h
 create mode 100644 include/jemalloc/internal/prof_log_externs.h

diff --git a/include/jemalloc/internal/prof_data_externs.h b/include/jemalloc/internal/prof_data_externs.h
new file mode 100644
index 0000000..95dc6b0
--- /dev/null
+++ b/include/jemalloc/internal/prof_data_externs.h
@@ -0,0 +1,22 @@
+#ifndef JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H
+#define JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H
+
+#include "jemalloc/internal/mutex.h"
+
+extern malloc_mutex_t *gctx_locks;
+extern malloc_mutex_t *tdata_locks;
+
+void prof_bt_hash(const void *key, size_t r_hash[2]);
+bool prof_bt_keycomp(const void *k1, const void *k2);
+
+bool prof_data_init(tsd_t *tsd);
+bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck);
+prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
+    uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
+void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
+void bt_init(prof_bt_t *bt, void **vec);
+void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
+void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+
+#endif /* JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index bd73a29..9ba363b 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -7,9 +7,6 @@ extern malloc_mutex_t bt2gctx_mtx;
 extern malloc_mutex_t tdatas_mtx;
 extern malloc_mutex_t prof_dump_mtx;
 
-malloc_mutex_t *prof_gctx_mutex_choose(void);
-malloc_mutex_t *prof_tdata_mutex_choose(uint64_t thr_uid);
-
 extern bool opt_prof;
 extern bool opt_prof_active;
 extern bool opt_prof_thread_active_init;
@@ -48,12 +45,14 @@ extern bool prof_booted;
 bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
 void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
 
+/* Functions only accessed in prof_inlines_b.h */
+prof_tdata_t *prof_tdata_init(tsd_t *tsd);
+prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
+
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
 void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
-void bt_init(prof_bt_t *bt, void **vec);
-void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
 #ifdef JEMALLOC_JET
 size_t prof_tdata_count(void);
@@ -76,10 +75,6 @@ bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
 bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
 
-void prof_bt_hash(const void *key, size_t r_hash[2]);
-bool prof_bt_keycomp(const void *k1, const void *k2);
-prof_tdata_t *prof_tdata_init(tsd_t *tsd);
-prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 void prof_reset(tsd_t *tsd, size_t lg_sample);
 void prof_tdata_cleanup(tsd_t *tsd);
 bool prof_active_get(tsdn_t *tsdn);
@@ -101,26 +96,7 @@ void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
 void prof_sample_threshold_update(tsd_t *tsd);
 
-void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
-bool prof_log_init(tsd_t *tsdn);
-#ifdef JEMALLOC_JET
-size_t prof_log_bt_count(void);
-size_t prof_log_alloc_count(void);
-size_t prof_log_thr_count(void);
-bool prof_log_is_logging(void);
-bool prof_log_rep_check(void);
-void prof_log_dummy_set(bool new_value);
-#endif
-
-/* Functions in prof_data.c only used in profiling code. */
-bool prof_data_init(tsd_t *tsd);
-bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck);
-prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
-    uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
-void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
-void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_log_externs.h b/include/jemalloc/internal/prof_log_externs.h
new file mode 100644
index 0000000..cde651b
--- /dev/null
+++ b/include/jemalloc/internal/prof_log_externs.h
@@ -0,0 +1,17 @@
+#ifndef JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H
+#define JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H
+
+#include "jemalloc/internal/mutex.h"
+
+void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
+bool prof_log_init(tsd_t *tsdn);
+#ifdef JEMALLOC_JET
+size_t prof_log_bt_count(void);
+size_t prof_log_alloc_count(void);
+size_t prof_log_thr_count(void);
+bool prof_log_is_logging(void);
+bool prof_log_rep_check(void);
+void prof_log_dummy_set(bool new_value);
+#endif
+
+#endif /* JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H */
diff --git a/src/prof.c b/src/prof.c
index 3a72e9c..58839bc 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -5,6 +5,8 @@
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/prof_data_externs.h"
+#include "jemalloc/internal/prof_log_externs.h"
 #include "jemalloc/internal/thread_event.h"
 
 /*
@@ -73,24 +75,6 @@ uint64_t prof_interval = 0;
 
 size_t lg_prof_sample;
 
-/*
- * Table of mutexes that are shared among gctx's.  These are leaf locks, so
- * there is no problem with using them for more than one gctx at the same time.
- * The primary motivation for this sharing though is that gctx's are ephemeral,
- * and destroying mutexes causes complications for systems that allocate when
- * creating/destroying mutexes.
- */
-static malloc_mutex_t *gctx_locks;
-static atomic_u_t cum_gctxs; /* Atomic counter. */
-
-/*
- * Table of mutexes that are shared among tdata's.  No operations require
- * holding multiple tdata locks, so there is no problem with using them for more
- * than one tdata at the same time, even though a gctx lock may be acquired
- * while holding a tdata lock.
- */
-static malloc_mutex_t *tdata_locks;
-
 /* Non static to enable profiling. */
 malloc_mutex_t bt2gctx_mtx;
 
@@ -431,18 +415,6 @@ prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 	post_reentrancy(tsd);
 }
 
-malloc_mutex_t *
-prof_gctx_mutex_choose(void) {
-	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
-
-	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
-}
-
-malloc_mutex_t *
-prof_tdata_mutex_choose(uint64_t thr_uid) {
-	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
-}
-
 /*
  * The bodies of this function and prof_leakcheck() are compiled out unless heap
  * profiling is enabled, so that it is possible to compile jemalloc with
diff --git a/src/prof_data.c b/src/prof_data.c
index 8a2cc84..5c2b926 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/prof_data_externs.h"
 
 /*
  * This file defines and manages the core profiling data structures.
@@ -26,6 +27,24 @@
 /******************************************************************************/
 
 /*
+ * Table of mutexes that are shared among gctx's.  These are leaf locks, so
+ * there is no problem with using them for more than one gctx at the same time.
+ * The primary motivation for this sharing though is that gctx's are ephemeral,
+ * and destroying mutexes causes complications for systems that allocate when
+ * creating/destroying mutexes.
+ */
+malloc_mutex_t *gctx_locks;
+static atomic_u_t cum_gctxs; /* Atomic counter. */
+
+/*
+ * Table of mutexes that are shared among tdata's.  No operations require
+ * holding multiple tdata locks, so there is no problem with using them for more
+ * than one tdata at the same time, even though a gctx lock may be acquired
+ * while holding a tdata lock.
+ */
+malloc_mutex_t *tdata_locks;
+
+/*
  * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
  * structure that knows about all backtraces currently captured.
  */
@@ -114,6 +133,18 @@ rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
 
 /******************************************************************************/
 
+static malloc_mutex_t *
+prof_gctx_mutex_choose(void) {
+	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
+
+	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
+}
+
+static malloc_mutex_t *
+prof_tdata_mutex_choose(uint64_t thr_uid) {
+	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
+}
+
 bool
 prof_data_init(tsd_t *tsd) {
 	tdata_tree_new(&tdatas);
diff --git a/src/prof_log.c b/src/prof_log.c
index 2904f0c..6ac81e0 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -4,10 +4,12 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/emitter.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/emitter.h"
+#include "jemalloc/internal/prof_data_externs.h"
+#include "jemalloc/internal/prof_log_externs.h"
 
 bool opt_prof_log = false;
 typedef enum prof_logging_state_e prof_logging_state_t;
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index 9336ebc..e816d4e 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -1,4 +1,5 @@
 #include "test/jemalloc_test.h"
+#include "jemalloc/internal/prof_log_externs.h"
 
 #define N_PARAM 100
 #define N_THREADS 10
-- 
cgit v0.12


From 112dc36dd5cf3fc24e1bd9beda61b48cb1d6e9e3 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 20 Dec 2019 10:38:05 -0800
Subject: Handle log_mtx during forking

---
 include/jemalloc/internal/prof_log_externs.h | 2 ++
 src/prof.c                                   | 3 +++
 src/prof_log.c                               | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/prof_log_externs.h b/include/jemalloc/internal/prof_log_externs.h
index cde651b..c8cc5a3 100644
--- a/include/jemalloc/internal/prof_log_externs.h
+++ b/include/jemalloc/internal/prof_log_externs.h
@@ -3,6 +3,8 @@
 
 #include "jemalloc/internal/mutex.h"
 
+extern malloc_mutex_t log_mtx;
+
 void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 bool prof_log_init(tsd_t *tsdn);
 #ifdef JEMALLOC_JET
diff --git a/src/prof.c b/src/prof.c
index 58839bc..f35bba9 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1119,6 +1119,7 @@ prof_prefork0(tsdn_t *tsdn) {
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
 		}
+		malloc_mutex_prefork(tsdn, &log_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
 		}
@@ -1150,6 +1151,7 @@ prof_postfork_parent(tsdn_t *tsdn) {
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
 		}
+		malloc_mutex_postfork_parent(tsdn, &log_mtx);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
 		}
@@ -1172,6 +1174,7 @@ prof_postfork_child(tsdn_t *tsdn) {
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
 		}
+		malloc_mutex_postfork_child(tsdn, &log_mtx);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
 		}
diff --git a/src/prof_log.c b/src/prof_log.c
index 6ac81e0..9411b98 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -104,7 +104,7 @@ static prof_alloc_node_t *log_alloc_first = NULL;
 static prof_alloc_node_t *log_alloc_last = NULL;
 
 /* Protects the prof_logging_state and any log_{...} variable. */
-static malloc_mutex_t log_mtx;
+malloc_mutex_t log_mtx;
 
 /******************************************************************************/
 /*
-- 
cgit v0.12


From 3fa142cf394d39f36d4bf7564251071f13527e4f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Sun, 22 Dec 2019 20:02:28 -0800
Subject: Remove _externs from prof internal header names

---
 include/jemalloc/internal/prof_data.h         | 22 ++++++++++++++++++++++
 include/jemalloc/internal/prof_data_externs.h | 22 ----------------------
 include/jemalloc/internal/prof_log.h          | 19 +++++++++++++++++++
 include/jemalloc/internal/prof_log_externs.h  | 19 -------------------
 src/prof.c                                    |  4 ++--
 src/prof_data.c                               |  2 +-
 src/prof_log.c                                |  4 ++--
 test/unit/prof_log.c                          |  2 +-
 8 files changed, 47 insertions(+), 47 deletions(-)
 create mode 100644 include/jemalloc/internal/prof_data.h
 delete mode 100644 include/jemalloc/internal/prof_data_externs.h
 create mode 100644 include/jemalloc/internal/prof_log.h
 delete mode 100644 include/jemalloc/internal/prof_log_externs.h

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
new file mode 100644
index 0000000..95dc6b0
--- /dev/null
+++ b/include/jemalloc/internal/prof_data.h
@@ -0,0 +1,22 @@
+#ifndef JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H
+#define JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H
+
+#include "jemalloc/internal/mutex.h"
+
+extern malloc_mutex_t *gctx_locks;
+extern malloc_mutex_t *tdata_locks;
+
+void prof_bt_hash(const void *key, size_t r_hash[2]);
+bool prof_bt_keycomp(const void *k1, const void *k2);
+
+bool prof_data_init(tsd_t *tsd);
+bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck);
+prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
+    uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
+void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
+void bt_init(prof_bt_t *bt, void **vec);
+void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
+void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+
+#endif /* JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_data_externs.h b/include/jemalloc/internal/prof_data_externs.h
deleted file mode 100644
index 95dc6b0..0000000
--- a/include/jemalloc/internal/prof_data_externs.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H
-#define JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H
-
-#include "jemalloc/internal/mutex.h"
-
-extern malloc_mutex_t *gctx_locks;
-extern malloc_mutex_t *tdata_locks;
-
-void prof_bt_hash(const void *key, size_t r_hash[2]);
-bool prof_bt_keycomp(const void *k1, const void *k2);
-
-bool prof_data_init(tsd_t *tsd);
-bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck);
-prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
-    uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
-void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
-void bt_init(prof_bt_t *bt, void **vec);
-void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
-void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
-
-#endif /* JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_log.h b/include/jemalloc/internal/prof_log.h
new file mode 100644
index 0000000..c8cc5a3
--- /dev/null
+++ b/include/jemalloc/internal/prof_log.h
@@ -0,0 +1,19 @@
+#ifndef JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H
+#define JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H
+
+#include "jemalloc/internal/mutex.h"
+
+extern malloc_mutex_t log_mtx;
+
+void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
+bool prof_log_init(tsd_t *tsdn);
+#ifdef JEMALLOC_JET
+size_t prof_log_bt_count(void);
+size_t prof_log_alloc_count(void);
+size_t prof_log_thr_count(void);
+bool prof_log_is_logging(void);
+bool prof_log_rep_check(void);
+void prof_log_dummy_set(bool new_value);
+#endif
+
+#endif /* JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_log_externs.h b/include/jemalloc/internal/prof_log_externs.h
deleted file mode 100644
index c8cc5a3..0000000
--- a/include/jemalloc/internal/prof_log_externs.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H
-#define JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H
-
-#include "jemalloc/internal/mutex.h"
-
-extern malloc_mutex_t log_mtx;
-
-void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
-bool prof_log_init(tsd_t *tsdn);
-#ifdef JEMALLOC_JET
-size_t prof_log_bt_count(void);
-size_t prof_log_alloc_count(void);
-size_t prof_log_thr_count(void);
-bool prof_log_is_logging(void);
-bool prof_log_rep_check(void);
-void prof_log_dummy_set(bool new_value);
-#endif
-
-#endif /* JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H */
diff --git a/src/prof.c b/src/prof.c
index f35bba9..33b6819 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -5,8 +5,8 @@
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/prof_data_externs.h"
-#include "jemalloc/internal/prof_log_externs.h"
+#include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_log.h"
 #include "jemalloc/internal/thread_event.h"
 
 /*
diff --git a/src/prof_data.c b/src/prof_data.c
index 5c2b926..690070e 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -6,7 +6,7 @@
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
-#include "jemalloc/internal/prof_data_externs.h"
+#include "jemalloc/internal/prof_data.h"
 
 /*
  * This file defines and manages the core profiling data structures.
diff --git a/src/prof_log.c b/src/prof_log.c
index 9411b98..11de436 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -8,8 +8,8 @@
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/prof_data_externs.h"
-#include "jemalloc/internal/prof_log_externs.h"
+#include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_log.h"
 
 bool opt_prof_log = false;
 typedef enum prof_logging_state_e prof_logging_state_t;
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index e816d4e..4b14fd5 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -1,5 +1,5 @@
 #include "test/jemalloc_test.h"
-#include "jemalloc/internal/prof_log_externs.h"
+#include "jemalloc/internal/prof_log.h"
 
 #define N_PARAM 100
 #define N_THREADS 10
-- 
cgit v0.12


From e98ddf7987b8e9556c269ca0829f438151b124b7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 24 Dec 2019 11:30:23 -0800
Subject: Fix unlikely condition in arena_prof_info_get()

---
 include/jemalloc/internal/arena_inlines_b.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 6dacab3..28f2e97 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -49,7 +49,7 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 	if (alloc_ctx == NULL) {
 		edata = iealloc(tsd_tsdn(tsd), ptr);
 		is_slab = edata_slab_get(edata);
-	} else if (!unlikely(is_slab = alloc_ctx->slab)) {
+	} else if (unlikely(!(is_slab = alloc_ctx->slab))) {
 		edata = iealloc(tsd_tsdn(tsd), ptr);
 	}
 
-- 
cgit v0.12


From 7a27a05940d8eb0afc6ddbe32b420ce9e1452b91 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 26 Dec 2019 15:28:04 -0800
Subject: Delete tdata states used for cleanup

---
 include/jemalloc/internal/prof_inlines_b.h | 2 +-
 include/jemalloc/internal/prof_types.h     | 8 --------
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 193ede7..186446b 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -88,7 +88,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update) {
 	}
 
 	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
-	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
+	if (unlikely(tdata == NULL)) {
 		return true;
 	}
 
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 7a34385..ad095da 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -46,14 +46,6 @@ typedef struct prof_tdata_s prof_tdata_t;
  */
 #define PROF_NTDATA_LOCKS		256
 
-/*
- * prof_tdata pointers close to NULL are used to encode state information that
- * is used for cleaning up during thread shutdown.
- */
-#define PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
-#define PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
-#define PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
-
 /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
 #define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
-- 
cgit v0.12


From 9a60cf54ec4b825a692330a1c56932fa1b121e27 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 18 Dec 2019 13:38:14 -0800
Subject: Last-N profiling mode

---
 Makefile.in                                        |   2 +
 include/jemalloc/internal/arena_inlines_b.h        |  16 +-
 include/jemalloc/internal/edata.h                  |  53 +-
 include/jemalloc/internal/large_externs.h          |   3 +-
 include/jemalloc/internal/nstime.h                 |  10 +-
 include/jemalloc/internal/prof_externs.h           |   9 +
 include/jemalloc/internal/prof_inlines_b.h         |  20 +-
 include/jemalloc/internal/prof_recent.h            |  16 +
 include/jemalloc/internal/prof_structs.h           |  22 +-
 include/jemalloc/internal/prof_types.h             |   4 +
 include/jemalloc/internal/witness.h                |   1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |   3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |   3 +
 src/ctl.c                                          |  45 +-
 src/extent.c                                       |   2 +
 src/jemalloc.c                                     |  42 +-
 src/large.c                                        |  20 +-
 src/prof.c                                         |  16 +-
 src/prof_data.c                                    |  15 +-
 src/prof_recent.c                                  | 553 +++++++++++++++++++++
 test/unit/mallctl.c                                |   1 +
 test/unit/prof_recent.c                            | 391 +++++++++++++++
 test/unit/prof_recent.sh                           |   5 +
 test/unit/prof_reset.sh                            |   2 +-
 26 files changed, 1215 insertions(+), 41 deletions(-)
 create mode 100644 include/jemalloc/internal/prof_recent.h
 create mode 100644 src/prof_recent.c
 create mode 100644 test/unit/prof_recent.c
 create mode 100644 test/unit/prof_recent.sh

diff --git a/Makefile.in b/Makefile.in
index 40ba7f2..ad54720 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -126,6 +126,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
+	$(srcroot)src/prof_recent.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
 	$(srcroot)src/sc.c \
@@ -216,6 +217,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_gdump.c \
 	$(srcroot)test/unit/prof_idump.c \
 	$(srcroot)test/unit/prof_log.c \
+	$(srcroot)test/unit/prof_recent.c \
 	$(srcroot)test/unit/prof_reset.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 28f2e97..a310eb2 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -37,12 +37,12 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 
 JEMALLOC_ALWAYS_INLINE void
 arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
-    prof_info_t *prof_info) {
+    prof_info_t *prof_info, bool reset_recent) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(prof_info != NULL);
 
-	const edata_t *edata;
+	edata_t *edata = NULL;
 	bool is_slab;
 
 	/* Static check. */
@@ -55,10 +55,14 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 
 	if (unlikely(!is_slab)) {
 		/* edata must have been initialized at this point. */
-		large_prof_info_get(edata, prof_info);
+		assert(edata != NULL);
+		large_prof_info_get(tsd, edata, prof_info, reset_recent);
 	} else {
-		memset(prof_info, 0, sizeof(prof_info_t));
 		prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
+		/*
+		 * No need to set other fields in prof_info; they will never be
+		 * accessed if (uintptr_t)alloc_tctx == (uintptr_t)1U.
+		 */
 	}
 }
 
@@ -92,11 +96,9 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_info_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
+arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
 	cassert(config_prof);
-	assert(ptr != NULL);
 
-	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
 	assert(!edata_slab_get(edata));
 	large_prof_info_set(edata, tctx);
 }
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 86f5ac5..2a81bdc 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -25,6 +25,20 @@ enum extent_head_state_e {
 };
 typedef enum extent_head_state_e extent_head_state_t;
 
+struct e_prof_info_s {
+	/* Time when this was allocated. */
+	nstime_t	e_prof_alloc_time;
+	/* Points to a prof_tctx_t. */
+	atomic_p_t	e_prof_tctx;
+	/*
+	 * Points to a prof_recent_t for the allocation; NULL
+	 * means the recent allocation record no longer exists.
+	 * Protected by prof_recent_alloc_mtx.
+	 */
+	atomic_p_t	e_prof_recent_alloc;
+};
+typedef struct e_prof_info_s e_prof_info_t;
+
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
 typedef ql_head(edata_t) edata_list_t;
@@ -186,12 +200,7 @@ struct edata_s {
 		slab_data_t	e_slab_data;
 
 		/* Profiling data, used for large objects. */
-		struct {
-			/* Time when this was allocated. */
-			nstime_t		e_alloc_time;
-			/* Points to a prof_tctx_t. */
-			atomic_p_t		e_prof_tctx;
-		};
+		e_prof_info_t	e_prof_info;
 	};
 };
 
@@ -333,12 +342,21 @@ edata_slab_data_get_const(const edata_t *edata) {
 	return &edata->e_slab_data;
 }
 
-static inline void
-edata_prof_info_get(const edata_t *edata, prof_info_t *prof_info) {
-	assert(prof_info != NULL);
-	prof_info->alloc_tctx = (prof_tctx_t *)atomic_load_p(
-	    &edata->e_prof_tctx, ATOMIC_ACQUIRE);
-	prof_info->alloc_time = edata->e_alloc_time;
+static inline prof_tctx_t *
+edata_prof_tctx_get(const edata_t *edata) {
+	return (prof_tctx_t *)atomic_load_p(&edata->e_prof_info.e_prof_tctx,
+	    ATOMIC_ACQUIRE);
+}
+
+static inline const nstime_t *
+edata_prof_alloc_time_get(const edata_t *edata) {
+	return &edata->e_prof_info.e_prof_alloc_time;
+}
+
+static inline prof_recent_t *
+edata_prof_recent_alloc_get_dont_call_directly(const edata_t *edata) {
+	return (prof_recent_t *)atomic_load_p(
+	    &edata->e_prof_info.e_prof_recent_alloc, ATOMIC_RELAXED);
 }
 
 static inline void
@@ -457,12 +475,19 @@ edata_slab_set(edata_t *edata, bool slab) {
 
 static inline void
 edata_prof_tctx_set(edata_t *edata, prof_tctx_t *tctx) {
-	atomic_store_p(&edata->e_prof_tctx, tctx, ATOMIC_RELEASE);
+	atomic_store_p(&edata->e_prof_info.e_prof_tctx, tctx, ATOMIC_RELEASE);
 }
 
 static inline void
 edata_prof_alloc_time_set(edata_t *edata, nstime_t *t) {
-	nstime_copy(&edata->e_alloc_time, t);
+	nstime_copy(&edata->e_prof_info.e_prof_alloc_time, t);
+}
+
+static inline void
+edata_prof_recent_alloc_set_dont_call_directly(edata_t *edata,
+    prof_recent_t *recent_alloc) {
+	atomic_store_p(&edata->e_prof_info.e_prof_recent_alloc, recent_alloc,
+	    ATOMIC_RELAXED);
 }
 
 static inline bool
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index fe5e606..05e6c44 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -22,7 +22,8 @@ void large_dalloc_prep_junked_locked(tsdn_t *tsdn, edata_t *edata);
 void large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
 void large_dalloc(tsdn_t *tsdn, edata_t *edata);
 size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
-void large_prof_info_get(const edata_t *edata, prof_info_t *prof_info);
+void large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
+    bool reset_recent);
 void large_prof_tctx_reset(edata_t *edata);
 void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx);
 
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index a3766ff..c4bee24 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -9,6 +9,8 @@ typedef struct {
 	uint64_t ns;
 } nstime_t;
 
+static const nstime_t zero = NSTIME_ZERO_INITIALIZER;
+
 void nstime_init(nstime_t *time, uint64_t ns);
 void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
 uint64_t nstime_ns(const nstime_t *time);
@@ -35,8 +37,14 @@ bool nstime_init_update(nstime_t *time);
 
 JEMALLOC_ALWAYS_INLINE void
 nstime_init_zero(nstime_t *time) {
-	static const nstime_t zero = NSTIME_ZERO_INITIALIZER;
 	nstime_copy(time, &zero);
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+nstime_equals_zero(nstime_t *time) {
+	int diff = nstime_compare(time, &zero);
+	assert(diff >= 0);
+	return diff == 0;
+}
+
 #endif /* JEMALLOC_INTERNAL_NSTIME_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 9ba363b..a07fd22 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -24,6 +24,10 @@ extern char opt_prof_prefix[
 #endif
     1];
 
+/* For recording recent allocations */
+extern ssize_t opt_prof_recent_alloc_max;
+extern malloc_mutex_t prof_recent_alloc_mtx;
+
 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
 extern bool prof_active;
 
@@ -99,4 +103,9 @@ void prof_sample_threshold_update(tsd_t *tsd);
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
 
+ssize_t prof_recent_alloc_max_ctl_read();
+ssize_t prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max);
+void prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
+    void *cbopaque);
+
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 186446b..9ea0454 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -46,7 +46,17 @@ prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 	assert(ptr != NULL);
 	assert(prof_info != NULL);
 
-	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info);
+	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, false);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_info_get_and_reset_recent(tsd_t *tsd, const void *ptr,
+    alloc_ctx_t *alloc_ctx, prof_info_t *prof_info) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+	assert(prof_info != NULL);
+
+	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -66,12 +76,12 @@ prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_info_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
+prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
 	cassert(config_prof);
-	assert(ptr != NULL);
+	assert(edata != NULL);
 	assert((uintptr_t)tctx > (uintptr_t)1U);
 
-	arena_prof_info_set(tsd, ptr, tctx);
+	arena_prof_info_set(tsd, edata, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -190,7 +200,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
 	prof_info_t prof_info;
-	prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
+	prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
 
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
new file mode 100644
index 0000000..d0e9e1e
--- /dev/null
+++ b/include/jemalloc/internal/prof_recent.h
@@ -0,0 +1,16 @@
+#ifndef JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
+#define JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
+
+bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
+void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t usize);
+void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
+bool prof_recent_init();
+void edata_prof_recent_alloc_init(edata_t *edata);
+#ifdef JEMALLOC_JET
+prof_recent_t *prof_recent_alloc_begin(tsd_t *tsd);
+prof_recent_t *prof_recent_alloc_end(tsd_t *tsd);
+prof_recent_t *prof_recent_alloc_next(tsd_t *tsd, prof_recent_t *node);
+prof_recent_t *edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata);
+#endif
+
+#endif /* JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 6223adc..59c0f4f 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_PROF_STRUCTS_H
 
 #include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/rb.h"
@@ -55,6 +56,12 @@ struct prof_tctx_s {
 	uint64_t		thr_uid;
 	uint64_t		thr_discrim;
 
+	/*
+	 * Reference count of how many times this tctx object is referenced in
+	 * recent allocation / deallocation records, protected by tdata->lock.
+	 */
+	uint64_t		recent_count;
+
 	/* Profiling counters, protected by tdata->lock. */
 	prof_cnt_t		cnts;
 
@@ -97,10 +104,10 @@ struct prof_tctx_s {
 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
 
 struct prof_info_s {
-	/* Points to the prof_tctx_t corresponding to the allocation. */
-	prof_tctx_t		*alloc_tctx;
 	/* Time when the allocation was made. */
 	nstime_t		alloc_time;
+	/* Points to the prof_tctx_t corresponding to the allocation. */
+	prof_tctx_t		*alloc_tctx;
 };
 
 struct prof_gctx_s {
@@ -201,4 +208,15 @@ struct prof_tdata_s {
 };
 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
 
+struct prof_recent_s {
+	nstime_t alloc_time;
+	nstime_t dalloc_time;
+
+	prof_recent_t *next;
+	size_t usize;
+	prof_tctx_t *alloc_tctx;
+	edata_t *alloc_edata; /* NULL means allocation has been freed. */
+	prof_tctx_t *dalloc_tctx;
+};
+
 #endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index ad095da..498962d 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -8,6 +8,7 @@ typedef struct prof_tctx_s prof_tctx_t;
 typedef struct prof_info_s prof_info_t;
 typedef struct prof_gctx_s prof_gctx_t;
 typedef struct prof_tdata_s prof_tdata_t;
+typedef struct prof_recent_s prof_recent_t;
 
 /* Option defaults. */
 #ifdef JEMALLOC_PROF
@@ -53,4 +54,7 @@ typedef struct prof_tdata_s prof_tdata_t;
 #define PROF_DUMP_FILENAME_LEN 1
 #endif
 
+/* Default number of recent allocations to record. */
+#define PROF_RECENT_ALLOC_MAX_DEFAULT 0
+
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 985e0a3..4ed787a 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -61,6 +61,7 @@
 #define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_RECENT_ALLOC	WITNESS_RANK_LEAF
 
 /******************************************************************************/
 /* PER-WITNESS DATA */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 5879090..f9af3dd 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -67,6 +67,7 @@
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
+    <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 3551ba5..90f8831 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -82,6 +82,9 @@
     <ClCompile Include="..\..\..\..\src\prof_log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_recent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 631de57..4ca484a 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -67,6 +67,7 @@
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
+    <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 3551ba5..90f8831 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -82,6 +82,9 @@
     <ClCompile Include="..\..\..\..\src\prof_log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_recent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index eee1277..5a467d5 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -113,6 +113,7 @@ CTL_PROTO(opt_prof_gdump)
 CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
+CTL_PROTO(opt_prof_recent_alloc_max)
 CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
@@ -232,6 +233,7 @@ CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
 CTL_PROTO(experimental_arenas_i_pactivep)
 INDEX_PROTO(experimental_arenas_i)
+CTL_PROTO(experimental_prof_recent_alloc_max)
 
 #define MUTEX_STATS_CTL_PROTO_GEN(n)					\
 CTL_PROTO(stats_##n##_num_ops)						\
@@ -343,6 +345,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_final"),	CTL(opt_prof_final)},
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
 	{NAME("prof_accum"),	CTL(opt_prof_accum)},
+	{NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)},
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
 };
 
@@ -620,10 +623,15 @@ static const ctl_indexed_node_t experimental_arenas_node[] = {
 	{INDEX(experimental_arenas_i)}
 };
 
+static const ctl_named_node_t experimental_prof_recent_node[] = {
+	{NAME("alloc_max"),	CTL(experimental_prof_recent_alloc_max)},
+};
+
 static const ctl_named_node_t experimental_node[] = {
 	{NAME("hooks"),		CHILD(named, experimental_hooks)},
 	{NAME("utilization"),	CHILD(named, experimental_utilization)},
-	{NAME("arenas"),	CHILD(indexed, experimental_arenas)}
+	{NAME("arenas"),	CHILD(indexed, experimental_arenas)},
+	{NAME("prof_recent"),	CHILD(named, experimental_prof_recent)}
 };
 
 static const ctl_named_node_t	root_node[] = {
@@ -1791,6 +1799,8 @@ CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
+    opt_prof_recent_alloc_max, ssize_t)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
 
@@ -3461,3 +3471,36 @@ label_return:
 	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return ret;
 }
+
+static int
+experimental_prof_recent_alloc_max_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (!(config_prof && opt_prof)) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	ssize_t old_max;
+	if (newp != NULL) {
+		if (newlen != sizeof(ssize_t)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		ssize_t max = *(ssize_t *)newp;
+		if (max < -1) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		old_max = prof_recent_alloc_max_ctl_write(tsd, max);
+	} else {
+		old_max = prof_recent_alloc_max_ctl_read();
+	}
+	READ(old_max, ssize_t);
+
+	ret = 0;
+
+label_return:
+	return ret;
+}
diff --git a/src/extent.c b/src/extent.c
index 8d78f95..54f1499 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1562,6 +1562,8 @@ extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 
 bool
 extent_boot(void) {
+	assert(sizeof(slab_data_t) >= sizeof(e_prof_info_t));
+
 	if (rtree_new(&extents_rtree, true)) {
 		return true;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 825a8ed..7184cbb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1402,6 +1402,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
+				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
+				    "prof_recent_alloc_max", -1, SSIZE_MAX)
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {
@@ -3015,7 +3017,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
     arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
 	prof_info_t old_prof_info;
-	prof_info_get(tsd, old_ptr, alloc_ctx, &old_prof_info);
+	prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
 	bool prof_active = prof_active_get_unlocked();
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	void *p;
@@ -3265,8 +3267,13 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
 JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
+	/*
+	 * old_prof_info is only used for asserting that the profiling info
+	 * isn't changed by the ixalloc() call.
+	 */
 	prof_info_t old_prof_info;
 	prof_info_get(tsd, ptr, alloc_ctx, &old_prof_info);
+
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
@@ -3315,13 +3322,26 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		 */
 		thread_event(tsd, usize - usize_max);
 	}
+
+	/*
+	 * At this point we can still safely get the original profiling
+	 * information associated with the ptr, because (a) the edata_t object
+	 * associated with the ptr still lives and (b) the profiling info
+	 * fields are not touched.  "(a)" is asserted in the outer je_xallocx()
+	 * function, and "(b)" is indirectly verified below by checking that
+	 * the alloc_tctx field is unchanged.
+	 */
+	prof_info_t prof_info;
 	if (usize == old_usize) {
+		prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
 		prof_alloc_rollback(tsd, tctx, false);
-		return usize;
+	} else {
+		prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
+		prof_realloc(tsd, ptr, usize, tctx, prof_active, ptr,
+		    old_usize, &prof_info);
 	}
-	prof_realloc(tsd, ptr, usize, tctx, prof_active, ptr, old_usize,
-	    &old_prof_info);
 
+	assert(old_prof_info.alloc_tctx == prof_info.alloc_tctx);
 	return usize;
 }
 
@@ -3342,6 +3362,13 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	tsd = tsd_fetch();
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
+	/*
+	 * old_edata is only for verifying that xallocx() keeps the edata_t
+	 * object associated with the ptr (though the content of the edata_t
+	 * object can be changed).
+	 */
+	edata_t *old_edata = iealloc(tsd_tsdn(tsd), ptr);
+
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
@@ -3374,6 +3401,13 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 		    extra, alignment, zero);
 		thread_event(tsd, usize);
 	}
+
+	/*
+	 * xallocx() should keep using the same edata_t object (though its
+	 * content can be changed).
+	 */
+	assert(iealloc(tsd_tsdn(tsd), ptr) == old_edata);
+
 	if (unlikely(usize == old_usize)) {
 		thread_event_rollback(tsd, usize);
 		goto label_not_resized;
diff --git a/src/large.c b/src/large.c
index 5ca09f6..ca35fc5 100644
--- a/src/large.c
+++ b/src/large.c
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/prof_recent.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/util.h"
 
@@ -368,8 +369,22 @@ large_salloc(tsdn_t *tsdn, const edata_t *edata) {
 }
 
 void
-large_prof_info_get(const edata_t *edata, prof_info_t *prof_info) {
-	edata_prof_info_get(edata, prof_info);
+large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
+    bool reset_recent) {
+	assert(prof_info != NULL);
+	nstime_copy(&prof_info->alloc_time, edata_prof_alloc_time_get(edata));
+
+	prof_tctx_t *alloc_tctx = edata_prof_tctx_get(edata);
+	prof_info->alloc_tctx = alloc_tctx;
+
+	if (reset_recent && (uintptr_t)alloc_tctx > (uintptr_t)1U) {
+		/*
+		 * This allocation was a prof sample.  Reset the pointer on the
+		 * recent allocation record, so that this allocation is
+		 * recorded as released.
+		 */
+		prof_recent_alloc_reset(tsd, edata);
+	}
 }
 
 static void
@@ -388,4 +403,5 @@ large_prof_info_set(edata_t *edata, prof_tctx_t *tctx) {
 	nstime_t t;
 	nstime_init_update(&t);
 	edata_prof_alloc_time_set(edata, &t);
+	edata_prof_recent_alloc_init(edata);
 }
diff --git a/src/prof.c b/src/prof.c
index 33b6819..159600e 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -7,6 +7,7 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_log.h"
+#include "jemalloc/internal/prof_recent.h"
 #include "jemalloc/internal/thread_event.h"
 
 /*
@@ -146,7 +147,8 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
-	prof_info_set(tsd, ptr, tctx);
+	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+	prof_info_set(tsd, edata, tctx);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	tctx->cnts.curobjs++;
@@ -155,8 +157,13 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 		tctx->cnts.accumobjs++;
 		tctx->cnts.accumbytes += usize;
 	}
+	bool record_recent = prof_recent_alloc_prepare(tsd, tctx);
 	tctx->prepared = false;
 	malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
+	if (record_recent) {
+		assert(tctx == edata_prof_tctx_get(edata));
+		prof_recent_alloc(tsd, edata, usize);
+	}
 }
 
 void
@@ -1068,6 +1075,10 @@ prof_boot2(tsd_t *tsd) {
 			return true;
 		}
 
+		if (prof_recent_init()) {
+			return true;
+		}
+
 		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
 		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
 		    CACHELINE);
@@ -1134,6 +1145,7 @@ prof_prefork1(tsdn_t *tsdn) {
 		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
 		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
+		malloc_mutex_prefork(tsdn, &prof_recent_alloc_mtx);
 	}
 }
 
@@ -1142,6 +1154,7 @@ prof_postfork_parent(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
 		unsigned i;
 
+		malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx);
 		malloc_mutex_postfork_parent(tsdn,
 		    &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
@@ -1166,6 +1179,7 @@ prof_postfork_child(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
 		unsigned i;
 
+		malloc_mutex_postfork_child(tsdn, &prof_recent_alloc_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
diff --git a/src/prof_data.c b/src/prof_data.c
index 690070e..dfc507f 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -378,6 +378,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 		ret.p->tdata = tdata;
 		ret.p->thr_uid = tdata->thr_uid;
 		ret.p->thr_discrim = tdata->thr_discrim;
+		ret.p->recent_count = 0;
 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
 		ret.p->gctx = gctx;
 		ret.p->tctx_uid = tdata->tctx_uid_next++;
@@ -405,8 +406,15 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 
 prof_tctx_t *
 prof_tctx_create(tsd_t *tsd) {
-	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
-	assert(tdata != NULL);
+	if (tsd_reentrancy_level_get(tsd) > 0) {
+		return NULL;
+	}
+
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return NULL;
+	}
+
 	prof_bt_t bt;
 	bt_init(&bt, tdata->vec);
 	prof_backtrace(tsd, &bt);
@@ -1417,6 +1425,9 @@ prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	if (tctx->prepared) {
 		return false;
 	}
+	if (tctx->recent_count != 0) {
+		return false;
+	}
 	return true;
 }
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
new file mode 100644
index 0000000..98349ac
--- /dev/null
+++ b/src/prof_recent.c
@@ -0,0 +1,553 @@
+#define JEMALLOC_PROF_RECENT_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/emitter.h"
+#include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_recent.h"
+
+#ifndef JEMALLOC_JET
+#  define STATIC_INLINE_IF_NOT_TEST static inline
+#else
+#  define STATIC_INLINE_IF_NOT_TEST
+#endif
+
+ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
+malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
+static atomic_zd_t prof_recent_alloc_max;
+static ssize_t prof_recent_alloc_count = 0;
+static prof_recent_t *prof_recent_alloc_dummy = NULL;
+
+static void
+prof_recent_alloc_max_init() {
+	atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max,
+	    ATOMIC_RELAXED);
+}
+
+static inline ssize_t
+prof_recent_alloc_max_get_no_lock() {
+	return atomic_load_zd(&prof_recent_alloc_max, ATOMIC_RELAXED);
+}
+
+static inline ssize_t
+prof_recent_alloc_max_get(tsd_t *tsd) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	return prof_recent_alloc_max_get_no_lock();
+}
+
+static inline ssize_t
+prof_recent_alloc_max_update(tsd_t *tsd, ssize_t max) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	ssize_t old_max = prof_recent_alloc_max_get(tsd);
+	atomic_store_zd(&prof_recent_alloc_max, max, ATOMIC_RELAXED);
+	return old_max;
+}
+
+static inline void
+increment_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+	++tctx->recent_count;
+	assert(tctx->recent_count > 0);
+}
+
+bool
+prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx) {
+	assert(opt_prof && prof_booted);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	/*
+	 * Check whether last-N mode is turned on without trying to acquire the
+	 * lock, so as to optimize for the following two scenarios:
+	 * (1) Last-N mode is switched off;
+	 * (2) Dumping, during which last-N mode is temporarily turned off so
+	 *     as not to block sampled allocations.
+	 */
+	if (prof_recent_alloc_max_get_no_lock() == 0) {
+		return false;
+	}
+
+	/*
+	 * Increment recent_count to hold the tctx so that it won't be gone
+	 * even after tctx->tdata->lock is released.  This acts as a
+	 * "placeholder"; the real recording of the allocation requires a lock
+	 * on prof_recent_alloc_mtx and is done in prof_recent_alloc (when
+	 * tctx->tdata->lock has been released).
+	 */
+	increment_recent_count(tsd, tctx);
+	return true;
+}
+
+static void
+decrement_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert(tctx != NULL);
+	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
+	assert(tctx->recent_count > 0);
+	--tctx->recent_count;
+	prof_tctx_try_destroy(tsd, tctx);
+}
+
+void
+edata_prof_recent_alloc_init(edata_t *edata) {
+	edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
+}
+
+static inline prof_recent_t *
+edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
+	return edata_prof_recent_alloc_get_dont_call_directly(edata);
+}
+
+STATIC_INLINE_IF_NOT_TEST prof_recent_t *
+edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_t *recent_alloc =
+	    edata_prof_recent_alloc_get_no_lock(edata);
+	assert(recent_alloc == NULL || recent_alloc->alloc_edata == edata);
+	return recent_alloc;
+}
+
+static prof_recent_t *
+edata_prof_recent_alloc_update_internal(tsd_t *tsd, edata_t *edata,
+    prof_recent_t *recent_alloc) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_t *old_recent_alloc =
+	    edata_prof_recent_alloc_get(tsd, edata);
+	edata_prof_recent_alloc_set_dont_call_directly(edata, recent_alloc);
+	return old_recent_alloc;
+}
+
+static void
+edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
+    prof_recent_t *recent_alloc) {
+	assert(recent_alloc != NULL);
+	prof_recent_t *old_recent_alloc =
+	    edata_prof_recent_alloc_update_internal(tsd, edata, recent_alloc);
+	assert(old_recent_alloc == NULL);
+	recent_alloc->alloc_edata = edata;
+}
+
+static void
+edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata,
+    prof_recent_t *recent_alloc) {
+	assert(recent_alloc != NULL);
+	prof_recent_t *old_recent_alloc =
+	    edata_prof_recent_alloc_update_internal(tsd, edata, NULL);
+	assert(old_recent_alloc == recent_alloc);
+	assert(edata == recent_alloc->alloc_edata);
+	recent_alloc->alloc_edata = NULL;
+}
+
+/*
+ * This function should be called right before an allocation is released, so
+ * that the associated recent allocation record can contain the following
+ * information:
+ * (1) The allocation is released;
+ * (2) The time of the deallocation; and
+ * (3) The prof_tctx associated with the deallocation.
+ */
+void
+prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
+	/*
+	 * Check whether the recent allocation record still exists without
+	 * trying to acquire the lock.
+	 */
+	if (edata_prof_recent_alloc_get_no_lock(edata) == NULL) {
+		return;
+	}
+
+	prof_tctx_t *dalloc_tctx = prof_tctx_create(tsd);
+	/*
+	 * In case dalloc_tctx is NULL, e.g. due to OOM, we will not record the
+	 * deallocation time / tctx, which is handled later, after we check
+	 * again when holding the lock.
+	 */
+
+	if (dalloc_tctx != NULL) {
+		malloc_mutex_lock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock);
+		increment_recent_count(tsd, dalloc_tctx);
+		dalloc_tctx->prepared = false;
+		malloc_mutex_unlock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock);
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	/* Check again after acquiring the lock.  */
+	prof_recent_t *recent = edata_prof_recent_alloc_get(tsd, edata);
+	if (recent != NULL) {
+		edata_prof_recent_alloc_reset(tsd, edata, recent);
+		assert(nstime_equals_zero(&recent->dalloc_time));
+		assert(recent->dalloc_tctx == NULL);
+		if (dalloc_tctx != NULL) {
+			nstime_update(&recent->dalloc_time);
+			recent->dalloc_tctx = dalloc_tctx;
+		}
+	} else if (dalloc_tctx != NULL) {
+		/* We lost the rase - the allocation record was just gone. */
+		decrement_recent_count(tsd, dalloc_tctx);
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+}
+
+static void
+prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	if (recent->alloc_edata != NULL) {
+		edata_prof_recent_alloc_reset(tsd, recent->alloc_edata, recent);
+	}
+}
+
+STATIC_INLINE_IF_NOT_TEST prof_recent_t *
+prof_recent_alloc_begin(tsd_t *tsd) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert(prof_recent_alloc_dummy != NULL);
+	return prof_recent_alloc_dummy->next;
+}
+
+STATIC_INLINE_IF_NOT_TEST prof_recent_t *
+prof_recent_alloc_end(tsd_t *tsd) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert(prof_recent_alloc_dummy != NULL);
+	return prof_recent_alloc_dummy;
+}
+
+STATIC_INLINE_IF_NOT_TEST prof_recent_t *
+prof_recent_alloc_next(tsd_t *tsd, prof_recent_t *node) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert(prof_recent_alloc_dummy != NULL);
+	assert(node != NULL && node != prof_recent_alloc_dummy);
+	return node->next;
+}
+
+static bool
+prof_recent_alloc_is_empty(tsd_t *tsd) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	if (prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd)) {
+		assert(prof_recent_alloc_count == 0);
+		return true;
+	} else {
+		assert(prof_recent_alloc_count > 0);
+		return false;
+	}
+}
+
+static void
+prof_recent_alloc_assert_count(tsd_t *tsd) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	if (config_debug) {
+		ssize_t count = 0;
+		prof_recent_t *n = prof_recent_alloc_begin(tsd);
+		while (n != prof_recent_alloc_end(tsd)) {
+			++count;
+			n = prof_recent_alloc_next(tsd, n);
+		}
+		assert(count == prof_recent_alloc_count);
+		assert(prof_recent_alloc_max_get(tsd) == -1 ||
+		    count <= prof_recent_alloc_max_get(tsd));
+	}
+}
+
+void
+prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t usize) {
+	assert(edata != NULL);
+	prof_tctx_t *tctx = edata_prof_tctx_get(edata);
+
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_alloc_assert_count(tsd);
+
+	/*
+	 * Reserve a new prof_recent_t node if needed.  If needed, we release
+	 * the prof_recent_alloc_mtx lock and allocate.  Then, rather than
+	 * immediately checking for OOM, we regain the lock and try to make use
+	 * of the reserve node if needed.  There are six scenarios:
+	 *
+	 *          \ now | no need | need but OOMed | need and allocated
+	 *     later \    |         |                |
+	 *    ------------------------------------------------------------
+	 *     no need    |   (1)   |      (2)       |         (3)
+	 *    ------------------------------------------------------------
+	 *     need       |   (4)   |      (5)       |         (6)
+	 *
+	 * First, "(4)" never happens, because we don't release the lock in the
+	 * middle if there's no need for a new node; in such cases "(1)" always
+	 * takes place, which is trivial.
+	 *
+	 * Out of the remaining four scenarios, "(6)" is the common case and is
+	 * trivial.  "(5)" is also trivial, in which case we'll rollback the
+	 * effect of prof_recent_alloc_prepare() as expected.
+	 *
+	 * "(2)" / "(3)" occurs when the need for a new node is gone after we
+	 * regain the lock.  If the new node is successfully allocated, i.e. in
+	 * the case of "(3)", we'll release it in the end; otherwise, i.e. in
+	 * the case of "(2)", we do nothing - we're lucky that the OOM ends up
+	 * doing no harm at all.
+	 *
+	 * Therefore, the only performance cost of the "release lock" ->
+	 * "allocate" -> "regain lock" design is the "(3)" case, but it happens
+	 * very rarely, so the cost is relatively small compared to the gain of
+	 * not having to have the lock order of prof_recent_alloc_mtx above all
+	 * the allocation locks.
+	 */
+	prof_recent_t *reserve = NULL;
+	if (prof_recent_alloc_max_get(tsd) == -1 ||
+	    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
+		assert(prof_recent_alloc_max_get(tsd) != 0);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		reserve = (prof_recent_t *)iallocztm(tsd_tsdn(tsd),
+		    sizeof(prof_recent_t), sz_size2index(sizeof(prof_recent_t)),
+		    false, NULL, true, arena_get(tsd_tsdn(tsd), 0, false),
+		    true);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		prof_recent_alloc_assert_count(tsd);
+	}
+
+	if (prof_recent_alloc_max_get(tsd) == 0) {
+		assert(prof_recent_alloc_is_empty(tsd));
+		goto label_rollback;
+	}
+
+	assert(prof_recent_alloc_dummy != NULL);
+	{
+		/* Fill content into the dummy node. */
+		prof_recent_t *node = prof_recent_alloc_dummy;
+		node->usize = usize;
+		nstime_copy(&node->alloc_time,
+		    edata_prof_alloc_time_get(edata));
+		node->alloc_tctx = tctx;
+		edata_prof_recent_alloc_set(tsd, edata, node);
+		nstime_init_zero(&node->dalloc_time);
+		node->dalloc_tctx = NULL;
+	}
+
+	prof_tctx_t *old_alloc_tctx, *old_dalloc_tctx;
+	if (prof_recent_alloc_count == prof_recent_alloc_max_get(tsd)) {
+		/* If upper limit is reached, simply shift the dummy. */
+		assert(prof_recent_alloc_max_get(tsd) != -1);
+		assert(!prof_recent_alloc_is_empty(tsd));
+		prof_recent_alloc_dummy = prof_recent_alloc_dummy->next;
+		old_alloc_tctx = prof_recent_alloc_dummy->alloc_tctx;
+		assert(old_alloc_tctx != NULL);
+		old_dalloc_tctx = prof_recent_alloc_dummy->dalloc_tctx;
+		prof_recent_alloc_evict_edata(tsd, prof_recent_alloc_dummy);
+	} else {
+		/* Otherwise use the new node as the dummy. */
+		assert(prof_recent_alloc_max_get(tsd) == -1 ||
+		    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd));
+		if (reserve == NULL) {
+			goto label_rollback;
+		}
+		reserve->next = prof_recent_alloc_dummy->next;
+		prof_recent_alloc_dummy->next = reserve;
+		prof_recent_alloc_dummy = reserve;
+		reserve = NULL;
+		old_alloc_tctx = NULL;
+		old_dalloc_tctx = NULL;
+		++prof_recent_alloc_count;
+	}
+
+	assert(!prof_recent_alloc_is_empty(tsd));
+	prof_recent_alloc_assert_count(tsd);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	if (reserve != NULL) {
+		idalloctm(tsd_tsdn(tsd), reserve, NULL, NULL, true, true);
+	}
+
+	/*
+	 * Asynchronously handle the tctx of the old node, so that there's no
+	 * simultaneous holdings of prof_recent_alloc_mtx and tdata->lock.
+	 * In the worst case this may delay the tctx release but it's better
+	 * than holding prof_recent_alloc_mtx for longer.
+	 */
+	if (old_alloc_tctx != NULL) {
+		decrement_recent_count(tsd, old_alloc_tctx);
+	}
+	if (old_dalloc_tctx != NULL) {
+		decrement_recent_count(tsd, old_dalloc_tctx);
+	}
+	return;
+
+label_rollback:
+	assert(edata_prof_recent_alloc_get(tsd, edata) == NULL);
+	prof_recent_alloc_assert_count(tsd);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	if (reserve != NULL) {
+		idalloctm(tsd_tsdn(tsd), reserve, NULL, NULL, true, true);
+	}
+	decrement_recent_count(tsd, tctx);
+}
+
+ssize_t
+prof_recent_alloc_max_ctl_read() {
+	/* Don't bother to acquire the lock. */
+	return prof_recent_alloc_max_get_no_lock();
+}
+
+ssize_t
+prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
+	assert(max >= -1);
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_alloc_assert_count(tsd);
+
+	const ssize_t old_max = prof_recent_alloc_max_update(tsd, max);
+
+	if (max == -1 || prof_recent_alloc_count <= max) {
+		/* Easy case - no need to alter the list. */
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		return old_max;
+	}
+
+	prof_recent_t *begin = prof_recent_alloc_dummy->next;
+	/* For verification purpose only. */
+	ssize_t count = prof_recent_alloc_count - max;
+	do {
+		assert(!prof_recent_alloc_is_empty(tsd));
+		prof_recent_t *node = prof_recent_alloc_dummy->next;
+		assert(node != prof_recent_alloc_dummy);
+		prof_recent_alloc_evict_edata(tsd, node);
+		prof_recent_alloc_dummy->next = node->next;
+		--prof_recent_alloc_count;
+	} while (prof_recent_alloc_count > max);
+	prof_recent_t *end = prof_recent_alloc_dummy->next;
+	assert(begin != end);
+
+	prof_recent_alloc_assert_count(tsd);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	/*
+	 * Asynchronously handle the tctx of the to-be-deleted nodes, so that
+	 * there's no simultaneous holdings of prof_recent_alloc_mtx and
+	 * tdata->lock.  In the worst case there can be slightly extra space
+	 * overhead taken by these nodes, but the total number of nodes at any
+	 * time is bounded by (max + sum(decreases)), where "max" means the
+	 * most recent prof_recent_alloc_max and "sum(decreases)" means the
+	 * sum of the deltas of all decreases in prof_recent_alloc_max in the
+	 * past.  This (max + sum(decreases)) value is completely transparent
+	 * to and controlled by application.
+	 */
+	do {
+		prof_recent_t *node = begin;
+		decrement_recent_count(tsd, node->alloc_tctx);
+		if (node->dalloc_tctx != NULL) {
+			decrement_recent_count(tsd, node->dalloc_tctx);
+		}
+		begin = node->next;
+		idalloctm(tsd_tsdn(tsd), node, NULL, NULL, true, true);
+		--count;
+	} while (begin != end);
+	assert(count == 0);
+
+	return old_max;
+}
+
+static void
+dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
+	char bt_buf[2 * sizeof(intptr_t) + 3];
+	char *s = bt_buf;
+	assert(tctx != NULL);
+	prof_bt_t *bt = &tctx->gctx->bt;
+	for (size_t i = 0; i < bt->len; ++i) {
+		malloc_snprintf(bt_buf, sizeof(bt_buf), "%p", bt->vec[i]);
+		emitter_json_value(emitter, emitter_type_string, &s);
+	}
+}
+
+#define PROF_RECENT_PRINT_BUFSIZE 4096
+void
+prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
+    void *cbopaque) {
+	char *buf = (char *)iallocztm(tsd_tsdn(tsd), PROF_RECENT_PRINT_BUFSIZE,
+	    sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
+	    arena_get(tsd_tsdn(tsd), 0, false), true);
+	buf_writer_arg_t buf_arg = {write_cb, cbopaque, buf,
+	    PROF_RECENT_PRINT_BUFSIZE - 1, 0};
+	emitter_t emitter;
+	emitter_init(&emitter, emitter_output_json_compact, buffered_write_cb,
+	    &buf_arg);
+	emitter_begin(&emitter);
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_alloc_assert_count(tsd);
+
+	/*
+	 * Set prof_recent_alloc_max to 0 so that dumping won't block sampled
+	 * allocations: the allocations can complete but will not be recorded.
+	 */
+	ssize_t max = prof_recent_alloc_max_update(tsd, 0);
+
+	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize, &max);
+
+	emitter_json_array_kv_begin(&emitter, "recent_alloc");
+	for (prof_recent_t *n = prof_recent_alloc_begin(tsd);
+	    n != prof_recent_alloc_end(tsd);
+	    n = prof_recent_alloc_next(tsd, n)) {
+		emitter_json_object_begin(&emitter);
+
+		emitter_json_kv(&emitter, "usize", emitter_type_size,
+		    &n->usize);
+		bool released = n->alloc_edata == NULL;
+		emitter_json_kv(&emitter, "released", emitter_type_bool,
+		    &released);
+
+		emitter_json_kv(&emitter, "alloc_thread_uid",
+		    emitter_type_uint64, &n->alloc_tctx->thr_uid);
+		uint64_t alloc_time_ns = nstime_ns(&n->alloc_time);
+		emitter_json_kv(&emitter, "alloc_time", emitter_type_uint64,
+		    &alloc_time_ns);
+		emitter_json_array_kv_begin(&emitter, "alloc_trace");
+		dump_bt(&emitter, n->alloc_tctx);
+		emitter_json_array_end(&emitter);
+
+		if (n->dalloc_tctx != NULL) {
+			assert(released);
+			emitter_json_kv(&emitter, "dalloc_thread_uid",
+			    emitter_type_uint64, &n->dalloc_tctx->thr_uid);
+			assert(!nstime_equals_zero(&n->dalloc_time));
+			uint64_t dalloc_time_ns = nstime_ns(&n->dalloc_time);
+			emitter_json_kv(&emitter, "dalloc_time",
+			    emitter_type_uint64, &dalloc_time_ns);
+			emitter_json_array_kv_begin(&emitter, "dalloc_trace");
+			dump_bt(&emitter, n->dalloc_tctx);
+			emitter_json_array_end(&emitter);
+		} else {
+			assert(nstime_equals_zero(&n->dalloc_time));
+		}
+
+		emitter_json_object_end(&emitter);
+	}
+	emitter_json_array_end(&emitter);
+
+	max = prof_recent_alloc_max_update(tsd, max);
+	assert(max == 0);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	emitter_end(&emitter);
+	buf_writer_flush(&buf_arg);
+	idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
+}
+#undef PROF_RECENT_PRINT_BUFSIZE
+
+bool
+prof_recent_init() {
+	prof_recent_alloc_max_init();
+
+	if (malloc_mutex_init(&prof_recent_alloc_mtx,
+	    "prof_recent_alloc", WITNESS_RANK_PROF_RECENT_ALLOC,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+
+	assert(prof_recent_alloc_dummy == NULL);
+	prof_recent_alloc_dummy = (prof_recent_t *)iallocztm(
+	    TSDN_NULL, sizeof(prof_recent_t),
+	    sz_size2index(sizeof(prof_recent_t)), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (prof_recent_alloc_dummy == NULL) {
+		return true;
+	}
+	prof_recent_alloc_dummy->next = prof_recent_alloc_dummy;
+
+	return false;
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index ebbaed7..d317b4a 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -188,6 +188,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_gdump, prof);
 	TEST_MALLCTL_OPT(bool, prof_final, prof);
 	TEST_MALLCTL_OPT(bool, prof_leak, prof);
+	TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof);
 
 #undef TEST_MALLCTL_OPT
 }
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
new file mode 100644
index 0000000..e10ac3f
--- /dev/null
+++ b/test/unit/prof_recent.c
@@ -0,0 +1,391 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/prof_recent.h"
+
+/* As specified in the shell script */
+#define OPT_ALLOC_MAX	3
+
+/* Invariant before and after every test (when config_prof is on) */
+static void confirm_prof_setup(tsd_t *tsd) {
+	/* Options */
+	assert_true(opt_prof, "opt_prof not on");
+	assert_true(opt_prof_active, "opt_prof_active not on");
+	assert_zd_eq(opt_prof_recent_alloc_max, OPT_ALLOC_MAX,
+	    "opt_prof_recent_alloc_max not set correctly");
+
+	/* Dynamics */
+	assert_true(prof_active, "prof_active not on");
+	assert_zd_eq(prof_recent_alloc_max_ctl_read(tsd), OPT_ALLOC_MAX,
+	    "prof_recent_alloc_max not set correctly");
+}
+
+TEST_BEGIN(test_confirm_setup) {
+	test_skip_if(!config_prof);
+	confirm_prof_setup(tsd_fetch());
+}
+TEST_END
+
+TEST_BEGIN(test_prof_recent_off) {
+	test_skip_if(config_prof);
+
+	const ssize_t past_ref = 0, future_ref = 0;
+	const size_t len_ref = sizeof(ssize_t);
+
+	ssize_t past = past_ref, future = future_ref;
+	size_t len = len_ref;
+
+#define ASSERT_SHOULD_FAIL(opt, a, b, c, d) do {			\
+	assert_d_eq(mallctl("experimental.prof_recent." opt, a, b, c,	\
+	    d), ENOENT, "Should return ENOENT when config_prof is off");\
+	assert_zd_eq(past, past_ref, "output was touched");		\
+	assert_zu_eq(len, len_ref, "output length was touched");	\
+	assert_zd_eq(future, future_ref, "input was touched");		\
+} while (0)
+
+	ASSERT_SHOULD_FAIL("alloc_max", NULL, NULL, NULL, 0);
+	ASSERT_SHOULD_FAIL("alloc_max", &past, &len, NULL, 0);
+	ASSERT_SHOULD_FAIL("alloc_max", NULL, NULL, &future, len);
+	ASSERT_SHOULD_FAIL("alloc_max", &past, &len, &future, len);
+
+#undef ASSERT_SHOULD_FAIL
+}
+TEST_END
+
+TEST_BEGIN(test_prof_recent_on) {
+	test_skip_if(!config_prof);
+
+	ssize_t past, future;
+	size_t len = sizeof(ssize_t);
+
+	tsd_t *tsd = tsd_fetch();
+
+	confirm_prof_setup(tsd);
+
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, NULL, 0), 0, "no-op mallctl should be allowed");
+	confirm_prof_setup(tsd);
+
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    &past, &len, NULL, 0), 0, "Read error");
+	assert_zd_eq(past, OPT_ALLOC_MAX, "Wrong read result");
+	future = OPT_ALLOC_MAX + 1;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, len), 0, "Write error");
+	future = -1;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    &past, &len, &future, len), 0, "Read/write error");
+	assert_zd_eq(past, OPT_ALLOC_MAX + 1, "Wrong read result");
+	future = -2;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    &past, &len, &future, len), EINVAL,
+	    "Invalid write should return EINVAL");
+	assert_zd_eq(past, OPT_ALLOC_MAX + 1,
+	    "Output should not be touched given invalid write");
+	future = OPT_ALLOC_MAX;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    &past, &len, &future, len), 0, "Read/write error");
+	assert_zd_eq(past, -1, "Wrong read result");
+	future = OPT_ALLOC_MAX + 2;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    &past, &len, &future, len * 2), EINVAL,
+	    "Invalid write should return EINVAL");
+	assert_zd_eq(past, -1,
+	    "Output should not be touched given invalid write");
+
+	confirm_prof_setup(tsd);
+}
+TEST_END
+
+/* Reproducible sequence of request sizes */
+#define NTH_REQ_SIZE(n) ((n) * 97 + 101)
+
+static void confirm_malloc(tsd_t *tsd, void *p) {
+	assert_ptr_not_null(p, "malloc failed unexpectedly");
+	edata_t *e = iealloc(TSDN_NULL, p);
+	assert_ptr_not_null(e, "NULL edata for living pointer");
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_t *n = edata_prof_recent_alloc_get(tsd, e);
+	assert_ptr_not_null(n, "Record in edata should not be NULL");
+	assert_ptr_not_null(n->alloc_tctx,
+	    "alloc_tctx in record should not be NULL");
+	assert_ptr_eq(e, n->alloc_edata,
+	    "edata pointer in record is not correct");
+	assert_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+}
+
+static void confirm_record_size(tsd_t *tsd, prof_recent_t *n, unsigned kth) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_zu_eq(n->usize, sz_s2u(NTH_REQ_SIZE(kth)),
+	    "Recorded allocation usize is wrong");
+}
+
+static void confirm_record_living(tsd_t *tsd, prof_recent_t *n) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_ptr_not_null(n->alloc_tctx,
+	    "alloc_tctx in record should not be NULL");
+	assert_ptr_not_null(n->alloc_edata,
+	    "Recorded edata should not be NULL for living pointer");
+	assert_ptr_eq(n, edata_prof_recent_alloc_get(tsd, n->alloc_edata),
+	    "Record in edata is not correct");
+	assert_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
+}
+
+static void confirm_record_released(tsd_t *tsd, prof_recent_t *n) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_ptr_not_null(n->alloc_tctx,
+	    "alloc_tctx in record should not be NULL");
+	assert_ptr_null(n->alloc_edata,
+	    "Recorded edata should be NULL for released pointer");
+	assert_ptr_not_null(n->dalloc_tctx,
+	    "dalloc_tctx in record should not be NULL for released pointer");
+}
+
+TEST_BEGIN(test_prof_recent_alloc) {
+	test_skip_if(!config_prof);
+
+	bool b;
+	unsigned i, c;
+	size_t req_size;
+	void *p;
+	prof_recent_t *n;
+	ssize_t future;
+
+	tsd_t *tsd = tsd_fetch();
+
+	confirm_prof_setup(tsd);
+
+	/*
+	 * First batch of 2 * OPT_ALLOC_MAX allocations.  After the
+	 * (OPT_ALLOC_MAX - 1)'th allocation the recorded allocations should
+	 * always be the last OPT_ALLOC_MAX allocations coming from here.
+	 */
+	for (i = 0; i < 2 * OPT_ALLOC_MAX; ++i) {
+		req_size = NTH_REQ_SIZE(i);
+		p = malloc(req_size);
+		confirm_malloc(tsd, p);
+		if (i < OPT_ALLOC_MAX - 1) {
+			malloc_mutex_lock(tsd_tsdn(tsd),
+			    &prof_recent_alloc_mtx);
+			assert_ptr_ne(prof_recent_alloc_begin(tsd),
+			    prof_recent_alloc_end(tsd),
+			    "Empty recent allocation");
+			malloc_mutex_unlock(tsd_tsdn(tsd),
+			    &prof_recent_alloc_mtx);
+			free(p);
+			/*
+			 * The recorded allocations may still include some
+			 * other allocations before the test run started,
+			 * so keep allocating without checking anything.
+			 */
+			continue;
+		}
+		c = 0;
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		for (n = prof_recent_alloc_begin(tsd);
+		    n != prof_recent_alloc_end(tsd);
+		    n = prof_recent_alloc_next(tsd, n)) {
+			++c;
+			confirm_record_size(tsd, n, i + c - OPT_ALLOC_MAX);
+			if (c == OPT_ALLOC_MAX) {
+				confirm_record_living(tsd, n);
+			} else {
+				confirm_record_released(tsd, n);
+			}
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		assert_u_eq(c, OPT_ALLOC_MAX,
+		    "Incorrect total number of allocations");
+		free(p);
+	}
+
+	confirm_prof_setup(tsd);
+
+	b = false;
+	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
+	    "mallctl for turning off prof_active failed");
+
+	/*
+	 * Second batch of OPT_ALLOC_MAX allocations.  Since prof_active is
+	 * turned off, this batch shouldn't be recorded.
+	 */
+	for (; i < 3 * OPT_ALLOC_MAX; ++i) {
+		req_size = NTH_REQ_SIZE(i);
+		p = malloc(req_size);
+		assert_ptr_not_null(p, "malloc failed unexpectedly");
+		c = 0;
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		for (n = prof_recent_alloc_begin(tsd);
+		    n != prof_recent_alloc_end(tsd);
+		    n = prof_recent_alloc_next(tsd, n)) {
+			confirm_record_size(tsd, n, c + OPT_ALLOC_MAX);
+			confirm_record_released(tsd, n);
+			++c;
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		assert_u_eq(c, OPT_ALLOC_MAX,
+		    "Incorrect total number of allocations");
+		free(p);
+	}
+
+	b = true;
+	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
+	    "mallctl for turning on prof_active failed");
+
+	confirm_prof_setup(tsd);
+
+	/*
+	 * Third batch of OPT_ALLOC_MAX allocations.  Since prof_active is
+	 * turned back on, they should be recorded, and in the list of recorded
+	 * allocations they should follow the first batch rather than the
+	 * second batch.
+	 */
+	for (; i < 4 * OPT_ALLOC_MAX; ++i) {
+		req_size = NTH_REQ_SIZE(i);
+		p = malloc(req_size);
+		confirm_malloc(tsd, p);
+		c = 0;
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		for (n = prof_recent_alloc_begin(tsd);
+		    n != prof_recent_alloc_end(tsd);
+		    n = prof_recent_alloc_next(tsd, n)) {
+			++c;
+			confirm_record_size(tsd, n,
+			    /* Is the allocation from the third batch? */
+			    i + c - OPT_ALLOC_MAX >= 3 * OPT_ALLOC_MAX ?
+			    /* If yes, then it's just recorded. */
+			    i + c - OPT_ALLOC_MAX :
+			    /*
+			     * Otherwise, it should come from the first batch
+			     * instead of the second batch.
+			     */
+			    i + c - 2 * OPT_ALLOC_MAX);
+			if (c == OPT_ALLOC_MAX) {
+				confirm_record_living(tsd, n);
+			} else {
+				confirm_record_released(tsd, n);
+			}
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		assert_u_eq(c, OPT_ALLOC_MAX,
+		    "Incorrect total number of allocations");
+		free(p);
+	}
+
+	/* Increasing the limit shouldn't alter the list of records. */
+	future = OPT_ALLOC_MAX + 1;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	c = 0;
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	for (n = prof_recent_alloc_begin(tsd);
+	    n != prof_recent_alloc_end(tsd);
+	    n = prof_recent_alloc_next(tsd, n)) {
+		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(tsd, n);
+		++c;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_u_eq(c, OPT_ALLOC_MAX,
+	    "Incorrect total number of allocations");
+
+	/*
+	 * Decreasing the limit shouldn't alter the list of records as long as
+	 * the new limit is still no less than the length of the list.
+	 */
+	future = OPT_ALLOC_MAX;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	c = 0;
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	for (n = prof_recent_alloc_begin(tsd);
+	    n != prof_recent_alloc_end(tsd);
+	    n = prof_recent_alloc_next(tsd, n)) {
+		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(tsd, n);
+		++c;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_u_eq(c, OPT_ALLOC_MAX,
+	    "Incorrect total number of allocations");
+
+	/*
+	 * Decreasing the limit should shorten the list of records if the new
+	 * limit is less than the length of the list.
+	 */
+	future = OPT_ALLOC_MAX - 1;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	c = 0;
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	for (n = prof_recent_alloc_begin(tsd);
+	    n != prof_recent_alloc_end(tsd);
+	    n = prof_recent_alloc_next(tsd, n)) {
+		++c;
+		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(tsd, n);
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_u_eq(c, OPT_ALLOC_MAX - 1,
+	    "Incorrect total number of allocations");
+
+	/* Setting to unlimited shouldn't alter the list of records. */
+	future = -1;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	c = 0;
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	for (n = prof_recent_alloc_begin(tsd);
+	    n != prof_recent_alloc_end(tsd);
+	    n = prof_recent_alloc_next(tsd, n)) {
+		++c;
+		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(tsd, n);
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_u_eq(c, OPT_ALLOC_MAX - 1,
+	    "Incorrect total number of allocations");
+
+	/* Downshift to only one record. */
+	future = 1;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	n = prof_recent_alloc_begin(tsd);
+	assert(n != prof_recent_alloc_end(tsd));
+	confirm_record_size(tsd, n, 4 * OPT_ALLOC_MAX - 1);
+	confirm_record_released(tsd, n);
+	n = prof_recent_alloc_next(tsd, n);
+	assert(n == prof_recent_alloc_end(tsd));
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	/* Completely turn off. */
+	future = 0;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	/* Restore the settings. */
+	future = OPT_ALLOC_MAX;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	confirm_prof_setup(tsd);
+}
+TEST_END
+
+#undef NTH_REQ_SIZE
+
+int
+main(void) {
+	return test(
+	    test_confirm_setup,
+	    test_prof_recent_off,
+	    test_prof_recent_on,
+	    test_prof_recent_alloc);
+}
diff --git a/test/unit/prof_recent.sh b/test/unit/prof_recent.sh
new file mode 100644
index 0000000..59759a6
--- /dev/null
+++ b/test/unit/prof_recent.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_recent_alloc_max:3"
+fi
diff --git a/test/unit/prof_reset.sh b/test/unit/prof_reset.sh
index 43c516a..daefeb7 100644
--- a/test/unit/prof_reset.sh
+++ b/test/unit/prof_reset.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_active:false,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:false,lg_prof_sample:0,prof_recent_alloc_max:0"
 fi
-- 
cgit v0.12


From 6b6b4709b34992940e112fbe5726472b37783ef2 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 9 Jan 2020 09:59:17 -0800
Subject: Unify buffered writer naming

---
 include/jemalloc/internal/malloc_io.h | 6 +++---
 src/jemalloc.c                        | 6 +++---
 src/malloc_io.c                       | 8 ++++----
 src/prof_log.c                        | 8 ++++----
 src/prof_recent.c                     | 6 +++---
 test/unit/buf_writer.c                | 8 ++++----
 6 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index f5d16a5..dabcb19 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -119,9 +119,9 @@ typedef struct {
 	char *buf;
 	size_t buf_size; /* must be one less than the capacity of buf array */
 	size_t buf_end;
-} buf_writer_arg_t;
+} buf_write_arg_t;
 
-void buf_writer_flush(buf_writer_arg_t *arg);
-void buffered_write_cb(void *buf_writer_arg, const char *s);
+void buf_write_flush(buf_write_arg_t *arg);
+void buf_write_cb(void *buf_write_arg, const char *s);
 
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7184cbb..0a95b3b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3696,10 +3696,10 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		char *stats_print_buf = (char *)iallocztm(tsdn,
 		    STATS_PRINT_BUFSIZE, sz_size2index(STATS_PRINT_BUFSIZE),
 		    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
-		buf_writer_arg_t stats_print_buf_arg = {write_cb, cbopaque,
+		buf_write_arg_t stats_print_buf_arg = {write_cb, cbopaque,
 		    stats_print_buf, STATS_PRINT_BUFSIZE - 1, 0};
-		stats_print(buffered_write_cb, &stats_print_buf_arg, opts);
-		buf_writer_flush(&stats_print_buf_arg);
+		stats_print(buf_write_cb, &stats_print_buf_arg, opts);
+		buf_write_flush(&stats_print_buf_arg);
 		idalloctm(tsdn, stats_print_buf, NULL, NULL, true, true);
 	}
 
diff --git a/src/malloc_io.c b/src/malloc_io.c
index fc7ff72..cfefcac 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -665,7 +665,7 @@ malloc_printf(const char *format, ...) {
 }
 
 void
-buf_writer_flush(buf_writer_arg_t *arg) {
+buf_write_flush(buf_write_arg_t *arg) {
 	assert(arg->buf_end <= arg->buf_size);
 	arg->buf[arg->buf_end] = '\0';
 	if (arg->write_cb == NULL) {
@@ -677,13 +677,13 @@ buf_writer_flush(buf_writer_arg_t *arg) {
 }
 
 void
-buffered_write_cb(void *buf_writer_arg, const char *s) {
-	buf_writer_arg_t *arg = (buf_writer_arg_t *)buf_writer_arg;
+buf_write_cb(void *buf_write_arg, const char *s) {
+	buf_write_arg_t *arg = (buf_write_arg_t *)buf_write_arg;
 	size_t i, slen, n, s_remain, buf_remain;
 	assert(arg->buf_end <= arg->buf_size);
 	for (i = 0, slen = strlen(s); i < slen; i += n) {
 		if (arg->buf_end == arg->buf_size) {
-			buf_writer_flush(arg);
+			buf_write_flush(arg);
 		}
 		s_remain = slen - i;
 		buf_remain = arg->buf_size - arg->buf_end;
diff --git a/src/prof_log.c b/src/prof_log.c
index 11de436..e3d21af 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -631,12 +631,12 @@ prof_log_stop(tsdn_t *tsdn) {
 	char *prof_log_stop_buf = (char *)iallocztm(tsdn,
 	    PROF_LOG_STOP_BUFSIZE, sz_size2index(PROF_LOG_STOP_BUFSIZE),
 	    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
-	buf_writer_arg_t prof_log_stop_buf_arg = {prof_emitter_write_cb, &arg,
+	buf_write_arg_t prof_log_stop_buf_arg = {prof_emitter_write_cb, &arg,
 	    prof_log_stop_buf, PROF_LOG_STOP_BUFSIZE - 1, 0};
 
 	/* Emit to json. */
-	emitter_init(&emitter, emitter_output_json_compact,
-	    buffered_write_cb, &prof_log_stop_buf_arg);
+	emitter_init(&emitter, emitter_output_json_compact, buf_write_cb,
+	    &prof_log_stop_buf_arg);
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
@@ -645,7 +645,7 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_log_emit_allocs(tsd, &emitter);
 	emitter_end(&emitter);
 
-	buf_writer_flush(&prof_log_stop_buf_arg);
+	buf_write_flush(&prof_log_stop_buf_arg);
 	idalloctm(tsdn, prof_log_stop_buf, NULL, NULL, true, true);
 
 	/* Reset global state. */
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 98349ac..f4cad09 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -461,10 +461,10 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	char *buf = (char *)iallocztm(tsd_tsdn(tsd), PROF_RECENT_PRINT_BUFSIZE,
 	    sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
 	    arena_get(tsd_tsdn(tsd), 0, false), true);
-	buf_writer_arg_t buf_arg = {write_cb, cbopaque, buf,
+	buf_write_arg_t buf_arg = {write_cb, cbopaque, buf,
 	    PROF_RECENT_PRINT_BUFSIZE - 1, 0};
 	emitter_t emitter;
-	emitter_init(&emitter, emitter_output_json_compact, buffered_write_cb,
+	emitter_init(&emitter, emitter_output_json_compact, buf_write_cb,
 	    &buf_arg);
 	emitter_begin(&emitter);
 
@@ -524,7 +524,7 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	emitter_end(&emitter);
-	buf_writer_flush(&buf_arg);
+	buf_write_flush(&buf_arg);
 	idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
 }
 #undef PROF_RECENT_PRINT_BUFSIZE
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 4d8ae99..5051f76 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -20,8 +20,8 @@ TEST_BEGIN(test_buf_write) {
 	size_t n_unit, remain, i;
 	ssize_t unit;
 	uint64_t arg = 4; /* Starting value of random argument. */
-	buf_writer_arg_t test_buf_arg =
-	    {test_write_cb, &arg, test_buf, TEST_BUF_SIZE - 1, 0};
+	buf_write_arg_t test_buf_arg = {test_write_cb, &arg, test_buf,
+	    TEST_BUF_SIZE - 1, 0};
 
 	memset(s, 'a', UNIT_MAX);
 	arg_store = arg;
@@ -33,7 +33,7 @@ TEST_BEGIN(test_buf_write) {
 			remain = 0;
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
-				buffered_write_cb(&test_buf_arg, s);
+				buf_write_cb(&test_buf_arg, s);
 				remain += unit;
 				if (remain > test_buf_arg.buf_size) {
 					/* Flushes should have happened. */
@@ -49,7 +49,7 @@ TEST_BEGIN(test_buf_write) {
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}
-			buf_writer_flush(&test_buf_arg);
+			buf_write_flush(&test_buf_arg);
 			assert_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
 			    " writing %zu strings of length %zu", n_unit, unit);
-- 
cgit v0.12


From 6d8e6169028f50ef9904692a0d4ecc0f21054925 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 9 Jan 2020 16:36:09 -0800
Subject: Make buffered writer an independent module

---
 Makefile.in                                        |  1 +
 include/jemalloc/internal/buf_writer.h             | 24 +++++++++++++++
 include/jemalloc/internal/malloc_io.h              | 26 +---------------
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 src/buf_writer.c                                   | 36 ++++++++++++++++++++++
 src/jemalloc.c                                     |  1 +
 src/malloc_io.c                                    | 33 +-------------------
 src/prof_log.c                                     |  1 +
 src/prof_recent.c                                  |  1 +
 test/unit/buf_writer.c                             |  2 ++
 13 files changed, 76 insertions(+), 57 deletions(-)
 create mode 100644 include/jemalloc/internal/buf_writer.h
 create mode 100644 src/buf_writer.c

diff --git a/Makefile.in b/Makefile.in
index ad54720..d923d50 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -101,6 +101,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/bin.c \
 	$(srcroot)src/bin_info.c \
 	$(srcroot)src/bitmap.c \
+	$(srcroot)src/buf_writer.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
new file mode 100644
index 0000000..013bbf5
--- /dev/null
+++ b/include/jemalloc/internal/buf_writer.h
@@ -0,0 +1,24 @@
+#ifndef JEMALLOC_INTERNAL_BUF_WRITER_H
+#define JEMALLOC_INTERNAL_BUF_WRITER_H
+
+/*
+ * Note: when using the buffered writer, cbopaque is passed to write_cb only
+ * when the buffer is flushed.  It would make a difference if cbopaque points
+ * to something that's changing for each write_cb call, or something that
+ * affects write_cb in a way dependent on the content of the output string.
+ * However, the most typical usage case in practice is that cbopaque points to
+ * some "option like" content for the write_cb, so it doesn't matter.
+ */
+
+typedef struct {
+	void (*write_cb)(void *, const char *);
+	void *cbopaque;
+	char *buf;
+	size_t buf_size; /* must be one less than the capacity of buf array */
+	size_t buf_end;
+} buf_write_arg_t;
+
+void buf_write_flush(buf_write_arg_t *arg);
+void buf_write_cb(void *buf_write_arg, const char *s);
+
+#endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index dabcb19..fac6361 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -40,6 +40,7 @@
  */
 #define MALLOC_PRINTF_BUFSIZE	4096
 
+void wrtmessage(void *cbopaque, const char *s);
 int buferror(int err, char *buf, size_t buflen);
 uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr,
     int base);
@@ -99,29 +100,4 @@ malloc_read_fd(int fd, void *buf, size_t count) {
 	return (ssize_t)result;
 }
 
-/******************************************************************************/
-
-/*
- * The rest is buffered writing utility.
- *
- * The only difference when using the buffered writer is that cbopaque is
- * passed to write_cb only when the buffer is flushed.  It would make a
- * difference if cbopaque points to something that's changing for each write_cb
- * call, or something that affects write_cb in a way dependent on the content
- * of the output string.  However, the most typical usage case in practice is
- * that cbopaque points to some "option like" content for the write_cb, so it
- * doesn't matter.
- */
-
-typedef struct {
-	void (*write_cb)(void *, const char *);
-	void *cbopaque;
-	char *buf;
-	size_t buf_size; /* must be one less than the capacity of buf array */
-	size_t buf_end;
-} buf_write_arg_t;
-
-void buf_write_flush(buf_write_arg_t *arg);
-void buf_write_cb(void *buf_write_arg, const char *s);
-
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index f9af3dd..4b25b85 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -41,6 +41,7 @@
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\buf_writer.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 90f8831..73ee8d1 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -22,6 +22,9 @@
     <ClCompile Include="..\..\..\..\src\bitmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\buf_writer.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 4ca484a..ed6f618 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -41,6 +41,7 @@
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\buf_writer.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 90f8831..73ee8d1 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -22,6 +22,9 @@
     <ClCompile Include="..\..\..\..\src\bitmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\buf_writer.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/buf_writer.c b/src/buf_writer.c
new file mode 100644
index 0000000..4106594
--- /dev/null
+++ b/src/buf_writer.c
@@ -0,0 +1,36 @@
+#define JEMALLOC_BUF_WRITER_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/buf_writer.h"
+#include "jemalloc/internal/malloc_io.h"
+
+void
+buf_write_flush(buf_write_arg_t *arg) {
+	assert(arg->buf_end <= arg->buf_size);
+	arg->buf[arg->buf_end] = '\0';
+	if (arg->write_cb == NULL) {
+		arg->write_cb = je_malloc_message != NULL ?
+		    je_malloc_message : wrtmessage;
+	}
+	arg->write_cb(arg->cbopaque, arg->buf);
+	arg->buf_end = 0;
+}
+
+void
+buf_write_cb(void *buf_write_arg, const char *s) {
+	buf_write_arg_t *arg = (buf_write_arg_t *)buf_write_arg;
+	size_t i, slen, n, s_remain, buf_remain;
+	assert(arg->buf_end <= arg->buf_size);
+	for (i = 0, slen = strlen(s); i < slen; i += n) {
+		if (arg->buf_end == arg->buf_size) {
+			buf_write_flush(arg);
+		}
+		s_remain = slen - i;
+		buf_remain = arg->buf_size - arg->buf_end;
+		n = s_remain < buf_remain ? s_remain : buf_remain;
+		memcpy(arg->buf + arg->buf_end, s + i, n);
+		arg->buf_end += n;
+	}
+	assert(i == slen);
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0a95b3b..e54c49b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
diff --git a/src/malloc_io.c b/src/malloc_io.c
index cfefcac..4b7d2e4 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -53,7 +53,6 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static void wrtmessage(void *cbopaque, const char *s);
 #define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
 static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
     size_t *slen_p);
@@ -68,7 +67,7 @@ static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
 /******************************************************************************/
 
 /* malloc_message() setup. */
-static void
+void
 wrtmessage(void *cbopaque, const char *s) {
 	malloc_write_fd(STDERR_FILENO, s, strlen(s));
 }
@@ -664,36 +663,6 @@ malloc_printf(const char *format, ...) {
 	va_end(ap);
 }
 
-void
-buf_write_flush(buf_write_arg_t *arg) {
-	assert(arg->buf_end <= arg->buf_size);
-	arg->buf[arg->buf_end] = '\0';
-	if (arg->write_cb == NULL) {
-		arg->write_cb = je_malloc_message != NULL ?
-		    je_malloc_message : wrtmessage;
-	}
-	arg->write_cb(arg->cbopaque, arg->buf);
-	arg->buf_end = 0;
-}
-
-void
-buf_write_cb(void *buf_write_arg, const char *s) {
-	buf_write_arg_t *arg = (buf_write_arg_t *)buf_write_arg;
-	size_t i, slen, n, s_remain, buf_remain;
-	assert(arg->buf_end <= arg->buf_size);
-	for (i = 0, slen = strlen(s); i < slen; i += n) {
-		if (arg->buf_end == arg->buf_size) {
-			buf_write_flush(arg);
-		}
-		s_remain = slen - i;
-		buf_remain = arg->buf_size - arg->buf_end;
-		n = s_remain < buf_remain ? s_remain : buf_remain;
-		memcpy(arg->buf + arg->buf_end, s + i, n);
-		arg->buf_end += n;
-	}
-	assert(i == slen);
-}
-
 /*
  * Restore normal assertion macros, in order to make it possible to compile all
  * C files as a single concatenation.
diff --git a/src/prof_log.c b/src/prof_log.c
index e3d21af..d0b798d 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/emitter.h"
 #include "jemalloc/internal/hash.h"
diff --git a/src/prof_recent.c b/src/prof_recent.c
index f4cad09..a1f71ea 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/emitter.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_recent.h"
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 5051f76..bbdb657 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/buf_writer.h"
+
 #define TEST_BUF_SIZE 16
 #define UNIT_MAX (TEST_BUF_SIZE * 3)
 
-- 
cgit v0.12


From 40a391408c6edbabac4e408c1cdfdda64c0cd356 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 9 Jan 2020 16:50:09 -0800
Subject: Define constructor for buffered writer argument

---
 include/jemalloc/internal/buf_writer.h | 12 +++++++++++-
 src/jemalloc.c                         | 17 +++++++++--------
 src/prof_log.c                         | 17 +++++++++--------
 src/prof_recent.c                      |  5 +++--
 4 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index 013bbf5..1acda9a 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -14,10 +14,20 @@ typedef struct {
 	void (*write_cb)(void *, const char *);
 	void *cbopaque;
 	char *buf;
-	size_t buf_size; /* must be one less than the capacity of buf array */
+	size_t buf_size;
 	size_t buf_end;
 } buf_write_arg_t;
 
+JEMALLOC_ALWAYS_INLINE void
+buf_write_init(buf_write_arg_t *arg, void (*write_cb)(void *, const char *),
+    void *cbopaque, char *buf, size_t buf_len) {
+	arg->write_cb = write_cb;
+	arg->cbopaque = cbopaque;
+	arg->buf = buf;
+	arg->buf_size = buf_len - 1; /* Accommodating '\0' at the end. */
+	arg->buf_end = 0;
+}
+
 void buf_write_flush(buf_write_arg_t *arg);
 void buf_write_cb(void *buf_write_arg, const char *s);
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e54c49b..5503fd0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3694,14 +3694,15 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	if (config_debug) {
 		stats_print(write_cb, cbopaque, opts);
 	} else {
-		char *stats_print_buf = (char *)iallocztm(tsdn,
-		    STATS_PRINT_BUFSIZE, sz_size2index(STATS_PRINT_BUFSIZE),
-		    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
-		buf_write_arg_t stats_print_buf_arg = {write_cb, cbopaque,
-		    stats_print_buf, STATS_PRINT_BUFSIZE - 1, 0};
-		stats_print(buf_write_cb, &stats_print_buf_arg, opts);
-		buf_write_flush(&stats_print_buf_arg);
-		idalloctm(tsdn, stats_print_buf, NULL, NULL, true, true);
+		char *buf = (char *)iallocztm(tsdn, STATS_PRINT_BUFSIZE,
+		    sz_size2index(STATS_PRINT_BUFSIZE), false, NULL, true,
+		    arena_get(TSDN_NULL, 0, true), true);
+		buf_write_arg_t buf_arg;
+		buf_write_init(&buf_arg, write_cb, cbopaque, buf,
+		    STATS_PRINT_BUFSIZE);
+		stats_print(buf_write_cb, &buf_arg, opts);
+		buf_write_flush(&buf_arg);
+		idalloctm(tsdn, buf, NULL, NULL, true, true);
 	}
 
 	check_entry_exit_locking(tsdn);
diff --git a/src/prof_log.c b/src/prof_log.c
index d0b798d..9495cf7 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -629,15 +629,16 @@ prof_log_stop(tsdn_t *tsdn) {
 	struct prof_emitter_cb_arg_s arg;
 	arg.fd = fd;
 
-	char *prof_log_stop_buf = (char *)iallocztm(tsdn,
-	    PROF_LOG_STOP_BUFSIZE, sz_size2index(PROF_LOG_STOP_BUFSIZE),
-	    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
-	buf_write_arg_t prof_log_stop_buf_arg = {prof_emitter_write_cb, &arg,
-	    prof_log_stop_buf, PROF_LOG_STOP_BUFSIZE - 1, 0};
+	char *buf = (char *)iallocztm(tsdn, PROF_LOG_STOP_BUFSIZE,
+	    sz_size2index(PROF_LOG_STOP_BUFSIZE), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	buf_write_arg_t buf_arg;
+	buf_write_init(&buf_arg, prof_emitter_write_cb, &arg, buf,
+	    PROF_LOG_STOP_BUFSIZE);
 
 	/* Emit to json. */
 	emitter_init(&emitter, emitter_output_json_compact, buf_write_cb,
-	    &prof_log_stop_buf_arg);
+	    &buf_arg);
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
@@ -646,8 +647,8 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_log_emit_allocs(tsd, &emitter);
 	emitter_end(&emitter);
 
-	buf_write_flush(&prof_log_stop_buf_arg);
-	idalloctm(tsdn, prof_log_stop_buf, NULL, NULL, true, true);
+	buf_write_flush(&buf_arg);
+	idalloctm(tsdn, buf, NULL, NULL, true, true);
 
 	/* Reset global state. */
 	if (log_tables_initialized) {
diff --git a/src/prof_recent.c b/src/prof_recent.c
index a1f71ea..ed4170e 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -462,8 +462,9 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	char *buf = (char *)iallocztm(tsd_tsdn(tsd), PROF_RECENT_PRINT_BUFSIZE,
 	    sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
 	    arena_get(tsd_tsdn(tsd), 0, false), true);
-	buf_write_arg_t buf_arg = {write_cb, cbopaque, buf,
-	    PROF_RECENT_PRINT_BUFSIZE - 1, 0};
+	buf_write_arg_t buf_arg;
+	buf_write_init(&buf_arg, write_cb, cbopaque, buf,
+	    PROF_RECENT_PRINT_BUFSIZE);
 	emitter_t emitter;
 	emitter_init(&emitter, emitter_output_json_compact, buf_write_cb,
 	    &buf_arg);
-- 
cgit v0.12


From 2b604a3016f2cbda9499e2533ebef43b6fa9b72e Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 9 Jan 2020 10:20:34 -0800
Subject: Record request size in prof recent entries

---
 include/jemalloc/internal/prof_externs.h   |  4 ++--
 include/jemalloc/internal/prof_inlines_b.h | 12 ++++++------
 include/jemalloc/internal/prof_recent.h    |  2 +-
 include/jemalloc/internal/prof_structs.h   |  2 +-
 src/jemalloc.c                             |  8 ++++----
 src/prof.c                                 |  6 +++---
 src/prof_recent.c                          |  9 +++++----
 test/unit/prof_recent.c                    |  4 ++--
 8 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index a07fd22..df4f7cd 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -54,8 +54,8 @@ prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
+void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
+    size_t usize, prof_tctx_t *tctx);
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
 #ifdef JEMALLOC_JET
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 9ea0454..d0cc48d 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -126,22 +126,22 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
-    prof_tctx_t *tctx) {
+prof_malloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
+    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
-		prof_malloc_sample_object(tsd, ptr, usize, tctx);
+		prof_malloc_sample_object(tsd, ptr, size, usize, tctx);
 	} else {
 		prof_tctx_reset(tsd, ptr, alloc_ctx);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
-    bool prof_active, const void *old_ptr, size_t old_usize,
+prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
+    prof_tctx_t *tctx, bool prof_active, const void *old_ptr, size_t old_usize,
     prof_info_t *old_prof_info) {
 	bool sampled, old_sampled, moved;
 
@@ -168,7 +168,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	moved = (ptr != old_ptr);
 
 	if (unlikely(sampled)) {
-		prof_malloc_sample_object(tsd, ptr, usize, tctx);
+		prof_malloc_sample_object(tsd, ptr, size, usize, tctx);
 	} else if (moved) {
 		prof_tctx_reset(tsd, ptr, NULL);
 	} else if (unlikely(old_sampled)) {
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index d0e9e1e..b2973db 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
 
 bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
-void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t usize);
+void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size);
 void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
 bool prof_recent_init();
 void edata_prof_recent_alloc_init(edata_t *edata);
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 59c0f4f..ee78643 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -213,7 +213,7 @@ struct prof_recent_s {
 	nstime_t dalloc_time;
 
 	prof_recent_t *next;
-	size_t usize;
+	size_t size;
 	prof_tctx_t *alloc_tctx;
 	edata_t *alloc_edata; /* NULL means allocation has been freed. */
 	prof_tctx_t *dalloc_tctx;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5503fd0..e33d032 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2175,7 +2175,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 			prof_alloc_rollback(tsd, tctx, true);
 			goto label_oom;
 		}
-		prof_malloc(tsd, allocation, usize, &alloc_ctx, tctx);
+		prof_malloc(tsd, allocation, size, usize, &alloc_ctx, tctx);
 	} else {
 		assert(!opt_prof);
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
@@ -3045,8 +3045,8 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		 */
 		*usize = isalloc(tsd_tsdn(tsd), p);
 	}
-	prof_realloc(tsd, p, *usize, tctx, prof_active, old_ptr, old_usize,
-	    &old_prof_info);
+	prof_realloc(tsd, p, size, *usize, tctx, prof_active, old_ptr,
+	    old_usize, &old_prof_info);
 
 	return p;
 }
@@ -3338,7 +3338,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		prof_alloc_rollback(tsd, tctx, false);
 	} else {
 		prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
-		prof_realloc(tsd, ptr, usize, tctx, prof_active, ptr,
+		prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr,
 		    old_usize, &prof_info);
 	}
 
diff --git a/src/prof.c b/src/prof.c
index 159600e..791c362 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -145,8 +145,8 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 }
 
 void
-prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_tctx_t *tctx) {
+prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
+    size_t usize, prof_tctx_t *tctx) {
 	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
 	prof_info_set(tsd, edata, tctx);
 
@@ -162,7 +162,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 	malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 	if (record_recent) {
 		assert(tctx == edata_prof_tctx_get(edata));
-		prof_recent_alloc(tsd, edata, usize);
+		prof_recent_alloc(tsd, edata, size);
 	}
 }
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index ed4170e..0ae449f 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -249,7 +249,7 @@ prof_recent_alloc_assert_count(tsd_t *tsd) {
 }
 
 void
-prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t usize) {
+prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
 	assert(edata != NULL);
 	prof_tctx_t *tctx = edata_prof_tctx_get(edata);
 
@@ -312,7 +312,7 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t usize) {
 	{
 		/* Fill content into the dummy node. */
 		prof_recent_t *node = prof_recent_alloc_dummy;
-		node->usize = usize;
+		node->size = size;
 		nstime_copy(&node->alloc_time,
 		    edata_prof_alloc_time_get(edata));
 		node->alloc_tctx = tctx;
@@ -487,8 +487,9 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	    n = prof_recent_alloc_next(tsd, n)) {
 		emitter_json_object_begin(&emitter);
 
-		emitter_json_kv(&emitter, "usize", emitter_type_size,
-		    &n->usize);
+		emitter_json_kv(&emitter, "size", emitter_type_size, &n->size);
+		size_t usize = sz_s2u(n->size);
+		emitter_json_kv(&emitter, "usize", emitter_type_size, &usize);
 		bool released = n->alloc_edata == NULL;
 		emitter_json_kv(&emitter, "released", emitter_type_bool,
 		    &released);
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index e10ac3f..0f140a8 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -116,8 +116,8 @@ static void confirm_malloc(tsd_t *tsd, void *p) {
 
 static void confirm_record_size(tsd_t *tsd, prof_recent_t *n, unsigned kth) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_zu_eq(n->usize, sz_s2u(NTH_REQ_SIZE(kth)),
-	    "Recorded allocation usize is wrong");
+	assert_zu_eq(n->size, NTH_REQ_SIZE(kth),
+	    "Recorded allocation size is wrong");
 }
 
 static void confirm_record_living(tsd_t *tsd, prof_recent_t *n) {
-- 
cgit v0.12


From a5d3dd4059a19268e6c2916b4014e395442d5750 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 2 Jan 2020 11:19:14 -0800
Subject: Fix an assertion on extent head state with dss.

---
 src/ehooks.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ehooks.c b/src/ehooks.c
index 667bee8..1e1cac9 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -221,7 +221,8 @@ ehooks_no_merge_heads(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
 		assert(!ehooks_same_sn(tsdn, addr_a, addr_b));
 		return true;
 	}
-	assert(ehooks_same_sn(tsdn, addr_a, addr_b));
+	assert(ehooks_same_sn(tsdn, addr_a, addr_b) || (have_dss &&
+	    (extent_in_dss(addr_a) || extent_in_dss(addr_b))));
 
 	return false;
 }
-- 
cgit v0.12


From ad3f3fc561d5829a0a998c1b0650f6e7c7474a74 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 14 Jan 2020 11:12:18 -0800
Subject: Fetch time after tctx and only for samples

---
 src/large.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/large.c b/src/large.c
index ca35fc5..9b94617 100644
--- a/src/large.c
+++ b/src/large.c
@@ -372,18 +372,20 @@ void
 large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
     bool reset_recent) {
 	assert(prof_info != NULL);
-	nstime_copy(&prof_info->alloc_time, edata_prof_alloc_time_get(edata));
 
 	prof_tctx_t *alloc_tctx = edata_prof_tctx_get(edata);
 	prof_info->alloc_tctx = alloc_tctx;
 
-	if (reset_recent && (uintptr_t)alloc_tctx > (uintptr_t)1U) {
-		/*
-		 * This allocation was a prof sample.  Reset the pointer on the
-		 * recent allocation record, so that this allocation is
-		 * recorded as released.
-		 */
-		prof_recent_alloc_reset(tsd, edata);
+	if ((uintptr_t)alloc_tctx > (uintptr_t)1U) {
+		nstime_copy(&prof_info->alloc_time,
+		    edata_prof_alloc_time_get(edata));
+		if (reset_recent) {
+			/*
+			 * Reset the pointer on the recent allocation record,
+			 * so that this allocation is recorded as released.
+			 */
+			prof_recent_alloc_reset(tsd, edata);
+		}
 	}
 }
 
-- 
cgit v0.12


From dab81bd315e3eee19552ab68d331f693b205866a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 13 Jan 2020 23:28:09 -0800
Subject: Rework and fix the assertions on malloc fastpath.

The first half of the malloc fastpath may execute before malloc_init.  Make the
assertions work in that case.
---
 include/jemalloc/internal/sz.h | 24 ++++++++++++++++++++----
 src/jemalloc.c                 | 37 ++++++++++++++++++++++++++++---------
 2 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 6df541f..73fb0a4 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -152,10 +152,15 @@ sz_size2index_compute(size_t size) {
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
-sz_size2index_lookup(size_t size) {
+sz_size2index_lookup_impl(size_t size) {
 	assert(size <= SC_LOOKUP_MAXCLASS);
-	szind_t ret = (sz_size2index_tab[(size + (ZU(1) << SC_LG_TINY_MIN) - 1)
-					 >> SC_LG_TINY_MIN]);
+	return sz_size2index_tab[(size + (ZU(1) << SC_LG_TINY_MIN) - 1)
+	    >> SC_LG_TINY_MIN];
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+sz_size2index_lookup(size_t size) {
+	szind_t ret = sz_size2index_lookup_impl(size);
 	assert(ret == sz_size2index_compute(size));
 	return ret;
 }
@@ -195,8 +200,13 @@ sz_index2size_compute(szind_t index) {
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size_lookup_impl(szind_t index) {
+	return sz_index2size_tab[index];
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
 sz_index2size_lookup(szind_t index) {
-	size_t ret = (size_t)sz_index2size_tab[index];
+	size_t ret = sz_index2size_lookup_impl(index);
 	assert(ret == sz_index2size_compute(index));
 	return ret;
 }
@@ -207,6 +217,12 @@ sz_index2size(szind_t index) {
 	return sz_index2size_lookup(index);
 }
 
+JEMALLOC_ALWAYS_INLINE void
+sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
+	*ind = sz_size2index_lookup_impl(size);
+	*usize = sz_index2size_lookup_impl(*ind);
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u_compute(size_t size) {
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e33d032..6455061 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2361,29 +2361,48 @@ je_malloc(size_t size) {
 	if (unlikely((size > SC_LOOKUP_MAXCLASS) || tsd == NULL)) {
 		return malloc_default(size);
 	}
-
-	szind_t ind = sz_size2index_lookup(size);
+	/*
+	 * The code below till the branch checking the next_event threshold may
+	 * execute before malloc_init(), in which case the threshold is 0 to
+	 * trigger slow path and initialization.
+	 *
+	 * Note that when uninitialized, only the fast-path variants of the sz /
+	 * tsd facilities may be called.
+	 */
+	szind_t ind;
 	/*
 	 * The thread_allocated counter in tsd serves as a general purpose
 	 * accumulator for bytes of allocation to trigger different types of
 	 * events.  usize is always needed to advance thread_allocated, though
 	 * it's not always needed in the core allocation logic.
 	 */
-	size_t usize = sz_index2size(ind);
-	/*
-	 * Fast path relies on size being a bin.
-	 * I.e. SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS
-	 */
+	size_t usize;
+
+	sz_size2index_usize_fastpath(size, &ind, &usize);
+	/* Fast path relies on size being a bin. */
 	assert(ind < SC_NBINS);
-	assert(size <= SC_SMALL_MAXCLASS);
+	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
+	    (size <= SC_SMALL_MAXCLASS));
 
 	uint64_t allocated = thread_allocated_malloc_fastpath(tsd);
 	uint64_t threshold = thread_allocated_next_event_malloc_fastpath(tsd);
+	uint64_t allocated_after = allocated + usize;
+	/*
+	 * The ind and usize might be uninitialized (or partially) before
+	 * malloc_init().  The assertions check for: 1) full correctness (usize
+	 * & ind) when initialized; and 2) guaranteed slow-path (threshold == 0)
+	 * when !initialized.
+	 */
+	if (!malloc_initialized()) {
+		assert(threshold == 0);
+	} else {
+		assert(ind == sz_size2index(size));
+		assert(usize > 0 && usize == sz_index2size(ind));
+	}
 	/*
 	 * Check for events and tsd non-nominal (fast_threshold will be set to
 	 * 0) in a single branch.
 	 */
-	uint64_t allocated_after = allocated + usize;
 	if (unlikely(allocated_after >= threshold)) {
 		return malloc_default(size);
 	}
-- 
cgit v0.12


From b8df719d5c10f6b52263ca4e7bb800c2796b6767 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 14 Jan 2020 11:23:32 -0800
Subject: No tdata creation for backtracing on dying thread

---
 src/prof_data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index dfc507f..723e579 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -406,7 +406,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 
 prof_tctx_t *
 prof_tctx_create(tsd_t *tsd) {
-	if (tsd_reentrancy_level_get(tsd) > 0) {
+	if (!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0) {
 		return NULL;
 	}
 
-- 
cgit v0.12


From bd3be8e0b169e8a3952cbed1a399cfffe9023862 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 16 Jan 2020 13:28:27 -0800
Subject: Remove commit parameter to ecache functions.

No caller ever wants uncommitted memory.
---
 include/jemalloc/internal/extent.h |  4 ++--
 src/arena.c                        | 19 +++++++------------
 src/extent.c                       | 12 +++++++-----
 src/large.c                        |  7 +++----
 4 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 8fecee6..b89708a 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -23,10 +23,10 @@ extern rtree_t extents_rtree;
 
 edata_t *ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit);
+    bool slab, szind_t szind, bool *zero);
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit);
+    bool slab, szind_t szind, bool *zero);
 void ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
diff --git a/src/arena.c b/src/arena.c
index 7e1a673..d04712a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -433,19 +433,17 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
-	bool commit = true;
 	edata_t *edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    NULL, usize, sz_large_pad, alignment, false, szind, zero, &commit);
+	    NULL, usize, sz_large_pad, alignment, false, szind, zero);
 	if (edata == NULL && arena_may_have_muzzy(arena)) {
 		edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-		    NULL, usize, sz_large_pad, alignment, false, szind, zero,
-		    &commit);
+		    NULL, usize, sz_large_pad, alignment, false, szind, zero);
 	}
 	size_t size = usize + sz_large_pad;
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, arena, ehooks,
 		    &arena->ecache_retained, NULL, usize, sz_large_pad,
-		    alignment, false, szind, zero, &commit);
+		    alignment, false, szind, zero);
 		if (config_stats) {
 			/*
 			 * edata may be NULL on OOM, but in that case mapped_add
@@ -1203,15 +1201,14 @@ static edata_t *
 arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     const bin_info_t *bin_info, szind_t szind) {
 	edata_t *slab;
-	bool zero, commit;
+	bool zero;
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	zero = false;
-	commit = true;
 	slab = ecache_alloc_grow(tsdn, arena, ehooks, &arena->ecache_retained,
-	    NULL, bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
+	    NULL, bin_info->slab_size, 0, PAGE, true, szind, &zero);
 
 	if (config_stats && slab != NULL) {
 		arena_stats_mapped_add(tsdn, &arena->stats,
@@ -1230,13 +1227,11 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
-	bool commit = true;
 	edata_t *slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit);
+	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
 		slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-		    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero,
-		    &commit);
+		    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero);
 	}
 	if (slab == NULL) {
 		slab = arena_slab_alloc_hard(tsdn, arena, ehooks, bin_info,
diff --git a/src/extent.c b/src/extent.c
index 54f1499..9779c38 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -186,14 +186,15 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
 edata_t *
 ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
+    szind_t szind, bool *zero) {
 	assert(size + pad != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
+	bool commit = true;
 	edata_t *edata = extent_recycle(tsdn, arena, ehooks, ecache, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit, false);
+	    size, pad, alignment, slab, szind, zero, &commit, false);
 	assert(edata == NULL || edata_dumpable_get(edata));
 	return edata;
 }
@@ -201,14 +202,15 @@ ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 edata_t *
 ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit) {
+    bool slab, szind_t szind, bool *zero) {
 	assert(size + pad != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
+	bool commit = true;
 	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit);
+	    size, pad, alignment, slab, szind, zero, &commit);
 	if (edata == NULL) {
 		if (opt_retain && new_addr != NULL) {
 			/*
@@ -220,7 +222,7 @@ ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			return NULL;
 		}
 		edata = extent_alloc_wrapper(tsdn, arena, ehooks,
-		    new_addr, size, pad, alignment, slab, szind, zero, commit);
+		    new_addr, size, pad, alignment, slab, szind, zero, &commit);
 	}
 
 	assert(edata == NULL || edata_dumpable_get(edata));
diff --git a/src/large.c b/src/large.c
index 9b94617..e133e19 100644
--- a/src/large.c
+++ b/src/large.c
@@ -147,22 +147,21 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	 * false.
 	 */
 	bool is_zeroed_trail = zero;
-	bool commit = true;
 	edata_t *trail;
 	bool new_mapping;
 	if ((trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
 	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
-	    &is_zeroed_trail, &commit)) != NULL
+	    &is_zeroed_trail)) != NULL
 	    || (trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
-	    &is_zeroed_trail, &commit)) != NULL) {
+	    &is_zeroed_trail)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
 		if ((trail = ecache_alloc_grow(tsdn, arena, ehooks,
 		    &arena->ecache_retained, edata_past_get(edata), trailsize,
-		    0, CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit))
+		    0, CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
 			== NULL) {
 			return true;
 		}
-- 
cgit v0.12


From 7b67ed0b5a90d5288c66c132f210883dece99181 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 14 Jan 2020 16:10:23 -0800
Subject: Get rid of lock overlap in prof_recent_alloc_reset

---
 src/prof_recent.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index 0ae449f..a53f82c 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -182,12 +182,15 @@ prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
 		if (dalloc_tctx != NULL) {
 			nstime_update(&recent->dalloc_time);
 			recent->dalloc_tctx = dalloc_tctx;
+			dalloc_tctx = NULL;
 		}
-	} else if (dalloc_tctx != NULL) {
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	if (dalloc_tctx != NULL) {
 		/* We lost the rase - the allocation record was just gone. */
 		decrement_recent_count(tsd, dalloc_tctx);
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 }
 
 static void
-- 
cgit v0.12


From a72ea0db60bc475415c13f1057408389bccb40a4 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Jan 2020 07:25:59 -0800
Subject: Restructure and correct sleep utility for testing

---
 Makefile.in                          |  2 +-
 test/include/test/jemalloc_test.h.in |  1 +
 test/include/test/mq.h               |  4 ++--
 test/include/test/sleep.h            |  1 +
 test/src/mq.c                        | 27 ---------------------------
 test/src/sleep.c                     | 27 +++++++++++++++++++++++++++
 6 files changed, 32 insertions(+), 30 deletions(-)
 create mode 100644 test/include/test/sleep.h
 delete mode 100644 test/src/mq.c
 create mode 100644 test/src/sleep.c

diff --git a/Makefile.in b/Makefile.in
index d923d50..24ab542 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -167,7 +167,7 @@ DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3)
 DOCS := $(DOCS_HTML) $(DOCS_MAN3)
 C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \
-	$(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
+	$(srcroot)test/src/mtx.c $(srcroot)test/src/sleep.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
 ifeq (1, $(link_whole_archive))
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index c46af5d..a59408f 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -124,6 +124,7 @@ static const bool config_debug =
 #include "test/math.h"
 #include "test/mtx.h"
 #include "test/mq.h"
+#include "test/sleep.h"
 #include "test/test.h"
 #include "test/timer.h"
 #include "test/thd.h"
diff --git a/test/include/test/mq.h b/test/include/test/mq.h
index af2c078..5dc6486 100644
--- a/test/include/test/mq.h
+++ b/test/include/test/mq.h
@@ -1,4 +1,4 @@
-void	mq_nanosleep(unsigned ns);
+#include "test/sleep.h"
 
 /*
  * Simple templated message queue implementation that relies on only mutexes for
@@ -82,7 +82,7 @@ a_prefix##get(a_mq_type *mq) {						\
 									\
 	ns = 1;								\
 	while (true) {							\
-		mq_nanosleep(ns);					\
+		sleep_ns(ns);						\
 		msg = a_prefix##tryget(mq);				\
 		if (msg != NULL) {					\
 			return msg;					\
diff --git a/test/include/test/sleep.h b/test/include/test/sleep.h
new file mode 100644
index 0000000..c232f63
--- /dev/null
+++ b/test/include/test/sleep.h
@@ -0,0 +1 @@
+void sleep_ns(unsigned ns);
diff --git a/test/src/mq.c b/test/src/mq.c
deleted file mode 100644
index 9b5f672..0000000
--- a/test/src/mq.c
+++ /dev/null
@@ -1,27 +0,0 @@
-#include "test/jemalloc_test.h"
-
-/*
- * Sleep for approximately ns nanoseconds.  No lower *nor* upper bound on sleep
- * time is guaranteed.
- */
-void
-mq_nanosleep(unsigned ns) {
-	assert(ns <= 1000*1000*1000);
-
-#ifdef _WIN32
-	Sleep(ns / 1000);
-#else
-	{
-		struct timespec timeout;
-
-		if (ns < 1000*1000*1000) {
-			timeout.tv_sec = 0;
-			timeout.tv_nsec = ns;
-		} else {
-			timeout.tv_sec = 1;
-			timeout.tv_nsec = 0;
-		}
-		nanosleep(&timeout, NULL);
-	}
-#endif
-}
diff --git a/test/src/sleep.c b/test/src/sleep.c
new file mode 100644
index 0000000..2234b4b
--- /dev/null
+++ b/test/src/sleep.c
@@ -0,0 +1,27 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * Sleep for approximately ns nanoseconds.  No lower *nor* upper bound on sleep
+ * time is guaranteed.
+ */
+void
+sleep_ns(unsigned ns) {
+	assert(ns <= 1000*1000*1000);
+
+#ifdef _WIN32
+	Sleep(ns / 1000 / 1000);
+#else
+	{
+		struct timespec timeout;
+
+		if (ns < 1000*1000*1000) {
+			timeout.tv_sec = 0;
+			timeout.tv_nsec = ns;
+		} else {
+			timeout.tv_sec = 1;
+			timeout.tv_nsec = 0;
+		}
+		nanosleep(&timeout, NULL);
+	}
+#endif
+}
-- 
cgit v0.12


From d3312085603ab84e13e820be19f55f05e75a46ea Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 16 Jan 2020 20:43:45 -0800
Subject: Get rid of redundant logic in prof

---
 src/prof_data.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index 723e579..2a25ec7 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -218,8 +218,8 @@ prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
 }
 
 static void
-prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
-    prof_tdata_t *tdata) {
+prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self,
+    prof_gctx_t *gctx) {
 	cassert(config_prof);
 
 	/*
@@ -371,7 +371,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 		    arena_ichoose(tsd, NULL), true);
 		if (ret.p == NULL) {
 			if (new_gctx) {
-				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+				prof_gctx_try_destroy(tsd, tdata, gctx);
 			}
 			return NULL;
 		}
@@ -389,7 +389,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (error) {
 			if (new_gctx) {
-				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+				prof_gctx_try_destroy(tsd, tdata, gctx);
 			}
 			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
 			return NULL;
@@ -767,7 +767,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 		if (prof_gctx_should_destroy(gctx)) {
 			gctx->nlimbo++;
 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+			prof_gctx_try_destroy(tsd, tdata, gctx);
 		} else {
 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 		}
@@ -1367,8 +1367,7 @@ prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
 	malloc_mutex_lock(tsdn, tdata->lock);
 	if (!tdata->expired) {
 		tdata->expired = true;
-		destroy_tdata = tdata->attached ? false :
-		    prof_tdata_should_destroy(tsdn, tdata, false);
+		destroy_tdata = prof_tdata_should_destroy(tsdn, tdata, false);
 	} else {
 		destroy_tdata = false;
 	}
@@ -1492,8 +1491,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	if (destroy_gctx) {
-		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
-		    tdata);
+		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx);
 	}
 
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-- 
cgit v0.12


From 84b28c6a13d4d208e547bc50f7091107f5161957 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 16 Jan 2020 20:38:46 -0800
Subject: Properly handle tdata deletion race

---
 src/prof_data.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index 2a25ec7..9721cbe 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1315,6 +1315,7 @@ static void
 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tdata->lock);
 
 	tdata_tree_remove(&tdatas, tdata);
 
@@ -1432,10 +1433,6 @@ prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 
 static void
 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
-	prof_tdata_t *tdata = tctx->tdata;
-	prof_gctx_t *gctx = tctx->gctx;
-	bool destroy_tdata, destroy_tctx, destroy_gctx;
-
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	assert(tctx->cnts.curobjs == 0);
@@ -1444,9 +1441,21 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	assert(tctx->cnts.accumobjs == 0);
 	assert(tctx->cnts.accumbytes == 0);
 
-	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+	prof_gctx_t *gctx = tctx->gctx;
+
+	{
+		prof_tdata_t *tdata = tctx->tdata;
+		tctx->tdata = NULL;
+		ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+		bool destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd),
+		    tdata, false);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+		if (destroy_tdata) {
+			prof_tdata_destroy(tsd, tdata, false);
+		}
+	}
+
+	bool destroy_tctx, destroy_gctx;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 	switch (tctx->state) {
@@ -1493,13 +1502,6 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	if (destroy_gctx) {
 		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx);
 	}
-
-	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	if (destroy_tdata) {
-		prof_tdata_destroy(tsd, tdata, false);
-	}
-
 	if (destroy_tctx) {
 		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
 	}
-- 
cgit v0.12


From cd6e908241900640864b59a4dae835e9cecfc0cd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 14 Jan 2020 16:01:45 -0800
Subject: Add stress test for last-N profiling mode

---
 test/unit/prof_recent.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 115 insertions(+), 2 deletions(-)

diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 0f140a8..c132452 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -3,7 +3,7 @@
 #include "jemalloc/internal/prof_recent.h"
 
 /* As specified in the shell script */
-#define OPT_ALLOC_MAX	3
+#define OPT_ALLOC_MAX 3
 
 /* Invariant before and after every test (when config_prof is on) */
 static void confirm_prof_setup(tsd_t *tsd) {
@@ -381,11 +381,124 @@ TEST_END
 
 #undef NTH_REQ_SIZE
 
+#define N_THREADS 16
+#define N_PTRS 512
+#define N_CTLS 8
+#define N_ITERS 2048
+#define STRESS_ALLOC_MAX 4096
+
+typedef struct {
+	thd_t thd;
+	size_t id;
+	void *ptrs[N_PTRS];
+	size_t count;
+} thd_data_t;
+
+static thd_data_t thd_data[N_THREADS];
+static ssize_t test_max;
+
+static void test_write_cb(void *cbopaque, const char *str) {
+	sleep_ns(1000 * 1000);
+}
+
+static void *f_thread(void *arg) {
+	const size_t thd_id = *(size_t *)arg;
+	thd_data_t *data_p = thd_data + thd_id;
+	assert(data_p->id == thd_id);
+	data_p->count = 0;
+	uint64_t rand = (uint64_t)thd_id;
+	tsd_t *tsd = tsd_fetch();
+	assert(test_max > 1);
+	ssize_t last_max = -1;
+	for (int i = 0; i < N_ITERS; i++) {
+		rand = prng_range_u64(&rand, N_PTRS + N_CTLS * 5);
+		assert(data_p->count <= N_PTRS);
+		if (rand < data_p->count) {
+			assert(data_p->count > 0);
+			if (rand != data_p->count - 1) {
+				assert(data_p->count > 1);
+				void *temp = data_p->ptrs[rand];
+				data_p->ptrs[rand] =
+				    data_p->ptrs[data_p->count - 1];
+				data_p->ptrs[data_p->count - 1] = temp;
+			}
+			free(data_p->ptrs[--data_p->count]);
+		} else if (rand < N_PTRS) {
+			assert(data_p->count < N_PTRS);
+			data_p->ptrs[data_p->count++] = malloc(1);
+		} else if (rand % 5 == 0) {
+			prof_recent_alloc_dump(tsd, test_write_cb, NULL);
+		} else if (rand % 5 == 1) {
+			last_max = prof_recent_alloc_max_ctl_read(tsd);
+		} else if (rand % 5 == 2) {
+			last_max =
+			    prof_recent_alloc_max_ctl_write(tsd, test_max * 2);
+		} else if (rand % 5 == 3) {
+			last_max =
+			    prof_recent_alloc_max_ctl_write(tsd, test_max);
+		} else {
+			assert(rand % 5 == 4);
+			last_max =
+			    prof_recent_alloc_max_ctl_write(tsd, test_max / 2);
+		}
+		assert_zd_ge(last_max, -1, "Illegal last-N max");
+	}
+
+	while (data_p->count > 0) {
+		free(data_p->ptrs[--data_p->count]);
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_prof_recent_stress) {
+	test_skip_if(!config_prof);
+
+	tsd_t *tsd = tsd_fetch();
+	confirm_prof_setup(tsd);
+
+	test_max = OPT_ALLOC_MAX;
+	for (size_t i = 0; i < N_THREADS; i++) {
+		thd_data_t *data_p = thd_data + i;
+		data_p->id = i;
+		thd_create(&data_p->thd, &f_thread, &data_p->id);
+	}
+	for (size_t i = 0; i < N_THREADS; i++) {
+		thd_data_t *data_p = thd_data + i;
+		thd_join(data_p->thd, NULL);
+	}
+
+	test_max = STRESS_ALLOC_MAX;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
+	for (size_t i = 0; i < N_THREADS; i++) {
+		thd_data_t *data_p = thd_data + i;
+		data_p->id = i;
+		thd_create(&data_p->thd, &f_thread, &data_p->id);
+	}
+	for (size_t i = 0; i < N_THREADS; i++) {
+		thd_data_t *data_p = thd_data + i;
+		thd_join(data_p->thd, NULL);
+	}
+
+	test_max = OPT_ALLOC_MAX;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
+	confirm_prof_setup(tsd);
+}
+TEST_END
+
+#undef STRESS_ALLOC_MAX
+#undef N_ITERS
+#undef N_PTRS
+#undef N_THREADS
+
 int
 main(void) {
 	return test(
 	    test_confirm_setup,
 	    test_prof_recent_off,
 	    test_prof_recent_on,
-	    test_prof_recent_alloc);
+	    test_prof_recent_alloc,
+	    test_prof_recent_stress);
 }
-- 
cgit v0.12


From f81341a48b15e9257d573b80e8e45589137397ec Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 13 Jan 2020 14:23:10 -0800
Subject: Fallback to unbuffered printing if OOM

---
 include/jemalloc/internal/buf_writer.h |  2 ++
 src/jemalloc.c                         | 16 ++++++++++------
 src/prof_log.c                         | 21 +++++++++++++--------
 src/prof_recent.c                      | 21 ++++++++++++++-------
 4 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index 1acda9a..60bd010 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -23,7 +23,9 @@ buf_write_init(buf_write_arg_t *arg, void (*write_cb)(void *, const char *),
     void *cbopaque, char *buf, size_t buf_len) {
 	arg->write_cb = write_cb;
 	arg->cbopaque = cbopaque;
+	assert(buf != NULL);
 	arg->buf = buf;
+	assert(buf_len >= 2);
 	arg->buf_size = buf_len - 1; /* Accommodating '\0' at the end. */
 	arg->buf_end = 0;
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 6455061..218e04a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3716,12 +3716,16 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		char *buf = (char *)iallocztm(tsdn, STATS_PRINT_BUFSIZE,
 		    sz_size2index(STATS_PRINT_BUFSIZE), false, NULL, true,
 		    arena_get(TSDN_NULL, 0, true), true);
-		buf_write_arg_t buf_arg;
-		buf_write_init(&buf_arg, write_cb, cbopaque, buf,
-		    STATS_PRINT_BUFSIZE);
-		stats_print(buf_write_cb, &buf_arg, opts);
-		buf_write_flush(&buf_arg);
-		idalloctm(tsdn, buf, NULL, NULL, true, true);
+		if (buf == NULL) {
+			stats_print(write_cb, cbopaque, opts);
+		} else {
+			buf_write_arg_t buf_arg;
+			buf_write_init(&buf_arg, write_cb, cbopaque, buf,
+			    STATS_PRINT_BUFSIZE);
+			stats_print(buf_write_cb, &buf_arg, opts);
+			buf_write_flush(&buf_arg);
+			idalloctm(tsdn, buf, NULL, NULL, true, true);
+		}
 	}
 
 	check_entry_exit_locking(tsdn);
diff --git a/src/prof_log.c b/src/prof_log.c
index 9495cf7..a04c8e4 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -633,12 +633,15 @@ prof_log_stop(tsdn_t *tsdn) {
 	    sz_size2index(PROF_LOG_STOP_BUFSIZE), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	buf_write_arg_t buf_arg;
-	buf_write_init(&buf_arg, prof_emitter_write_cb, &arg, buf,
-	    PROF_LOG_STOP_BUFSIZE);
-
-	/* Emit to json. */
-	emitter_init(&emitter, emitter_output_json_compact, buf_write_cb,
-	    &buf_arg);
+	if (buf == NULL) {
+		emitter_init(&emitter, emitter_output_json_compact,
+		    prof_emitter_write_cb, &arg);
+	} else {
+		buf_write_init(&buf_arg, prof_emitter_write_cb, &arg, buf,
+		    PROF_LOG_STOP_BUFSIZE);
+		emitter_init(&emitter, emitter_output_json_compact,
+		    buf_write_cb, &buf_arg);
+	}
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
@@ -647,8 +650,10 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_log_emit_allocs(tsd, &emitter);
 	emitter_end(&emitter);
 
-	buf_write_flush(&buf_arg);
-	idalloctm(tsdn, buf, NULL, NULL, true, true);
+	if (buf != NULL) {
+		buf_write_flush(&buf_arg);
+		idalloctm(tsdn, buf, NULL, NULL, true, true);
+	}
 
 	/* Reset global state. */
 	if (log_tables_initialized) {
diff --git a/src/prof_recent.c b/src/prof_recent.c
index a53f82c..66a9b40 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -465,12 +465,17 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	char *buf = (char *)iallocztm(tsd_tsdn(tsd), PROF_RECENT_PRINT_BUFSIZE,
 	    sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
 	    arena_get(tsd_tsdn(tsd), 0, false), true);
-	buf_write_arg_t buf_arg;
-	buf_write_init(&buf_arg, write_cb, cbopaque, buf,
-	    PROF_RECENT_PRINT_BUFSIZE);
 	emitter_t emitter;
-	emitter_init(&emitter, emitter_output_json_compact, buf_write_cb,
-	    &buf_arg);
+	buf_write_arg_t buf_arg;
+	if (buf == NULL) {
+		emitter_init(&emitter, emitter_output_json_compact, write_cb,
+		    cbopaque);
+	} else {
+		buf_write_init(&buf_arg, write_cb, cbopaque, buf,
+		    PROF_RECENT_PRINT_BUFSIZE);
+		emitter_init(&emitter, emitter_output_json_compact,
+		    buf_write_cb, &buf_arg);
+	}
 	emitter_begin(&emitter);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -530,8 +535,10 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	emitter_end(&emitter);
-	buf_write_flush(&buf_arg);
-	idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
+	if (buf != NULL) {
+		buf_write_flush(&buf_arg);
+		idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
+	}
 }
 #undef PROF_RECENT_PRINT_BUFSIZE
 
-- 
cgit v0.12


From 6a622867cac04d7cdd4cf9cf19b7a367f9108fa5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 22 Jan 2020 11:13:26 -0800
Subject: Add "thread.idle" mallctl.

This can encapsulate various internal cleaning logic, and can be used to free up
resources before a long sleep.
---
 doc/jemalloc.xml.in | 22 ++++++++++++++++
 src/ctl.c           | 45 ++++++++++++++++++++++++++++++++-
 test/unit/mallctl.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 137 insertions(+), 2 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 76edab8..b0a3f6c 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1654,6 +1654,28 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         default.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="thread.idle">
+        <term>
+          <mallctl>thread.idle</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+        </term>
+        <listitem><para>Hints to jemalloc that the calling thread will be idle
+	for some nontrivial period of time (say, on the order of seconds), and
+	that doing some cleanup operations may be beneficial.  There are no
+	guarantees as to what specific operations will be performed; currently
+	this flushes the caller's tcache and may (according to some heuristic)
+	purge its associated arena.</para>
+	<para>This is not intended to be a general-purpose background activity
+	mechanism, and threads should not wake up multiple times solely to call
+	it.  Rather, a thread waiting for a task should do a timed wait first,
+	call <link linkend="thread.idle"><mallctl>thread.idle</mallctl><link> if
+	no task appears in the timeout interval, and then do an untimed wait.
+	For such a background activity mechanism, see
+	<link linked="background_thread"><mallctl>background_thread</mallctl></link>.
+	</para></listitem>
+      </varlistentry>
+
       <varlistentry id="tcache.create">
         <term>
           <mallctl>tcache.create</mallctl>
diff --git a/src/ctl.c b/src/ctl.c
index 5a467d5..bbe962c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -68,6 +68,7 @@ CTL_PROTO(thread_allocated)
 CTL_PROTO(thread_allocatedp)
 CTL_PROTO(thread_deallocated)
 CTL_PROTO(thread_deallocatedp)
+CTL_PROTO(thread_idle)
 CTL_PROTO(config_cache_oblivious)
 CTL_PROTO(config_debug)
 CTL_PROTO(config_fill)
@@ -293,7 +294,8 @@ static const ctl_named_node_t	thread_node[] = {
 	{NAME("deallocated"),	CTL(thread_deallocated)},
 	{NAME("deallocatedp"),	CTL(thread_deallocatedp)},
 	{NAME("tcache"),	CHILD(named, thread_tcache)},
-	{NAME("prof"),		CHILD(named, thread_prof)}
+	{NAME("prof"),		CHILD(named, thread_prof)},
+	{NAME("idle"),		CTL(thread_idle)}
 };
 
 static const ctl_named_node_t	config_node[] = {
@@ -1900,6 +1902,12 @@ thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 
+	/*
+	 * Slightly counterintuitively, READONLY() really just requires that the
+	 * call isn't trying to write, and WRITEONLY() just requires that it
+	 * isn't trying to read; hence, adding both requires that the operation
+	 * is neither a read nor a write.
+	 */
 	READONLY();
 	WRITEONLY();
 
@@ -1971,6 +1979,41 @@ label_return:
 	return ret;
 }
 
+static int
+thread_idle_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
+	int ret;
+
+	/* See the comment in thread_tcache_flush_ctl. */
+	READONLY();
+	WRITEONLY();
+
+	if (tcache_available(tsd)) {
+		tcache_flush(tsd);
+	}
+	/*
+	 * This heuristic is perhaps not the most well-considered.  But it
+	 * matches the only idling policy we have experience with in the status
+	 * quo.  Over time we should investigate more principled approaches.
+	 */
+	if (opt_narenas > ncpus * 2) {
+		arena_t *arena = arena_choose(tsd, NULL);
+		if (arena != NULL) {
+			arena_decay(tsd_tsdn(tsd), arena, false, true);
+		}
+		/*
+		 * The missing arena case is not actually an error; a thread
+		 * might be idle before it associates itself to one.  This is
+		 * unusual, but not wrong.
+		 */
+	}
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
 /******************************************************************************/
 
 static int
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index d317b4a..da1716a 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -882,6 +882,75 @@ TEST_BEGIN(test_hooks_exhaustion) {
 }
 TEST_END
 
+TEST_BEGIN(test_thread_idle) {
+	/*
+	 * We're cheating a little bit in this test, and inferring things about
+	 * implementation internals (like tcache details).  We have to;
+	 * thread.idle has no guaranteed effects.  We need stats to make these
+	 * inferences.
+	 */
+	test_skip_if(!config_stats);
+
+	int err;
+	size_t sz;
+	size_t miblen;
+
+	bool tcache_enabled = false;
+	sz = sizeof(tcache_enabled);
+	err = mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL, 0);
+	assert_d_eq(err, 0, "");
+	test_skip_if(!tcache_enabled);
+
+	size_t tcache_max;
+	sz = sizeof(tcache_max);
+	err = mallctl("arenas.tcache_max", &tcache_max, &sz, NULL, 0);
+	assert_d_eq(err, 0, "");
+	test_skip_if(tcache_max == 0);
+
+	unsigned arena_ind;
+	sz = sizeof(arena_ind);
+	err = mallctl("thread.arena", &arena_ind, &sz, NULL, 0);
+	assert_d_eq(err, 0, "");
+
+	/* We're going to do an allocation of size 1, which we know is small. */
+	size_t mib[5];
+	miblen = sizeof(mib)/sizeof(mib[0]);
+	err = mallctlnametomib("stats.arenas.0.small.ndalloc", mib, &miblen);
+	assert_d_eq(err, 0, "");
+	mib[2] = arena_ind;
+
+	/*
+	 * This alloc and dalloc should leave something in the tcache, in a
+	 * small size's cache bin.
+	 */
+	void *ptr = mallocx(1, 0);
+	dallocx(ptr, 0);
+
+	uint64_t epoch;
+	err = mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch));
+	assert_d_eq(err, 0, "");
+
+	uint64_t small_dalloc_pre_idle;
+	sz = sizeof(small_dalloc_pre_idle);
+	err = mallctlbymib(mib, miblen, &small_dalloc_pre_idle, &sz, NULL, 0);
+	assert_d_eq(err, 0, "");
+
+	err = mallctl("thread.idle", NULL, NULL, NULL, 0);
+	assert_d_eq(err, 0, "");
+
+	err = mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch));
+	assert_d_eq(err, 0, "");
+
+	uint64_t small_dalloc_post_idle;
+	sz = sizeof(small_dalloc_post_idle);
+	err = mallctlbymib(mib, miblen, &small_dalloc_post_idle, &sz, NULL, 0);
+	assert_d_eq(err, 0, "");
+
+	assert_u64_lt(small_dalloc_pre_idle, small_dalloc_post_idle,
+	    "Purge didn't flush the tcache");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -913,5 +982,6 @@ main(void) {
 	    test_prof_active,
 	    test_stats_arenas,
 	    test_hooks,
-	    test_hooks_exhaustion);
+	    test_hooks_exhaustion,
+	    test_thread_idle);
 }
-- 
cgit v0.12


From d92f0175c75b5c9d9fc2bccabd2af0e6ebce7757 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 22 Jan 2020 14:59:28 -0800
Subject: Introduce NEITHER_READ_NOR_WRITE in ctl.

This is slightly clearer in meaning.  A function that is both READONLY() and
WRITEONLY() is in fact neither one.
---
 src/ctl.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index bbe962c..d357b38 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1451,6 +1451,7 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 	}								\
 } while (0)
 
+/* Can read or write, but not both. */
 #define READ_XOR_WRITE()	do {					\
 	if ((oldp != NULL && oldlenp != NULL) && (newp != NULL ||	\
 	    newlen != 0)) {						\
@@ -1459,6 +1460,15 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 	}								\
 } while (0)
 
+/* Can neither read nor write. */
+#define NEITHER_READ_NOR_WRITE()	do {				\
+	if (oldp != NULL || oldlenp != NULL || newp != NULL ||		\
+	    newlen != 0) {						\
+		ret = EPERM;						\
+		goto label_return;					\
+	}								\
+} while (0)
+
 #define READ(v, t)	do {						\
 	if (oldp != NULL && oldlenp != NULL) {				\
 		if (*oldlenp != sizeof(t)) {				\
@@ -1902,14 +1912,7 @@ thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 
-	/*
-	 * Slightly counterintuitively, READONLY() really just requires that the
-	 * call isn't trying to write, and WRITEONLY() just requires that it
-	 * isn't trying to read; hence, adding both requires that the operation
-	 * is neither a read nor a write.
-	 */
-	READONLY();
-	WRITEONLY();
+	NEITHER_READ_NOR_WRITE();
 
 	tcache_flush(tsd);
 
@@ -1985,9 +1988,7 @@ thread_idle_ctl(tsd_t *tsd, const size_t *mib,
     size_t newlen) {
 	int ret;
 
-	/* See the comment in thread_tcache_flush_ctl. */
-	READONLY();
-	WRITEONLY();
+	NEITHER_READ_NOR_WRITE();
 
 	if (tcache_available(tsd)) {
 		tcache_flush(tsd);
@@ -2151,8 +2152,7 @@ arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned arena_ind;
 
-	READONLY();
-	WRITEONLY();
+	NEITHER_READ_NOR_WRITE();
 	MIB_UNSIGNED(arena_ind, 1);
 	arena_i_decay(tsd_tsdn(tsd), arena_ind, false);
 
@@ -2167,8 +2167,7 @@ arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned arena_ind;
 
-	READONLY();
-	WRITEONLY();
+	NEITHER_READ_NOR_WRITE();
 	MIB_UNSIGNED(arena_ind, 1);
 	arena_i_decay(tsd_tsdn(tsd), arena_ind, true);
 
@@ -2183,8 +2182,7 @@ arena_i_reset_destroy_helper(tsd_t *tsd, const size_t *mib, size_t miblen,
     arena_t **arena) {
 	int ret;
 
-	READONLY();
-	WRITEONLY();
+	NEITHER_READ_NOR_WRITE();
 	MIB_UNSIGNED(*arena_ind, 1);
 
 	*arena = arena_get(tsd_tsdn(tsd), *arena_ind, false);
-- 
cgit v0.12


From ea351a7b52430de88007bf16f354a132da311c5b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 23 Jan 2020 16:05:37 -0800
Subject: Fix syntax errors in doc for thread.idle.

---
 doc/jemalloc.xml.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index b0a3f6c..802c64a 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1669,10 +1669,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
 	<para>This is not intended to be a general-purpose background activity
 	mechanism, and threads should not wake up multiple times solely to call
 	it.  Rather, a thread waiting for a task should do a timed wait first,
-	call <link linkend="thread.idle"><mallctl>thread.idle</mallctl><link> if
-	no task appears in the timeout interval, and then do an untimed wait.
+	call <link linkend="thread.idle"><mallctl>thread.idle</mallctl></link>
+	if no task appears in the timeout interval, and then do an untimed wait.
 	For such a background activity mechanism, see
-	<link linked="background_thread"><mallctl>background_thread</mallctl></link>.
+	<link linkend="background_thread"><mallctl>background_thread</mallctl></link>.
 	</para></listitem>
       </varlistentry>
 
-- 
cgit v0.12


From d71a145ec1bb8153c3d69be27eea5b076d59abfe Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 23 Jan 2020 13:18:04 -0800
Subject: Chagne prof_accum_t to counter_accum_t for general purpose.

---
 Makefile.in                                        |  1 +
 include/jemalloc/internal/arena_structs.h          |  3 +-
 include/jemalloc/internal/counter.h                | 83 ++++++++++++++++++++++
 include/jemalloc/internal/prof_externs.h           |  2 +-
 include/jemalloc/internal/prof_structs.h           |  9 ---
 include/jemalloc/internal/prof_types.h             |  1 -
 include/jemalloc/internal/witness.h                |  2 +-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  3 +
 src/arena.c                                        |  2 +-
 src/counter.c                                      | 22 ++++++
 src/prof.c                                         | 76 ++------------------
 14 files changed, 125 insertions(+), 84 deletions(-)
 create mode 100644 include/jemalloc/internal/counter.h
 create mode 100644 src/counter.c

diff --git a/Makefile.in b/Makefile.in
index 24ab542..37941ea 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -103,6 +103,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/buf_writer.c \
 	$(srcroot)src/ckh.c \
+	$(srcroot)src/counter.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
 	$(srcroot)src/ecache.c \
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 48d13b8..2d5c568 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/counter.h"
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/extent_dss.h"
@@ -117,7 +118,7 @@ struct arena_s {
 	malloc_mutex_t				tcache_ql_mtx;
 
 	/* Synchronization: internal. */
-	prof_accum_t		prof_accum;
+	counter_accum_t		prof_accum;
 
 	/*
 	 * Extent serial number generator state.
diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
new file mode 100644
index 0000000..302e350
--- /dev/null
+++ b/include/jemalloc/internal/counter.h
@@ -0,0 +1,83 @@
+#ifndef JEMALLOC_INTERNAL_COUNTER_H
+#define JEMALLOC_INTERNAL_COUNTER_H
+
+#include "jemalloc/internal/mutex.h"
+
+typedef struct counter_accum_s {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_t	mtx;
+	uint64_t	accumbytes;
+#else
+	atomic_u64_t	accumbytes;
+#endif
+	uint64_t	interval;
+} counter_accum_t;
+
+JEMALLOC_ALWAYS_INLINE bool
+counter_accum(tsdn_t *tsdn, counter_accum_t *counter, uint64_t accumbytes) {
+	bool overflow;
+	uint64_t a0, a1;
+
+	/*
+	 * If the event moves fast enough (and/or if the event handling is slow
+	 * enough), extreme overflow here (a1 >= interval * 2) can cause counter
+	 * trigger coalescing.  This is an intentional mechanism that avoids
+	 * rate-limiting allocation.
+	 */
+	uint64_t interval = counter->interval;
+	assert(interval > 0);
+#ifdef JEMALLOC_ATOMIC_U64
+	a0 = atomic_load_u64(&counter->accumbytes, ATOMIC_RELAXED);
+	do {
+		a1 = a0 + accumbytes;
+		assert(a1 >= a0);
+		overflow = (a1 >= interval);
+		if (overflow) {
+			a1 %= interval;
+		}
+	} while (!atomic_compare_exchange_weak_u64(&counter->accumbytes, &a0, a1,
+	    ATOMIC_RELAXED, ATOMIC_RELAXED));
+#else
+	malloc_mutex_lock(tsdn, &counter->mtx);
+	a0 = counter->accumbytes;
+	a1 = a0 + accumbytes;
+	overflow = (a1 >= interval);
+	if (overflow) {
+		a1 %= interval;
+	}
+	counter->accumbytes = a1;
+	malloc_mutex_unlock(tsdn, &counter->mtx);
+#endif
+	return overflow;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+counter_rollback(tsdn_t *tsdn, counter_accum_t *counter, size_t usize) {
+	/*
+	 * Cancel out as much of the excessive accumbytes increase as possible
+	 * without underflowing.  Interval-triggered events occur slightly more
+	 * often than intended as a result of incomplete canceling.
+	 */
+	uint64_t a0, a1;
+#ifdef JEMALLOC_ATOMIC_U64
+	a0 = atomic_load_u64(&counter->accumbytes,
+	    ATOMIC_RELAXED);
+	do {
+		a1 = (a0 >= SC_LARGE_MINCLASS - usize)
+		    ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
+	} while (!atomic_compare_exchange_weak_u64(
+	    &counter->accumbytes, &a0, a1, ATOMIC_RELAXED,
+	    ATOMIC_RELAXED));
+#else
+	malloc_mutex_lock(tsdn, &counter->mtx);
+	a0 = counter->accumbytes;
+	a1 = (a0 >= SC_LARGE_MINCLASS - usize)
+	    ?  a0 - (SC_LARGE_MINCLASS - usize) : 0;
+	counter->accumbytes = a1;
+	malloc_mutex_unlock(tsdn, &counter->mtx);
+#endif
+}
+
+bool counter_accum_init(counter_accum_t *counter, uint64_t interval);
+
+#endif /* JEMALLOC_INTERNAL_COUNTER_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index df4f7cd..36571c8 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -73,7 +73,7 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 #endif
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
-bool prof_accum_init(tsdn_t *tsdn);
+bool prof_accum_init(void);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index ee78643..977eb1c 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -21,15 +21,6 @@ typedef struct {
 } prof_unwind_data_t;
 #endif
 
-struct prof_accum_s {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_t	mtx;
-	uint64_t	accumbytes;
-#else
-	atomic_u64_t	accumbytes;
-#endif
-};
-
 struct prof_cnt_s {
 	/* Profiling counters. */
 	uint64_t	curobjs;
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 498962d..4abe5b5 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -2,7 +2,6 @@
 #define JEMALLOC_INTERNAL_PROF_TYPES_H
 
 typedef struct prof_bt_s prof_bt_t;
-typedef struct prof_accum_s prof_accum_t;
 typedef struct prof_cnt_s prof_cnt_t;
 typedef struct prof_tctx_s prof_tctx_t;
 typedef struct prof_info_s prof_info_t;
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 4ed787a..083bdcc 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -54,9 +54,9 @@
 #define WITNESS_RANK_LEAF		0xffffffffU
 #define WITNESS_RANK_BIN		WITNESS_RANK_LEAF
 #define WITNESS_RANK_ARENA_STATS	WITNESS_RANK_LEAF
+#define WITNESS_RANK_COUNTER_ACCUM	WITNESS_RANK_LEAF
 #define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_ACCUM		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_DUMP_FILENAME	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 4b25b85..d8b4898 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -43,6 +43,7 @@
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\buf_writer.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\counter.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\ecache.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 73ee8d1..404adbe 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -28,6 +28,9 @@
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\counter.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ctl.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index ed6f618..b0d32d9 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -43,6 +43,7 @@
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\buf_writer.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\counter.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\ecache.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 73ee8d1..404adbe 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -28,6 +28,9 @@
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\counter.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ctl.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index d04712a..9558bb4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1988,7 +1988,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	if (config_prof) {
-		if (prof_accum_init(tsdn)) {
+		if (prof_accum_init()) {
 			goto label_error;
 		}
 	}
diff --git a/src/counter.c b/src/counter.c
new file mode 100644
index 0000000..1b8201e
--- /dev/null
+++ b/src/counter.c
@@ -0,0 +1,22 @@
+#define JEMALLOC_COUNTER_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/counter.h"
+
+bool
+counter_accum_init(counter_accum_t *counter, uint64_t interval) {
+#ifndef JEMALLOC_ATOMIC_U64
+	if (malloc_mutex_init(&counter->mtx, "counter_accum",
+	    WITNESS_RANK_COUNTER_ACCUM, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	counter->accumbytes = 0;
+#else
+	atomic_store_u64(&counter->accumbytes, 0,
+	    ATOMIC_RELAXED);
+#endif
+	counter->interval = interval;
+
+	return false;
+}
diff --git a/src/prof.c b/src/prof.c
index 791c362..649e9ca 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/counter.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_log.h"
 #include "jemalloc/internal/prof_recent.h"
@@ -49,7 +50,7 @@ bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 
 /* Accessed via prof_idump_[accum/rollback](). */
-static prof_accum_t prof_idump_accumulated;
+static counter_accum_t prof_idump_accumulated;
 
 /*
  * Initialized as opt_prof_active, and accessed via
@@ -553,89 +554,24 @@ prof_fdump(void) {
 }
 
 bool
-prof_accum_init(tsdn_t *tsdn) {
+prof_accum_init(void) {
 	cassert(config_prof);
 
-#ifndef JEMALLOC_ATOMIC_U64
-	if (malloc_mutex_init(&prof_idump_accumulated.mtx, "prof_accum",
-	    WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-	prof_idump_accumulated.accumbytes = 0;
-#else
-	atomic_store_u64(&prof_idump_accumulated.accumbytes, 0,
-	    ATOMIC_RELAXED);
-#endif
-	return false;
+	return counter_accum_init(&prof_idump_accumulated, prof_interval);
 }
 
 bool
 prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes) {
 	cassert(config_prof);
 
-	bool overflow;
-	uint64_t a0, a1;
-
-	/*
-	 * If the application allocates fast enough (and/or if idump is slow
-	 * enough), extreme overflow here (a1 >= prof_interval * 2) can cause
-	 * idump trigger coalescing.  This is an intentional mechanism that
-	 * avoids rate-limiting allocation.
-	 */
-#ifdef JEMALLOC_ATOMIC_U64
-	a0 = atomic_load_u64(&prof_idump_accumulated.accumbytes,
-	    ATOMIC_RELAXED);
-	do {
-		a1 = a0 + accumbytes;
-		assert(a1 >= a0);
-		overflow = (a1 >= prof_interval);
-		if (overflow) {
-			a1 %= prof_interval;
-		}
-	} while (!atomic_compare_exchange_weak_u64(
-	    &prof_idump_accumulated.accumbytes, &a0, a1, ATOMIC_RELAXED,
-	    ATOMIC_RELAXED));
-#else
-	malloc_mutex_lock(tsdn, &prof_idump_accumulated.mtx);
-	a0 = prof_idump_accumulated.accumbytes;
-	a1 = a0 + accumbytes;
-	overflow = (a1 >= prof_interval);
-	if (overflow) {
-		a1 %= prof_interval;
-	}
-	prof_idump_accumulated.accumbytes = a1;
-	malloc_mutex_unlock(tsdn, &prof_idump_accumulated.mtx);
-#endif
-	return overflow;
+	return counter_accum(tsdn, &prof_idump_accumulated, accumbytes);
 }
 
 void
 prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize) {
 	cassert(config_prof);
 
-	/*
-	 * Cancel out as much of the excessive accumbytes increase as possible
-	 * without underflowing.  Interval-triggered dumps occur slightly more
-	 * often than intended as a result of incomplete canceling.
-	 */
-	uint64_t a0, a1;
-#ifdef JEMALLOC_ATOMIC_U64
-	a0 = atomic_load_u64(&prof_idump_accumulated.accumbytes,
-	    ATOMIC_RELAXED);
-	do {
-		a1 = (a0 >= SC_LARGE_MINCLASS - usize)
-		    ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
-	} while (!atomic_compare_exchange_weak_u64(
-	    &prof_idump_accumulated.accumbytes, &a0, a1, ATOMIC_RELAXED,
-	    ATOMIC_RELAXED));
-#else
-	malloc_mutex_lock(tsdn, &prof_idump_accumulated.mtx);
-	a0 = prof_idump_accumulated.accumbytes;
-	a1 = (a0 >= SC_LARGE_MINCLASS - usize)
-	    ?  a0 - (SC_LARGE_MINCLASS - usize) : 0;
-	prof_idump_accumulated.accumbytes = a1;
-	malloc_mutex_unlock(tsdn, &prof_idump_accumulated.mtx);
-#endif
+	return counter_rollback(tsdn, &prof_idump_accumulated, usize);
 }
 
 bool
-- 
cgit v0.12


From 88b0e03a4e081d3d9c1bdf369345679f9e23b983 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 13 Jan 2020 22:29:17 -0800
Subject: Implement opt.stats_interval and the _opts options.

Add options stats_interval and stats_interval_opts to allow interval based stats
printing.  This provides an easy way to collect stats without code changes,
because opt.stats_print may not work (some binaries never exit).
---
 Makefile.in                              |   1 +
 doc/jemalloc.xml.in                      |  35 +++++++++
 include/jemalloc/internal/counter.h      |  14 ++--
 include/jemalloc/internal/emitter.h      |   4 +
 include/jemalloc/internal/stats.h        |  18 +++++
 include/jemalloc/internal/thread_event.h |   6 +-
 include/jemalloc/internal/tsd.h          |   4 +
 src/ctl.c                                |   6 ++
 src/jemalloc.c                           |  82 +++++++++++---------
 src/prof.c                               |   5 +-
 src/stats.c                              |  52 ++++++++++++-
 src/thread_event.c                       |  26 ++++++-
 test/unit/counter.c                      | 128 +++++++++++++++++++++++++++++++
 test/unit/mallctl.c                      |   3 +
 14 files changed, 334 insertions(+), 50 deletions(-)
 create mode 100644 test/unit/counter.c

diff --git a/Makefile.in b/Makefile.in
index 37941ea..eda9c7a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -191,6 +191,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/buf_writer.c \
 	$(srcroot)test/unit/cache_bin.c \
 	$(srcroot)test/unit/ckh.c \
+	$(srcroot)test/unit/counter.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/emitter.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 802c64a..1baf1f6 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1185,6 +1185,41 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         enabled.  The default is <quote></quote>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.stats_interval">
+        <term>
+          <mallctl>opt.stats_interval</mallctl>
+          (<type>int64_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Average interval between statistics outputs, as measured
+        in bytes of allocation activity.  The actual interval may be sporadic
+        because decentralized event counters are used to avoid synchronization
+        bottlenecks.  The output may be triggered on any thread, which then
+        calls <function>malloc_stats_print()</function>.  <link
+        linkend="opt.stats_interval_opts"><mallctl>opt.stats_interval_opts</mallctl></link>
+        can be combined to specify output options.  By default,
+        interval-triggered stats output is disabled (encoded as
+        -1).</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.stats_interval_opts">
+        <term>
+          <mallctl>opt.stats_interval_opts</mallctl>
+          (<type>const char *</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Options (the <parameter>opts</parameter> string) to pass
+        to the <function>malloc_stats_print()</function> for interval based
+	statistics printing (enabled
+        through <link
+        linkend="opt.stats_interval"><mallctl>opt.stats_interval</mallctl></link>). See
+        available options in <link
+        linkend="malloc_stats_print_opts"><function>malloc_stats_print()</function></link>.
+        Has no effect unless <link
+        linkend="opt.stats_interval"><mallctl>opt.stats_interval</mallctl></link> is
+        enabled.  The default is <quote></quote>.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.junk">
         <term>
           <mallctl>opt.junk</mallctl>
diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
index 302e350..4aee23d 100644
--- a/include/jemalloc/internal/counter.h
+++ b/include/jemalloc/internal/counter.h
@@ -6,11 +6,11 @@
 typedef struct counter_accum_s {
 #ifndef JEMALLOC_ATOMIC_U64
 	malloc_mutex_t	mtx;
-	uint64_t	accumbytes;
+	uint64_t accumbytes;
 #else
-	atomic_u64_t	accumbytes;
+	atomic_u64_t accumbytes;
 #endif
-	uint64_t	interval;
+	uint64_t interval;
 } counter_accum_t;
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -52,7 +52,7 @@ counter_accum(tsdn_t *tsdn, counter_accum_t *counter, uint64_t accumbytes) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-counter_rollback(tsdn_t *tsdn, counter_accum_t *counter, size_t usize) {
+counter_rollback(tsdn_t *tsdn, counter_accum_t *counter, uint64_t bytes) {
 	/*
 	 * Cancel out as much of the excessive accumbytes increase as possible
 	 * without underflowing.  Interval-triggered events occur slightly more
@@ -63,16 +63,14 @@ counter_rollback(tsdn_t *tsdn, counter_accum_t *counter, size_t usize) {
 	a0 = atomic_load_u64(&counter->accumbytes,
 	    ATOMIC_RELAXED);
 	do {
-		a1 = (a0 >= SC_LARGE_MINCLASS - usize)
-		    ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
+		a1 = (a0 >= bytes) ? a0 - bytes : 0;
 	} while (!atomic_compare_exchange_weak_u64(
 	    &counter->accumbytes, &a0, a1, ATOMIC_RELAXED,
 	    ATOMIC_RELAXED));
 #else
 	malloc_mutex_lock(tsdn, &counter->mtx);
 	a0 = counter->accumbytes;
-	a1 = (a0 >= SC_LARGE_MINCLASS - usize)
-	    ?  a0 - (SC_LARGE_MINCLASS - usize) : 0;
+	a1 = (a0 >= bytes) ?  a0 - bytes : 0;
 	counter->accumbytes = a1;
 	malloc_mutex_unlock(tsdn, &counter->mtx);
 #endif
diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 009bf9a..c3f47b2 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -22,6 +22,7 @@ typedef enum emitter_type_e emitter_type_t;
 enum emitter_type_e {
 	emitter_type_bool,
 	emitter_type_int,
+	emitter_type_int64,
 	emitter_type_unsigned,
 	emitter_type_uint32,
 	emitter_type_uint64,
@@ -149,6 +150,9 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 	case emitter_type_int:
 		EMIT_SIMPLE(int, "%d")
 		break;
+	case emitter_type_int64:
+		EMIT_SIMPLE(int64_t, "%" FMTd64)
+		break;
 	case emitter_type_unsigned:
 		EMIT_SIMPLE(unsigned, "%u")
 		break;
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 3b9e0ea..d1f5eab 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -24,8 +24,26 @@ enum {
 extern bool opt_stats_print;
 extern char opt_stats_print_opts[stats_print_tot_num_options+1];
 
+/* Utilities for stats_interval. */
+extern int64_t opt_stats_interval;
+extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
+
+#define STATS_INTERVAL_DEFAULT -1
+/*
+ * Batch-increment the counter to reduce synchronization overhead.  Each thread
+ * merges after (interval >> LG_BATCH_SIZE) bytes of allocations; also limit the
+ * BATCH_MAX for accuracy when the interval is huge (which is expected).
+ */
+#define STATS_INTERVAL_ACCUM_LG_BATCH_SIZE 6
+#define STATS_INTERVAL_ACCUM_BATCH_MAX (4 << 20)
+
+uint64_t stats_interval_accum_batch_size(void);
+bool stats_interval_accum(tsd_t *tsd, uint64_t bytes);
+
 /* Implements je_malloc_stats_print. */
 void stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts);
 
+bool stats_boot(void);
+
 #endif /* JEMALLOC_INTERNAL_STATS_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 3ceb470..454c689 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -36,7 +36,8 @@ void tsd_thread_event_init(tsd_t *tsd);
  */
 #define ITERATE_OVER_ALL_EVENTS						\
     E(tcache_gc,	(TCACHE_GC_INCR_BYTES > 0))			\
-    E(prof_sample,	(config_prof && opt_prof))
+    E(prof_sample,	(config_prof && opt_prof))	    		\
+    E(stats_interval,	(opt_stats_interval >= 0))
 
 #define E(event, condition)						\
     C(event##_event_wait)
@@ -46,7 +47,8 @@ void tsd_thread_event_init(tsd_t *tsd);
     C(thread_allocated)							\
     C(thread_allocated_last_event)					\
     ITERATE_OVER_ALL_EVENTS						\
-    C(prof_sample_last_event)
+    C(prof_sample_last_event)						\
+    C(stats_interval_last_event)
 
 /* Getters directly wrap TSD getters. */
 #define C(counter)							\
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 3465a2d..576fa44 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -87,6 +87,8 @@ typedef void (*test_callback_t)(int *);
     O(tcache_gc_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
+    O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
+    O(stats_interval_last_event,	uint64_t,	uint64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(prng_state,		uint64_t,		uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
@@ -118,6 +120,8 @@ typedef void (*test_callback_t)(int *);
     /* tcache_gc_event_wait */		THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_last_event */	0,				\
+    /* stats_interval_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* stats_interval_last_event */	0,				\
     /* prof_tdata */		NULL,					\
     /* prng_state */		0,					\
     /* iarena */		NULL,					\
diff --git a/src/ctl.c b/src/ctl.c
index d357b38..78f5df2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -96,6 +96,8 @@ CTL_PROTO(opt_dirty_decay_ms)
 CTL_PROTO(opt_muzzy_decay_ms)
 CTL_PROTO(opt_stats_print)
 CTL_PROTO(opt_stats_print_opts)
+CTL_PROTO(opt_stats_interval)
+CTL_PROTO(opt_stats_interval_opts)
 CTL_PROTO(opt_junk)
 CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
@@ -329,6 +331,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
 	{NAME("stats_print_opts"),	CTL(opt_stats_print_opts)},
+	{NAME("stats_interval"),	CTL(opt_stats_interval)},
+	{NAME("stats_interval_opts"),	CTL(opt_stats_interval_opts)},
 	{NAME("junk"),		CTL(opt_junk)},
 	{NAME("zero"),		CTL(opt_zero)},
 	{NAME("utrace"),	CTL(opt_utrace)},
@@ -1791,6 +1795,8 @@ CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
 CTL_RO_NL_GEN(opt_stats_print_opts, opt_stats_print_opts, const char *)
+CTL_RO_NL_GEN(opt_stats_interval, opt_stats_interval, int64_t)
+CTL_RO_NL_GEN(opt_stats_interval_opts, opt_stats_interval_opts, const char *)
 CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
 CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 218e04a..1976791 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -775,8 +775,8 @@ malloc_ncpus(void) {
 }
 
 static void
-init_opt_stats_print_opts(const char *v, size_t vlen) {
-	size_t opts_len = strlen(opt_stats_print_opts);
+init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
+	size_t opts_len = strlen(dest);
 	assert(opts_len <= stats_print_tot_num_options);
 
 	for (size_t i = 0; i < vlen; i++) {
@@ -787,16 +787,16 @@ init_opt_stats_print_opts(const char *v, size_t vlen) {
 		default: continue;
 		}
 
-		if (strchr(opt_stats_print_opts, v[i]) != NULL) {
+		if (strchr(dest, v[i]) != NULL) {
 			/* Ignore repeated. */
 			continue;
 		}
 
-		opt_stats_print_opts[opts_len++] = v[i];
-		opt_stats_print_opts[opts_len] = '\0';
+		dest[opts_len++] = v[i];
+		dest[opts_len] = '\0';
 		assert(opts_len <= stats_print_tot_num_options);
 	}
-	assert(opts_len == strlen(opt_stats_print_opts));
+	assert(opts_len == strlen(dest));
 }
 
 /* Reads the next size pair in a multi-sized option. */
@@ -1118,39 +1118,47 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #define CONF_CHECK_MIN(um, min)	((um) < (min))
 #define CONF_DONT_CHECK_MAX(um, max)	false
 #define CONF_CHECK_MAX(um, max)	((um) > (max))
-#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
+
+#define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip) \
 			if (CONF_MATCH(n)) {				\
-				uintmax_t um;				\
+				max_t mv;				\
 				char *end;				\
 									\
 				set_errno(0);				\
-				um = malloc_strtoumax(v, &end, 0);	\
+				mv = (max_t)malloc_strtoumax(v, &end, 0); \
 				if (get_errno() != 0 || (uintptr_t)end -\
 				    (uintptr_t)v != vlen) {		\
 					CONF_ERROR("Invalid conf value",\
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
-					if (check_min(um, (t)(min))) {	\
+					if (check_min(mv, (t)(min))) {	\
 						o = (t)(min);		\
 					} else if (			\
-					    check_max(um, (t)(max))) {	\
+					    check_max(mv, (t)(max))) {	\
 						o = (t)(max);		\
 					} else {			\
-						o = (t)um;		\
+						o = (t)mv;		\
 					}				\
 				} else {				\
-					if (check_min(um, (t)(min)) ||	\
-					    check_max(um, (t)(max))) {	\
+					if (check_min(mv, (t)(min)) ||	\
+					    check_max(mv, (t)(max))) {	\
 						CONF_ERROR(		\
 						    "Out-of-range "	\
 						    "conf value",	\
 						    k, klen, v, vlen);	\
 					} else {			\
-						o = (t)um;		\
+						o = (t)mv;		\
 					}				\
 				}					\
 				CONF_CONTINUE;				\
 			}
+#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
+	      CONF_HANDLE_T(t, uintmax_t, o, n, min, max, check_min,	\
+			    check_max, clip)
+#define CONF_HANDLE_T_SIGNED(t, o, n, min, max, check_min, check_max, clip)\
+	      CONF_HANDLE_T(t, intmax_t, o, n, min, max, check_min,	\
+			    check_max, clip)
+
 #define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max,	\
     clip)								\
 			CONF_HANDLE_T_U(unsigned, o, n, min, max,	\
@@ -1158,27 +1166,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)	\
 			CONF_HANDLE_T_U(size_t, o, n, min, max,		\
 			    check_min, check_max, clip)
+#define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)	\
+			CONF_HANDLE_T_SIGNED(int64_t, o, n, min, max,	\
+			    check_min, check_max, clip)
 #define CONF_HANDLE_SSIZE_T(o, n, min, max)				\
-			if (CONF_MATCH(n)) {				\
-				long l;					\
-				char *end;				\
-									\
-				set_errno(0);				\
-				l = strtol(v, &end, 0);			\
-				if (get_errno() != 0 || (uintptr_t)end -\
-				    (uintptr_t)v != vlen) {		\
-					CONF_ERROR("Invalid conf value",\
-					    k, klen, v, vlen);		\
-				} else if (l < (ssize_t)(min) || l >	\
-				    (ssize_t)(max)) {			\
-					CONF_ERROR(			\
-					    "Out-of-range conf value",	\
-					    k, klen, v, vlen);		\
-				} else {				\
-					o = l;				\
-				}					\
-				CONF_CONTINUE;				\
-			}
+			CONF_HANDLE_T_SIGNED(ssize_t, o, n, min, max,	\
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, false)
 #define CONF_HANDLE_CHAR_P(o, n, d)					\
 			if (CONF_MATCH(n)) {				\
 				size_t cpylen = (vlen <=		\
@@ -1275,7 +1268,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    SSIZE_MAX);
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
 			if (CONF_MATCH("stats_print_opts")) {
-				init_opt_stats_print_opts(v, vlen);
+				init_opt_stats_opts(v, vlen,
+				    opt_stats_print_opts);
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_INT64_T(opt_stats_interval,
+			    "stats_interval", -1, INT64_MAX,
+			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+			if (CONF_MATCH("stats_interval_opts")) {
+				init_opt_stats_opts(v, vlen,
+				    opt_stats_interval_opts);
 				CONF_CONTINUE;
 			}
 			if (config_fill) {
@@ -1463,7 +1465,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #undef CONF_CHECK_MIN
 #undef CONF_DONT_CHECK_MAX
 #undef CONF_CHECK_MAX
+#undef CONF_HANDLE_T
 #undef CONF_HANDLE_T_U
+#undef CONF_HANDLE_T_SIGNED
 #undef CONF_HANDLE_UNSIGNED
 #undef CONF_HANDLE_SIZE_T
 #undef CONF_HANDLE_SSIZE_T
@@ -1545,7 +1549,6 @@ malloc_init_hard_a0_locked() {
 		prof_boot0();
 	}
 	malloc_conf_init(&sc_data, bin_shard_sizes);
-	thread_event_boot();
 	sz_boot(&sc_data);
 	bin_info_boot(&sc_data, bin_shard_sizes);
 
@@ -1558,6 +1561,10 @@ malloc_init_hard_a0_locked() {
 			}
 		}
 	}
+
+	if (stats_boot()) {
+		return true;
+	}
 	if (pages_boot()) {
 		return true;
 	}
@@ -1573,6 +1580,7 @@ malloc_init_hard_a0_locked() {
 	if (config_prof) {
 		prof_boot1();
 	}
+	thread_event_boot();
 	arena_boot(&sc_data);
 	if (tcache_boot(TSDN_NULL)) {
 		return true;
diff --git a/src/prof.c b/src/prof.c
index 649e9ca..0d29c68 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -571,7 +571,10 @@ void
 prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize) {
 	cassert(config_prof);
 
-	return counter_rollback(tsdn, &prof_idump_accumulated, usize);
+	/* Rollback is only done on arena_prof_promote of small sizes. */
+	assert(SC_LARGE_MINCLASS > usize);
+	return counter_rollback(tsdn, &prof_idump_accumulated,
+	    SC_LARGE_MINCLASS - usize);
 }
 
 bool
diff --git a/src/stats.c b/src/stats.c
index 41b990e..dd31032 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -50,6 +50,13 @@ const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 bool opt_stats_print = false;
 char opt_stats_print_opts[stats_print_tot_num_options+1] = "";
 
+int64_t opt_stats_interval = STATS_INTERVAL_DEFAULT;
+char opt_stats_interval_opts[stats_print_tot_num_options+1] = "";
+
+static counter_accum_t stats_interval_accumulated;
+/* Per thread batch accum size for stats_interval. */
+static uint64_t stats_interval_accum_batch;
+
 /******************************************************************************/
 
 static uint64_t
@@ -1000,14 +1007,16 @@ stats_general_print(emitter_t *emitter) {
 	unsigned uv;
 	uint32_t u32v;
 	uint64_t u64v;
+	int64_t i64v;
 	ssize_t ssv, ssv2;
-	size_t sv, bsz, usz, ssz, sssz, cpsz;
+	size_t sv, bsz, usz, i64sz, ssz, sssz, cpsz;
 
 	bsz = sizeof(bool);
 	usz = sizeof(unsigned);
 	ssz = sizeof(size_t);
 	sssz = sizeof(ssize_t);
 	cpsz = sizeof(const char *);
+	i64sz = sizeof(int64_t);
 
 	CTL_GET("version", &cpv, const char *);
 	emitter_kv(emitter, "version", "Version", emitter_type_string, &cpv);
@@ -1063,6 +1072,9 @@ stats_general_print(emitter_t *emitter) {
 #define OPT_WRITE_UNSIGNED(name)					\
 	OPT_WRITE(name, uv, usz, emitter_type_unsigned)
 
+#define OPT_WRITE_INT64(name)						\
+	OPT_WRITE(name, i64v, i64sz, emitter_type_int64)
+
 #define OPT_WRITE_SIZE_T(name)						\
 	OPT_WRITE(name, sv, ssz, emitter_type_size)
 #define OPT_WRITE_SSIZE_T(name)						\
@@ -1109,6 +1121,10 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("prof_leak")
 	OPT_WRITE_BOOL("stats_print")
 	OPT_WRITE_CHAR_P("stats_print_opts")
+	OPT_WRITE_BOOL("stats_print")
+	OPT_WRITE_CHAR_P("stats_print_opts")
+	OPT_WRITE_INT64("stats_interval")
+	OPT_WRITE_CHAR_P("stats_interval_opts")
 	OPT_WRITE_CHAR_P("zero_realloc")
 
 	emitter_dict_end(emitter);
@@ -1477,3 +1493,37 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	emitter_table_printf(&emitter, "--- End jemalloc statistics ---\n");
 	emitter_end(&emitter);
 }
+
+bool
+stats_interval_accum(tsd_t *tsd, uint64_t bytes) {
+	return counter_accum(tsd_tsdn(tsd), &stats_interval_accumulated, bytes);
+}
+
+uint64_t
+stats_interval_accum_batch_size(void) {
+	return stats_interval_accum_batch;
+}
+
+bool
+stats_boot(void) {
+	uint64_t stats_interval;
+	if (opt_stats_interval < 0) {
+		assert(opt_stats_interval == -1);
+		stats_interval = 0;
+		stats_interval_accum_batch = 0;
+	} else{
+		/* See comments in stats.h */
+		stats_interval = (opt_stats_interval > 0) ?
+		    opt_stats_interval : 1;
+		uint64_t batch = stats_interval >>
+		    STATS_INTERVAL_ACCUM_LG_BATCH_SIZE;
+		if (batch > STATS_INTERVAL_ACCUM_BATCH_MAX) {
+			batch = STATS_INTERVAL_ACCUM_BATCH_MAX;
+		} else if (batch == 0) {
+			batch = 1;
+		}
+		stats_interval_accum_batch = batch;
+	}
+
+	return counter_accum_init(&stats_interval_accumulated, stats_interval);
+}
diff --git a/src/thread_event.c b/src/thread_event.c
index 0657c84..6aedf16 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -25,6 +25,7 @@ static void thread_##event##_event_handler(tsd_t *tsd);
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
+/* (Re)Init functions. */
 static void
 tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
@@ -38,10 +39,18 @@ tsd_thread_prof_sample_event_init(tsd_t *tsd) {
 }
 
 static void
+tsd_thread_stats_interval_event_init(tsd_t *tsd) {
+	assert(opt_stats_interval >= 0);
+	uint64_t interval = stats_interval_accum_batch_size();
+	thread_stats_interval_event_update(tsd, interval);
+}
+
+/* Handler functions. */
+static void
 thread_tcache_gc_event_handler(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
 	assert(tcache_gc_event_wait_get(tsd) == 0U);
-	thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	tsd_thread_tcache_gc_event_init(tsd);
 	tcache_t *tcache = tcache_get(tsd);
 	if (tcache != NULL) {
 		tcache_event_hard(tsd, tcache);
@@ -71,6 +80,21 @@ thread_prof_sample_event_handler(tsd_t *tsd) {
 	}
 }
 
+static void
+thread_stats_interval_event_handler(tsd_t *tsd) {
+	assert(opt_stats_interval >= 0);
+	assert(stats_interval_event_wait_get(tsd) == 0U);
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t last_stats_event = stats_interval_last_event_get(tsd);
+	stats_interval_last_event_set(tsd, last_event);
+
+	if (stats_interval_accum(tsd, last_event - last_stats_event)) {
+		je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
+	}
+	tsd_thread_stats_interval_event_init(tsd);
+}
+/* Per event facilities done. */
+
 static uint64_t
 thread_allocated_next_event_compute(tsd_t *tsd) {
 	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
diff --git a/test/unit/counter.c b/test/unit/counter.c
new file mode 100644
index 0000000..619510d
--- /dev/null
+++ b/test/unit/counter.c
@@ -0,0 +1,128 @@
+#include "test/jemalloc_test.h"
+
+static const uint64_t interval = 1 << 20;
+
+TEST_BEGIN(test_counter_accum) {
+	uint64_t increment = interval >> 4;
+	unsigned n = interval / increment;
+	uint64_t accum = 0;
+
+	counter_accum_t c;
+	counter_accum_init(&c, interval);
+
+	tsd_t *tsd = tsd_fetch();
+	bool trigger;
+	for (unsigned i = 0; i < n; i++) {
+		trigger = counter_accum(tsd_tsdn(tsd), &c, increment);
+		accum += increment;
+		if (accum < interval) {
+			assert_b_eq(trigger, false, "Should not trigger");
+		} else {
+			assert_b_eq(trigger, true, "Should have triggered");
+		}
+	}
+	assert_b_eq(trigger, true, "Should have triggered");
+}
+TEST_END
+
+void
+assert_counter_value(counter_accum_t *c, uint64_t v) {
+	uint64_t accum;
+#ifdef JEMALLOC_ATOMIC_U64
+	accum = atomic_load_u64(&(c->accumbytes), ATOMIC_RELAXED);
+#else
+	accum = c->accumbytes;
+#endif
+	assert_u64_eq(accum, v, "Counter value mismatch");
+}
+
+TEST_BEGIN(test_counter_rollback) {
+	uint64_t half_interval = interval / 2;
+
+	counter_accum_t c;
+	counter_accum_init(&c, interval);
+
+	tsd_t *tsd = tsd_fetch();
+	counter_rollback(tsd_tsdn(tsd), &c, half_interval);
+
+	bool trigger;
+	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
+	assert_b_eq(trigger, false, "Should not trigger");
+	counter_rollback(tsd_tsdn(tsd), &c, half_interval + 1);
+	assert_counter_value(&c,  0);
+
+	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
+	assert_b_eq(trigger, false, "Should not trigger");
+	counter_rollback(tsd_tsdn(tsd), &c, half_interval - 1);
+	assert_counter_value(&c,  1);
+
+	counter_rollback(tsd_tsdn(tsd), &c, 1);
+	assert_counter_value(&c,  0);
+
+	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
+	assert_b_eq(trigger, false, "Should not trigger");
+	counter_rollback(tsd_tsdn(tsd), &c, 1);
+	assert_counter_value(&c,  half_interval - 1);
+
+	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
+	assert_b_eq(trigger, false, "Should not trigger");
+	assert_counter_value(&c,  interval - 1);
+
+	trigger = counter_accum(tsd_tsdn(tsd), &c, 1);
+	assert_b_eq(trigger, true, "Should have triggered");
+	assert_counter_value(&c, 0);
+
+	trigger = counter_accum(tsd_tsdn(tsd), &c, interval + 1);
+	assert_b_eq(trigger, true, "Should have triggered");
+	assert_counter_value(&c, 1);
+}
+TEST_END
+
+#define N_THDS (16)
+#define N_ITER_THD (1 << 12)
+#define ITER_INCREMENT (interval >> 4)
+
+static void *
+thd_start(void *varg) {
+	counter_accum_t *c = (counter_accum_t *)varg;
+
+	tsd_t *tsd = tsd_fetch();
+	bool trigger;
+	uintptr_t n_triggered = 0;
+	for (unsigned i = 0; i < N_ITER_THD; i++) {
+		trigger = counter_accum(tsd_tsdn(tsd), c, ITER_INCREMENT);
+		n_triggered += trigger ? 1 : 0;
+	}
+
+	return (void *)n_triggered;
+}
+
+
+TEST_BEGIN(test_counter_mt) {
+	counter_accum_t shared_c;
+	counter_accum_init(&shared_c, interval);
+
+	thd_t thds[N_THDS];
+	unsigned i;
+	for (i = 0; i < N_THDS; i++) {
+		thd_create(&thds[i], thd_start, (void *)&shared_c);
+	}
+
+	uint64_t sum = 0;
+	for (i = 0; i < N_THDS; i++) {
+		void *ret;
+		thd_join(thds[i], &ret);
+		sum += (uintptr_t)ret;
+	}
+	assert_u64_eq(sum, N_THDS * N_ITER_THD / (interval / ITER_INCREMENT),
+	    "Incorrect number of triggers");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_counter_accum,
+	    test_counter_rollback,
+	    test_counter_mt);
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index da1716a..14c169b 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -170,6 +170,9 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(ssize_t, dirty_decay_ms, always);
 	TEST_MALLCTL_OPT(ssize_t, muzzy_decay_ms, always);
 	TEST_MALLCTL_OPT(bool, stats_print, always);
+	TEST_MALLCTL_OPT(const char *, stats_print_opts, always);
+	TEST_MALLCTL_OPT(int64_t, stats_interval, always);
+	TEST_MALLCTL_OPT(const char *, stats_interval_opts, always);
 	TEST_MALLCTL_OPT(const char *, junk, fill);
 	TEST_MALLCTL_OPT(bool, zero, fill);
 	TEST_MALLCTL_OPT(bool, utrace, utrace);
-- 
cgit v0.12


From 38a48e5741faf51548f5b750c0ab6eba8eb67a0c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 16 Jan 2020 13:00:35 -0800
Subject: Set reentrancy to 1 for tsd_state_purgatory.

Reentrancy is already set for other non-nominal tsd states (reincarnated and
minimal_initialized).  Add purgatory to be safe and consistent.
---
 include/jemalloc/internal/tsd.h | 5 ++++-
 src/tsd.c                       | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 576fa44..a62793a 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -414,7 +414,10 @@ tsd_fetch(void) {
 
 static inline bool
 tsd_nominal(tsd_t *tsd) {
-	return (tsd_state_get(tsd) <= tsd_state_nominal_max);
+	bool nominal = tsd_state_get(tsd) <= tsd_state_nominal_max;
+	assert(nominal || tsd_reentrancy_level_get(tsd) > 0);
+
+	return nominal;
 }
 
 JEMALLOC_ALWAYS_INLINE tsdn_t *
diff --git a/src/tsd.c b/src/tsd.c
index 17e9eed..940ff7d 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -376,6 +376,7 @@ tsd_do_data_cleanup(tsd_t *tsd) {
 	arenas_tdata_cleanup(tsd);
 	tcache_cleanup(tsd);
 	witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
+	*tsd_reentrancy_levelp_get(tsd) = 1;
 }
 
 void
-- 
cgit v0.12


From 0f552ed673b26b733a290bcac4c4d8ff4d0344e1 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 13 Jan 2020 16:18:32 -0800
Subject: Don't purge huge extents when decay is off.

---
 include/jemalloc/internal/arena_inlines_b.h | 6 ++++++
 src/extent.c                                | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index a310eb2..844e045 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -103,6 +103,12 @@ arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
 	large_prof_info_set(edata, tctx);
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+arena_may_force_decay(arena_t *arena) {
+	return !(arena_dirty_decay_ms_get(arena) == -1
+	    || arena_muzzy_decay_ms_get(arena) == -1);
+}
+
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 	tsd_t *tsd;
diff --git a/src/extent.c b/src/extent.c
index 9779c38..07c0bd2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1226,7 +1226,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			    &arena->edata_cache, ehooks, rtree_ctx, ecache,
 			    edata, &coalesced, growing_retained);
 		} while (coalesced);
-		if (edata_size_get(edata) >= oversize_threshold) {
+		if (edata_size_get(edata) >= oversize_threshold &&
+		    arena_may_force_decay(arena)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
 			arena_decay_extent(tsdn, arena, ehooks, edata);
-- 
cgit v0.12


From 88d9eca8483f39ded261c897e95e7d4459775c28 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Jan 2020 17:32:45 -0800
Subject: Enforce page alignment for sampled allocations.

This allows sampled allocations to be checked through alignment, therefore
enable sized deallocation regardless of cache_oblivious.
---
 include/jemalloc/internal/prof_inlines_b.h | 17 +++++++
 src/jemalloc.c                             | 82 ++++++++++++++++--------------
 test/integration/extent.c                  |  2 +-
 3 files changed, 62 insertions(+), 39 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index d0cc48d..c53dac5 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -197,6 +197,22 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+prof_sample_align(size_t orig_align) {
+	/*
+	 * Enforce page alignment, so that sampled allocations can be identified
+	 * w/o metadata lookup.
+	 */
+	assert(opt_prof);
+	return (config_cache_oblivious && orig_align < PAGE) ? PAGE :
+	    orig_align;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_aligned(const void *ptr) {
+	return ((uintptr_t)ptr & PAGE_MASK) == 0;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
 	prof_info_t prof_info;
@@ -206,6 +222,7 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
 	if (unlikely((uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U)) {
+		assert(prof_sample_aligned(ptr));
 		prof_free_sampled_object(tsd, usize, &prof_info);
 	}
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1976791..bac050a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2013,6 +2013,7 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	szind_t ind_large;
 	size_t bumped_usize = usize;
 
+	dopts->alignment = prof_sample_align(dopts->alignment);
 	if (usize <= SC_SMALL_MAXCLASS) {
 		assert(((dopts->alignment == 0) ?
 		    sz_s2u(SC_LARGE_MINCLASS) :
@@ -2029,6 +2030,7 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	} else {
 		ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind);
 	}
+	assert(prof_sample_aligned(ret));
 
 	return ret;
 }
@@ -2598,32 +2600,42 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	alloc_ctx_t alloc_ctx, *ctx;
-	if (!config_cache_oblivious && ((uintptr_t)ptr & PAGE_MASK) != 0) {
-		/*
-		 * When cache_oblivious is disabled and ptr is not page aligned,
-		 * the allocation was not sampled -- usize can be used to
-		 * determine szind directly.
-		 */
-		alloc_ctx.szind = sz_size2index(usize);
-		alloc_ctx.slab = true;
-		ctx = &alloc_ctx;
-		if (config_debug) {
-			alloc_ctx_t dbg_ctx;
+	if (!config_prof) {
+		/* Means usize will be used to determine szind. */
+		ctx = NULL;
+	} else {
+		if (likely(!prof_sample_aligned(ptr))) {
+			ctx = &alloc_ctx;
+			/*
+			 * When the ptr is not page aligned, it was not sampled.
+			 * usize can be trusted to determine szind and slab.
+			 */
+			ctx->szind = sz_size2index(usize);
+			if (config_cache_oblivious) {
+				ctx->slab = (ctx->szind < SC_NBINS);
+			} else {
+				/* Non page aligned must be slab allocated. */
+				ctx->slab = true;
+			}
+			if (config_debug) {
+				alloc_ctx_t dbg_ctx;
+				rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+				rtree_szind_slab_read(tsd_tsdn(tsd),
+				    &extents_rtree, rtree_ctx, (uintptr_t)ptr,
+				    true, &dbg_ctx.szind, &dbg_ctx.slab);
+				assert(dbg_ctx.szind == ctx->szind);
+				assert(dbg_ctx.slab == ctx->slab);
+			}
+		} else if (opt_prof) {
+			ctx = &alloc_ctx;
 			rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 			rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree,
-			    rtree_ctx, (uintptr_t)ptr, true, &dbg_ctx.szind,
-			    &dbg_ctx.slab);
-			assert(dbg_ctx.szind == alloc_ctx.szind);
-			assert(dbg_ctx.slab == alloc_ctx.slab);
+			    rtree_ctx, (uintptr_t)ptr, true, &ctx->szind,
+			    &ctx->slab);
+			assert(ctx->szind == sz_size2index(usize));
+		} else {
+			ctx = NULL;
 		}
-	} else if (config_prof && opt_prof) {
-		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
-		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
-		assert(alloc_ctx.szind == sz_size2index(usize));
-		ctx = &alloc_ctx;
-	} else {
-		ctx = NULL;
 	}
 
 	if (config_prof && opt_prof) {
@@ -2683,13 +2695,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	}
 
 	szind_t szind;
-	/*
-	 * If !config_cache_oblivious, we can check PAGE alignment to
-	 * detect sampled objects.  Otherwise addresses are
-	 * randomized, and we have to look it up in the rtree anyway.
-	 * See also isfree().
-	 */
-	if (!size_hint || config_cache_oblivious) {
+	if (!size_hint) {
 		bool slab;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd),
@@ -2707,7 +2713,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		 * sampled object check will also check for null ptr.
 		 */
 		if (unlikely(size > SC_LOOKUP_MAXCLASS ||
-		    (((uintptr_t)ptr & PAGE_MASK) == 0))) {
+		    (config_prof && prof_sample_aligned(ptr)))) {
 			return false;
 		}
 		szind = sz_size2index_lookup(size);
@@ -3024,6 +3030,8 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 	if (tctx == NULL) {
 		return NULL;
 	}
+
+	alignment = prof_sample_align(alignment);
 	if (usize <= SC_SMALL_MAXCLASS) {
 		p = iralloct(tsdn, old_ptr, old_usize,
 		    SC_LARGE_MINCLASS, alignment, zero, tcache,
@@ -3036,6 +3044,7 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 		p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero,
 		    tcache, arena, hook_args);
 	}
+	assert(prof_sample_aligned(p));
 
 	return p;
 }
@@ -3281,15 +3290,13 @@ ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
 static size_t
 ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx) {
-	size_t usize;
-
-	if (tctx == NULL) {
+	/* Sampled allocation needs to be page aligned. */
+	if (tctx == NULL || !prof_sample_aligned(ptr)) {
 		return old_usize;
 	}
-	usize = ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
-	    zero);
 
-	return usize;
+	return ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
+	    zero);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -3590,7 +3597,6 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 		isfree(tsd, ptr, usize, tcache, true);
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
-
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
diff --git a/test/integration/extent.c b/test/integration/extent.c
index b5db087..a75ba03 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -59,8 +59,8 @@ test_extent_body(unsigned arena_ind) {
 		assert_true(called_decommit, "Expected decommit call");
 		assert_true(did_purge_lazy || did_purge_forced,
 		    "Expected purge");
+		assert_true(called_split, "Expected split call");
 	}
-	assert_true(called_split, "Expected split call");
 	dallocx(p, flags);
 	try_dalloc = true;
 
-- 
cgit v0.12


From 974222c626b351256f071d18994c70b79d10a627 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Jan 2020 14:35:54 -0800
Subject: Add safety check on sdallocx slow / sampled path.

---
 include/jemalloc/internal/safety_check.h |  1 +
 src/jemalloc.c                           |  6 +++++-
 src/safety_check.c                       | 12 ++++++++++++
 src/tcache.c                             |  5 +----
 4 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index ec4b336..a7a4433 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
 #define JEMALLOC_INTERNAL_SAFETY_CHECK_H
 
+void safety_check_fail_sized_dealloc(bool current_dealloc);
 void safety_check_fail(const char *format, ...);
 /* Can set to NULL for a default. */
 void safety_check_set_abort(void (*abort_fn)(const char *));
diff --git a/src/jemalloc.c b/src/jemalloc.c
index bac050a..5f11fc3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2632,7 +2632,11 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 			rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree,
 			    rtree_ctx, (uintptr_t)ptr, true, &ctx->szind,
 			    &ctx->slab);
-			assert(ctx->szind == sz_size2index(usize));
+			/* Small alloc may have !slab (sampled). */
+			bool sz_correct = (ctx->szind == sz_size2index(usize));
+			if (config_opt_safety_checks && !sz_correct) {
+				safety_check_fail_sized_dealloc(true);
+			}
 		} else {
 			ctx = NULL;
 		}
diff --git a/src/safety_check.c b/src/safety_check.c
index 804155d..a83dca7 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -3,6 +3,18 @@
 
 static void (*safety_check_abort)(const char *message);
 
+void safety_check_fail_sized_dealloc(bool current_dealloc) {
+	assert(config_opt_safety_checks);
+	char *src = current_dealloc ? "the current pointer being freed" :
+	    "in thread cache, possibly from previous deallocations";
+
+	safety_check_fail("<jemalloc>: size mismatch detected, likely caused by"
+	   " application sized deallocation bugs (source: %s). Suggest building"
+	    "with --enable-debug or address sanitizer for debugging. Abort.\n",
+	    src);
+	abort();
+}
+
 void safety_check_set_abort(void (*abort_fn)(const char *)) {
 	safety_check_abort = abort_fn;
 }
diff --git a/src/tcache.c b/src/tcache.c
index 0a511e2..2f4ca5a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -135,10 +135,7 @@ tbin_edatas_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 		sz_sum -= szind;
 	}
 	if (sz_sum != 0) {
-		safety_check_fail("<jemalloc>: size mismatch in thread cache "
-		    "detected, likely caused by sized deallocation bugs by "
-		    "application. Abort.\n");
-		abort();
+		safety_check_fail_sized_dealloc(false);
 	}
 }
 
-- 
cgit v0.12


From 536ea6858ecfcac49060c805231bd1722d84a0cf Mon Sep 17 00:00:00 2001
From: zoulasc <christos@zoulas.com>
Date: Mon, 3 Feb 2020 15:35:08 -0500
Subject: NetBSD specific changes: - NetBSD overcommits - When mapping pages,
 use the maximum of the alignment requested and the   compiled-in PAGE
 constant which might be greater than the current kernel   pagesize, since we
 compile binaries with the maximum page size supported   by the architecture
 (so that they work with all kernels).

---
 src/pages.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 75c8dd9..62e84f0 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -14,6 +14,9 @@
 #include <vm/vm_param.h>
 #endif
 #endif
+#ifdef __NetBSD__
+#include <sys/bitops.h>	/* ilog2 */
+#endif
 
 /******************************************************************************/
 /* Data. */
@@ -74,6 +77,18 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 	 * of existing mappings, and we only want to create new mappings.
 	 */
 	{
+#ifdef __NetBSD__
+		/*
+		 * On NetBSD PAGE for a platform is defined to the
+		 * maximum page size of all machine architectures
+		 * for that platform, so that we can use the same
+		 * binaries across all machine architectures.
+		 */
+		if (alignment > os_page || PAGE > os_page) {
+			unsigned int a = ilog2(MAX(alignment, PAGE));
+			mmap_flags |= MAP_ALIGNED(a);
+		}
+#endif
 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
 
 		ret = mmap(addr, size, prot, mmap_flags, -1, 0);
@@ -622,6 +637,8 @@ pages_boot(void) {
 		mmap_flags |= MAP_NORESERVE;
 	}
 #  endif
+#elif defined(__NetBSD__)
+	os_overcommits = true;
 #else
 	os_overcommits = false;
 #endif
-- 
cgit v0.12


From 97dd79db6c4f9b93bb83182afb191d8dbef49806 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Jan 2020 21:12:06 -0800
Subject: Implement deallocation events.

Make the event module to accept two event types, and pass around the event
context.  Use bytes-based events to trigger tcache GC on deallocation, and get
rid of the tcache ticker.
---
 include/jemalloc/internal/tcache_inlines.h |  16 ---
 include/jemalloc/internal/tcache_structs.h |   3 -
 include/jemalloc/internal/thread_event.h   | 166 +++++++++++++++++++++--------
 include/jemalloc/internal/tsd.h            |   8 ++
 src/jemalloc.c                             |  65 ++++++-----
 src/tcache.c                               |   2 -
 src/thread_event.c                         | 161 ++++++++++++++++++----------
 src/tsd.c                                  |   2 +-
 test/unit/thread_event.c                   |  44 +++++---
 9 files changed, 301 insertions(+), 166 deletions(-)

diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 40c4286..d356181 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -5,7 +5,6 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
-#include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/util.h"
 
 static inline bool
@@ -27,17 +26,6 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	tsd_slow_update(tsd);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-tcache_event(tsd_t *tsd, tcache_t *tcache) {
-	if (TCACHE_GC_INCR == 0) {
-		return;
-	}
-
-	if (unlikely(ticker_tick(&tcache->gc_ticker))) {
-		tcache_event_hard(tsd, tcache);
-	}
-}
-
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t binind, bool zero, bool slow_path) {
@@ -171,8 +159,6 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
 	}
-
-	tcache_event(tsd, tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -195,8 +181,6 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
 	}
-
-	tcache_event(tsd, tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE tcache_t *
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 98d3ef7..38a82fe 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -16,9 +16,6 @@ struct tcache_s {
 	 * together at the start of this struct.
 	 */
 
-	/* Drives incremental GC. */
-	ticker_t	gc_ticker;
-
 	/*
 	 * The pointer stacks associated with bins follow as a contiguous array.
 	 * During tcache initialization, the avail pointer in each element of
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 454c689..33cbcbe 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -4,42 +4,51 @@
 #include "jemalloc/internal/tsd.h"
 
 /*
- * Maximum threshold on thread_allocated_next_event_fast, so that there is no
- * need to check overflow in malloc fast path. (The allocation size in malloc
+ * Maximum threshold on thread_(de)allocated_next_event_fast, so that there is
+ * no need to check overflow in malloc fast path. (The allocation size in malloc
  * fast path never exceeds SC_LOOKUP_MAXCLASS.)
  */
-#define THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX				\
+#define THREAD_NEXT_EVENT_FAST_MAX				\
     (UINT64_MAX - SC_LOOKUP_MAXCLASS + 1U)
 
 /*
  * The max interval helps make sure that malloc stays on the fast path in the
- * common case, i.e. thread_allocated < thread_allocated_next_event_fast.
- * When thread_allocated is within an event's distance to
- * THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX above, thread_allocated_next_event_fast
- * is wrapped around and we fall back to the medium-fast path. The max interval
- * makes sure that we're not staying on the fallback case for too long, even if
- * there's no active event or if all active events have long wait times.
+ * common case, i.e. thread_allocated < thread_allocated_next_event_fast.  When
+ * thread_allocated is within an event's distance to THREAD_NEXT_EVENT_FAST_MAX
+ * above, thread_allocated_next_event_fast is wrapped around and we fall back to
+ * the medium-fast path. The max interval makes sure that we're not staying on
+ * the fallback case for too long, even if there's no active event or if all
+ * active events have long wait times.
  */
 #define THREAD_EVENT_MAX_INTERVAL ((uint64_t)(4U << 20))
 
+typedef struct event_ctx_s {
+	bool is_alloc;
+	uint64_t *current;
+	uint64_t *last_event;
+	uint64_t *next_event;
+	uint64_t *next_event_fast;
+} event_ctx_t;
+
 void thread_event_assert_invariants_debug(tsd_t *tsd);
-void thread_event_trigger(tsd_t *tsd, bool delay_event);
-void thread_event_rollback(tsd_t *tsd, size_t diff);
-void thread_event_update(tsd_t *tsd);
+void thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event);
+void thread_alloc_event_rollback(tsd_t *tsd, size_t diff);
+void thread_event_update(tsd_t *tsd, bool alloc_event);
 void thread_event_boot();
 void thread_event_recompute_fast_threshold(tsd_t *tsd);
 void tsd_thread_event_init(tsd_t *tsd);
 
 /*
  * List of all events, in the following format:
- *  E(event,		(condition))
+ *  E(event,		(condition), is_alloc_event)
  */
 #define ITERATE_OVER_ALL_EVENTS						\
-    E(tcache_gc,	(TCACHE_GC_INCR_BYTES > 0))			\
-    E(prof_sample,	(config_prof && opt_prof))	    		\
-    E(stats_interval,	(opt_stats_interval >= 0))
+    E(tcache_gc,	(TCACHE_GC_INCR_BYTES > 0), true)		\
+    E(prof_sample,	(config_prof && opt_prof), true)	    	\
+    E(stats_interval,	(opt_stats_interval >= 0), true)	    	\
+    E(tcache_gc_dalloc,	(TCACHE_GC_INCR_BYTES > 0), false)
 
-#define E(event, condition)						\
+#define E(event, condition_unused, is_alloc_event_unused)		\
     C(event##_event_wait)
 
 /* List of all thread event counters. */
@@ -83,9 +92,9 @@ ITERATE_OVER_ALL_COUNTERS
 #undef E
 
 /*
- * Two malloc fastpath getters -- use the unsafe getters since tsd may be
- * non-nominal, in which case the fast_threshold will be set to 0.  This allows
- * checking for events and tsd non-nominal in a single branch.
+ * The malloc and free fastpath getters -- use the unsafe getters since tsd may
+ * be non-nominal, in which case the fast_threshold will be set to 0.  This
+ * allows checking for events and tsd non-nominal in a single branch.
  *
  * Note that these can only be used on the fastpath.
  */
@@ -97,42 +106,83 @@ thread_allocated_malloc_fastpath(tsd_t *tsd) {
 JEMALLOC_ALWAYS_INLINE uint64_t
 thread_allocated_next_event_malloc_fastpath(tsd_t *tsd) {
 	uint64_t v = *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd);
-	assert(v <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
 	return v;
 }
 
+JEMALLOC_ALWAYS_INLINE void
+thread_event_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated,
+    uint64_t *threshold, bool size_hint) {
+	if (!size_hint) {
+		*deallocated = tsd_thread_deallocated_get(tsd);
+		*threshold = tsd_thread_deallocated_next_event_fast_get(tsd);
+	} else {
+		/* Unsafe getters since this may happen before tsd_init. */
+		*deallocated = *tsd_thread_deallocatedp_get_unsafe(tsd);
+		*threshold =
+		    *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd);
+	}
+	assert(*threshold <= THREAD_NEXT_EVENT_FAST_MAX);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+event_ctx_is_alloc(event_ctx_t *ctx) {
+	return ctx->is_alloc;
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+event_ctx_current_bytes_get(event_ctx_t *ctx) {
+	return *ctx->current;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+event_ctx_current_bytes_set(event_ctx_t *ctx, uint64_t v) {
+	*ctx->current = v;
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+event_ctx_last_event_get(event_ctx_t *ctx) {
+	return *ctx->last_event;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+event_ctx_last_event_set(event_ctx_t *ctx, uint64_t v) {
+	*ctx->last_event = v;
+}
+
 /* Below 3 for next_event_fast. */
 JEMALLOC_ALWAYS_INLINE uint64_t
-thread_allocated_next_event_fast_get(tsd_t *tsd) {
-	uint64_t v = tsd_thread_allocated_next_event_fast_get(tsd);
-	assert(v <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+event_ctx_next_event_fast_get(event_ctx_t *ctx) {
+	uint64_t v = *ctx->next_event_fast;
+	assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
 	return v;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_allocated_next_event_fast_set(tsd_t *tsd, uint64_t v) {
-	assert(v <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
-	*tsd_thread_allocated_next_event_fastp_get(tsd) = v;
+event_ctx_next_event_fast_set(event_ctx_t *ctx, uint64_t v) {
+	assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
+	*ctx->next_event_fast = v;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_allocated_next_event_fast_set_non_nominal(tsd_t *tsd) {
+thread_next_event_fast_set_non_nominal(tsd_t *tsd) {
 	/*
-	 * Set the fast threshold to zero when tsd is non-nominal.  Use the
+	 * Set the fast thresholds to zero when tsd is non-nominal.  Use the
 	 * unsafe getter as this may get called during tsd init and clean up.
 	 */
 	*tsd_thread_allocated_next_event_fastp_get_unsafe(tsd) = 0;
+	*tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) = 0;
 }
 
 /* For next_event.  Setter also updates the fast threshold. */
 JEMALLOC_ALWAYS_INLINE uint64_t
-thread_allocated_next_event_get(tsd_t *tsd) {
-	return tsd_thread_allocated_next_event_get(tsd);
+event_ctx_next_event_get(event_ctx_t *ctx) {
+	return *ctx->next_event;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_allocated_next_event_set(tsd_t *tsd, uint64_t v) {
-	*tsd_thread_allocated_next_eventp_get(tsd) = v;
+event_ctx_next_event_set(tsd_t *tsd, event_ctx_t *ctx, uint64_t v) {
+	*ctx->next_event = v;
 	thread_event_recompute_fast_threshold(tsd);
 }
 
@@ -145,8 +195,8 @@ thread_allocated_next_event_set(tsd_t *tsd, uint64_t v) {
  *     at the end will restore the invariants),
  * (b) thread_##event##_event_update() (the thread_event_update() call at the
  *     end will restore the invariants), or
- * (c) thread_event_rollback() if the rollback falls below the last_event (the
- *     thread_event_update() call at the end will restore the invariants).
+ * (c) thread_alloc_event_rollback() if the rollback falls below the last_event
+ *     (the thread_event_update() call at the end will restore the invariants).
  */
 JEMALLOC_ALWAYS_INLINE void
 thread_event_assert_invariants(tsd_t *tsd) {
@@ -156,22 +206,52 @@ thread_event_assert_invariants(tsd_t *tsd) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_event(tsd_t *tsd, size_t usize) {
+event_ctx_get(tsd_t *tsd, event_ctx_t *ctx, bool is_alloc) {
+	ctx->is_alloc = is_alloc;
+	if (is_alloc) {
+		ctx->current = tsd_thread_allocatedp_get(tsd);
+		ctx->last_event = tsd_thread_allocated_last_eventp_get(tsd);
+		ctx->next_event = tsd_thread_allocated_next_eventp_get(tsd);
+		ctx->next_event_fast =
+		    tsd_thread_allocated_next_event_fastp_get(tsd);
+	} else {
+		ctx->current = tsd_thread_deallocatedp_get(tsd);
+		ctx->last_event = tsd_thread_deallocated_last_eventp_get(tsd);
+		ctx->next_event = tsd_thread_deallocated_next_eventp_get(tsd);
+		ctx->next_event_fast =
+		    tsd_thread_deallocated_next_event_fastp_get(tsd);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+thread_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
 	thread_event_assert_invariants(tsd);
 
-	uint64_t thread_allocated_before = thread_allocated_get(tsd);
-	thread_allocated_set(tsd, thread_allocated_before + usize);
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, is_alloc);
+
+	uint64_t bytes_before = event_ctx_current_bytes_get(&ctx);
+	event_ctx_current_bytes_set(&ctx, bytes_before + usize);
 
 	/* The subtraction is intentionally susceptible to underflow. */
-	if (likely(usize < thread_allocated_next_event_get(tsd) -
-	    thread_allocated_before)) {
+	if (likely(usize < event_ctx_next_event_get(&ctx) - bytes_before)) {
 		thread_event_assert_invariants(tsd);
 	} else {
-		thread_event_trigger(tsd, false);
+		thread_event_trigger(tsd, &ctx, false);
 	}
 }
 
-#define E(event, condition)						\
+JEMALLOC_ALWAYS_INLINE void
+thread_dalloc_event(tsd_t *tsd, size_t usize) {
+	thread_event_advance(tsd, usize, false);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+thread_alloc_event(tsd_t *tsd, size_t usize) {
+	thread_event_advance(tsd, usize, true);
+}
+
+#define E(event, condition, is_alloc)					\
 JEMALLOC_ALWAYS_INLINE void						\
 thread_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {	\
 	thread_event_assert_invariants(tsd);				\
@@ -188,7 +268,7 @@ thread_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {	\
 		event_wait = THREAD_EVENT_MAX_START_WAIT;		\
 	}								\
 	event##_event_wait_set(tsd, event_wait);			\
-	thread_event_update(tsd);					\
+	thread_event_update(tsd, is_alloc);				\
 }
 
 ITERATE_OVER_ALL_EVENTS
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index a62793a..6868ce4 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -81,10 +81,14 @@ typedef void (*test_callback_t)(int *);
     O(thread_allocated,		uint64_t,		uint64_t)	\
     O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
     O(thread_deallocated,	uint64_t,		uint64_t)	\
+    O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
     O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
     O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
+    O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
+    O(thread_deallocated_next_event,	uint64_t,	uint64_t)	\
     O(tcache_gc_event_wait,	uint64_t,		uint64_t)	\
+    O(tcache_gc_dalloc_event_wait,	uint64_t,	uint64_t)	\
     O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
     O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
@@ -114,10 +118,14 @@ typedef void (*test_callback_t)(int *);
     /* thread_allocated */	0,					\
     /* thread_allocated_next_event_fast */ 0, 				\
     /* thread_deallocated */	0,					\
+    /* thread_deallocated_next_event_fast */	0,			\
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
     /* thread_allocated_last_event */	0,				\
     /* thread_allocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* thread_deallocated_last_event */	0,				\
+    /* thread_deallocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
     /* tcache_gc_event_wait */		THREAD_EVENT_MIN_START_WAIT,	\
+    /* tcache_gc_dalloc_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_last_event */	0,				\
     /* stats_interval_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5f11fc3..60565df 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2154,7 +2154,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		dopts->arena_ind = 0;
 	}
 
-	thread_event(tsd, usize);
+	thread_alloc_event(tsd, usize);
 
 	/*
 	 * If dopts->alignment > 0, then ind is still 0, but usize was computed
@@ -2181,7 +2181,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 
 		if (unlikely(allocation == NULL)) {
-			thread_event_rollback(tsd, usize);
+			thread_alloc_event_rollback(tsd, usize);
 			prof_alloc_rollback(tsd, tctx, true);
 			goto label_oom;
 		}
@@ -2191,7 +2191,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
-			thread_event_rollback(tsd, usize);
+			thread_alloc_event_rollback(tsd, usize);
 			goto label_oom;
 		}
 	}
@@ -2575,7 +2575,6 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	if (config_prof && opt_prof) {
 		prof_free(tsd, ptr, usize, &alloc_ctx);
 	}
-	*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	if (likely(!slow_path)) {
 		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
@@ -2584,6 +2583,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
 		    true);
 	}
+	thread_dalloc_event(tsd, usize);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -2645,14 +2645,12 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	if (config_prof && opt_prof) {
 		prof_free(tsd, ptr, usize, ctx);
 	}
-
-	*tsd_thread_deallocatedp_get(tsd) += usize;
-
 	if (likely(!slow_path)) {
 		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, false);
 	} else {
 		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, true);
 	}
+	thread_dalloc_event(tsd, usize);
 }
 
 JEMALLOC_NOINLINE
@@ -2694,12 +2692,12 @@ free_default(void *ptr) {
 JEMALLOC_ALWAYS_INLINE
 bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	tsd_t *tsd = tsd_get(false);
-	if (unlikely(!tsd || !tsd_fast(tsd))) {
-		return false;
-	}
 
 	szind_t szind;
 	if (!size_hint) {
+		if (unlikely(!tsd || !tsd_fast(tsd))) {
+			return false;
+		}
 		bool slab;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd),
@@ -2712,6 +2710,15 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		assert(szind != SC_NSIZES);
 	} else {
 		/*
+		 * The size hinted fastpath does not involve rtree lookup, thus
+		 * can tolerate an uninitialized tsd.  This allows the tsd_fast
+		 * check to be folded into the branch testing fast_threshold
+		 * (set to 0 when !tsd_fast).
+		 */
+		if (unlikely(!tsd)) {
+			return false;
+		}
+		/*
 		 * Check for both sizes that are too large, and for sampled
 		 * objects.  Sampled objects are always page-aligned.  The
 		 * sampled object check will also check for null ptr.
@@ -2722,19 +2729,26 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		}
 		szind = sz_size2index_lookup(size);
 	}
+	uint64_t deallocated, threshold;
+	thread_event_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
 
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-	if (unlikely(ticker_trytick(&tcache->gc_ticker))) {
+	size_t usize = sz_index2size(szind);
+	uint64_t deallocated_after = deallocated + usize;
+	/*
+	 * Check for events and tsd non-nominal (fast_threshold will be set to
+	 * 0) in a single branch.
+	 */
+	if (unlikely(deallocated_after >= threshold)) {
 		return false;
 	}
 
+	tcache_t *tcache = tsd_tcachep_get(tsd);
 	cache_bin_t *bin = tcache_small_bin_get(tcache, szind);
 	if (!cache_bin_dalloc_easy(bin, ptr)) {
 		return false;
 	}
 
-	size_t usize = sz_index2size(szind);
-	*tsd_thread_deallocatedp_get(tsd) += usize;
+	*tsd_thread_deallocatedp_get(tsd) = deallocated_after;
 
 	return true;
 }
@@ -3144,11 +3158,11 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
-		thread_event(tsd, usize);
+		thread_alloc_event(tsd, usize);
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
 		    zero, tcache, arena, &alloc_ctx, &hook_args);
 		if (unlikely(p == NULL)) {
-			thread_event_rollback(tsd, usize);
+			thread_alloc_event_rollback(tsd, usize);
 			goto label_oom;
 		}
 	} else {
@@ -3158,11 +3172,10 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 			goto label_oom;
 		}
 		usize = isalloc(tsd_tsdn(tsd), p);
-		thread_event(tsd, usize);
+		thread_alloc_event(tsd, usize);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
-
-	*tsd_thread_deallocatedp_get(tsd) += old_usize;
+	thread_dalloc_event(tsd, old_usize);
 
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
@@ -3337,7 +3350,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 			usize_max = SC_LARGE_MAXCLASS;
 		}
 	}
-	thread_event(tsd, usize_max);
+	thread_alloc_event(tsd, usize_max);
 	bool prof_active = prof_active_get_unlocked();
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 
@@ -3350,7 +3363,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		    extra, alignment, zero);
 	}
 	if (usize <= usize_max) {
-		thread_event_rollback(tsd, usize_max - usize);
+		thread_alloc_event_rollback(tsd, usize_max - usize);
 	} else {
 		/*
 		 * For downsizing request, usize_max can be less than usize.
@@ -3359,7 +3372,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		 * to xallocx(), the entire usize will be rolled back if it's
 		 * equal to the old usize.
 		 */
-		thread_event(tsd, usize - usize_max);
+		thread_alloc_event(tsd, usize - usize_max);
 	}
 
 	/*
@@ -3438,7 +3451,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	} else {
 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
 		    extra, alignment, zero);
-		thread_event(tsd, usize);
+		thread_alloc_event(tsd, usize);
 	}
 
 	/*
@@ -3448,12 +3461,10 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	assert(iealloc(tsd_tsdn(tsd), ptr) == old_edata);
 
 	if (unlikely(usize == old_usize)) {
-		thread_event_rollback(tsd, usize);
+		thread_alloc_event_rollback(tsd, usize);
 		goto label_not_resized;
 	}
-
-	*tsd_thread_deallocatedp_get(tsd) += old_usize;
-
+	thread_dalloc_event(tsd, old_usize);
 label_not_resized:
 	if (unlikely(!tsd_fast(tsd))) {
 		uintptr_t args[4] = {(uintptr_t)ptr, size, extra, flags};
diff --git a/src/tcache.c b/src/tcache.c
index 2f4ca5a..3d96512 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -437,8 +437,6 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 	tcache->next_gc_bin = 0;
 	tcache->arena = NULL;
 
-	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
-
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
 	memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
diff --git a/src/thread_event.c b/src/thread_event.c
index 6aedf16..5bdc4ae 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -12,14 +12,14 @@
 static bool thread_event_active = false;
 
 /* TSD event init function signatures. */
-#define E(event, condition)						\
+#define E(event, condition_unused, is_alloc_event_unused)		\
 static void tsd_thread_##event##_event_init(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
 /* Event handler function signatures. */
-#define E(event, condition)						\
+#define E(event, condition_unused, is_alloc_event_unused)		\
 static void thread_##event##_event_handler(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
@@ -33,6 +33,12 @@ tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
 }
 
 static void
+tsd_thread_tcache_gc_dalloc_event_init(tsd_t *tsd) {
+	assert(TCACHE_GC_INCR_BYTES > 0);
+	thread_tcache_gc_dalloc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+}
+
+static void
 tsd_thread_prof_sample_event_init(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
 	prof_sample_threshold_update(tsd);
@@ -46,11 +52,10 @@ tsd_thread_stats_interval_event_init(tsd_t *tsd) {
 }
 
 /* Handler functions. */
+
 static void
-thread_tcache_gc_event_handler(tsd_t *tsd) {
+tcache_gc_event(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
-	assert(tcache_gc_event_wait_get(tsd) == 0U);
-	tsd_thread_tcache_gc_event_init(tsd);
 	tcache_t *tcache = tcache_get(tsd);
 	if (tcache != NULL) {
 		tcache_event_hard(tsd, tcache);
@@ -58,6 +63,20 @@ thread_tcache_gc_event_handler(tsd_t *tsd) {
 }
 
 static void
+thread_tcache_gc_event_handler(tsd_t *tsd) {
+	assert(tcache_gc_event_wait_get(tsd) == 0U);
+	tsd_thread_tcache_gc_event_init(tsd);
+	tcache_gc_event(tsd);
+}
+
+static void
+thread_tcache_gc_dalloc_event_handler(tsd_t *tsd) {
+	assert(tcache_gc_dalloc_event_wait_get(tsd) == 0U);
+	tsd_thread_tcache_gc_dalloc_event_init(tsd);
+	tcache_gc_event(tsd);
+}
+
+static void
 thread_prof_sample_event_handler(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
 	assert(prof_sample_event_wait_get(tsd) == 0U);
@@ -96,12 +115,12 @@ thread_stats_interval_event_handler(tsd_t *tsd) {
 /* Per event facilities done. */
 
 static uint64_t
-thread_allocated_next_event_compute(tsd_t *tsd) {
+thread_next_event_compute(tsd_t *tsd, bool is_alloc) {
 	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
 	bool no_event_on = true;
 
-#define E(event, condition)						\
-	if (condition) {						\
+#define E(event, condition, alloc_event)				\
+	if (is_alloc == alloc_event && condition) {			\
 		no_event_on = false;					\
 		uint64_t event_wait =					\
 		    event##_event_wait_get(tsd);			\
@@ -119,15 +138,15 @@ thread_allocated_next_event_compute(tsd_t *tsd) {
 	return wait;
 }
 
-void
-thread_event_assert_invariants_debug(tsd_t *tsd) {
-	uint64_t thread_allocated = thread_allocated_get(tsd);
-	uint64_t last_event = thread_allocated_last_event_get(tsd);
-	uint64_t next_event = thread_allocated_next_event_get(tsd);
-	uint64_t next_event_fast = thread_allocated_next_event_fast_get(tsd);
+static void
+thread_event_assert_invariants_impl(tsd_t *tsd, event_ctx_t *ctx) {
+	uint64_t current_bytes = event_ctx_current_bytes_get(ctx);
+	uint64_t last_event = event_ctx_last_event_get(ctx);
+	uint64_t next_event = event_ctx_next_event_get(ctx);
+	uint64_t next_event_fast = event_ctx_next_event_fast_get(ctx);
 
 	assert(last_event != next_event);
-	if (next_event > THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX ||
+	if (next_event > THREAD_NEXT_EVENT_FAST_MAX ||
 	    !tsd_fast(tsd)) {
 		assert(next_event_fast == 0U);
 	} else {
@@ -138,10 +157,9 @@ thread_event_assert_invariants_debug(tsd_t *tsd) {
 	uint64_t interval = next_event - last_event;
 
 	/* The subtraction is intentionally susceptible to underflow. */
-	assert(thread_allocated - last_event < interval);
-
-	uint64_t min_wait = thread_allocated_next_event_compute(tsd);
-
+	assert(current_bytes - last_event < interval);
+	uint64_t min_wait = thread_next_event_compute(tsd,
+	    event_ctx_is_alloc(ctx));
 	/*
 	 * next_event should have been pushed up only except when no event is
 	 * on and the TSD is just initialized.  The last_event == 0U guard
@@ -153,6 +171,16 @@ thread_event_assert_invariants_debug(tsd_t *tsd) {
 	    (interval < min_wait && interval == THREAD_EVENT_MAX_INTERVAL));
 }
 
+void
+thread_event_assert_invariants_debug(tsd_t *tsd) {
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, true);
+	thread_event_assert_invariants_impl(tsd, &ctx);
+
+	event_ctx_get(tsd, &ctx, false);
+	thread_event_assert_invariants_impl(tsd, &ctx);
+}
+
 /*
  * Synchronization around the fast threshold in tsd --
  * There are two threads to consider in the synchronization here:
@@ -200,39 +228,50 @@ thread_event_assert_invariants_debug(tsd_t *tsd) {
  * of the owner thread's next_event_fast, but that's always safe (it just sends
  * it down the slow path earlier).
  */
+static void
+event_ctx_next_event_fast_update(event_ctx_t *ctx) {
+	uint64_t next_event = event_ctx_next_event_get(ctx);
+	uint64_t next_event_fast = (next_event <=
+	    THREAD_NEXT_EVENT_FAST_MAX) ? next_event : 0U;
+	event_ctx_next_event_fast_set(ctx, next_event_fast);
+}
+
 void
 thread_event_recompute_fast_threshold(tsd_t *tsd) {
 	if (tsd_state_get(tsd) != tsd_state_nominal) {
 		/* Check first because this is also called on purgatory. */
-		thread_allocated_next_event_fast_set_non_nominal(tsd);
+		thread_next_event_fast_set_non_nominal(tsd);
 		return;
 	}
-	uint64_t next_event = thread_allocated_next_event_get(tsd);
-	uint64_t next_event_fast = (next_event <=
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX) ? next_event : 0U;
-	thread_allocated_next_event_fast_set(tsd, next_event_fast);
+
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, true);
+	event_ctx_next_event_fast_update(&ctx);
+	event_ctx_get(tsd, &ctx, false);
+	event_ctx_next_event_fast_update(&ctx);
 
 	atomic_fence(ATOMIC_SEQ_CST);
 	if (tsd_state_get(tsd) != tsd_state_nominal) {
-		thread_allocated_next_event_fast_set_non_nominal(tsd);
+		thread_next_event_fast_set_non_nominal(tsd);
 	}
 }
 
 static void
-thread_event_adjust_thresholds_helper(tsd_t *tsd, uint64_t wait) {
+thread_event_adjust_thresholds_helper(tsd_t *tsd, event_ctx_t *ctx,
+    uint64_t wait) {
 	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
-	uint64_t next_event = thread_allocated_last_event_get(tsd) + (wait <=
+	uint64_t next_event = event_ctx_last_event_get(ctx) + (wait <=
 	    THREAD_EVENT_MAX_INTERVAL ? wait : THREAD_EVENT_MAX_INTERVAL);
-	thread_allocated_next_event_set(tsd, next_event);
+	event_ctx_next_event_set(tsd, ctx, next_event);
 }
 
 static uint64_t
 thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
-    bool allow_event_trigger) {
+    bool is_alloc, bool allow_event_trigger) {
 	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
 
-#define E(event, condition)						\
-	if (condition) {						\
+#define E(event, condition, alloc_event)				\
+	if (is_alloc == alloc_event && condition) {			\
 		uint64_t event_wait = event##_event_wait_get(tsd);	\
 		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
 		if (event_wait > accumbytes) {				\
@@ -267,28 +306,30 @@ thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
 }
 
 void
-thread_event_trigger(tsd_t *tsd, bool delay_event) {
+thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event) {
 	/* usize has already been added to thread_allocated. */
-	uint64_t thread_allocated_after = thread_allocated_get(tsd);
+	uint64_t bytes_after = event_ctx_current_bytes_get(ctx);
 
 	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t accumbytes = thread_allocated_after -
-	    thread_allocated_last_event_get(tsd);
+	uint64_t accumbytes = bytes_after - event_ctx_last_event_get(ctx);
 
 	/* Make sure that accumbytes cannot overflow uint64_t. */
 	assert(THREAD_EVENT_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
 
-	thread_allocated_last_event_set(tsd, thread_allocated_after);
+	event_ctx_last_event_set(ctx, bytes_after);
 	bool allow_event_trigger = !delay_event && tsd_nominal(tsd) &&
 	    tsd_reentrancy_level_get(tsd) == 0;
+
+	bool is_alloc = ctx->is_alloc;
 	uint64_t wait = thread_event_trigger_batch_update(tsd, accumbytes,
-	    allow_event_trigger);
-	thread_event_adjust_thresholds_helper(tsd, wait);
+	    is_alloc, allow_event_trigger);
+	thread_event_adjust_thresholds_helper(tsd, ctx, wait);
 
 	thread_event_assert_invariants(tsd);
 
-#define E(event, condition)						\
-	if (condition && event##_event_wait_get(tsd) == 0U) {		\
+#define E(event, condition, alloc_event)				\
+	if (is_alloc == alloc_event && condition &&			\
+	    event##_event_wait_get(tsd) == 0U) {			\
 		assert(allow_event_trigger);				\
 		thread_##event##_event_handler(tsd);			\
 	}
@@ -300,19 +341,23 @@ thread_event_trigger(tsd_t *tsd, bool delay_event) {
 }
 
 void
-thread_event_rollback(tsd_t *tsd, size_t diff) {
+thread_alloc_event_rollback(tsd_t *tsd, size_t diff) {
 	thread_event_assert_invariants(tsd);
 
 	if (diff == 0U) {
 		return;
 	}
 
-	uint64_t thread_allocated = thread_allocated_get(tsd);
+	/* Rollback happens only on alloc events. */
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, true);
+
+	uint64_t thread_allocated = event_ctx_current_bytes_get(&ctx);
 	/* The subtraction is intentionally susceptible to underflow. */
 	uint64_t thread_allocated_rollback = thread_allocated - diff;
-	thread_allocated_set(tsd, thread_allocated_rollback);
+	event_ctx_current_bytes_set(&ctx, thread_allocated_rollback);
 
-	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t last_event = event_ctx_last_event_get(&ctx);
 	/* Both subtractions are intentionally susceptible to underflow. */
 	if (thread_allocated_rollback - last_event <=
 	    thread_allocated - last_event) {
@@ -320,14 +365,14 @@ thread_event_rollback(tsd_t *tsd, size_t diff) {
 		return;
 	}
 
-	thread_allocated_last_event_set(tsd, thread_allocated_rollback);
+	event_ctx_last_event_set(&ctx, thread_allocated_rollback);
 
 	/* The subtraction is intentionally susceptible to underflow. */
 	uint64_t wait_diff = last_event - thread_allocated_rollback;
 	assert(wait_diff <= diff);
 
-#define E(event, condition)						\
-	if (condition) {						\
+#define E(event, condition, alloc_event)				\
+	if (alloc_event == true && condition) {				\
 		uint64_t event_wait = event##_event_wait_get(tsd);	\
 		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
 		if (event_wait > 0U) {					\
@@ -347,27 +392,29 @@ thread_event_rollback(tsd_t *tsd, size_t diff) {
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 
-	thread_event_update(tsd);
+	thread_event_update(tsd, true);
 }
 
 void
-thread_event_update(tsd_t *tsd) {
-	uint64_t wait = thread_allocated_next_event_compute(tsd);
-	thread_event_adjust_thresholds_helper(tsd, wait);
+thread_event_update(tsd_t *tsd, bool is_alloc) {
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, is_alloc);
 
-	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t wait = thread_next_event_compute(tsd, is_alloc);
+	thread_event_adjust_thresholds_helper(tsd, &ctx, wait);
 
+	uint64_t last_event = event_ctx_last_event_get(&ctx);
 	/* Both subtractions are intentionally susceptible to underflow. */
-	if (thread_allocated_get(tsd) - last_event >=
-	    thread_allocated_next_event_get(tsd) - last_event) {
-		thread_event_trigger(tsd, true);
+	if (event_ctx_current_bytes_get(&ctx) - last_event >=
+	    event_ctx_next_event_get(&ctx) - last_event) {
+		thread_event_trigger(tsd, &ctx, true);
 	} else {
 		thread_event_assert_invariants(tsd);
 	}
 }
 
 void thread_event_boot() {
-#define E(event, condition)						\
+#define E(event, condition, ignored)					\
 	if (condition) {						\
 		thread_event_active = true;				\
 	}
@@ -377,7 +424,7 @@ void thread_event_boot() {
 }
 
 void tsd_thread_event_init(tsd_t *tsd) {
-#define E(event, condition)						\
+#define E(event, condition, is_alloc_event_unused)			\
 	if (condition) {						\
 		tsd_thread_##event##_event_init(tsd);			\
 	}
diff --git a/src/tsd.c b/src/tsd.c
index 940ff7d..54e5b4a 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -119,7 +119,7 @@ tsd_force_recompute(tsdn_t *tsdn) {
 		    tsd_state_nominal_recompute, ATOMIC_RELAXED);
 		/* See comments in thread_event_recompute_fast_threshold(). */
 		atomic_fence(ATOMIC_SEQ_CST);
-		thread_allocated_next_event_fast_set_non_nominal(remote_tsd);
+		thread_next_event_fast_set_non_nominal(remote_tsd);
 	}
 	malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
 }
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index f016cc5..db2d637 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -2,14 +2,18 @@
 
 TEST_BEGIN(test_next_event_fast_roll_back) {
 	tsd_t *tsd = tsd_fetch();
-	thread_allocated_last_event_set(tsd, 0);
-	thread_allocated_set(tsd,
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX - 8U);
-	thread_allocated_next_event_set(tsd,
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
-#define E(event, condition)						\
-	event##_event_wait_set(tsd,					\
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, true);
+
+	event_ctx_last_event_set(&ctx, 0);
+	event_ctx_current_bytes_set(&ctx,
+	    THREAD_NEXT_EVENT_FAST_MAX - 8U);
+	event_ctx_next_event_set(tsd, &ctx,
+	    THREAD_NEXT_EVENT_FAST_MAX);
+#define E(event, condition, is_alloc)					\
+	if (is_alloc && condition) {					\
+		event##_event_wait_set(tsd, THREAD_NEXT_EVENT_FAST_MAX);\
+	}
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 	void *p = malloc(16U);
@@ -20,14 +24,20 @@ TEST_END
 
 TEST_BEGIN(test_next_event_fast_resume) {
 	tsd_t *tsd = tsd_fetch();
-	thread_allocated_last_event_set(tsd, 0);
-	thread_allocated_set(tsd,
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 8U);
-	thread_allocated_next_event_set(tsd,
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
-#define E(event, condition)						\
-	event##_event_wait_set(tsd,					\
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
+
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, true);
+
+	event_ctx_last_event_set(&ctx, 0);
+	event_ctx_current_bytes_set(&ctx,
+	    THREAD_NEXT_EVENT_FAST_MAX + 8U);
+	event_ctx_next_event_set(tsd, &ctx,
+	    THREAD_NEXT_EVENT_FAST_MAX + 16U);
+#define E(event, condition, is_alloc)					\
+	if (is_alloc && condition) {					\
+		event##_event_wait_set(tsd,				\
+		    THREAD_NEXT_EVENT_FAST_MAX + 16U);			\
+	}
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 	void *p = malloc(SC_LOOKUP_MAXCLASS);
@@ -42,7 +52,7 @@ TEST_BEGIN(test_event_rollback) {
 	size_t count = 10;
 	uint64_t thread_allocated = thread_allocated_get(tsd);
 	while (count-- != 0) {
-		thread_event_rollback(tsd, diff);
+		thread_alloc_event_rollback(tsd, diff);
 		uint64_t thread_allocated_after = thread_allocated_get(tsd);
 		assert_u64_eq(thread_allocated - thread_allocated_after, diff,
 		    "thread event counters are not properly rolled back");
-- 
cgit v0.12


From 5e500523a056d7330e2223627ecdfb565d88e070 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 29 Jan 2020 20:29:05 -0800
Subject: Remove thread_event_boot().

---
 include/jemalloc/internal/thread_event.h |  1 -
 src/jemalloc.c                           |  1 -
 src/thread_event.c                       | 36 ++++++++++++--------------------
 3 files changed, 13 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 33cbcbe..383af30 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -34,7 +34,6 @@ void thread_event_assert_invariants_debug(tsd_t *tsd);
 void thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event);
 void thread_alloc_event_rollback(tsd_t *tsd, size_t diff);
 void thread_event_update(tsd_t *tsd, bool alloc_event);
-void thread_event_boot();
 void thread_event_recompute_fast_threshold(tsd_t *tsd);
 void tsd_thread_event_init(tsd_t *tsd);
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 60565df..e4ef7f3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1580,7 +1580,6 @@ malloc_init_hard_a0_locked() {
 	if (config_prof) {
 		prof_boot1();
 	}
-	thread_event_boot();
 	arena_boot(&sc_data);
 	if (tcache_boot(TSDN_NULL)) {
 		return true;
diff --git a/src/thread_event.c b/src/thread_event.c
index 5bdc4ae..0fbdebe 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -4,13 +4,6 @@
 
 #include "jemalloc/internal/thread_event.h"
 
-/*
- * There's no lock for thread_event_active because write is only done in
- * malloc_init(), where init_lock there serves as the guard, and ever since
- * then thread_event_active becomes read only.
- */
-static bool thread_event_active = false;
-
 /* TSD event init function signatures. */
 #define E(event, condition_unused, is_alloc_event_unused)		\
 static void tsd_thread_##event##_event_init(tsd_t *tsd);
@@ -114,14 +107,23 @@ thread_stats_interval_event_handler(tsd_t *tsd) {
 }
 /* Per event facilities done. */
 
+static bool
+event_ctx_has_active_events(event_ctx_t *ctx) {
+	assert(config_debug);
+#define E(event, condition, alloc_event)			       \
+	if (condition && alloc_event == ctx->is_alloc) {	       \
+		return true;					       \
+	}
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+	return false;
+}
+
 static uint64_t
 thread_next_event_compute(tsd_t *tsd, bool is_alloc) {
 	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
-	bool no_event_on = true;
-
 #define E(event, condition, alloc_event)				\
 	if (is_alloc == alloc_event && condition) {			\
-		no_event_on = false;					\
 		uint64_t event_wait =					\
 		    event##_event_wait_get(tsd);			\
 		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
@@ -132,8 +134,6 @@ thread_next_event_compute(tsd_t *tsd, bool is_alloc) {
 
 	ITERATE_OVER_ALL_EVENTS
 #undef E
-
-	assert(no_event_on == !thread_event_active);
 	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
 	return wait;
 }
@@ -166,7 +166,7 @@ thread_event_assert_invariants_impl(tsd_t *tsd, event_ctx_t *ctx) {
 	 * below is stronger than needed, but having an exactly accurate guard
 	 * is more complicated to implement.
 	 */
-	assert((!thread_event_active && last_event == 0U) ||
+	assert((!event_ctx_has_active_events(ctx) && last_event == 0U) ||
 	    interval == min_wait ||
 	    (interval < min_wait && interval == THREAD_EVENT_MAX_INTERVAL));
 }
@@ -413,16 +413,6 @@ thread_event_update(tsd_t *tsd, bool is_alloc) {
 	}
 }
 
-void thread_event_boot() {
-#define E(event, condition, ignored)					\
-	if (condition) {						\
-		thread_event_active = true;				\
-	}
-
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-}
-
 void tsd_thread_event_init(tsd_t *tsd) {
 #define E(event, condition, is_alloc_event_unused)			\
 	if (condition) {						\
-- 
cgit v0.12


From e8965226168cdcb359f6db39fdf4c216b47a60cf Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Jan 2020 16:31:45 -0800
Subject: Abbreviate thread-event to te.

---
 include/jemalloc/internal/thread_event.h | 126 +++++++++---------
 include/jemalloc/internal/tsd.h          |  18 +--
 src/jemalloc.c                           |  17 ++-
 src/prof.c                               |   5 +-
 src/thread_event.c                       | 217 +++++++++++++++----------------
 src/tsd.c                                |  10 +-
 test/unit/thread_event.c                 |  32 ++---
 7 files changed, 201 insertions(+), 224 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 383af30..d528c05 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -3,39 +3,40 @@
 
 #include "jemalloc/internal/tsd.h"
 
+/* "te" is short for "thread_event" */
+
 /*
  * Maximum threshold on thread_(de)allocated_next_event_fast, so that there is
  * no need to check overflow in malloc fast path. (The allocation size in malloc
  * fast path never exceeds SC_LOOKUP_MAXCLASS.)
  */
-#define THREAD_NEXT_EVENT_FAST_MAX				\
-    (UINT64_MAX - SC_LOOKUP_MAXCLASS + 1U)
+#define TE_NEXT_EVENT_FAST_MAX (UINT64_MAX - SC_LOOKUP_MAXCLASS + 1U)
 
 /*
  * The max interval helps make sure that malloc stays on the fast path in the
  * common case, i.e. thread_allocated < thread_allocated_next_event_fast.  When
- * thread_allocated is within an event's distance to THREAD_NEXT_EVENT_FAST_MAX
+ * thread_allocated is within an event's distance to TE_NEXT_EVENT_FAST_MAX
  * above, thread_allocated_next_event_fast is wrapped around and we fall back to
  * the medium-fast path. The max interval makes sure that we're not staying on
  * the fallback case for too long, even if there's no active event or if all
  * active events have long wait times.
  */
-#define THREAD_EVENT_MAX_INTERVAL ((uint64_t)(4U << 20))
+#define TE_MAX_INTERVAL ((uint64_t)(4U << 20))
 
-typedef struct event_ctx_s {
+typedef struct te_ctx_s {
 	bool is_alloc;
 	uint64_t *current;
 	uint64_t *last_event;
 	uint64_t *next_event;
 	uint64_t *next_event_fast;
-} event_ctx_t;
+} te_ctx_t;
 
-void thread_event_assert_invariants_debug(tsd_t *tsd);
-void thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event);
-void thread_alloc_event_rollback(tsd_t *tsd, size_t diff);
-void thread_event_update(tsd_t *tsd, bool alloc_event);
-void thread_event_recompute_fast_threshold(tsd_t *tsd);
-void tsd_thread_event_init(tsd_t *tsd);
+void te_assert_invariants_debug(tsd_t *tsd);
+void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event);
+void te_alloc_rollback(tsd_t *tsd, size_t diff);
+void te_event_update(tsd_t *tsd, bool alloc_event);
+void te_recompute_fast_threshold(tsd_t *tsd);
+void tsd_te_init(tsd_t *tsd);
 
 /*
  * List of all events, in the following format:
@@ -97,21 +98,16 @@ ITERATE_OVER_ALL_COUNTERS
  *
  * Note that these can only be used on the fastpath.
  */
-JEMALLOC_ALWAYS_INLINE uint64_t
-thread_allocated_malloc_fastpath(tsd_t *tsd) {
-	return *tsd_thread_allocatedp_get_unsafe(tsd);
-}
-
-JEMALLOC_ALWAYS_INLINE uint64_t
-thread_allocated_next_event_malloc_fastpath(tsd_t *tsd) {
-	uint64_t v = *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd);
-	assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
-	return v;
+JEMALLOC_ALWAYS_INLINE void
+te_malloc_fastpath_ctx(tsd_t *tsd, uint64_t *allocated, uint64_t *threshold) {
+	*allocated = *tsd_thread_allocatedp_get_unsafe(tsd);
+	*threshold = *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd);
+	assert(*threshold <= TE_NEXT_EVENT_FAST_MAX);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_event_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated,
-    uint64_t *threshold, bool size_hint) {
+te_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated, uint64_t *threshold,
+    bool size_hint) {
 	if (!size_hint) {
 		*deallocated = tsd_thread_deallocated_get(tsd);
 		*threshold = tsd_thread_deallocated_next_event_fast_get(tsd);
@@ -121,50 +117,50 @@ thread_event_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated,
 		*threshold =
 		    *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd);
 	}
-	assert(*threshold <= THREAD_NEXT_EVENT_FAST_MAX);
+	assert(*threshold <= TE_NEXT_EVENT_FAST_MAX);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-event_ctx_is_alloc(event_ctx_t *ctx) {
+te_ctx_is_alloc(te_ctx_t *ctx) {
 	return ctx->is_alloc;
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
-event_ctx_current_bytes_get(event_ctx_t *ctx) {
+te_ctx_current_bytes_get(te_ctx_t *ctx) {
 	return *ctx->current;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-event_ctx_current_bytes_set(event_ctx_t *ctx, uint64_t v) {
+te_ctx_current_bytes_set(te_ctx_t *ctx, uint64_t v) {
 	*ctx->current = v;
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
-event_ctx_last_event_get(event_ctx_t *ctx) {
+te_ctx_last_event_get(te_ctx_t *ctx) {
 	return *ctx->last_event;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-event_ctx_last_event_set(event_ctx_t *ctx, uint64_t v) {
+te_ctx_last_event_set(te_ctx_t *ctx, uint64_t v) {
 	*ctx->last_event = v;
 }
 
 /* Below 3 for next_event_fast. */
 JEMALLOC_ALWAYS_INLINE uint64_t
-event_ctx_next_event_fast_get(event_ctx_t *ctx) {
+te_ctx_next_event_fast_get(te_ctx_t *ctx) {
 	uint64_t v = *ctx->next_event_fast;
-	assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
+	assert(v <= TE_NEXT_EVENT_FAST_MAX);
 	return v;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-event_ctx_next_event_fast_set(event_ctx_t *ctx, uint64_t v) {
-	assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
+te_ctx_next_event_fast_set(te_ctx_t *ctx, uint64_t v) {
+	assert(v <= TE_NEXT_EVENT_FAST_MAX);
 	*ctx->next_event_fast = v;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_next_event_fast_set_non_nominal(tsd_t *tsd) {
+te_next_event_fast_set_non_nominal(tsd_t *tsd) {
 	/*
 	 * Set the fast thresholds to zero when tsd is non-nominal.  Use the
 	 * unsafe getter as this may get called during tsd init and clean up.
@@ -175,14 +171,14 @@ thread_next_event_fast_set_non_nominal(tsd_t *tsd) {
 
 /* For next_event.  Setter also updates the fast threshold. */
 JEMALLOC_ALWAYS_INLINE uint64_t
-event_ctx_next_event_get(event_ctx_t *ctx) {
+te_ctx_next_event_get(te_ctx_t *ctx) {
 	return *ctx->next_event;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-event_ctx_next_event_set(tsd_t *tsd, event_ctx_t *ctx, uint64_t v) {
+te_ctx_next_event_set(tsd_t *tsd, te_ctx_t *ctx, uint64_t v) {
 	*ctx->next_event = v;
-	thread_event_recompute_fast_threshold(tsd);
+	te_recompute_fast_threshold(tsd);
 }
 
 /*
@@ -190,22 +186,22 @@ event_ctx_next_event_set(tsd_t *tsd, event_ctx_t *ctx, uint64_t v) {
  * a consistent state, which forms the invariants before and after each round
  * of thread event handling that we can rely on and need to promise.
  * The invariants are only temporarily violated in the middle of:
- * (a) thread_event() if an event is triggered (the thread_event_trigger() call
+ * (a) event_advance() if an event is triggered (the te_event_trigger() call
  *     at the end will restore the invariants),
- * (b) thread_##event##_event_update() (the thread_event_update() call at the
+ * (b) te_##event##_event_update() (the te_event_update() call at the
  *     end will restore the invariants), or
- * (c) thread_alloc_event_rollback() if the rollback falls below the last_event
- *     (the thread_event_update() call at the end will restore the invariants).
+ * (c) te_alloc_rollback() if the rollback falls below the last_event
+ *     (the te_event_update() call at the end will restore the invariants).
  */
 JEMALLOC_ALWAYS_INLINE void
-thread_event_assert_invariants(tsd_t *tsd) {
+te_assert_invariants(tsd_t *tsd) {
 	if (config_debug) {
-		thread_event_assert_invariants_debug(tsd);
+		te_assert_invariants_debug(tsd);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-event_ctx_get(tsd_t *tsd, event_ctx_t *ctx, bool is_alloc) {
+te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
 	ctx->is_alloc = is_alloc;
 	if (is_alloc) {
 		ctx->current = tsd_thread_allocatedp_get(tsd);
@@ -223,51 +219,51 @@ event_ctx_get(tsd_t *tsd, event_ctx_t *ctx, bool is_alloc) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
-	thread_event_assert_invariants(tsd);
+te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
+	te_assert_invariants(tsd);
 
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, is_alloc);
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, is_alloc);
 
-	uint64_t bytes_before = event_ctx_current_bytes_get(&ctx);
-	event_ctx_current_bytes_set(&ctx, bytes_before + usize);
+	uint64_t bytes_before = te_ctx_current_bytes_get(&ctx);
+	te_ctx_current_bytes_set(&ctx, bytes_before + usize);
 
 	/* The subtraction is intentionally susceptible to underflow. */
-	if (likely(usize < event_ctx_next_event_get(&ctx) - bytes_before)) {
-		thread_event_assert_invariants(tsd);
+	if (likely(usize < te_ctx_next_event_get(&ctx) - bytes_before)) {
+		te_assert_invariants(tsd);
 	} else {
-		thread_event_trigger(tsd, &ctx, false);
+		te_event_trigger(tsd, &ctx, false);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 thread_dalloc_event(tsd_t *tsd, size_t usize) {
-	thread_event_advance(tsd, usize, false);
+	te_event_advance(tsd, usize, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 thread_alloc_event(tsd_t *tsd, size_t usize) {
-	thread_event_advance(tsd, usize, true);
+	te_event_advance(tsd, usize, true);
 }
 
 #define E(event, condition, is_alloc)					\
 JEMALLOC_ALWAYS_INLINE void						\
-thread_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {	\
-	thread_event_assert_invariants(tsd);				\
+te_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {		\
+	te_assert_invariants(tsd);					\
 	assert(condition);						\
 	assert(tsd_nominal(tsd));					\
 	assert(tsd_reentrancy_level_get(tsd) == 0);			\
 	assert(event_wait > 0U);					\
-	if (THREAD_EVENT_MIN_START_WAIT > 1U &&				\
-	    unlikely(event_wait < THREAD_EVENT_MIN_START_WAIT)) {	\
-		event_wait = THREAD_EVENT_MIN_START_WAIT;		\
+	if (TE_MIN_START_WAIT > 1U &&					\
+	    unlikely(event_wait < TE_MIN_START_WAIT)) {			\
+		event_wait = TE_MIN_START_WAIT;				\
 	}								\
-	if (THREAD_EVENT_MAX_START_WAIT < UINT64_MAX &&			\
-	    unlikely(event_wait > THREAD_EVENT_MAX_START_WAIT)) {	\
-		event_wait = THREAD_EVENT_MAX_START_WAIT;		\
+	if (TE_MAX_START_WAIT < UINT64_MAX &&				\
+	    unlikely(event_wait > TE_MAX_START_WAIT)) {			\
+		event_wait = TE_MAX_START_WAIT;				\
 	}								\
 	event##_event_wait_set(tsd, event_wait);			\
-	thread_event_update(tsd, is_alloc);				\
+	te_event_update(tsd, is_alloc);					\
 }
 
 ITERATE_OVER_ALL_EVENTS
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 6868ce4..163ffc4 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -104,10 +104,10 @@ typedef void (*test_callback_t)(int *);
     MALLOC_TEST_TSD
 
 /*
- * THREAD_EVENT_MIN_START_WAIT should not exceed the minimal allocation usize.
+ * TE_MIN_START_WAIT should not exceed the minimal allocation usize.
  */
-#define THREAD_EVENT_MIN_START_WAIT ((uint64_t)1U)
-#define THREAD_EVENT_MAX_START_WAIT UINT64_MAX
+#define TE_MIN_START_WAIT ((uint64_t)1U)
+#define TE_MAX_START_WAIT UINT64_MAX
 
 #define TSD_INITIALIZER {						\
     /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
@@ -121,14 +121,14 @@ typedef void (*test_callback_t)(int *);
     /* thread_deallocated_next_event_fast */	0,			\
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
     /* thread_allocated_last_event */	0,				\
-    /* thread_allocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* thread_allocated_next_event */	TE_MIN_START_WAIT,		\
     /* thread_deallocated_last_event */	0,				\
-    /* thread_deallocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
-    /* tcache_gc_event_wait */		THREAD_EVENT_MIN_START_WAIT,	\
-    /* tcache_gc_dalloc_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
-    /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* thread_deallocated_next_event */	TE_MIN_START_WAIT,		\
+    /* tcache_gc_event_wait */		TE_MIN_START_WAIT,		\
+    /* tcache_gc_dalloc_event_wait */	TE_MIN_START_WAIT,		\
+    /* prof_sample_event_wait */	TE_MIN_START_WAIT,		\
     /* prof_sample_last_event */	0,				\
-    /* stats_interval_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* stats_interval_event_wait */	TE_MIN_START_WAIT,		\
     /* stats_interval_last_event */	0,				\
     /* prof_tdata */		NULL,					\
     /* prng_state */		0,					\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e4ef7f3..190b3a2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2180,7 +2180,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 
 		if (unlikely(allocation == NULL)) {
-			thread_alloc_event_rollback(tsd, usize);
+			te_alloc_rollback(tsd, usize);
 			prof_alloc_rollback(tsd, tctx, true);
 			goto label_oom;
 		}
@@ -2190,7 +2190,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
-			thread_alloc_event_rollback(tsd, usize);
+			te_alloc_rollback(tsd, usize);
 			goto label_oom;
 		}
 	}
@@ -2386,15 +2386,14 @@ je_malloc(size_t size) {
 	 * it's not always needed in the core allocation logic.
 	 */
 	size_t usize;
-
 	sz_size2index_usize_fastpath(size, &ind, &usize);
 	/* Fast path relies on size being a bin. */
 	assert(ind < SC_NBINS);
 	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
 	    (size <= SC_SMALL_MAXCLASS));
 
-	uint64_t allocated = thread_allocated_malloc_fastpath(tsd);
-	uint64_t threshold = thread_allocated_next_event_malloc_fastpath(tsd);
+	uint64_t allocated, threshold;
+	te_malloc_fastpath_ctx(tsd, &allocated, &threshold);
 	uint64_t allocated_after = allocated + usize;
 	/*
 	 * The ind and usize might be uninitialized (or partially) before
@@ -2729,7 +2728,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		szind = sz_size2index_lookup(size);
 	}
 	uint64_t deallocated, threshold;
-	thread_event_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
+	te_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
 
 	size_t usize = sz_index2size(szind);
 	uint64_t deallocated_after = deallocated + usize;
@@ -3161,7 +3160,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
 		    zero, tcache, arena, &alloc_ctx, &hook_args);
 		if (unlikely(p == NULL)) {
-			thread_alloc_event_rollback(tsd, usize);
+			te_alloc_rollback(tsd, usize);
 			goto label_oom;
 		}
 	} else {
@@ -3362,7 +3361,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		    extra, alignment, zero);
 	}
 	if (usize <= usize_max) {
-		thread_alloc_event_rollback(tsd, usize_max - usize);
+		te_alloc_rollback(tsd, usize_max - usize);
 	} else {
 		/*
 		 * For downsizing request, usize_max can be less than usize.
@@ -3460,7 +3459,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	assert(iealloc(tsd_tsdn(tsd), ptr) == old_edata);
 
 	if (unlikely(usize == old_usize)) {
-		thread_alloc_event_rollback(tsd, usize);
+		te_alloc_rollback(tsd, usize);
 		goto label_not_resized;
 	}
 	thread_dalloc_event(tsd, old_usize);
diff --git a/src/prof.c b/src/prof.c
index 0d29c68..248532e 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -444,8 +444,7 @@ prof_sample_threshold_update(tsd_t *tsd) {
 	}
 
 	if (lg_prof_sample == 0) {
-		thread_prof_sample_event_update(tsd,
-		    THREAD_EVENT_MIN_START_WAIT);
+		te_prof_sample_event_update(tsd, TE_MIN_START_WAIT);
 		return;
 	}
 
@@ -472,7 +471,7 @@ prof_sample_threshold_update(tsd_t *tsd) {
 	uint64_t bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
-	thread_prof_sample_event_update(tsd, bytes_until_sample);
+	te_prof_sample_event_update(tsd, bytes_until_sample);
 #endif
 }
 
diff --git a/src/thread_event.c b/src/thread_event.c
index 0fbdebe..dadace3 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -6,46 +6,45 @@
 
 /* TSD event init function signatures. */
 #define E(event, condition_unused, is_alloc_event_unused)		\
-static void tsd_thread_##event##_event_init(tsd_t *tsd);
+static void te_tsd_##event##_event_init(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
 /* Event handler function signatures. */
 #define E(event, condition_unused, is_alloc_event_unused)		\
-static void thread_##event##_event_handler(tsd_t *tsd);
+static void te_##event##_event_handler(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
 /* (Re)Init functions. */
 static void
-tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
+te_tsd_tcache_gc_event_init(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
-	thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	te_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
 }
 
 static void
-tsd_thread_tcache_gc_dalloc_event_init(tsd_t *tsd) {
+te_tsd_tcache_gc_dalloc_event_init(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
-	thread_tcache_gc_dalloc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	te_tcache_gc_dalloc_event_update(tsd, TCACHE_GC_INCR_BYTES);
 }
 
 static void
-tsd_thread_prof_sample_event_init(tsd_t *tsd) {
+te_tsd_prof_sample_event_init(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
 	prof_sample_threshold_update(tsd);
 }
 
 static void
-tsd_thread_stats_interval_event_init(tsd_t *tsd) {
+te_tsd_stats_interval_event_init(tsd_t *tsd) {
 	assert(opt_stats_interval >= 0);
 	uint64_t interval = stats_interval_accum_batch_size();
-	thread_stats_interval_event_update(tsd, interval);
+	te_stats_interval_event_update(tsd, interval);
 }
 
 /* Handler functions. */
-
 static void
 tcache_gc_event(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
@@ -56,21 +55,21 @@ tcache_gc_event(tsd_t *tsd) {
 }
 
 static void
-thread_tcache_gc_event_handler(tsd_t *tsd) {
+te_tcache_gc_event_handler(tsd_t *tsd) {
 	assert(tcache_gc_event_wait_get(tsd) == 0U);
-	tsd_thread_tcache_gc_event_init(tsd);
+	te_tsd_tcache_gc_event_init(tsd);
 	tcache_gc_event(tsd);
 }
 
 static void
-thread_tcache_gc_dalloc_event_handler(tsd_t *tsd) {
+te_tcache_gc_dalloc_event_handler(tsd_t *tsd) {
 	assert(tcache_gc_dalloc_event_wait_get(tsd) == 0U);
-	tsd_thread_tcache_gc_dalloc_event_init(tsd);
+	te_tsd_tcache_gc_dalloc_event_init(tsd);
 	tcache_gc_event(tsd);
 }
 
 static void
-thread_prof_sample_event_handler(tsd_t *tsd) {
+te_prof_sample_event_handler(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
 	assert(prof_sample_event_wait_get(tsd) == 0U);
 	uint64_t last_event = thread_allocated_last_event_get(tsd);
@@ -87,13 +86,13 @@ thread_prof_sample_event_handler(tsd_t *tsd) {
 		 * prof_active is turned on later, the counting for sampling
 		 * can immediately resume as normal.
 		 */
-		thread_prof_sample_event_update(tsd,
+		te_prof_sample_event_update(tsd,
 		    (uint64_t)(1 << lg_prof_sample));
 	}
 }
 
 static void
-thread_stats_interval_event_handler(tsd_t *tsd) {
+te_stats_interval_event_handler(tsd_t *tsd) {
 	assert(opt_stats_interval >= 0);
 	assert(stats_interval_event_wait_get(tsd) == 0U);
 	uint64_t last_event = thread_allocated_last_event_get(tsd);
@@ -103,12 +102,12 @@ thread_stats_interval_event_handler(tsd_t *tsd) {
 	if (stats_interval_accum(tsd, last_event - last_stats_event)) {
 		je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
 	}
-	tsd_thread_stats_interval_event_init(tsd);
+	te_tsd_stats_interval_event_init(tsd);
 }
 /* Per event facilities done. */
 
 static bool
-event_ctx_has_active_events(event_ctx_t *ctx) {
+te_ctx_has_active_events(te_ctx_t *ctx) {
 	assert(config_debug);
 #define E(event, condition, alloc_event)			       \
 	if (condition && alloc_event == ctx->is_alloc) {	       \
@@ -120,13 +119,13 @@ event_ctx_has_active_events(event_ctx_t *ctx) {
 }
 
 static uint64_t
-thread_next_event_compute(tsd_t *tsd, bool is_alloc) {
-	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
+te_next_event_compute(tsd_t *tsd, bool is_alloc) {
+	uint64_t wait = TE_MAX_START_WAIT;
 #define E(event, condition, alloc_event)				\
 	if (is_alloc == alloc_event && condition) {			\
 		uint64_t event_wait =					\
 		    event##_event_wait_get(tsd);			\
-		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		assert(event_wait <= TE_MAX_START_WAIT);		\
 		if (event_wait > 0U && event_wait < wait) {		\
 			wait = event_wait;				\
 		}							\
@@ -134,20 +133,19 @@ thread_next_event_compute(tsd_t *tsd, bool is_alloc) {
 
 	ITERATE_OVER_ALL_EVENTS
 #undef E
-	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
+	assert(wait <= TE_MAX_START_WAIT);
 	return wait;
 }
 
 static void
-thread_event_assert_invariants_impl(tsd_t *tsd, event_ctx_t *ctx) {
-	uint64_t current_bytes = event_ctx_current_bytes_get(ctx);
-	uint64_t last_event = event_ctx_last_event_get(ctx);
-	uint64_t next_event = event_ctx_next_event_get(ctx);
-	uint64_t next_event_fast = event_ctx_next_event_fast_get(ctx);
+te_assert_invariants_impl(tsd_t *tsd, te_ctx_t *ctx) {
+	uint64_t current_bytes = te_ctx_current_bytes_get(ctx);
+	uint64_t last_event = te_ctx_last_event_get(ctx);
+	uint64_t next_event = te_ctx_next_event_get(ctx);
+	uint64_t next_event_fast = te_ctx_next_event_fast_get(ctx);
 
 	assert(last_event != next_event);
-	if (next_event > THREAD_NEXT_EVENT_FAST_MAX ||
-	    !tsd_fast(tsd)) {
+	if (next_event > TE_NEXT_EVENT_FAST_MAX || !tsd_fast(tsd)) {
 		assert(next_event_fast == 0U);
 	} else {
 		assert(next_event_fast == next_event);
@@ -158,27 +156,26 @@ thread_event_assert_invariants_impl(tsd_t *tsd, event_ctx_t *ctx) {
 
 	/* The subtraction is intentionally susceptible to underflow. */
 	assert(current_bytes - last_event < interval);
-	uint64_t min_wait = thread_next_event_compute(tsd,
-	    event_ctx_is_alloc(ctx));
+	uint64_t min_wait = te_next_event_compute(tsd, te_ctx_is_alloc(ctx));
 	/*
 	 * next_event should have been pushed up only except when no event is
 	 * on and the TSD is just initialized.  The last_event == 0U guard
 	 * below is stronger than needed, but having an exactly accurate guard
 	 * is more complicated to implement.
 	 */
-	assert((!event_ctx_has_active_events(ctx) && last_event == 0U) ||
+	assert((!te_ctx_has_active_events(ctx) && last_event == 0U) ||
 	    interval == min_wait ||
-	    (interval < min_wait && interval == THREAD_EVENT_MAX_INTERVAL));
+	    (interval < min_wait && interval == TE_MAX_INTERVAL));
 }
 
 void
-thread_event_assert_invariants_debug(tsd_t *tsd) {
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, true);
-	thread_event_assert_invariants_impl(tsd, &ctx);
+te_assert_invariants_debug(tsd_t *tsd) {
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, true);
+	te_assert_invariants_impl(tsd, &ctx);
 
-	event_ctx_get(tsd, &ctx, false);
-	thread_event_assert_invariants_impl(tsd, &ctx);
+	te_ctx_get(tsd, &ctx, false);
+	te_assert_invariants_impl(tsd, &ctx);
 }
 
 /*
@@ -229,66 +226,65 @@ thread_event_assert_invariants_debug(tsd_t *tsd) {
  * it down the slow path earlier).
  */
 static void
-event_ctx_next_event_fast_update(event_ctx_t *ctx) {
-	uint64_t next_event = event_ctx_next_event_get(ctx);
-	uint64_t next_event_fast = (next_event <=
-	    THREAD_NEXT_EVENT_FAST_MAX) ? next_event : 0U;
-	event_ctx_next_event_fast_set(ctx, next_event_fast);
+te_ctx_next_event_fast_update(te_ctx_t *ctx) {
+	uint64_t next_event = te_ctx_next_event_get(ctx);
+	uint64_t next_event_fast = (next_event <= TE_NEXT_EVENT_FAST_MAX) ?
+	    next_event : 0U;
+	te_ctx_next_event_fast_set(ctx, next_event_fast);
 }
 
 void
-thread_event_recompute_fast_threshold(tsd_t *tsd) {
+te_recompute_fast_threshold(tsd_t *tsd) {
 	if (tsd_state_get(tsd) != tsd_state_nominal) {
 		/* Check first because this is also called on purgatory. */
-		thread_next_event_fast_set_non_nominal(tsd);
+		te_next_event_fast_set_non_nominal(tsd);
 		return;
 	}
 
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, true);
-	event_ctx_next_event_fast_update(&ctx);
-	event_ctx_get(tsd, &ctx, false);
-	event_ctx_next_event_fast_update(&ctx);
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, true);
+	te_ctx_next_event_fast_update(&ctx);
+	te_ctx_get(tsd, &ctx, false);
+	te_ctx_next_event_fast_update(&ctx);
 
 	atomic_fence(ATOMIC_SEQ_CST);
 	if (tsd_state_get(tsd) != tsd_state_nominal) {
-		thread_next_event_fast_set_non_nominal(tsd);
+		te_next_event_fast_set_non_nominal(tsd);
 	}
 }
 
 static void
-thread_event_adjust_thresholds_helper(tsd_t *tsd, event_ctx_t *ctx,
+te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
     uint64_t wait) {
-	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
-	uint64_t next_event = event_ctx_last_event_get(ctx) + (wait <=
-	    THREAD_EVENT_MAX_INTERVAL ? wait : THREAD_EVENT_MAX_INTERVAL);
-	event_ctx_next_event_set(tsd, ctx, next_event);
+	assert(wait <= TE_MAX_START_WAIT);
+	uint64_t next_event = te_ctx_last_event_get(ctx) + (wait <=
+	    TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
+	te_ctx_next_event_set(tsd, ctx, next_event);
 }
 
 static uint64_t
-thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
-    bool is_alloc, bool allow_event_trigger) {
-	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
+te_batch_accum(tsd_t *tsd, uint64_t accumbytes, bool is_alloc,
+    bool allow_event_trigger) {
+	uint64_t wait = TE_MAX_START_WAIT;
 
 #define E(event, condition, alloc_event)				\
 	if (is_alloc == alloc_event && condition) {			\
 		uint64_t event_wait = event##_event_wait_get(tsd);	\
-		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		assert(event_wait <= TE_MAX_START_WAIT);		\
 		if (event_wait > accumbytes) {				\
 			event_wait -= accumbytes;			\
 		} else {						\
 			event_wait = 0U;				\
 			if (!allow_event_trigger) {			\
-				event_wait =				\
-				    THREAD_EVENT_MIN_START_WAIT;	\
+				event_wait = TE_MIN_START_WAIT;		\
 			}						\
 		}							\
-		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		assert(event_wait <= TE_MAX_START_WAIT);		\
 		event##_event_wait_set(tsd, event_wait);		\
 		/*							\
 		 * If there is a single event, then the remaining wait	\
 		 * time may become zero, and we rely on either the	\
-		 * event handler or a thread_event_update() call later	\
+		 * event handler or a te_event_update() call later	\
 		 * to properly set next_event; if there are multiple	\
 		 * events, then	here we can get the minimum remaining	\
 		 * wait time to	the next already set event.		\
@@ -301,72 +297,64 @@ thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 
-	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
+	assert(wait <= TE_MAX_START_WAIT);
 	return wait;
 }
 
 void
-thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event) {
+te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event) {
 	/* usize has already been added to thread_allocated. */
-	uint64_t bytes_after = event_ctx_current_bytes_get(ctx);
-
+	uint64_t bytes_after = te_ctx_current_bytes_get(ctx);
 	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t accumbytes = bytes_after - event_ctx_last_event_get(ctx);
-
-	/* Make sure that accumbytes cannot overflow uint64_t. */
-	assert(THREAD_EVENT_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
+	uint64_t accumbytes = bytes_after - te_ctx_last_event_get(ctx);
 
-	event_ctx_last_event_set(ctx, bytes_after);
+	te_ctx_last_event_set(ctx, bytes_after);
 	bool allow_event_trigger = !delay_event && tsd_nominal(tsd) &&
 	    tsd_reentrancy_level_get(tsd) == 0;
 
 	bool is_alloc = ctx->is_alloc;
-	uint64_t wait = thread_event_trigger_batch_update(tsd, accumbytes,
-	    is_alloc, allow_event_trigger);
-	thread_event_adjust_thresholds_helper(tsd, ctx, wait);
-
-	thread_event_assert_invariants(tsd);
+	uint64_t wait = te_batch_accum(tsd, accumbytes, is_alloc,
+	    allow_event_trigger);
+	te_adjust_thresholds_helper(tsd, ctx, wait);
 
+	te_assert_invariants(tsd);
 #define E(event, condition, alloc_event)				\
 	if (is_alloc == alloc_event && condition &&			\
 	    event##_event_wait_get(tsd) == 0U) {			\
 		assert(allow_event_trigger);				\
-		thread_##event##_event_handler(tsd);			\
+		te_##event##_event_handler(tsd);			\
 	}
 
 	ITERATE_OVER_ALL_EVENTS
 #undef E
-
-	thread_event_assert_invariants(tsd);
+	te_assert_invariants(tsd);
 }
 
 void
-thread_alloc_event_rollback(tsd_t *tsd, size_t diff) {
-	thread_event_assert_invariants(tsd);
-
+te_alloc_rollback(tsd_t *tsd, size_t diff) {
+	te_assert_invariants(tsd);
 	if (diff == 0U) {
 		return;
 	}
 
 	/* Rollback happens only on alloc events. */
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, true);
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, true);
 
-	uint64_t thread_allocated = event_ctx_current_bytes_get(&ctx);
+	uint64_t thread_allocated = te_ctx_current_bytes_get(&ctx);
 	/* The subtraction is intentionally susceptible to underflow. */
 	uint64_t thread_allocated_rollback = thread_allocated - diff;
-	event_ctx_current_bytes_set(&ctx, thread_allocated_rollback);
+	te_ctx_current_bytes_set(&ctx, thread_allocated_rollback);
 
-	uint64_t last_event = event_ctx_last_event_get(&ctx);
+	uint64_t last_event = te_ctx_last_event_get(&ctx);
 	/* Both subtractions are intentionally susceptible to underflow. */
 	if (thread_allocated_rollback - last_event <=
 	    thread_allocated - last_event) {
-		thread_event_assert_invariants(tsd);
+		te_assert_invariants(tsd);
 		return;
 	}
 
-	event_ctx_last_event_set(&ctx, thread_allocated_rollback);
-
+	te_ctx_last_event_set(&ctx, thread_allocated_rollback);
 	/* The subtraction is intentionally susceptible to underflow. */
 	uint64_t wait_diff = last_event - thread_allocated_rollback;
 	assert(wait_diff <= diff);
@@ -374,49 +362,48 @@ thread_alloc_event_rollback(tsd_t *tsd, size_t diff) {
 #define E(event, condition, alloc_event)				\
 	if (alloc_event == true && condition) {				\
 		uint64_t event_wait = event##_event_wait_get(tsd);	\
-		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		assert(event_wait <= TE_MAX_START_WAIT);		\
 		if (event_wait > 0U) {					\
-			if (wait_diff >					\
-			    THREAD_EVENT_MAX_START_WAIT - event_wait) {	\
-				event_wait =				\
-				    THREAD_EVENT_MAX_START_WAIT;	\
+			if (wait_diff >	TE_MAX_START_WAIT - event_wait) {\
+				event_wait = TE_MAX_START_WAIT;		\
 			} else {					\
 				event_wait += wait_diff;		\
 			}						\
-			assert(event_wait <=				\
-			    THREAD_EVENT_MAX_START_WAIT);		\
+			assert(event_wait <= TE_MAX_START_WAIT);	\
 			event##_event_wait_set(tsd, event_wait);	\
 		}							\
 	}
 
 	ITERATE_OVER_ALL_EVENTS
 #undef E
-
-	thread_event_update(tsd, true);
+	te_event_update(tsd, true);
 }
 
 void
-thread_event_update(tsd_t *tsd, bool is_alloc) {
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, is_alloc);
+te_event_update(tsd_t *tsd, bool is_alloc) {
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, is_alloc);
 
-	uint64_t wait = thread_next_event_compute(tsd, is_alloc);
-	thread_event_adjust_thresholds_helper(tsd, &ctx, wait);
+	uint64_t wait = te_next_event_compute(tsd, is_alloc);
+	te_adjust_thresholds_helper(tsd, &ctx, wait);
 
-	uint64_t last_event = event_ctx_last_event_get(&ctx);
+	uint64_t last_event = te_ctx_last_event_get(&ctx);
 	/* Both subtractions are intentionally susceptible to underflow. */
-	if (event_ctx_current_bytes_get(&ctx) - last_event >=
-	    event_ctx_next_event_get(&ctx) - last_event) {
-		thread_event_trigger(tsd, &ctx, true);
+	if (te_ctx_current_bytes_get(&ctx) - last_event >=
+	    te_ctx_next_event_get(&ctx) - last_event) {
+		te_event_trigger(tsd, &ctx, true);
 	} else {
-		thread_event_assert_invariants(tsd);
+		te_assert_invariants(tsd);
 	}
 }
 
-void tsd_thread_event_init(tsd_t *tsd) {
+void tsd_te_init(tsd_t *tsd) {
+	/* Make sure no overflow for the bytes accumulated on event_trigger. */
+	assert(TE_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
+
 #define E(event, condition, is_alloc_event_unused)			\
 	if (condition) {						\
-		tsd_thread_##event##_event_init(tsd);			\
+		te_tsd_##event##_event_init(tsd);			\
 	}
 
 	ITERATE_OVER_ALL_EVENTS
diff --git a/src/tsd.c b/src/tsd.c
index 54e5b4a..38196c8 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -117,9 +117,9 @@ tsd_force_recompute(tsdn_t *tsdn) {
 		    <= tsd_state_nominal_max);
 		tsd_atomic_store(&remote_tsd->state,
 		    tsd_state_nominal_recompute, ATOMIC_RELAXED);
-		/* See comments in thread_event_recompute_fast_threshold(). */
+		/* See comments in te_recompute_fast_threshold(). */
 		atomic_fence(ATOMIC_SEQ_CST);
-		thread_next_event_fast_set_non_nominal(remote_tsd);
+		te_next_event_fast_set_non_nominal(remote_tsd);
 	}
 	malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
 }
@@ -179,7 +179,7 @@ tsd_slow_update(tsd_t *tsd) {
 		    ATOMIC_ACQUIRE);
 	} while (old_state == tsd_state_nominal_recompute);
 
-	thread_event_recompute_fast_threshold(tsd);
+	te_recompute_fast_threshold(tsd);
 }
 
 void
@@ -218,7 +218,7 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
 			tsd_slow_update(tsd);
 		}
 	}
-	thread_event_recompute_fast_threshold(tsd);
+	te_recompute_fast_threshold(tsd);
 }
 
 static bool
@@ -240,7 +240,7 @@ tsd_data_init(tsd_t *tsd) {
 	    (uint64_t)(uintptr_t)tsd;
 
 	/* event_init may use the prng state above. */
-	tsd_thread_event_init(tsd);
+	tsd_te_init(tsd);
 
 	return tsd_tcache_enabled_data_init(tsd);
 }
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index db2d637..0855829 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -2,17 +2,15 @@
 
 TEST_BEGIN(test_next_event_fast_roll_back) {
 	tsd_t *tsd = tsd_fetch();
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, true);
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, true);
 
-	event_ctx_last_event_set(&ctx, 0);
-	event_ctx_current_bytes_set(&ctx,
-	    THREAD_NEXT_EVENT_FAST_MAX - 8U);
-	event_ctx_next_event_set(tsd, &ctx,
-	    THREAD_NEXT_EVENT_FAST_MAX);
+	te_ctx_last_event_set(&ctx, 0);
+	te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX - 8U);
+	te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX);
 #define E(event, condition, is_alloc)					\
 	if (is_alloc && condition) {					\
-		event##_event_wait_set(tsd, THREAD_NEXT_EVENT_FAST_MAX);\
+		event##_event_wait_set(tsd, TE_NEXT_EVENT_FAST_MAX);	\
 	}
 	ITERATE_OVER_ALL_EVENTS
 #undef E
@@ -25,18 +23,16 @@ TEST_END
 TEST_BEGIN(test_next_event_fast_resume) {
 	tsd_t *tsd = tsd_fetch();
 
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, true);
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, true);
 
-	event_ctx_last_event_set(&ctx, 0);
-	event_ctx_current_bytes_set(&ctx,
-	    THREAD_NEXT_EVENT_FAST_MAX + 8U);
-	event_ctx_next_event_set(tsd, &ctx,
-	    THREAD_NEXT_EVENT_FAST_MAX + 16U);
+	te_ctx_last_event_set(&ctx, 0);
+	te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX + 8U);
+	te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX + 16U);
 #define E(event, condition, is_alloc)					\
 	if (is_alloc && condition) {					\
 		event##_event_wait_set(tsd,				\
-		    THREAD_NEXT_EVENT_FAST_MAX + 16U);			\
+		    TE_NEXT_EVENT_FAST_MAX + 16U);			\
 	}
 	ITERATE_OVER_ALL_EVENTS
 #undef E
@@ -48,11 +44,11 @@ TEST_END
 
 TEST_BEGIN(test_event_rollback) {
 	tsd_t *tsd = tsd_fetch();
-	const uint64_t diff = THREAD_EVENT_MAX_INTERVAL >> 2;
+	const uint64_t diff = TE_MAX_INTERVAL >> 2;
 	size_t count = 10;
 	uint64_t thread_allocated = thread_allocated_get(tsd);
 	while (count-- != 0) {
-		thread_alloc_event_rollback(tsd, diff);
+		te_alloc_rollback(tsd, diff);
 		uint64_t thread_allocated_after = thread_allocated_get(tsd);
 		assert_u64_eq(thread_allocated - thread_allocated_after, diff,
 		    "thread event counters are not properly rolled back");
-- 
cgit v0.12


From c6bfe55857230949ea2d6467c1dc3fce213fe9c3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 3 Feb 2020 23:59:31 -0800
Subject: Update the tsd description.

---
 include/jemalloc/internal/tsd.h | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 163ffc4..d88f3d1 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -17,21 +17,27 @@
  * Thread-Specific-Data layout
  * --- data accessed on tcache fast path: state, rtree_ctx, stats ---
  * s: state
- * e: tcache_enabled
  * m: thread_allocated
  * k: thread_allocated_next_event_fast
  * f: thread_deallocated
+ * h: thread_deallocated_next_event_fast
  * c: rtree_ctx (rtree cache accessed on deallocation)
  * t: tcache
  * --- data not accessed on tcache fast path: arena-related fields ---
+ * e: tcache_enabled
  * d: arenas_tdata_bypass
  * r: reentrancy_level
- * x: narenas_tdata
+ * n: narenas_tdata
  * l: thread_allocated_last_event
  * j: thread_allocated_next_event
+ * q: thread_deallocated_last_event
+ * u: thread_deallocated_next_event
  * g: tcache_gc_event_wait
+ * y: tcache_gc_dalloc_event_wait
  * w: prof_sample_event_wait (config_prof)
  * x: prof_sample_last_event (config_prof)
+ * z: stats_interval_event_wait
+ * e: stats_interval_last_event
  * p: prof_tdata (config_prof)
  * v: prng_state
  * i: iarena
@@ -43,15 +49,15 @@
  * Use a compact layout to reduce cache footprint.
  * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
  * |----------------------------  1st cacheline  ----------------------------|
- * | sedrxxxx mmmmmmmm kkkkkkkk ffffffff [c * 32  ........ ........ .......] |
+ * | sedrnnnn mmmmmmmm kkkkkkkk ffffffff hhhhhhhh [c * 24  ........ ........]|
  * |----------------------------  2nd cacheline  ----------------------------|
- * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
+ * | [c * 64  ........ ........ ........ ........ ........ ........ ........]|
  * |----------------------------  3nd cacheline  ----------------------------|
- * | [c * 32  ........ ........ .......] llllllll jjjjjjjj gggggggg wwwwwwww |
+ * | [c * 40  ........ ........ ........ .......] llllllll jjjjjjjj qqqqqqqq |
  * +----------------------------  4th cacheline  ----------------------------+
- * | xxxxxxxx pppppppp vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ |
- * +----------------------------  5th cacheline  ----------------------------+
- * | ........ ........ ..b][t.. ........ ........ ........ ........ ........ |
+ * | uuuuuuuu gggggggg yyyyyyyy wwwwwwww xxxxxxxx zzzzzzzz eeeeeeee pppppppp |
+ * +----------------------------  5th and after  ----------------------------+
+ * | vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b * 40; then embedded tcache ..... |
  * +-------------------------------------------------------------------------+
  * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
  *
-- 
cgit v0.12


From bdc08b51581d422189e32ee87724e668f0fa5ef2 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 23 Jan 2020 15:00:01 -0800
Subject: Better naming buffered writer

---
 include/jemalloc/internal/buf_writer.h | 20 ++++++++++----------
 src/buf_writer.c                       | 30 +++++++++++++++---------------
 src/jemalloc.c                         |  8 ++++----
 src/prof_log.c                         |  8 ++++----
 src/prof_recent.c                      |  8 ++++----
 test/unit/buf_writer.c                 | 15 ++++++++-------
 6 files changed, 45 insertions(+), 44 deletions(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index 60bd010..b2644a8 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -16,21 +16,21 @@ typedef struct {
 	char *buf;
 	size_t buf_size;
 	size_t buf_end;
-} buf_write_arg_t;
+} buf_writer_t;
 
 JEMALLOC_ALWAYS_INLINE void
-buf_write_init(buf_write_arg_t *arg, void (*write_cb)(void *, const char *),
-    void *cbopaque, char *buf, size_t buf_len) {
-	arg->write_cb = write_cb;
-	arg->cbopaque = cbopaque;
+buf_writer_init(buf_writer_t *buf_writer, void (*write_cb)(void *,
+    const char *), void *cbopaque, char *buf, size_t buf_len) {
+	buf_writer->write_cb = write_cb;
+	buf_writer->cbopaque = cbopaque;
 	assert(buf != NULL);
-	arg->buf = buf;
+	buf_writer->buf = buf;
 	assert(buf_len >= 2);
-	arg->buf_size = buf_len - 1; /* Accommodating '\0' at the end. */
-	arg->buf_end = 0;
+	buf_writer->buf_size = buf_len - 1; /* Allowing for '\0' at the end. */
+	buf_writer->buf_end = 0;
 }
 
-void buf_write_flush(buf_write_arg_t *arg);
-void buf_write_cb(void *buf_write_arg, const char *s);
+void buf_writer_flush(buf_writer_t *buf_writer);
+void buf_writer_cb(void *buf_writer_arg, const char *s);
 
 #endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/src/buf_writer.c b/src/buf_writer.c
index 4106594..aed7d4a 100644
--- a/src/buf_writer.c
+++ b/src/buf_writer.c
@@ -6,31 +6,31 @@
 #include "jemalloc/internal/malloc_io.h"
 
 void
-buf_write_flush(buf_write_arg_t *arg) {
-	assert(arg->buf_end <= arg->buf_size);
-	arg->buf[arg->buf_end] = '\0';
-	if (arg->write_cb == NULL) {
-		arg->write_cb = je_malloc_message != NULL ?
+buf_writer_flush(buf_writer_t *buf_writer) {
+	assert(buf_writer->buf_end <= buf_writer->buf_size);
+	buf_writer->buf[buf_writer->buf_end] = '\0';
+	if (buf_writer->write_cb == NULL) {
+		buf_writer->write_cb = je_malloc_message != NULL ?
 		    je_malloc_message : wrtmessage;
 	}
-	arg->write_cb(arg->cbopaque, arg->buf);
-	arg->buf_end = 0;
+	buf_writer->write_cb(buf_writer->cbopaque, buf_writer->buf);
+	buf_writer->buf_end = 0;
 }
 
 void
-buf_write_cb(void *buf_write_arg, const char *s) {
-	buf_write_arg_t *arg = (buf_write_arg_t *)buf_write_arg;
+buf_writer_cb(void *buf_writer_arg, const char *s) {
+	buf_writer_t *buf_writer = (buf_writer_t *)buf_writer_arg;
 	size_t i, slen, n, s_remain, buf_remain;
-	assert(arg->buf_end <= arg->buf_size);
+	assert(buf_writer->buf_end <= buf_writer->buf_size);
 	for (i = 0, slen = strlen(s); i < slen; i += n) {
-		if (arg->buf_end == arg->buf_size) {
-			buf_write_flush(arg);
+		if (buf_writer->buf_end == buf_writer->buf_size) {
+			buf_writer_flush(buf_writer);
 		}
 		s_remain = slen - i;
-		buf_remain = arg->buf_size - arg->buf_end;
+		buf_remain = buf_writer->buf_size - buf_writer->buf_end;
 		n = s_remain < buf_remain ? s_remain : buf_remain;
-		memcpy(arg->buf + arg->buf_end, s + i, n);
-		arg->buf_end += n;
+		memcpy(buf_writer->buf + buf_writer->buf_end, s + i, n);
+		buf_writer->buf_end += n;
 	}
 	assert(i == slen);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 190b3a2..35c490b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3746,11 +3746,11 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		if (buf == NULL) {
 			stats_print(write_cb, cbopaque, opts);
 		} else {
-			buf_write_arg_t buf_arg;
-			buf_write_init(&buf_arg, write_cb, cbopaque, buf,
+			buf_writer_t buf_writer;
+			buf_writer_init(&buf_writer, write_cb, cbopaque, buf,
 			    STATS_PRINT_BUFSIZE);
-			stats_print(buf_write_cb, &buf_arg, opts);
-			buf_write_flush(&buf_arg);
+			stats_print(buf_writer_cb, &buf_writer, opts);
+			buf_writer_flush(&buf_writer);
 			idalloctm(tsdn, buf, NULL, NULL, true, true);
 		}
 	}
diff --git a/src/prof_log.c b/src/prof_log.c
index a04c8e4..95cf246 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -632,15 +632,15 @@ prof_log_stop(tsdn_t *tsdn) {
 	char *buf = (char *)iallocztm(tsdn, PROF_LOG_STOP_BUFSIZE,
 	    sz_size2index(PROF_LOG_STOP_BUFSIZE), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
-	buf_write_arg_t buf_arg;
+	buf_writer_t buf_writer;
 	if (buf == NULL) {
 		emitter_init(&emitter, emitter_output_json_compact,
 		    prof_emitter_write_cb, &arg);
 	} else {
-		buf_write_init(&buf_arg, prof_emitter_write_cb, &arg, buf,
+		buf_writer_init(&buf_writer, prof_emitter_write_cb, &arg, buf,
 		    PROF_LOG_STOP_BUFSIZE);
 		emitter_init(&emitter, emitter_output_json_compact,
-		    buf_write_cb, &buf_arg);
+		    buf_writer_cb, &buf_writer);
 	}
 
 	emitter_begin(&emitter);
@@ -651,7 +651,7 @@ prof_log_stop(tsdn_t *tsdn) {
 	emitter_end(&emitter);
 
 	if (buf != NULL) {
-		buf_write_flush(&buf_arg);
+		buf_writer_flush(&buf_writer);
 		idalloctm(tsdn, buf, NULL, NULL, true, true);
 	}
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 66a9b40..dde029c 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -466,15 +466,15 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	    sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
 	    arena_get(tsd_tsdn(tsd), 0, false), true);
 	emitter_t emitter;
-	buf_write_arg_t buf_arg;
+	buf_writer_t buf_writer;
 	if (buf == NULL) {
 		emitter_init(&emitter, emitter_output_json_compact, write_cb,
 		    cbopaque);
 	} else {
-		buf_write_init(&buf_arg, write_cb, cbopaque, buf,
+		buf_writer_init(&buf_writer, write_cb, cbopaque, buf,
 		    PROF_RECENT_PRINT_BUFSIZE);
 		emitter_init(&emitter, emitter_output_json_compact,
-		    buf_write_cb, &buf_arg);
+		    buf_writer_cb, &buf_writer);
 	}
 	emitter_begin(&emitter);
 
@@ -536,7 +536,7 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 
 	emitter_end(&emitter);
 	if (buf != NULL) {
-		buf_write_flush(&buf_arg);
+		buf_writer_flush(&buf_writer);
 		idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
 	}
 }
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index bbdb657..63fd0c6 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -22,8 +22,9 @@ TEST_BEGIN(test_buf_write) {
 	size_t n_unit, remain, i;
 	ssize_t unit;
 	uint64_t arg = 4; /* Starting value of random argument. */
-	buf_write_arg_t test_buf_arg = {test_write_cb, &arg, test_buf,
-	    TEST_BUF_SIZE - 1, 0};
+	buf_writer_t buf_writer;
+	buf_writer_init(&buf_writer, test_write_cb, &arg, test_buf,
+	    TEST_BUF_SIZE);
 
 	memset(s, 'a', UNIT_MAX);
 	arg_store = arg;
@@ -35,23 +36,23 @@ TEST_BEGIN(test_buf_write) {
 			remain = 0;
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
-				buf_write_cb(&test_buf_arg, s);
+				buf_writer_cb(&buf_writer, s);
 				remain += unit;
-				if (remain > test_buf_arg.buf_size) {
+				if (remain > buf_writer.buf_size) {
 					/* Flushes should have happened. */
 					assert_u64_eq(arg_store, arg, "Call "
 					    "back argument didn't get through");
-					remain %= test_buf_arg.buf_size;
+					remain %= buf_writer.buf_size;
 					if (remain == 0) {
 						/* Last flush should be lazy. */
-						remain += test_buf_arg.buf_size;
+						remain += buf_writer.buf_size;
 					}
 				}
 				assert_zu_eq(test_write_len + remain, i * unit,
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}
-			buf_write_flush(&test_buf_arg);
+			buf_writer_flush(&buf_writer);
 			assert_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
 			    " writing %zu strings of length %zu", n_unit, unit);
-- 
cgit v0.12


From 9cac3fa8f588c828a0a94bdc911383d2952b40e0 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 3 Feb 2020 15:56:13 -0800
Subject: Encapsulate buffer allocation in buffered writer

---
 include/jemalloc/internal/buf_writer.h |  27 ++++----
 src/buf_writer.c                       | 110 +++++++++++++++++++++++++++++++--
 src/jemalloc.c                         |  19 ++----
 src/prof_log.c                         |  22 ++-----
 src/prof_recent.c                      |  24 +++----
 test/unit/buf_writer.c                 |  85 ++++++++++++++++++++++---
 6 files changed, 211 insertions(+), 76 deletions(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index b2644a8..c1e2a82 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -10,27 +10,24 @@
  * some "option like" content for the write_cb, so it doesn't matter.
  */
 
+typedef void (write_cb_t)(void *, const char *);
+
 typedef struct {
-	void (*write_cb)(void *, const char *);
-	void *cbopaque;
+	write_cb_t *public_write_cb;
+	void *public_cbopaque;
+	write_cb_t *private_write_cb;
+	void *private_cbopaque;
 	char *buf;
 	size_t buf_size;
 	size_t buf_end;
+	bool internal_buf;
 } buf_writer_t;
 
-JEMALLOC_ALWAYS_INLINE void
-buf_writer_init(buf_writer_t *buf_writer, void (*write_cb)(void *,
-    const char *), void *cbopaque, char *buf, size_t buf_len) {
-	buf_writer->write_cb = write_cb;
-	buf_writer->cbopaque = cbopaque;
-	assert(buf != NULL);
-	buf_writer->buf = buf;
-	assert(buf_len >= 2);
-	buf_writer->buf_size = buf_len - 1; /* Allowing for '\0' at the end. */
-	buf_writer->buf_end = 0;
-}
-
+bool buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer,
+    write_cb_t *write_cb, void *cbopaque, char *buf, size_t buf_len);
+write_cb_t *buf_writer_get_write_cb(buf_writer_t *buf_writer);
+void *buf_writer_get_cbopaque(buf_writer_t *buf_writer);
 void buf_writer_flush(buf_writer_t *buf_writer);
-void buf_writer_cb(void *buf_writer_arg, const char *s);
+void buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer);
 
 #endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/src/buf_writer.c b/src/buf_writer.c
index aed7d4a..bb8763b 100644
--- a/src/buf_writer.c
+++ b/src/buf_writer.c
@@ -5,23 +5,114 @@
 #include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/malloc_io.h"
 
+static void *
+buf_writer_allocate_internal_buf(tsdn_t *tsdn, size_t buf_len) {
+#ifdef JEMALLOC_JET
+	if (buf_len > SC_LARGE_MAXCLASS) {
+		return NULL;
+	}
+#else
+	assert(buf_len <= SC_LARGE_MAXCLASS);
+#endif
+	return iallocztm(tsdn, buf_len, sz_size2index(buf_len), false, NULL,
+	    true, arena_get(tsdn, 0, false), true);
+}
+
+static void
+buf_writer_free_internal_buf(tsdn_t *tsdn, void *buf) {
+	if (buf != NULL) {
+		idalloctm(tsdn, buf, NULL, NULL, true, true);
+	}
+}
+
+static write_cb_t buf_writer_cb;
+
+static void
+buf_writer_assert(buf_writer_t *buf_writer) {
+	if (buf_writer->buf != NULL) {
+		assert(buf_writer->public_write_cb == buf_writer_cb);
+		assert(buf_writer->public_cbopaque == buf_writer);
+		assert(buf_writer->private_write_cb != buf_writer_cb);
+		assert(buf_writer->private_cbopaque != buf_writer);
+		assert(buf_writer->buf_size > 0);
+	} else {
+		assert(buf_writer->public_write_cb != buf_writer_cb);
+		assert(buf_writer->public_cbopaque != buf_writer);
+		assert(buf_writer->private_write_cb == NULL);
+		assert(buf_writer->private_cbopaque == NULL);
+		assert(buf_writer->buf_size == 0);
+	}
+}
+
+bool
+buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer, write_cb_t *write_cb,
+    void *cbopaque, char *buf, size_t buf_len) {
+	assert(buf_len >= 2);
+	if (buf != NULL) {
+		buf_writer->buf = buf;
+		buf_writer->internal_buf = false;
+	} else {
+		buf_writer->buf = buf_writer_allocate_internal_buf(tsdn,
+		    buf_len);
+		buf_writer->internal_buf = true;
+	}
+	buf_writer->buf_end = 0;
+	if (buf_writer->buf != NULL) {
+		buf_writer->public_write_cb = buf_writer_cb;
+		buf_writer->public_cbopaque = buf_writer;
+		buf_writer->private_write_cb = write_cb;
+		buf_writer->private_cbopaque = cbopaque;
+		buf_writer->buf_size = buf_len - 1; /* Allowing for '\0'. */
+		buf_writer_assert(buf_writer);
+		return false;
+	} else {
+		buf_writer->public_write_cb = write_cb;
+		buf_writer->public_cbopaque = cbopaque;
+		buf_writer->private_write_cb = NULL;
+		buf_writer->private_cbopaque = NULL;
+		buf_writer->buf_size = 0;
+		buf_writer_assert(buf_writer);
+		return true;
+	}
+}
+
+write_cb_t *
+buf_writer_get_write_cb(buf_writer_t *buf_writer) {
+	buf_writer_assert(buf_writer);
+	return buf_writer->public_write_cb;
+}
+
+void *
+buf_writer_get_cbopaque(buf_writer_t *buf_writer) {
+	buf_writer_assert(buf_writer);
+	return buf_writer->public_cbopaque;
+}
+
 void
 buf_writer_flush(buf_writer_t *buf_writer) {
+	buf_writer_assert(buf_writer);
+	if (buf_writer->buf == NULL) {
+		return;
+	}
 	assert(buf_writer->buf_end <= buf_writer->buf_size);
 	buf_writer->buf[buf_writer->buf_end] = '\0';
-	if (buf_writer->write_cb == NULL) {
-		buf_writer->write_cb = je_malloc_message != NULL ?
+	if (buf_writer->private_write_cb == NULL) {
+		buf_writer->private_write_cb = je_malloc_message != NULL ?
 		    je_malloc_message : wrtmessage;
 	}
-	buf_writer->write_cb(buf_writer->cbopaque, buf_writer->buf);
+	assert(buf_writer->private_write_cb != NULL);
+	buf_writer->private_write_cb(buf_writer->private_cbopaque,
+	    buf_writer->buf);
 	buf_writer->buf_end = 0;
 }
 
-void
+static void
 buf_writer_cb(void *buf_writer_arg, const char *s) {
 	buf_writer_t *buf_writer = (buf_writer_t *)buf_writer_arg;
-	size_t i, slen, n, s_remain, buf_remain;
+	buf_writer_assert(buf_writer);
+	assert(buf_writer->buf != NULL);
 	assert(buf_writer->buf_end <= buf_writer->buf_size);
+	size_t i, slen, n, s_remain, buf_remain;
 	for (i = 0, slen = strlen(s); i < slen; i += n) {
 		if (buf_writer->buf_end == buf_writer->buf_size) {
 			buf_writer_flush(buf_writer);
@@ -34,3 +125,12 @@ buf_writer_cb(void *buf_writer_arg, const char *s) {
 	}
 	assert(i == slen);
 }
+
+void
+buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer) {
+	buf_writer_assert(buf_writer);
+	buf_writer_flush(buf_writer);
+	if (buf_writer->internal_buf) {
+		buf_writer_free_internal_buf(tsdn, buf_writer->buf);
+	}
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 35c490b..ddb29e3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3740,19 +3740,12 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	if (config_debug) {
 		stats_print(write_cb, cbopaque, opts);
 	} else {
-		char *buf = (char *)iallocztm(tsdn, STATS_PRINT_BUFSIZE,
-		    sz_size2index(STATS_PRINT_BUFSIZE), false, NULL, true,
-		    arena_get(TSDN_NULL, 0, true), true);
-		if (buf == NULL) {
-			stats_print(write_cb, cbopaque, opts);
-		} else {
-			buf_writer_t buf_writer;
-			buf_writer_init(&buf_writer, write_cb, cbopaque, buf,
-			    STATS_PRINT_BUFSIZE);
-			stats_print(buf_writer_cb, &buf_writer, opts);
-			buf_writer_flush(&buf_writer);
-			idalloctm(tsdn, buf, NULL, NULL, true, true);
-		}
+		buf_writer_t buf_writer;
+		buf_writer_init(tsdn, &buf_writer, write_cb, cbopaque, NULL,
+		    STATS_PRINT_BUFSIZE);
+		stats_print(buf_writer_get_write_cb(&buf_writer),
+		    buf_writer_get_cbopaque(&buf_writer), opts);
+		buf_writer_terminate(tsdn, &buf_writer);
 	}
 
 	check_entry_exit_locking(tsdn);
diff --git a/src/prof_log.c b/src/prof_log.c
index 95cf246..c29fa35 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -629,19 +629,12 @@ prof_log_stop(tsdn_t *tsdn) {
 	struct prof_emitter_cb_arg_s arg;
 	arg.fd = fd;
 
-	char *buf = (char *)iallocztm(tsdn, PROF_LOG_STOP_BUFSIZE,
-	    sz_size2index(PROF_LOG_STOP_BUFSIZE), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
 	buf_writer_t buf_writer;
-	if (buf == NULL) {
-		emitter_init(&emitter, emitter_output_json_compact,
-		    prof_emitter_write_cb, &arg);
-	} else {
-		buf_writer_init(&buf_writer, prof_emitter_write_cb, &arg, buf,
-		    PROF_LOG_STOP_BUFSIZE);
-		emitter_init(&emitter, emitter_output_json_compact,
-		    buf_writer_cb, &buf_writer);
-	}
+	buf_writer_init(tsdn, &buf_writer, prof_emitter_write_cb, &arg, NULL,
+	    PROF_LOG_STOP_BUFSIZE);
+	emitter_init(&emitter, emitter_output_json_compact,
+	    buf_writer_get_write_cb(&buf_writer),
+	    buf_writer_get_cbopaque(&buf_writer));
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
@@ -650,10 +643,7 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_log_emit_allocs(tsd, &emitter);
 	emitter_end(&emitter);
 
-	if (buf != NULL) {
-		buf_writer_flush(&buf_writer);
-		idalloctm(tsdn, buf, NULL, NULL, true, true);
-	}
+	buf_writer_terminate(tsdn, &buf_writer);
 
 	/* Reset global state. */
 	if (log_tables_initialized) {
diff --git a/src/prof_recent.c b/src/prof_recent.c
index dde029c..7a98cc5 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -462,20 +462,13 @@ dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
 void
 prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
     void *cbopaque) {
-	char *buf = (char *)iallocztm(tsd_tsdn(tsd), PROF_RECENT_PRINT_BUFSIZE,
-	    sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
-	    arena_get(tsd_tsdn(tsd), 0, false), true);
-	emitter_t emitter;
 	buf_writer_t buf_writer;
-	if (buf == NULL) {
-		emitter_init(&emitter, emitter_output_json_compact, write_cb,
-		    cbopaque);
-	} else {
-		buf_writer_init(&buf_writer, write_cb, cbopaque, buf,
-		    PROF_RECENT_PRINT_BUFSIZE);
-		emitter_init(&emitter, emitter_output_json_compact,
-		    buf_writer_cb, &buf_writer);
-	}
+	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
+	    PROF_RECENT_PRINT_BUFSIZE);
+	emitter_t emitter;
+	emitter_init(&emitter, emitter_output_json_compact,
+	    buf_writer_get_write_cb(&buf_writer),
+	    buf_writer_get_cbopaque(&buf_writer));
 	emitter_begin(&emitter);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -535,10 +528,7 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	emitter_end(&emitter);
-	if (buf != NULL) {
-		buf_writer_flush(&buf_writer);
-		idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
-	}
+	buf_writer_terminate(tsd_tsdn(tsd), &buf_writer);
 }
 #undef PROF_RECENT_PRINT_BUFSIZE
 
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 63fd0c6..5171d61 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -7,6 +7,7 @@
 
 static size_t test_write_len;
 static char test_buf[TEST_BUF_SIZE];
+static uint64_t arg;
 static uint64_t arg_store;
 
 static void test_write_cb(void *cbopaque, const char *s) {
@@ -17,16 +18,16 @@ static void test_write_cb(void *cbopaque, const char *s) {
 	    "Test write overflowed");
 }
 
-TEST_BEGIN(test_buf_write) {
+static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 	char s[UNIT_MAX + 1];
 	size_t n_unit, remain, i;
 	ssize_t unit;
-	uint64_t arg = 4; /* Starting value of random argument. */
-	buf_writer_t buf_writer;
-	buf_writer_init(&buf_writer, test_write_cb, &arg, test_buf,
-	    TEST_BUF_SIZE);
+	assert_ptr_not_null(buf_writer->buf, "Buffer is null");
+	write_cb_t *write_cb = buf_writer_get_write_cb(buf_writer);
+	void *cbopaque = buf_writer_get_cbopaque(buf_writer);
 
 	memset(s, 'a', UNIT_MAX);
+	arg = 4; /* Starting value of random argument. */
 	arg_store = arg;
 	for (unit = UNIT_MAX; unit >= 0; --unit) {
 		/* unit keeps decreasing, so strlen(s) is always unit. */
@@ -36,32 +37,96 @@ TEST_BEGIN(test_buf_write) {
 			remain = 0;
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
-				buf_writer_cb(&buf_writer, s);
+				write_cb(cbopaque, s);
 				remain += unit;
-				if (remain > buf_writer.buf_size) {
+				if (remain > buf_writer->buf_size) {
 					/* Flushes should have happened. */
 					assert_u64_eq(arg_store, arg, "Call "
 					    "back argument didn't get through");
-					remain %= buf_writer.buf_size;
+					remain %= buf_writer->buf_size;
 					if (remain == 0) {
 						/* Last flush should be lazy. */
-						remain += buf_writer.buf_size;
+						remain += buf_writer->buf_size;
 					}
 				}
 				assert_zu_eq(test_write_len + remain, i * unit,
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}
+			buf_writer_flush(buf_writer);
+			assert_zu_eq(test_write_len, n_unit * unit,
+			    "Incorrect length after flushing at the end of"
+			    " writing %zu strings of length %zu", n_unit, unit);
+		}
+	}
+	buf_writer_terminate(tsdn, buf_writer);
+}
+
+TEST_BEGIN(test_buf_write_static) {
+	buf_writer_t buf_writer;
+	tsdn_t *tsdn = tsdn_fetch();
+	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	    test_buf, TEST_BUF_SIZE),
+	    "buf_writer_init() should not encounter error on static buffer");
+	test_buf_writer_body(tsdn, &buf_writer);
+}
+TEST_END
+
+TEST_BEGIN(test_buf_write_dynamic) {
+	buf_writer_t buf_writer;
+	tsdn_t *tsdn = tsdn_fetch();
+	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	    NULL, TEST_BUF_SIZE), "buf_writer_init() should not OOM");
+	test_buf_writer_body(tsdn, &buf_writer);
+}
+TEST_END
+
+TEST_BEGIN(test_buf_write_oom) {
+	buf_writer_t buf_writer;
+	tsdn_t *tsdn = tsdn_fetch();
+	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
+	assert_ptr_null(buf_writer.buf, "Buffer should be null");
+	write_cb_t *write_cb = buf_writer_get_write_cb(&buf_writer);
+	assert_ptr_eq(write_cb, test_write_cb, "Should use test_write_cb");
+	void *cbopaque = buf_writer_get_cbopaque(&buf_writer);
+	assert_ptr_eq(cbopaque, &arg, "Should use arg");
+
+	char s[UNIT_MAX + 1];
+	size_t n_unit, i;
+	ssize_t unit;
+
+	memset(s, 'a', UNIT_MAX);
+	arg = 4; /* Starting value of random argument. */
+	arg_store = arg;
+	for (unit = UNIT_MAX; unit >= 0; unit -= UNIT_MAX / 4) {
+		/* unit keeps decreasing, so strlen(s) is always unit. */
+		s[unit] = '\0';
+		for (n_unit = 1; n_unit <= 3; ++n_unit) {
+			test_write_len = 0;
+			for (i = 1; i <= n_unit; ++i) {
+				arg = prng_lg_range_u64(&arg, 64);
+				write_cb(cbopaque, s);
+				assert_u64_eq(arg_store, arg,
+				    "Call back argument didn't get through");
+				assert_zu_eq(test_write_len, i * unit,
+				    "Incorrect length after writing %zu strings"
+				    " of length %zu", i, unit);
+			}
 			buf_writer_flush(&buf_writer);
 			assert_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
 			    " writing %zu strings of length %zu", n_unit, unit);
 		}
 	}
+	buf_writer_terminate(tsdn, &buf_writer);
 }
 TEST_END
 
 int
 main(void) {
-	return test(test_buf_write);
+	return test(
+	    test_buf_write_static,
+	    test_buf_write_dynamic,
+	    test_buf_write_oom);
 }
-- 
cgit v0.12


From 2476889195e897912cc4b6a26bfeab1eee4c06df Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 4 Feb 2020 15:00:37 -0800
Subject: Add inspect.c to MSVC filters

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 +++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 404adbe..9b0445f 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -52,6 +52,9 @@
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\inspect.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\jemalloc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 404adbe..9b0445f 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -52,6 +52,9 @@
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\inspect.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\jemalloc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-- 
cgit v0.12


From 7014f81e172290466e1a28118b622519bbbed2b0 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 4 Feb 2020 16:36:02 -0800
Subject: Add ASSURED_WRITE in mallctl

---
 src/ctl.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 78f5df2..302cb9d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1496,6 +1496,14 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 	}								\
 } while (0)
 
+#define ASSURED_WRITE(v, t)	do {					\
+	if (newp == NULL || newlen != sizeof(t)) {			\
+		ret = EINVAL;						\
+		goto label_return;					\
+	}								\
+	(v) = *(t *)newp;						\
+} while (0)
+
 #define MIB_UNSIGNED(v, i) do {						\
 	if (mib[i] > UINT_MAX) {					\
 		ret = EFAULT;						\
@@ -2048,12 +2056,7 @@ tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	unsigned tcache_ind;
 
 	WRITEONLY();
-	tcache_ind = UINT_MAX;
-	WRITE(tcache_ind, unsigned);
-	if (tcache_ind == UINT_MAX) {
-		ret = EFAULT;
-		goto label_return;
-	}
+	ASSURED_WRITE(tcache_ind, unsigned);
 	tcaches_flush(tsd, tcache_ind);
 
 	ret = 0;
@@ -2068,12 +2071,7 @@ tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	unsigned tcache_ind;
 
 	WRITEONLY();
-	tcache_ind = UINT_MAX;
-	WRITE(tcache_ind, unsigned);
-	if (tcache_ind == UINT_MAX) {
-		ret = EFAULT;
-		goto label_return;
-	}
+	ASSURED_WRITE(tcache_ind, unsigned);
 	tcaches_destroy(tsd, tcache_ind);
 
 	ret = 0;
-- 
cgit v0.12


From ca1f08225134981eb74083e5143be4a9d544ff1a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 30 Dec 2019 17:14:44 -0800
Subject: Disallow merge across mmap regions to preserve SN / first-fit.

Check the is_head state before merging two extents.  Disallow the merge if it's
crossing two separate mmap regions.  This enforces first-fit (by not losing the
SN) at a very small cost.
---
 src/ehooks.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/ehooks.c b/src/ehooks.c
index 1e1cac9..5ea73e3 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -201,17 +201,6 @@ ehooks_same_sn(tsdn_t *tsdn, void *addr_a, void *addr_b) {
 static bool
 ehooks_no_merge_heads(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
     bool head_b) {
-	/*
-	 * When coalesce is not always allowed (Windows), only merge extents
-	 * from the same VirtualAlloc region under opt.retain (in which case
-	 * MEM_DECOMMIT is utilized for purging).
-	 */
-	if (maps_coalesce) {
-		return false;
-	}
-	if (!opt_retain) {
-		return true;
-	}
 	/* If b is a head extent, disallow the cross-region merge. */
 	if (head_b) {
 		/*
@@ -230,10 +219,27 @@ ehooks_no_merge_heads(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
 bool
 ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
     bool head_b) {
-	if (ehooks_no_merge_heads(tsdn, addr_a, head_a, addr_b, head_b)) {
+	assert(addr_a < addr_b);
+	/*
+	 * For non-DSS cases (first 2 branches) --
+	 * a) W/o maps_coalesce, merge is not always allowed (Windows):
+	 *   1) w/o retain, never merge (first branch below).
+	 *   2) with retain, only merge extents from the same VirtualAlloc
+	 *      region (in which case MEM_DECOMMIT is utilized for purging).
+	 *
+	 * b) With maps_coalesce, it's always possible to merge.
+	 *   1) w/o retain, always allow merge (only about dirty / muzzy).
+	 *   2) with retain, to preserve the SN / first-fit, merge is still
+	 *      disallowed if b is a head extent, i.e. no merging across
+	 *      different mmap regions.
+	 *
+	 * a2) and b2) share the implementation (the no_merge_heads branch).
+	 */
+	if (!maps_coalesce && !opt_retain) {
 		return true;
 	}
-	if (!maps_coalesce && !opt_retain) {
+	if (opt_retain && ehooks_no_merge_heads(tsdn, addr_a, head_a, addr_b,
+	    head_b)) {
 		return true;
 	}
 	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
-- 
cgit v0.12


From 7fd22f7b2ea5ce2540563ece8e2d30a5316ac857 Mon Sep 17 00:00:00 2001
From: Kamil Rytarowski <n54@gmx.com>
Date: Thu, 13 Feb 2020 14:49:32 +0100
Subject: Fix Undefined Behavior in hash.h

hash.h:200:27, left shift of 250 by 24 places cannot be represented in type 'int'
---
 include/jemalloc/internal/hash.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 9132b60..7f94567 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -182,19 +182,19 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
 		case 13: k4 ^= tail[12] << 0;
 			k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
 			JEMALLOC_FALLTHROUGH;
-		case 12: k3 ^= tail[11] << 24; JEMALLOC_FALLTHROUGH;
+		case 12: k3 ^= (uint32_t) tail[11] << 24; JEMALLOC_FALLTHROUGH;
 		case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH;
 		case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH;
 		case  9: k3 ^= tail[ 8] << 0;
 			k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
 			JEMALLOC_FALLTHROUGH;
-		case  8: k2 ^= tail[ 7] << 24; JEMALLOC_FALLTHROUGH;
+		case  8: k2 ^= (uint32_t) tail[ 7] << 24; JEMALLOC_FALLTHROUGH;
 		case  7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH;
 		case  6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH;
 		case  5: k2 ^= tail[ 4] << 0;
 			k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
 			JEMALLOC_FALLTHROUGH;
-		case  4: k1 ^= tail[ 3] << 24; JEMALLOC_FALLTHROUGH;
+		case  4: k1 ^= (uint32_t) tail[ 3] << 24; JEMALLOC_FALLTHROUGH;
 		case  3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH;
 		case  2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH;
 		case  1: k1 ^= tail[ 0] << 0;
-- 
cgit v0.12


From ba0e35411cc39d57abb830c80eebde054b06241c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 7 Feb 2020 14:53:36 -0800
Subject: Rework the bin locking around tcache refill / flush.

Previously, tcache fill/flush (as well as small alloc/dalloc on the arena) may
potentially drop the bin lock for slab_alloc and slab_dalloc.  This commit
refactors the logic so that the slab calls happen in the same function / level
as the bin lock / unlock.  The main purpose is to be able to use flat combining
without having to keep track of stack state.

In the meantime, this change reduces the locking, especially for slab_dalloc
calls, where nothing happens after the call.
---
 include/jemalloc/internal/arena_externs.h |   3 +-
 src/arena.c                               | 409 +++++++++++++++++-------------
 src/tcache.c                              |  39 +--
 3 files changed, 258 insertions(+), 193 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 608dda7..1b92766 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -63,8 +63,9 @@ void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
 void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
-void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+bool arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, edata_t *edata, void *ptr);
+void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
 void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize);
diff --git a/src/arena.c b/src/arena.c
index 9558bb4..2234894 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -60,8 +60,6 @@ static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
     size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
-static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
-    bin_t *bin);
 static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin);
 
@@ -996,7 +994,7 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	arena_decay_muzzy(tsdn, arena, is_background_thread, all);
 }
 
-static void
+void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
 	arena_nactive_sub(arena, edata_size_get(slab) >> LG_PAGE);
 
@@ -1252,101 +1250,55 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	return slab;
 }
 
-static edata_t *
-arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, unsigned binshard) {
-	edata_t *slab;
-	const bin_info_t *bin_info;
-
-	/* Look for a usable slab. */
-	slab = arena_bin_slabs_nonfull_tryget(bin);
-	if (slab != NULL) {
-		return slab;
-	}
-	/* No existing slabs have any space available. */
-
-	bin_info = &bin_infos[binind];
-
-	/* Allocate a new slab. */
-	malloc_mutex_unlock(tsdn, &bin->lock);
-	/******************************/
-	slab = arena_slab_alloc(tsdn, arena, binind, binshard, bin_info);
-	/********************************/
-	malloc_mutex_lock(tsdn, &bin->lock);
-	if (slab != NULL) {
-		if (config_stats) {
-			bin->stats.nslabs++;
-			bin->stats.curslabs++;
-		}
-		return slab;
-	}
-
-	/*
-	 * arena_slab_alloc() failed, but another thread may have made
-	 * sufficient memory available while this one dropped bin->lock above,
-	 * so search one more time.
-	 */
-	slab = arena_bin_slabs_nonfull_tryget(bin);
-	if (slab != NULL) {
-		return slab;
+/*
+ * Before attempting the _with_fresh_slab approaches below, the _no_fresh_slab
+ * variants (i.e. through slabcur and nonfull) must be tried first.
+ */
+static void
+arena_bin_refill_slabcur_with_fresh_slab(tsdn_t *tsdn, arena_t *arena,
+    bin_t *bin, szind_t binind, edata_t *fresh_slab) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	/* Only called after slabcur and nonfull both failed. */
+	assert(bin->slabcur == NULL);
+	assert(edata_heap_first(&bin->slabs_nonfull) == NULL);
+	assert(fresh_slab != NULL);
+
+	/* A new slab from arena_slab_alloc() */
+	assert(edata_nfree_get(fresh_slab) == bin_infos[binind].nregs);
+	if (config_stats) {
+		bin->stats.nslabs++;
+		bin->stats.curslabs++;
 	}
-
-	return NULL;
+	bin->slabcur = fresh_slab;
 }
 
-/* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */
+/* Refill slabcur and then alloc using the fresh slab */
 static void *
-arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, unsigned binshard) {
+arena_bin_malloc_with_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	arena_bin_refill_slabcur_with_fresh_slab(tsdn, arena, bin, binind,
+	    fresh_slab);
 
-	if (bin->slabcur != NULL) {
-		/* Only attempted when current slab is full. */
-		assert(edata_nfree_get(bin->slabcur) == 0);
-	}
+	return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
+}
 
-	const bin_info_t *bin_info = &bin_infos[binind];
-	edata_t *slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind,
-	    binshard);
-	if (bin->slabcur != NULL) {
-		if (edata_nfree_get(bin->slabcur) > 0) {
-			/*
-			 * Another thread updated slabcur while this one ran
-			 * without the bin lock in arena_bin_nonfull_slab_get().
-			 */
-			void *ret = arena_slab_reg_alloc(bin->slabcur,
-			    bin_info);
-			if (slab != NULL) {
-				/*
-				 * arena_slab_alloc() may have allocated slab,
-				 * or it may have been pulled from
-				 * slabs_nonfull.  Therefore it is unsafe to
-				 * make any assumptions about how slab has
-				 * previously been used, and
-				 * arena_bin_lower_slab() must be called, as if
-				 * a region were just deallocated from the slab.
-				 */
-				if (edata_nfree_get(slab) == bin_info->nregs) {
-					arena_dalloc_bin_slab(tsdn, arena, slab,
-					    bin);
-				} else {
-					arena_bin_lower_slab(tsdn, arena, slab,
-					    bin);
-				}
-			}
-			return ret;
-		}
+static bool
+arena_bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, arena_t *arena,
+    bin_t *bin) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	/* Only called after arena_slab_reg_alloc[_batch] failed. */
+	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0);
 
+	if (bin->slabcur != NULL) {
 		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
-		bin->slabcur = NULL;
 	}
 
-	if (slab == NULL) {
-		return NULL;
-	}
-	bin->slabcur = slab;
-	assert(edata_nfree_get(bin->slabcur) > 0);
+	/* Look for a usable slab. */
+	bin->slabcur = arena_bin_slabs_nonfull_tryget(bin);
+	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) > 0);
 
-	return arena_slab_reg_alloc(slab, bin_info);
+	return (bin->slabcur == NULL);
 }
 
 /* Choose a bin shard and return the locked bin. */
@@ -1369,63 +1321,139 @@ arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind) {
-	unsigned i, nfill, cnt;
-
 	assert(cache_bin_ncached_get(tbin, binind) == 0);
 	tcache->bin_refilled[binind] = true;
 
+	const bin_info_t *bin_info = &bin_infos[binind];
+	const unsigned nfill = cache_bin_ncached_max_get(binind) >>
+	    tcache->lg_fill_div[binind];
+	void **empty_position = cache_bin_empty_position_get(tbin, binind);
+
+	/*
+	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
+	 * slabs.  After both are exhausted, new slabs will be allocated through
+	 * arena_slab_alloc().
+	 *
+	 * Bin lock is only taken / released right before / after the while(...)
+	 * refill loop, with new slab allocation (which has its own locking)
+	 * kept outside of the loop.  This setup facilitates flat combining, at
+	 * the cost of the nested loop (through goto label_refill).
+	 *
+	 * To optimize for cases with contention and limited resources
+	 * (e.g. hugepage-backed or non-overcommit arenas), each fill-iteration
+	 * gets one chance of slab_alloc, and a retry of bin local resources
+	 * after the slab allocation (regardless if slab_alloc failed, because
+	 * the bin lock is dropped during the slab allocation).
+	 *
+	 * In other words, new slab allocation is allowed, as long as there was
+	 * progress since the previous slab_alloc.  This is tracked with
+	 * made_progress below, initialized to true to jump start the first
+	 * iteration.
+	 *
+	 * In other words (again), the loop will only terminate early (i.e. stop
+	 * with filled < nfill) after going through the three steps: a) bin
+	 * local exhausted, b) unlock and slab_alloc returns null, c) re-lock
+	 * and bin local fails again.
+	 */
+	bool made_progress = true;
+	edata_t *fresh_slab = NULL;
+	bool alloc_and_retry = false;
+	unsigned filled = 0;
+
+	bin_t *bin;
 	unsigned binshard;
-	bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
+label_refill:
+	bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
+	while (filled < nfill) {
+		/* Try batch-fill from slabcur first. */
+		edata_t *slabcur = bin->slabcur;
+		if (slabcur != NULL && edata_nfree_get(slabcur) > 0) {
+			unsigned tofill = nfill - filled;
+			unsigned nfree = edata_nfree_get(slabcur);
+			unsigned cnt = tofill < nfree ? tofill : nfree;
+
+			arena_slab_reg_alloc_batch(slabcur, bin_info, cnt,
+			    empty_position - tofill);
+			made_progress = true;
+			filled += cnt;
+			continue;
+		}
+		/* Next try refilling slabcur from nonfull slabs. */
+		if (!arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) {
+			assert(bin->slabcur != NULL);
+			continue;
+		}
 
-	void **empty_position = cache_bin_empty_position_get(tbin, binind);
-	for (i = 0, nfill = (cache_bin_ncached_max_get(binind) >>
-	    tcache->lg_fill_div[binind]); i < nfill; i += cnt) {
-		edata_t *slab;
-		if ((slab = bin->slabcur) != NULL && edata_nfree_get(slab) >
-		    0) {
-			unsigned tofill = nfill - i;
-			cnt = tofill < edata_nfree_get(slab) ?
-				tofill : edata_nfree_get(slab);
-			arena_slab_reg_alloc_batch(
-			   slab, &bin_infos[binind], cnt,
-			   empty_position - nfill + i);
-		} else {
-			cnt = 1;
-			void *ptr = arena_bin_malloc_hard(tsdn, arena, bin,
-			    binind, binshard);
-			/*
-			 * OOM.  tbin->avail isn't yet filled down to its first
-			 * element, so the successful allocations (if any) must
-			 * be moved just before tbin->avail before bailing out.
-			 */
-			if (ptr == NULL) {
-				if (i > 0) {
-					memmove(empty_position - i,
-						empty_position - nfill,
-						i * sizeof(void *));
-				}
-				break;
-			}
-			/* Insert such that low regions get used first. */
-			*(empty_position - nfill + i) = ptr;
+		/* Then see if a new slab was reserved already. */
+		if (fresh_slab != NULL) {
+			arena_bin_refill_slabcur_with_fresh_slab(tsdn, arena,
+			    bin, binind, fresh_slab);
+			assert(bin->slabcur != NULL);
+			fresh_slab = NULL;
+			continue;
 		}
-		if (config_fill && unlikely(opt_junk_alloc)) {
-			for (unsigned j = 0; j < cnt; j++) {
-				void* ptr = *(empty_position - nfill + i + j);
-				arena_alloc_junk_small(ptr, &bin_infos[binind],
-							true);
-			}
+
+		/* Try slab_alloc if made progress (or never did slab_alloc). */
+		if (made_progress) {
+			assert(bin->slabcur == NULL);
+			assert(fresh_slab == NULL);
+			alloc_and_retry = true;
+			/* Alloc a new slab then come back. */
+			break;
 		}
-	}
-	if (config_stats) {
-		bin->stats.nmalloc += i;
+
+		assert(fresh_slab == NULL);
+		/*
+		 * OOM.  tbin->avail isn't yet filled down to its first element,
+		 * so the successful allocations (if any) must be moved just
+		 * before tbin->avail before bailing out.
+		 */
+		if (filled > 0) {
+			memmove(empty_position - filled, empty_position - nfill,
+			    filled * sizeof(void *));
+		}
+		assert(!alloc_and_retry);
+		break;
+	} /* while (filled < nfill) loop. */
+
+	if (config_stats && !alloc_and_retry) {
+		bin->stats.nmalloc += filled;
 		bin->stats.nrequests += tbin->tstats.nrequests;
-		bin->stats.curregs += i;
+		bin->stats.curregs += filled;
 		bin->stats.nfills++;
 		tbin->tstats.nrequests = 0;
 	}
 	malloc_mutex_unlock(tsdn, &bin->lock);
-	cache_bin_ncached_set(tbin, binind, i);
+
+	if (alloc_and_retry) {
+		assert(fresh_slab == NULL);
+		assert(filled < nfill);
+		assert(made_progress);
+
+		fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard,
+		    bin_info);
+		/* fresh_slab NULL case handled in the for loop. */
+
+		alloc_and_retry = false;
+		made_progress = false;
+		goto label_refill;
+	}
+	assert(filled == nfill || (fresh_slab == NULL && !made_progress));
+
+	/* Release if allocated but not used. */
+	if (fresh_slab != NULL) {
+		assert(edata_nfree_get(fresh_slab) == bin_info->nregs);
+		arena_slab_dalloc(tsdn, arena, fresh_slab);
+		fresh_slab = NULL;
+	}
+
+	if (config_fill && unlikely(opt_junk_alloc)) {
+		for (unsigned i = 0; i < filled; i++) {
+			void *ptr = *(empty_position - nfill + filled + i);
+			arena_alloc_junk_small(ptr, bin_info, true);
+		}
+	}
+	cache_bin_ncached_set(tbin, binind, filled);
 	arena_decay_tick(tsdn, arena);
 }
 
@@ -1443,55 +1471,80 @@ arena_dalloc_junk_small_impl(void *ptr, const bin_info_t *bin_info) {
 arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small =
     arena_dalloc_junk_small_impl;
 
+/*
+ * Without allocating a new slab, try arena_slab_reg_alloc() and re-fill
+ * bin->slabcur if necessary.
+ */
 static void *
-arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
-	void *ret;
-	bin_t *bin;
-	size_t usize;
-	edata_t *slab;
+arena_bin_malloc_no_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    szind_t binind) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	if (bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0) {
+		if (arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) {
+			return NULL;
+		}
+	}
 
+	assert(bin->slabcur != NULL && edata_nfree_get(bin->slabcur) > 0);
+	return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
+}
+
+static void *
+arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	assert(binind < SC_NBINS);
-	usize = sz_index2size(binind);
+	const bin_info_t *bin_info = &bin_infos[binind];
+	size_t usize = sz_index2size(binind);
 	unsigned binshard;
-	bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
-
-	if ((slab = bin->slabcur) != NULL && edata_nfree_get(slab) > 0) {
-		ret = arena_slab_reg_alloc(slab, &bin_infos[binind]);
-	} else {
-		ret = arena_bin_malloc_hard(tsdn, arena, bin, binind, binshard);
-	}
+	bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
 
+	edata_t *fresh_slab = NULL;
+	void *ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
 	if (ret == NULL) {
 		malloc_mutex_unlock(tsdn, &bin->lock);
-		return NULL;
+		/******************************/
+		fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard,
+		    bin_info);
+		/********************************/
+		malloc_mutex_lock(tsdn, &bin->lock);
+		/* Retry since the lock was dropped. */
+		ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
+		if (ret == NULL) {
+			if (fresh_slab == NULL) {
+				/* OOM */
+				malloc_mutex_unlock(tsdn, &bin->lock);
+				return NULL;
+			}
+			ret = arena_bin_malloc_with_fresh_slab(tsdn, arena, bin,
+			    binind, fresh_slab);
+			fresh_slab = NULL;
+		}
 	}
-
 	if (config_stats) {
 		bin->stats.nmalloc++;
 		bin->stats.nrequests++;
 		bin->stats.curregs++;
 	}
-
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
+	if (fresh_slab != NULL) {
+		arena_slab_dalloc(tsdn, arena, fresh_slab);
+	}
 	if (!zero) {
 		if (config_fill) {
 			if (unlikely(opt_junk_alloc)) {
-				arena_alloc_junk_small(ret,
-				    &bin_infos[binind], false);
+				arena_alloc_junk_small(ret, bin_info, false);
 			} else if (unlikely(opt_zero)) {
 				memset(ret, 0, usize);
 			}
 		}
 	} else {
 		if (config_fill && unlikely(opt_junk_alloc)) {
-			arena_alloc_junk_small(ret, &bin_infos[binind],
-			    true);
+			arena_alloc_junk_small(ret, bin_info, true);
 		}
 		memset(ret, 0, usize);
 	}
-
 	arena_decay_tick(tsdn, arena);
+
 	return ret;
 }
 
@@ -1625,21 +1678,6 @@ arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) {
 }
 
 static void
-arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
-    bin_t *bin) {
-	assert(slab != bin->slabcur);
-
-	malloc_mutex_unlock(tsdn, &bin->lock);
-	/******************************/
-	arena_slab_dalloc(tsdn, arena, slab);
-	/****************************/
-	malloc_mutex_lock(tsdn, &bin->lock);
-	if (config_stats) {
-		bin->stats.curslabs--;
-	}
-}
-
-static void
 arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin) {
 	assert(edata_nfree_get(slab) > 0);
@@ -1667,20 +1705,31 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
 }
 
 static void
+arena_dalloc_bin_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+
+	assert(slab != bin->slabcur);
+	if (config_stats) {
+		bin->stats.curslabs--;
+	}
+}
+
+/* Returns true if arena_slab_dalloc must be called on slab */
+static bool
 arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, edata_t *slab, void *ptr, bool junked) {
-	slab_data_t *slab_data = edata_slab_data_get(slab);
 	const bin_info_t *bin_info = &bin_infos[binind];
-
 	if (!junked && config_fill && unlikely(opt_junk_free)) {
 		arena_dalloc_junk_small(ptr, bin_info);
 	}
+	arena_slab_reg_dalloc(slab, edata_slab_data_get(slab), ptr);
 
-	arena_slab_reg_dalloc(slab, slab_data, ptr);
+	bool ret = false;
 	unsigned nfree = edata_nfree_get(slab);
 	if (nfree == bin_info->nregs) {
 		arena_dissociate_bin_slab(arena, slab, bin);
-		arena_dalloc_bin_slab(tsdn, arena, slab, bin);
+		arena_dalloc_bin_slab_prepare(tsdn, slab, bin);
+		ret = true;
 	} else if (nfree == 1 && slab != bin->slabcur) {
 		arena_bin_slabs_full_remove(arena, bin, slab);
 		arena_bin_lower_slab(tsdn, arena, slab, bin);
@@ -1690,13 +1739,15 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 		bin->stats.ndalloc++;
 		bin->stats.curregs--;
 	}
+
+	return ret;
 }
 
-void
+bool
 arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, edata_t *edata, void *ptr) {
-	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata, ptr,
-	    true);
+	return arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
+	    ptr, true);
 }
 
 static void
@@ -1706,9 +1757,13 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	bin_t *bin = &arena->bins[binind].bin_shards[binshard];
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata, ptr,
-	    false);
+	bool ret = arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
+	    ptr, false);
 	malloc_mutex_unlock(tsdn, &bin->lock);
+
+	if (ret) {
+		arena_slab_dalloc(tsdn, arena, edata);
+	}
 }
 
 void
diff --git a/src/tcache.c b/src/tcache.c
index 3d96512..27ac5c2 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -142,8 +142,6 @@ tbin_edatas_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem) {
-	bool merged_stats = false;
-
 	assert(binind < SC_NBINS);
 	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
 	assert((cache_bin_sz_t)rem <= ncached);
@@ -154,27 +152,30 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush);
 
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
+	tsdn_t *tsdn = tsd_tsdn(tsd);
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
-		tbin_edatas_lookup_size_check(tsd_tsdn(tsd), tbin, binind,
-		    nflush, item_edata);
+		tbin_edatas_lookup_size_check(tsdn, tbin, binind, nflush,
+		    item_edata);
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
-			item_edata[i] = iealloc(tsd_tsdn(tsd),
-			    *(bottom_item - i));
+			item_edata[i] = iealloc(tsdn, *(bottom_item - i));
 		}
 	}
+
+	bool merged_stats = false;
+	unsigned dalloc_count = 0;
+	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
 	while (nflush > 0) {
 		/* Lock the arena bin associated with the first object. */
 		edata_t *edata = item_edata[0];
 		unsigned bin_arena_ind = edata_arena_ind_get(edata);
-		arena_t *bin_arena = arena_get(tsd_tsdn(tsd), bin_arena_ind,
-		    false);
+		arena_t *bin_arena = arena_get(tsdn, bin_arena_ind, false);
 		unsigned binshard = edata_binshard_get(edata);
 		assert(binshard < bin_infos[binind].n_shards);
 		bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard];
 
-		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+		malloc_mutex_lock(tsdn, &bin->lock);
 		if (config_stats && bin_arena == arena && !merged_stats) {
 			merged_stats = true;
 			bin->stats.nflushes++;
@@ -189,8 +190,10 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 
 			if (edata_arena_ind_get(edata) == bin_arena_ind
 			    && edata_binshard_get(edata) == binshard) {
-				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
-				    bin_arena, bin, binind, edata, ptr);
+				if (arena_dalloc_bin_junked_locked(tsdn,
+				    bin_arena, bin, binind, edata, ptr)) {
+					dalloc_slabs[dalloc_count++] = edata;
+				}
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -203,22 +206,28 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
-		arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
+		malloc_mutex_unlock(tsdn, &bin->lock);
+		arena_decay_ticks(tsdn, bin_arena, nflush - ndeferred);
 		nflush = ndeferred;
 	}
+	/* Handle all deferred slab dalloc. */
+	for (unsigned i = 0; i < dalloc_count; i++) {
+		edata_t *slab = dalloc_slabs[i];
+		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
+	}
+
 	if (config_stats && !merged_stats) {
 		/*
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
 		unsigned binshard;
-		bin_t *bin = arena_bin_choose_lock(tsd_tsdn(tsd), arena, binind,
+		bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind,
 		    &binshard);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 	}
 
 	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
-- 
cgit v0.12


From bc05ecebf66531ebed82ad630d096061087ea18d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 5 Feb 2020 15:33:31 -0800
Subject: Add const qualifier in assert_cmp()

---
 test/include/test/test.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/include/test/test.h b/test/include/test/test.h
index 07f58a4..9081716 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -1,8 +1,8 @@
 #define ASSERT_BUFSIZE	256
 
 #define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
-	t a_ = (a);							\
-	t b_ = (b);							\
+	const t a_ = (a);						\
+	const t b_ = (b);						\
 	if (!(a_ cmp b_)) {						\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
-- 
cgit v0.12


From 68e8ddcaffeee1f2a510e0fc00eb510001a4eff4 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 4 Feb 2020 16:05:11 -0800
Subject: Add mallctl for dumping last-N profiling records

---
 src/ctl.c               |  33 +++++++++
 test/unit/prof_recent.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 225 insertions(+)

diff --git a/src/ctl.c b/src/ctl.c
index 302cb9d..29909df 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -237,6 +237,7 @@ CTL_PROTO(experimental_utilization_batch_query)
 CTL_PROTO(experimental_arenas_i_pactivep)
 INDEX_PROTO(experimental_arenas_i)
 CTL_PROTO(experimental_prof_recent_alloc_max)
+CTL_PROTO(experimental_prof_recent_alloc_dump)
 
 #define MUTEX_STATS_CTL_PROTO_GEN(n)					\
 CTL_PROTO(stats_##n##_num_ops)						\
@@ -631,6 +632,7 @@ static const ctl_indexed_node_t experimental_arenas_node[] = {
 
 static const ctl_named_node_t experimental_prof_recent_node[] = {
 	{NAME("alloc_max"),	CTL(experimental_prof_recent_alloc_max)},
+	{NAME("alloc_dump"),	CTL(experimental_prof_recent_alloc_dump)},
 };
 
 static const ctl_named_node_t experimental_node[] = {
@@ -3549,3 +3551,34 @@ experimental_prof_recent_alloc_max_ctl(tsd_t *tsd, const size_t *mib,
 label_return:
 	return ret;
 }
+
+typedef struct write_cb_packet_s write_cb_packet_t;
+struct write_cb_packet_s {
+	void (*write_cb)(void *, const char *);
+	void *cbopaque;
+};
+
+static int
+experimental_prof_recent_alloc_dump_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (!(config_prof && opt_prof)) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	assert(sizeof(write_cb_packet_t) == sizeof(void *) * 2);
+
+	WRITEONLY();
+	write_cb_packet_t write_cb_packet;
+	ASSURED_WRITE(write_cb_packet, write_cb_packet_t);
+
+	prof_recent_alloc_dump(tsd, write_cb_packet.write_cb,
+	    write_cb_packet.cbopaque);
+
+	ret = 0;
+
+label_return:
+	return ret;
+}
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index c132452..3c10618 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -381,6 +381,197 @@ TEST_END
 
 #undef NTH_REQ_SIZE
 
+#define DUMP_OUT_SIZE 4096
+static char dump_out[DUMP_OUT_SIZE];
+static size_t dump_out_len = 0;
+
+static void test_dump_write_cb(void *not_used, const char *str) {
+	size_t len = strlen(str);
+	assert(dump_out_len + len < DUMP_OUT_SIZE);
+	memcpy(dump_out + dump_out_len, str, len + 1);
+	dump_out_len += len;
+}
+
+static void call_dump() {
+	static void *in[2] = {test_dump_write_cb, NULL};
+	dump_out_len = 0;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump",
+	    NULL, NULL, in, sizeof(in)), 0, "Dump mallctl raised error");
+}
+
+typedef struct {
+	size_t size;
+	bool released;
+} confirm_record_t;
+
+#define DUMP_ERROR "Dump output is wrong"
+
+static void confirm_record(const char *template,
+    const confirm_record_t *records, const size_t n_records) {
+	static const char *types[2] = {"alloc", "dalloc"};
+	static char buf[64];
+
+	/*
+	 * The template string would be in the form of:
+	 * "{\"recent_alloc_max\":XYZ,\"recent_alloc\":[]}",
+	 * and dump_out would be in the form of:
+	 * "{\"recent_alloc_max\":XYZ,\"recent_alloc\":[...]}".
+	 * Using "- 2" serves to cut right before the ending "]}".
+	 */
+	assert_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
+	    DUMP_ERROR);
+	assert_d_eq(memcmp(dump_out + strlen(dump_out) - 2,
+	    template + strlen(template) - 2, 2), 0, DUMP_ERROR);
+
+	const char *start = dump_out + strlen(template) - 2;
+	const char *end = dump_out + strlen(dump_out) - 2;
+	const confirm_record_t *record;
+	for (record = records; record < records + n_records; ++record) {
+
+#define ASSERT_CHAR(c) do {						\
+	assert_true(start < end, DUMP_ERROR);				\
+	assert_c_eq(*start++, c, DUMP_ERROR);				\
+} while (0)
+
+#define ASSERT_STR(s) do {						\
+	const size_t len = strlen(s);					\
+	assert_true(start + len <= end, DUMP_ERROR);			\
+	assert_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);		\
+	start += len;							\
+} while (0)
+
+#define ASSERT_FORMATTED_STR(s, ...) do {				\
+	malloc_snprintf(buf, sizeof(buf), s, __VA_ARGS__);		\
+	ASSERT_STR(buf);						\
+} while (0)
+
+		if (record != records) {
+			ASSERT_CHAR(',');
+		}
+
+		ASSERT_CHAR('{');
+
+		ASSERT_STR("\"size\"");
+		ASSERT_CHAR(':');
+		ASSERT_FORMATTED_STR("%zu", record->size);
+		ASSERT_CHAR(',');
+
+		ASSERT_STR("\"usize\"");
+		ASSERT_CHAR(':');
+		ASSERT_FORMATTED_STR("%zu", sz_s2u(record->size));
+		ASSERT_CHAR(',');
+
+		ASSERT_STR("\"released\"");
+		ASSERT_CHAR(':');
+		ASSERT_STR(record->released ? "true" : "false");
+		ASSERT_CHAR(',');
+
+		const char **type = types;
+		while (true) {
+			ASSERT_FORMATTED_STR("\"%s_thread_uid\"", *type);
+			ASSERT_CHAR(':');
+			while (isdigit(*start)) {
+				++start;
+			}
+			ASSERT_CHAR(',');
+
+			ASSERT_FORMATTED_STR("\"%s_time\"", *type);
+			ASSERT_CHAR(':');
+			while (isdigit(*start)) {
+				++start;
+			}
+			ASSERT_CHAR(',');
+
+			ASSERT_FORMATTED_STR("\"%s_trace\"", *type);
+			ASSERT_CHAR(':');
+			ASSERT_CHAR('[');
+			while (isdigit(*start) || *start == 'x' ||
+			    (*start >= 'a' && *start <= 'f') ||
+			    *start == '\"' || *start == ',') {
+				++start;
+			}
+			ASSERT_CHAR(']');
+
+			if (strcmp(*type, "dalloc") == 0) {
+				break;
+			}
+
+			assert(strcmp(*type, "alloc") == 0);
+			if (!record->released) {
+				break;
+			}
+
+			ASSERT_CHAR(',');
+			++type;
+		}
+
+		ASSERT_CHAR('}');
+
+#undef ASSERT_FORMATTED_STR
+#undef ASSERT_STR
+#undef ASSERT_CHAR
+
+	}
+	assert_ptr_eq(record, records + n_records, DUMP_ERROR);
+	assert_ptr_eq(start, end, DUMP_ERROR);
+}
+
+TEST_BEGIN(test_prof_recent_alloc_dump) {
+	test_skip_if(!config_prof);
+
+	tsd_t *tsd = tsd_fetch();
+	confirm_prof_setup(tsd);
+
+	ssize_t future;
+	void *p, *q;
+	confirm_record_t records[2];
+
+	future = 0;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	call_dump();
+	assert_str_eq(dump_out, "{\"recent_alloc_max\":0,\"recent_alloc\":[]}",
+	    DUMP_ERROR);
+
+	future = 2;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	call_dump();
+	const char *template = "{\"recent_alloc_max\":2,\"recent_alloc\":[]}";
+	assert_str_eq(dump_out, template, DUMP_ERROR);
+
+	p = malloc(7);
+	call_dump();
+	records[0].size = 7;
+	records[0].released = false;
+	confirm_record(template, records, 1);
+
+	q = malloc(17);
+	call_dump();
+	records[1].size = 17;
+	records[1].released = false;
+	confirm_record(template, records, 2);
+
+	free(q);
+	call_dump();
+	records[1].released = true;
+	confirm_record(template, records, 2);
+
+	free(p);
+	call_dump();
+	records[0].released = true;
+	confirm_record(template, records, 2);
+
+	future = OPT_ALLOC_MAX;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	confirm_prof_setup(tsd);
+}
+TEST_END
+
+#undef DUMP_ERROR
+#undef DUMP_OUT_SIZE
+
 #define N_THREADS 16
 #define N_PTRS 512
 #define N_CTLS 8
@@ -500,5 +691,6 @@ main(void) {
 	    test_prof_recent_off,
 	    test_prof_recent_on,
 	    test_prof_recent_alloc,
+	    test_prof_recent_alloc_dump,
 	    test_prof_recent_stress);
 }
-- 
cgit v0.12


From 0f686e82a37e49af6caee2d469f2a2a88e1fbf7c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 13 Feb 2020 20:04:22 -0800
Subject: Avoid variable length array with length 0.

---
 src/tcache.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 27ac5c2..e8a4cc5 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -149,7 +149,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	arena_t *arena = tcache->arena;
 	assert(arena != NULL);
 	unsigned nflush = ncached - rem;
-	VARIABLE_ARRAY(edata_t *, item_edata, nflush);
+	/* Variable length array must have > 0 length. */
+	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	tsdn_t *tsdn = tsd_tsdn(tsd);
@@ -250,7 +251,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 	arena_t *tcache_arena = tcache->arena;
 	assert(tcache_arena != NULL);
 	unsigned nflush = ncached - rem;
-	VARIABLE_ARRAY(edata_t *, item_edata, nflush);
+	/* Variable length array must have > 0 length. */
+	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 #ifndef JEMALLOC_EXTRA_SIZE_CHECK
-- 
cgit v0.12


From 01f255161c97fac5a64517a0366d59eb8afdeae0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 27 Jan 2020 13:55:46 -0800
Subject: Add emap, for tracking extent locking.

---
 Makefile.in                                        |   1 +
 include/jemalloc/internal/arena_inlines_b.h        |  26 +--
 include/jemalloc/internal/emap.h                   |  33 ++++
 include/jemalloc/internal/extent.h                 |   2 -
 .../internal/jemalloc_internal_inlines_b.h         |   3 +-
 include/jemalloc/internal/witness.h                |   2 +-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |   3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |   3 +
 src/arena.c                                        |  15 +-
 src/emap.c                                         | 127 +++++++++++++
 src/extent.c                                       | 211 +++++----------------
 src/jemalloc.c                                     |  20 +-
 src/large.c                                        |   2 +-
 src/tcache.c                                       |   2 +-
 test/unit/arena_reset.c                            |   2 +-
 17 files changed, 257 insertions(+), 197 deletions(-)
 create mode 100644 include/jemalloc/internal/emap.h
 create mode 100644 src/emap.c

diff --git a/Makefile.in b/Makefile.in
index eda9c7a..984bd72 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -110,6 +110,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/edata.c \
 	$(srcroot)src/edata_cache.c \
 	$(srcroot)src/ehooks.c \
+	$(srcroot)src/emap.c \
 	$(srcroot)src/eset.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 844e045..b39578c 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -188,7 +188,7 @@ arena_salloc(tsdn_t *tsdn, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	szind_t szind = rtree_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	szind_t szind = rtree_szind_read(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
 	assert(szind != SC_NSIZES);
 
@@ -211,7 +211,7 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 
 	edata_t *edata;
 	szind_t szind;
-	if (rtree_edata_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	if (rtree_edata_szind_read(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, false, &edata, &szind)) {
 		return 0;
 	}
@@ -247,11 +247,11 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 
 	szind_t szind;
 	bool slab;
-	rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
-	    true, &szind, &slab);
+	rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
+	    (uintptr_t)ptr, true, &szind, &slab);
 
 	if (config_debug) {
-		edata_t *edata = rtree_edata_read(tsdn, &extents_rtree,
+		edata_t *edata = rtree_edata_read(tsdn, &emap_global.rtree,
 		    rtree_ctx, (uintptr_t)ptr, true);
 		assert(szind == edata_szind_get(edata));
 		assert(szind < SC_NSIZES);
@@ -302,13 +302,13 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		assert(szind != SC_NSIZES);
 	} else {
 		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+		rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &szind, &slab);
 	}
 
 	if (config_debug) {
 		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		edata_t *edata = rtree_edata_read(tsdn, &extents_rtree,
+		edata_t *edata = rtree_edata_read(tsdn, &emap_global.rtree,
 		    rtree_ctx, (uintptr_t)ptr, true);
 		assert(szind == edata_szind_get(edata));
 		assert(szind < SC_NSIZES);
@@ -345,7 +345,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
 		    &rtree_ctx_fallback);
 
-		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+		rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &szind, &slab);
 
 		assert(szind == sz_size2index(size));
@@ -353,7 +353,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 
 		if (config_debug) {
 			edata_t *edata = rtree_edata_read(tsdn,
-			    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
+			    &emap_global.rtree, rtree_ctx, (uintptr_t)ptr, true);
 			assert(szind == edata_szind_get(edata));
 			assert(slab == edata_slab_get(edata));
 		}
@@ -388,8 +388,8 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 			rtree_ctx_t rtree_ctx_fallback;
 			rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
 			    &rtree_ctx_fallback);
-			rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
-			    (uintptr_t)ptr, true, &local_ctx.szind,
+			rtree_szind_slab_read(tsdn, &emap_global.rtree,
+			    rtree_ctx, (uintptr_t)ptr, true, &local_ctx.szind,
 			    &local_ctx.slab);
 			assert(local_ctx.szind == sz_size2index(size));
 			alloc_ctx = &local_ctx;
@@ -407,10 +407,10 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 
 	if (config_debug) {
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+		rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &szind, &slab);
 		edata_t *edata = rtree_edata_read(tsdn,
-		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
+		    &emap_global.rtree, rtree_ctx, (uintptr_t)ptr, true);
 		assert(szind == edata_szind_get(edata));
 		assert(slab == edata_slab_get(edata));
 	}
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
new file mode 100644
index 0000000..9a37b94
--- /dev/null
+++ b/include/jemalloc/internal/emap.h
@@ -0,0 +1,33 @@
+#ifndef JEMALLOC_INTERNAL_EMAP_H
+#define JEMALLOC_INTERNAL_EMAP_H
+
+#include "jemalloc/internal/mutex_pool.h"
+#include "jemalloc/internal/rtree.h"
+
+typedef struct emap_s emap_t;
+struct emap_s {
+	rtree_t rtree;
+	/* Keyed by the address of the edata_t being protected. */
+	mutex_pool_t mtx_pool;
+};
+
+extern emap_t emap_global;
+
+bool emap_init(emap_t *emap);
+
+void emap_lock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+void emap_unlock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+
+void emap_lock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
+    edata_t *edata2);
+void emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
+    edata_t *edata2);
+
+edata_t *emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap,
+    rtree_ctx_t *rtree_ctx, void *addr, bool inactive_only);
+
+bool emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap,
+    rtree_ctx_t *rtree_ctx, const edata_t *edata, bool dependent,
+    bool init_missing, rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b);
+
+#endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index b89708a..d0ba70b 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -19,8 +19,6 @@
 #define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 extern size_t opt_lg_extent_max_active_fit;
 
-extern rtree_t extents_rtree;
-
 edata_t *ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index ebfb331..00fb604 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_B_H
 #define JEMALLOC_INTERNAL_INLINES_B_H
 
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/rtree.h"
 
@@ -81,7 +82,7 @@ iealloc(tsdn_t *tsdn, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	return rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
+	return rtree_edata_read(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
 }
 
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 083bdcc..b5fa1c0 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -45,7 +45,7 @@
 #define WITNESS_RANK_EXTENTS		15U
 #define WITNESS_RANK_EDATA_CACHE	16U
 
-#define WITNESS_RANK_EXTENT_POOL	17U
+#define WITNESS_RANK_EMAP		17U
 #define WITNESS_RANK_RTREE		18U
 #define WITNESS_RANK_BASE		19U
 #define WITNESS_RANK_ARENA_LARGE	20U
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index d8b4898..d98bb85 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -50,6 +50,7 @@
     <ClCompile Include="..\..\..\..\src\edata.c" />
     <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
+    <ClCompile Include="..\..\..\..\src\emap.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 9b0445f..fd3e11c 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -37,6 +37,9 @@
     <ClCompile Include="..\..\..\..\src\div.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\emap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\extent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index b0d32d9..b59d411 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -50,6 +50,7 @@
     <ClCompile Include="..\..\..\..\src\edata.c" />
     <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
+    <ClCompile Include="..\..\..\..\src\emap.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 9b0445f..fd3e11c 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -37,6 +37,9 @@
     <ClCompile Include="..\..\..\..\src\div.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\emap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\extent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index 2234894..3206a9a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1112,8 +1112,9 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 		alloc_ctx_t alloc_ctx;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
-		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+		rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree,
+		    rtree_ctx, (uintptr_t)ptr, true, &alloc_ctx.szind,
+		    &alloc_ctx.slab);
 		assert(alloc_ctx.szind != SC_NSIZES);
 
 		if (config_stats || (config_prof && opt_prof)) {
@@ -1601,13 +1602,13 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	edata_t *edata = rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
+	edata_t *edata = rtree_edata_read(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
 
 	szind_t szind = sz_size2index(usize);
 	edata_szind_set(edata, szind);
-	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
-	    szind, false);
+	rtree_szind_slab_update(tsdn, &emap_global.rtree, rtree_ctx,
+	    (uintptr_t)ptr, szind, false);
 
 	prof_idump_rollback(tsdn, usize);
 
@@ -1622,8 +1623,8 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	edata_szind_set(edata, SC_NBINS);
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
-	    SC_NBINS, false);
+	rtree_szind_slab_update(tsdn, &emap_global.rtree, rtree_ctx,
+	    (uintptr_t)ptr, SC_NBINS, false);
 
 	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
 
diff --git a/src/emap.c b/src/emap.c
new file mode 100644
index 0000000..ea3cce0
--- /dev/null
+++ b/src/emap.c
@@ -0,0 +1,127 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/emap.h"
+
+emap_t emap_global;
+
+enum emap_lock_result_e {
+	emap_lock_result_success,
+	emap_lock_result_failure,
+	emap_lock_result_no_extent
+};
+typedef enum emap_lock_result_e emap_lock_result_t;
+
+bool
+emap_init(emap_t *emap) {
+	bool err;
+	err = rtree_new(&emap->rtree, true);
+	if (err) {
+		return true;
+	}
+	err = mutex_pool_init(&emap->mtx_pool, "emap_mutex_pool",
+	    WITNESS_RANK_EMAP);
+	if (err) {
+		return true;
+	}
+	return false;
+}
+
+void
+emap_lock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	assert(edata != NULL);
+	mutex_pool_lock(tsdn, &emap->mtx_pool, (uintptr_t)edata);
+}
+
+void
+emap_unlock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	assert(edata != NULL);
+	mutex_pool_unlock(tsdn, &emap->mtx_pool, (uintptr_t)edata);
+}
+
+void
+emap_lock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
+    edata_t *edata2) {
+	assert(edata1 != NULL && edata2 != NULL);
+	mutex_pool_lock2(tsdn, &emap->mtx_pool, (uintptr_t)edata1,
+	    (uintptr_t)edata2);
+}
+
+void
+emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
+    edata_t *edata2) {
+	assert(edata1 != NULL && edata2 != NULL);
+	mutex_pool_unlock2(tsdn, &emap->mtx_pool, (uintptr_t)edata1,
+	    (uintptr_t)edata2);
+}
+
+static inline emap_lock_result_t
+emap_try_lock_rtree_leaf_elm(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm,
+    edata_t **result, bool inactive_only) {
+	edata_t *edata1 = rtree_leaf_elm_edata_read(tsdn, &emap->rtree,
+	    elm, true);
+
+	/* Slab implies active extents and should be skipped. */
+	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
+	    &emap->rtree, elm, true))) {
+		return emap_lock_result_no_extent;
+	}
+
+	/*
+	 * It's possible that the extent changed out from under us, and with it
+	 * the leaf->edata mapping.  We have to recheck while holding the lock.
+	 */
+	emap_lock_edata(tsdn, emap, edata1);
+	edata_t *edata2 = rtree_leaf_elm_edata_read(tsdn, &emap->rtree, elm,
+	    true);
+
+	if (edata1 == edata2) {
+		*result = edata1;
+		return emap_lock_result_success;
+	} else {
+		emap_unlock_edata(tsdn, emap, edata1);
+		return emap_lock_result_failure;
+	}
+}
+
+/*
+ * Returns a pool-locked edata_t * if there's one associated with the given
+ * address, and NULL otherwise.
+ */
+edata_t *
+emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    void *addr, bool inactive_only) {
+	edata_t *ret = NULL;
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
+	    rtree_ctx, (uintptr_t)addr, false, false);
+	if (elm == NULL) {
+		return NULL;
+	}
+	emap_lock_result_t lock_result;
+	do {
+		lock_result = emap_try_lock_rtree_leaf_elm(tsdn, emap, elm,
+		    &ret, inactive_only);
+	} while (lock_result == emap_lock_result_failure);
+	return ret;
+}
+
+bool
+emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    const edata_t *edata, bool dependent, bool init_missing,
+    rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
+	*r_elm_a = rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata), dependent, init_missing);
+	if (!dependent && *r_elm_a == NULL) {
+		return true;
+	}
+	assert(*r_elm_a != NULL);
+
+	*r_elm_b = rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_last_get(edata), dependent, init_missing);
+	if (!dependent && *r_elm_b == NULL) {
+		return true;
+	}
+	assert(*r_elm_b != NULL);
+
+	return false;
+}
diff --git a/src/extent.c b/src/extent.c
index 07c0bd2..bbebf9e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -2,20 +2,15 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool.h"
 
 /******************************************************************************/
 /* Data. */
 
-rtree_t		extents_rtree;
-/* Keyed by the address of the edata_t being protected. */
-mutex_pool_t	extent_mutex_pool;
-
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
 static bool extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
@@ -58,88 +53,6 @@ static edata_t *extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 
 /******************************************************************************/
 
-typedef enum {
-	lock_result_success,
-	lock_result_failure,
-	lock_result_no_extent
-} lock_result_t;
-
-static inline void
-extent_lock_edata(tsdn_t *tsdn, edata_t *edata) {
-	assert(edata != NULL);
-	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)edata);
-}
-
-static inline void
-extent_unlock_edata(tsdn_t *tsdn, edata_t *edata) {
-	assert(edata != NULL);
-	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)edata);
-}
-
-static inline void
-extent_lock_edata2(tsdn_t *tsdn, edata_t *edata1, edata_t *edata2) {
-	assert(edata1 != NULL && edata2 != NULL);
-	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)edata1,
-	    (uintptr_t)edata2);
-}
-
-static inline void
-extent_unlock_edata2(tsdn_t *tsdn, edata_t *edata1, edata_t *edata2) {
-	assert(edata1 != NULL && edata2 != NULL);
-	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)edata1,
-	    (uintptr_t)edata2);
-}
-
-static lock_result_t
-extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
-    edata_t **result, bool inactive_only) {
-	edata_t *edata1 = rtree_leaf_elm_edata_read(tsdn, &extents_rtree,
-	    elm, true);
-
-	/* Slab implies active extents and should be skipped. */
-	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
-	    &extents_rtree, elm, true))) {
-		return lock_result_no_extent;
-	}
-
-	/*
-	 * It's possible that the extent changed out from under us, and with it
-	 * the leaf->edata mapping.  We have to recheck while holding the lock.
-	 */
-	extent_lock_edata(tsdn, edata1);
-	edata_t *edata2 = rtree_leaf_elm_edata_read(tsdn, &extents_rtree, elm,
-	    true);
-
-	if (edata1 == edata2) {
-		*result = edata1;
-		return lock_result_success;
-	} else {
-		extent_unlock_edata(tsdn, edata1);
-		return lock_result_failure;
-	}
-}
-
-/*
- * Returns a pool-locked edata_t * if there's one associated with the given
- * address, and NULL otherwise.
- */
-static edata_t *
-extent_lock_edata_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
-    bool inactive_only) {
-	edata_t *ret = NULL;
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree,
-	    rtree_ctx, (uintptr_t)addr, false, false);
-	if (elm == NULL) {
-		return NULL;
-	}
-	lock_result_t lock_result;
-	do {
-		lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret,
-		    inactive_only);
-	} while (lock_result == lock_result_failure);
-	return ret;
-}
-
 static void
 extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     size_t alignment) {
@@ -357,34 +270,14 @@ extent_activate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 }
 
-static bool
-extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    const edata_t *edata, bool dependent, bool init_missing,
-    rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
-	*r_elm_a = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata), dependent, init_missing);
-	if (!dependent && *r_elm_a == NULL) {
-		return true;
-	}
-	assert(*r_elm_a != NULL);
-
-	*r_elm_b = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)edata_last_get(edata), dependent, init_missing);
-	if (!dependent && *r_elm_b == NULL) {
-		return true;
-	}
-	assert(*r_elm_b != NULL);
-
-	return false;
-}
-
 static void
 extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
     rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
-	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, edata, szind, slab);
+	rtree_leaf_elm_write(tsdn, &emap_global.rtree, elm_a, edata, szind,
+	    slab);
 	if (elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, edata, szind,
-		    slab);
+		rtree_leaf_elm_write(tsdn, &emap_global.rtree, elm_b, edata,
+		    szind, slab);
 	}
 }
 
@@ -395,7 +288,7 @@ extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, edata_t *edata,
 
 	/* Register interior. */
 	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_write(tsdn, &extents_rtree, rtree_ctx,
+		rtree_write(tsdn, &emap_global.rtree, rtree_ctx,
 		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
 		    LG_PAGE), edata, szind, true);
 	}
@@ -448,11 +341,11 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 	 * We need to hold the lock to protect against a concurrent coalesce
 	 * operation that sees us in a partial state.
 	 */
-	extent_lock_edata(tsdn, edata);
+	emap_lock_edata(tsdn, &emap_global, edata);
 
-	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, edata, false, true,
-	    &elm_a, &elm_b)) {
-		extent_unlock_edata(tsdn, edata);
+	if (emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, edata,
+	    false, true, &elm_a, &elm_b)) {
+		emap_unlock_edata(tsdn, &emap_global, edata);
 		return true;
 	}
 
@@ -463,7 +356,7 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 		extent_interior_register(tsdn, rtree_ctx, edata, szind);
 	}
 
-	extent_unlock_edata(tsdn, edata);
+	emap_unlock_edata(tsdn, &emap_global, edata);
 
 	if (config_prof && gdump_add) {
 		extent_gdump_add(tsdn, edata);
@@ -503,7 +396,7 @@ extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 	assert(edata_slab_get(edata));
 
 	for (i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_clear(tsdn, &extents_rtree, rtree_ctx,
+		rtree_clear(tsdn, &emap_global.rtree, rtree_ctx,
 		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
 		    LG_PAGE));
 	}
@@ -517,10 +410,10 @@ extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *elm_a, *elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, edata, true, false,
-	    &elm_a, &elm_b);
+	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, edata,
+	    true, false, &elm_a, &elm_b);
 
-	extent_lock_edata(tsdn, edata);
+	emap_lock_edata(tsdn, &emap_global, edata);
 
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false);
 	if (edata_slab_get(edata)) {
@@ -528,7 +421,7 @@ extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 		edata_slab_set(edata, false);
 	}
 
-	extent_unlock_edata(tsdn, edata);
+	emap_unlock_edata(tsdn, &emap_global, edata);
 
 	if (config_prof && gdump) {
 		extent_gdump_sub(tsdn, edata);
@@ -577,8 +470,8 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
-		edata = extent_lock_edata_from_addr(tsdn, rtree_ctx, new_addr,
-		    false);
+		edata = emap_lock_edata_from_addr(tsdn, &emap_global, rtree_ctx,
+		    new_addr, false);
 		if (edata != NULL) {
 			/*
 			 * We might null-out edata to report an error, but we
@@ -592,7 +485,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    != ecache->state) {
 				edata = NULL;
 			}
-			extent_unlock_edata(tsdn, unlock_edata);
+			emap_unlock_edata(tsdn, &emap_global, unlock_edata);
 		}
 	} else {
 		edata = eset_fit(&ecache->eset, esize, alignment,
@@ -692,11 +585,12 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		 */
 		edata_szind_set(*edata, szind);
 		if (szind != SC_NSIZES) {
-			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
-			    (uintptr_t)edata_addr_get(*edata), szind, slab);
+			rtree_szind_slab_update(tsdn, &emap_global.rtree,
+			    rtree_ctx, (uintptr_t)edata_addr_get(*edata), szind,
+			    slab);
 			if (slab && edata_size_get(*edata) > PAGE) {
-				rtree_szind_slab_update(tsdn, &extents_rtree,
-				    rtree_ctx,
+				rtree_szind_slab_update(tsdn,
+				    &emap_global.rtree, rtree_ctx,
 				    (uintptr_t)edata_past_get(*edata) -
 				    (uintptr_t)PAGE, szind, slab);
 			}
@@ -760,8 +654,8 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_abandon_vm(tsdn, arena, ehooks, ecache, to_leak,
 			    growing_retained);
-			assert(extent_lock_edata_from_addr(tsdn, rtree_ctx, leak,
-			    false) == NULL);
+			assert(emap_lock_edata_from_addr(tsdn, &emap_global,
+			    rtree_ctx, leak, false) == NULL);
 		}
 		return NULL;
 	}
@@ -1119,8 +1013,8 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 		again = false;
 
 		/* Try to coalesce forward. */
-		edata_t *next = extent_lock_edata_from_addr(tsdn, rtree_ctx,
-		    edata_past_get(edata), inactive_only);
+		edata_t *next = emap_lock_edata_from_addr(tsdn, &emap_global,
+		    rtree_ctx, edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
 			 * ecache->mtx only protects against races for
@@ -1130,7 +1024,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 			bool can_coalesce = extent_can_coalesce(ecache,
 			    edata, next);
 
-			extent_unlock_edata(tsdn, next);
+			emap_unlock_edata(tsdn, &emap_global, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
 			    ehooks, ecache, edata, next, true,
@@ -1145,12 +1039,12 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 		}
 
 		/* Try to coalesce backward. */
-		edata_t *prev = extent_lock_edata_from_addr(tsdn, rtree_ctx,
-		    edata_before_get(edata), inactive_only);
+		edata_t *prev = emap_lock_edata_from_addr(tsdn, &emap_global,
+		    rtree_ctx, edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(ecache, edata,
 			    prev);
-			extent_unlock_edata(tsdn, prev);
+			emap_unlock_edata(tsdn, &emap_global, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
 			    ehooks, ecache, edata, prev, false,
@@ -1210,7 +1104,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		edata_slab_set(edata, false);
 	}
 
-	assert(rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
+	assert(rtree_edata_read(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)edata_base_get(edata), true) == edata);
 
 	if (!ecache->delay_coalesce) {
@@ -1449,19 +1343,19 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 		    edata_committed_get(edata), edata_dumpable_get(edata),
 		    EXTENT_NOT_HEAD);
 
-		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
-		    true, &lead_elm_a, &lead_elm_b);
+		emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx,
+		    &lead, false, true, &lead_elm_a, &lead_elm_b);
 	}
 	rtree_leaf_elm_t *trail_elm_a, *trail_elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, trail, false, true,
-	    &trail_elm_a, &trail_elm_b);
+	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, trail, false,
+	    true, &trail_elm_a, &trail_elm_b);
 
 	if (lead_elm_a == NULL || lead_elm_b == NULL || trail_elm_a == NULL
 	    || trail_elm_b == NULL) {
 		goto label_error_b;
 	}
 
-	extent_lock_edata2(tsdn, edata, trail);
+	emap_lock_edata2(tsdn, &emap_global, edata, trail);
 
 	bool err = ehooks_split(tsdn, ehooks, edata_base_get(edata),
 	    size_a + size_b, size_a, size_b, edata_committed_get(edata));
@@ -1478,11 +1372,11 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
 	    szind_b, slab_b);
 
-	extent_unlock_edata2(tsdn, edata, trail);
+	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
 
 	return trail;
 label_error_c:
-	extent_unlock_edata2(tsdn, edata, trail);
+	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
 label_error_b:
 	edata_cache_put(tsdn, edata_cache, trail);
 label_error_a:
@@ -1523,19 +1417,19 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, a, true, false, &a_elm_a,
-	    &a_elm_b);
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, b, true, false, &b_elm_a,
-	    &b_elm_b);
+	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, a, true,
+	    false, &a_elm_a, &a_elm_b);
+	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, b, true,
+	    false, &b_elm_a, &b_elm_b);
 
-	extent_lock_edata2(tsdn, a, b);
+	emap_lock_edata2(tsdn, &emap_global, a, b);
 
 	if (a_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL,
+		rtree_leaf_elm_write(tsdn, &emap_global.rtree, a_elm_b, NULL,
 		    SC_NSIZES, false);
 	}
 	if (b_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, NULL,
+		rtree_leaf_elm_write(tsdn, &emap_global.rtree, b_elm_a, NULL,
 		    SC_NSIZES, false);
 	} else {
 		b_elm_b = b_elm_a;
@@ -1550,7 +1444,7 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES,
 	    false);
 
-	extent_unlock_edata2(tsdn, a, b);
+	emap_unlock_edata2(tsdn, &emap_global, a, b);
 
 	edata_cache_put(tsdn, edata_cache, b);
 
@@ -1567,15 +1461,6 @@ bool
 extent_boot(void) {
 	assert(sizeof(slab_data_t) >= sizeof(e_prof_info_t));
 
-	if (rtree_new(&extents_rtree, true)) {
-		return true;
-	}
-
-	if (mutex_pool_init(&extent_mutex_pool, "extent_mutex_pool",
-	    WITNESS_RANK_EXTENT_POOL)) {
-		return true;
-	}
-
 	if (have_dss) {
 		extent_dss_boot();
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ddb29e3..8f34989 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/hook.h"
@@ -1571,6 +1572,9 @@ malloc_init_hard_a0_locked() {
 	if (base_boot(TSDN_NULL)) {
 		return true;
 	}
+	if (emap_init(&emap_global)) {
+		return true;
+	}
 	if (extent_boot()) {
 		return true;
 	}
@@ -2565,7 +2569,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+	rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
@@ -2619,15 +2623,16 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 				alloc_ctx_t dbg_ctx;
 				rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 				rtree_szind_slab_read(tsd_tsdn(tsd),
-				    &extents_rtree, rtree_ctx, (uintptr_t)ptr,
-				    true, &dbg_ctx.szind, &dbg_ctx.slab);
+				    &emap_global.rtree, rtree_ctx,
+				    (uintptr_t)ptr, true, &dbg_ctx.szind,
+				    &dbg_ctx.slab);
 				assert(dbg_ctx.szind == ctx->szind);
 				assert(dbg_ctx.slab == ctx->slab);
 			}
 		} else if (opt_prof) {
 			ctx = &alloc_ctx;
 			rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-			rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree,
+			rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree,
 			    rtree_ctx, (uintptr_t)ptr, true, &ctx->szind,
 			    &ctx->slab);
 			/* Small alloc may have !slab (sampled). */
@@ -2699,7 +2704,8 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		bool slab;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd),
-		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, &szind, &slab);
+		    &emap_global.rtree, rtree_ctx, (uintptr_t)ptr, &szind,
+		    &slab);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
 		if (unlikely(!res || !slab)) {
@@ -3142,7 +3148,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+	rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
@@ -3421,7 +3427,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+	rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
diff --git a/src/large.c b/src/large.c
index e133e19..2e52098 100644
--- a/src/large.c
+++ b/src/large.c
@@ -179,7 +179,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	szind_t szind = sz_size2index(usize);
 	edata_szind_set(edata, szind);
-	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
+	rtree_szind_slab_update(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)edata_addr_get(edata), szind, false);
 
 	if (config_stats && new_mapping) {
diff --git a/src/tcache.c b/src/tcache.c
index e8a4cc5..9146f24 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -129,7 +129,7 @@ tbin_edatas_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 	size_t sz_sum = binind * nflush;
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	for (unsigned i = 0 ; i < nflush; i++) {
-		rtree_edata_szind_read(tsdn, &extents_rtree,
+		rtree_edata_szind_read(tsdn, &emap_global.rtree,
 		    rtree_ctx, (uintptr_t)*(bottom_item - i), true,
 		    &edatas[i], &szind);
 		sz_sum -= szind;
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 854799d..a1f1d07 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -65,7 +65,7 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 
 	edata_t *edata;
 	szind_t szind;
-	if (rtree_edata_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	if (rtree_edata_szind_read(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, false, &edata, &szind)) {
 		return 0;
 	}
-- 
cgit v0.12


From ca21ce4071d14b3cbbb88697bfd76a30b9de7ac8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 30 Jan 2020 12:31:19 -0800
Subject: Emap: Move in write_acquired from extent.

---
 include/jemalloc/internal/emap.h |  5 +++++
 src/emap.c                       | 10 ++++++++++
 src/extent.c                     | 29 ++++++++++-------------------
 3 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 9a37b94..b9624d1 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -30,4 +30,9 @@ bool emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap,
     rtree_ctx_t *rtree_ctx, const edata_t *edata, bool dependent,
     bool init_missing, rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b);
 
+/* Only temporarily public; this will be internal eventually. */
+void emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap,
+    rtree_leaf_elm_t *elm_a, rtree_leaf_elm_t *elm_b, edata_t *edata,
+    szind_t szind, bool slab);
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index ea3cce0..4ed9ff1 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -125,3 +125,13 @@ emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 
 	return false;
 }
+
+void
+emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
+    rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
+	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, edata, szind, slab);
+	if (elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &emap->rtree, elm_b, edata, szind,
+		    slab);
+	}
+}
diff --git a/src/extent.c b/src/extent.c
index bbebf9e..fc44925 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -271,17 +271,6 @@ extent_activate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
 }
 
 static void
-extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
-    rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
-	rtree_leaf_elm_write(tsdn, &emap_global.rtree, elm_a, edata, szind,
-	    slab);
-	if (elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap_global.rtree, elm_b, edata,
-		    szind, slab);
-	}
-}
-
-static void
 extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, edata_t *edata,
     szind_t szind) {
 	assert(edata_slab_get(edata));
@@ -351,7 +340,8 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 
 	szind_t szind = edata_szind_get_maybe_invalid(edata);
 	bool slab = edata_slab_get(edata);
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, edata, szind, slab);
+	emap_rtree_write_acquired(tsdn, &emap_global, elm_a, elm_b, edata,
+	    szind, slab);
 	if (slab) {
 		extent_interior_register(tsdn, rtree_ctx, edata, szind);
 	}
@@ -415,7 +405,8 @@ extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 
 	emap_lock_edata(tsdn, &emap_global, edata);
 
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false);
+	emap_rtree_write_acquired(tsdn, &emap_global, elm_a, elm_b, NULL,
+	    SC_NSIZES, false);
 	if (edata_slab_get(edata)) {
 		extent_interior_deregister(tsdn, rtree_ctx, edata);
 		edata_slab_set(edata, false);
@@ -1367,10 +1358,10 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 	edata_size_set(edata, size_a);
 	edata_szind_set(edata, szind_a);
 
-	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, edata,
-	    szind_a, slab_a);
-	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
-	    szind_b, slab_b);
+	emap_rtree_write_acquired(tsdn, &emap_global, lead_elm_a, lead_elm_b,
+	    edata, szind_a, slab_a);
+	emap_rtree_write_acquired(tsdn, &emap_global, trail_elm_a, trail_elm_b,
+	    trail, szind_b, slab_b);
 
 	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
 
@@ -1441,8 +1432,8 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	    edata_sn_get(a) : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
-	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES,
-	    false);
+	emap_rtree_write_acquired(tsdn, &emap_global, a_elm_a, b_elm_b, a,
+	    SC_NSIZES, false);
 
 	emap_unlock_edata2(tsdn, &emap_global, a, b);
 
-- 
cgit v0.12


From d05b61db4a4ac9ba498d2a478f65035935d776ba Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 30 Jan 2020 12:40:07 -0800
Subject: Emap: Move extent boundary registration in.

---
 include/jemalloc/internal/emap.h |  8 ++++++++
 src/emap.c                       | 13 +++++++++++++
 src/extent.c                     | 12 +++++-------
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index b9624d1..93fa472 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -35,4 +35,12 @@ void emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap,
     rtree_leaf_elm_t *elm_a, rtree_leaf_elm_t *elm_b, edata_t *edata,
     szind_t szind, bool slab);
 
+/*
+ * Associate the given edata with its beginning and end address, setting the
+ * szind and slab info appropriately.
+ * Returns true on error (i.e. resource exhaustion).
+ */
+bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    edata_t *edata, szind_t szind, bool slab);
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index 4ed9ff1..4371c4a 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -135,3 +135,16 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 		    slab);
 	}
 }
+
+bool
+emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    edata_t *edata, szind_t szind, bool slab) {
+	rtree_leaf_elm_t *elm_a, *elm_b;
+	bool err = emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
+	    false, true, &elm_a, &elm_b);
+	if (err) {
+		return true;
+	}
+	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, edata, szind, slab);
+	return false;
+}
diff --git a/src/extent.c b/src/extent.c
index fc44925..4c4e16a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -324,7 +324,6 @@ static bool
 extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *elm_a, *elm_b;
 
 	/*
 	 * We need to hold the lock to protect against a concurrent coalesce
@@ -332,16 +331,15 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 	 */
 	emap_lock_edata(tsdn, &emap_global, edata);
 
-	if (emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, edata,
-	    false, true, &elm_a, &elm_b)) {
+	szind_t szind = edata_szind_get_maybe_invalid(edata);
+	bool slab = edata_slab_get(edata);
+
+	if (emap_register_boundary(tsdn, &emap_global, rtree_ctx, edata, szind,
+	    slab)) {
 		emap_unlock_edata(tsdn, &emap_global, edata);
 		return true;
 	}
 
-	szind_t szind = edata_szind_get_maybe_invalid(edata);
-	bool slab = edata_slab_get(edata);
-	emap_rtree_write_acquired(tsdn, &emap_global, elm_a, elm_b, edata,
-	    szind, slab);
 	if (slab) {
 		extent_interior_register(tsdn, rtree_ctx, edata, szind);
 	}
-- 
cgit v0.12


From 9b5ca0b09df207de4abe02ccaedd018fc2deed77 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 30 Jan 2020 13:32:38 -0800
Subject: Emap: Move in slab interior registration.

---
 include/jemalloc/internal/emap.h | 22 ++++++++++++++++++++++
 src/emap.c                       | 13 +++++++++++++
 src/extent.c                     | 22 ++++++----------------
 3 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 93fa472..e8b422e 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -43,4 +43,26 @@ void emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap,
 bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     edata_t *edata, szind_t szind, bool slab);
 
+/*
+ * Does the same thing, but with the interior of the range, for slab
+ * allocations.
+ *
+ * You might wonder why we don't just have a single emap_register function that
+ * does both depending on the value of 'slab'.  The answer is twofold:
+ * - As a practical matter, in places like the extract->split->commit pathway,
+ *   we defer the interior operation until we're sure that the commit won't fail
+ *   (but we have to register the split boundaries there).
+ * - In general, we're trying to move to a world where the page-specific
+ *   allocator doesn't know as much about how the pages it allocates will be
+ *   used, and passing a 'slab' parameter everywhere makes that more
+ *   complicated.
+ *
+ * Unlike the boundary version, this function can't fail; this is because slabs
+ * can't get big enough to touch a new page that neither of the boundaries
+ * touched, so no allocation is necessary to fill the interior once the boundary
+ * has been touched.
+ */
+void emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    edata_t *edata, szind_t szind);
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index 4371c4a..0a37d17 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -148,3 +148,16 @@ emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, edata, szind, slab);
 	return false;
 }
+
+void
+emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    edata_t *edata, szind_t szind) {
+	assert(edata_slab_get(edata));
+
+	/* Register interior. */
+	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
+		rtree_write(tsdn, &emap->rtree, rtree_ctx,
+		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
+		    LG_PAGE), edata, szind, true);
+	}
+}
diff --git a/src/extent.c b/src/extent.c
index 4c4e16a..9975dd2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -271,19 +271,6 @@ extent_activate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
 }
 
 static void
-extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, edata_t *edata,
-    szind_t szind) {
-	assert(edata_slab_get(edata));
-
-	/* Register interior. */
-	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_write(tsdn, &emap_global.rtree, rtree_ctx,
-		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
-		    LG_PAGE), edata, szind, true);
-	}
-}
-
-static void
 extent_gdump_add(tsdn_t *tsdn, const edata_t *edata) {
 	cassert(config_prof);
 	/* prof_gdump() requirement. */
@@ -341,7 +328,8 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 	}
 
 	if (slab) {
-		extent_interior_register(tsdn, rtree_ctx, edata, szind);
+		emap_register_interior(tsdn, &emap_global, rtree_ctx, edata,
+		    szind);
 	}
 
 	emap_unlock_edata(tsdn, &emap_global, edata);
@@ -704,7 +692,8 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	assert(edata_state_get(edata) == extent_state_active);
 	if (slab) {
 		edata_slab_set(edata, slab);
-		extent_interior_register(tsdn, rtree_ctx, edata, szind);
+		emap_register_interior(tsdn, &emap_global, rtree_ctx, edata,
+		    szind);
 	}
 
 	if (*zero) {
@@ -867,7 +856,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		    &rtree_ctx_fallback);
 
 		edata_slab_set(edata, true);
-		extent_interior_register(tsdn, rtree_ctx, edata, szind);
+		emap_register_interior(tsdn, &emap_global, rtree_ctx, edata,
+		    szind);
 	}
 	if (*zero && !edata_zeroed_get(edata)) {
 		void *addr = edata_base_get(edata);
-- 
cgit v0.12


From 6513d9d923d4e32775612614326ff1889807c840 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 30 Jan 2020 14:55:36 -0800
Subject: Emap: Move over deregistration boundary functions.

---
 include/jemalloc/internal/emap.h |  3 +++
 src/emap.c                       | 11 +++++++++++
 src/extent.c                     |  8 +-------
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index e8b422e..eef33f2 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -65,4 +65,7 @@ bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 void emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     edata_t *edata, szind_t szind);
 
+void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap,
+    rtree_ctx_t *rtree_ctx, edata_t *edata);
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index 0a37d17..d54cf7e 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -161,3 +161,14 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 		    LG_PAGE), edata, szind, true);
 	}
 }
+
+void
+emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    edata_t *edata) {
+	rtree_leaf_elm_t *elm_a, *elm_b;
+
+	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
+	    true, false, &elm_a, &elm_b);
+	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, NULL, SC_NSIZES,
+	    false);
+}
diff --git a/src/extent.c b/src/extent.c
index 9975dd2..2b2ba7e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -385,19 +385,13 @@ static void
 extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *elm_a, *elm_b;
-	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, edata,
-	    true, false, &elm_a, &elm_b);
 
 	emap_lock_edata(tsdn, &emap_global, edata);
-
-	emap_rtree_write_acquired(tsdn, &emap_global, elm_a, elm_b, NULL,
-	    SC_NSIZES, false);
+	emap_deregister_boundary(tsdn, &emap_global, rtree_ctx, edata);
 	if (edata_slab_get(edata)) {
 		extent_interior_deregister(tsdn, rtree_ctx, edata);
 		edata_slab_set(edata, false);
 	}
-
 	emap_unlock_edata(tsdn, &emap_global, edata);
 
 	if (config_prof && gdump) {
-- 
cgit v0.12


From 44f5f5360598b57b9d701f6b544f5cd2acd4df9c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 30 Jan 2020 15:02:52 -0800
Subject: Emap: Move over deregistration functions.

---
 include/jemalloc/internal/emap.h |  2 ++
 src/emap.c                       | 11 +++++++++++
 src/extent.c                     | 25 ++-----------------------
 3 files changed, 15 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index eef33f2..d28a5f7 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -67,5 +67,7 @@ void emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 
 void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap,
     rtree_ctx_t *rtree_ctx, edata_t *edata);
+void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap,
+    rtree_ctx_t *rtree_ctx, edata_t *edata);
 
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index d54cf7e..92814fc 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -172,3 +172,14 @@ emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, NULL, SC_NSIZES,
 	    false);
 }
+
+void
+emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    edata_t *edata) {
+	assert(edata_slab_get(edata));
+	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
+		rtree_clear(tsdn, &emap->rtree, rtree_ctx,
+		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
+		    LG_PAGE));
+	}
+}
diff --git a/src/extent.c b/src/extent.c
index 2b2ba7e..bb88c20 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -358,27 +358,6 @@ extent_reregister(tsdn_t *tsdn, edata_t *edata) {
 }
 
 /*
- * Removes all pointers to the given extent from the global rtree indices for
- * its interior.  This is relevant for slab extents, for which we need to do
- * metadata lookups at places other than the head of the extent.  We deregister
- * on the interior, then, when an extent moves from being an active slab to an
- * inactive state.
- */
-static void
-extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    edata_t *edata) {
-	size_t i;
-
-	assert(edata_slab_get(edata));
-
-	for (i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_clear(tsdn, &emap_global.rtree, rtree_ctx,
-		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
-		    LG_PAGE));
-	}
-}
-
-/*
  * Removes all pointers to the given extent from the global rtree.
  */
 static void
@@ -389,7 +368,7 @@ extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 	emap_lock_edata(tsdn, &emap_global, edata);
 	emap_deregister_boundary(tsdn, &emap_global, rtree_ctx, edata);
 	if (edata_slab_get(edata)) {
-		extent_interior_deregister(tsdn, rtree_ctx, edata);
+		emap_deregister_interior(tsdn, &emap_global, rtree_ctx, edata);
 		edata_slab_set(edata, false);
 	}
 	emap_unlock_edata(tsdn, &emap_global, edata);
@@ -1073,7 +1052,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 
 	edata_szind_set(edata, SC_NSIZES);
 	if (edata_slab_get(edata)) {
-		extent_interior_deregister(tsdn, rtree_ctx, edata);
+		emap_deregister_interior(tsdn, &emap_global, rtree_ctx, edata);
 		edata_slab_set(edata, false);
 	}
 
-- 
cgit v0.12


From 7c7b7020640488f26fb81143ab2ca7c74377580b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Feb 2020 13:27:21 -0800
Subject: Emap: Move over metadata splitting logic.

---
 include/jemalloc/internal/emap.h | 17 +++++++++++++
 src/emap.c                       | 53 ++++++++++++++++++++++++++++++++++++++++
 src/extent.c                     | 44 +++++++--------------------------
 3 files changed, 79 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index d28a5f7..e5188d4 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -70,4 +70,21 @@ void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap,
 void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap,
     rtree_ctx_t *rtree_ctx, edata_t *edata);
 
+typedef struct emap_split_prepare_s emap_split_prepare_t;
+struct emap_split_prepare_s {
+	rtree_leaf_elm_t *lead_elm_a;
+	rtree_leaf_elm_t *lead_elm_b;
+	rtree_leaf_elm_t *trail_elm_a;
+	rtree_leaf_elm_t *trail_elm_b;
+};
+
+bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    emap_split_prepare_t *split_prepare, edata_t *edata, size_t size_a,
+    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
+    szind_t szind_b, bool slab_b, unsigned ind_b);
+void emap_split_commit(tsdn_t *tsdn, emap_t *emap,
+    emap_split_prepare_t *split_prepare, edata_t *lead, size_t size_a,
+    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
+    szind_t szind_b, bool slab_b);
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index 92814fc..45673c2 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -183,3 +183,56 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 		    LG_PAGE));
 	}
 }
+
+bool
+emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    emap_split_prepare_t *split_prepare, edata_t *edata, size_t size_a,
+    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
+    szind_t szind_b, bool slab_b, unsigned ind_b) {
+	/*
+	 * Note that while the trail mostly inherits its attributes from the
+	 * extent to be split, it maintains its own arena ind -- this allows
+	 * cross-arena edata interactions, such as occur in the range ecache.
+	 */
+	edata_init(trail, ind_b,
+	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
+	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
+	    edata_zeroed_get(edata), edata_committed_get(edata),
+	    edata_dumpable_get(edata), EXTENT_NOT_HEAD);
+
+	/*
+	 * We use incorrect constants for things like arena ind, zero, dump, and
+	 * commit state, and head status.  This is a fake edata_t, used to
+	 * facilitate a lookup.
+	 */
+	edata_t lead;
+	edata_init(&lead, 0U, edata_addr_get(edata), size_a, slab_a, szind_a, 0,
+	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
+
+	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, &lead, false, true,
+	    &split_prepare->lead_elm_a, &split_prepare->lead_elm_b);
+	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, trail, false, true,
+	    &split_prepare->trail_elm_a, &split_prepare->trail_elm_b);
+
+	if (split_prepare->lead_elm_a == NULL
+	    || split_prepare->lead_elm_b == NULL
+	    || split_prepare->trail_elm_a == NULL
+	    || split_prepare->trail_elm_b == NULL) {
+		return true;
+	}
+	return false;
+}
+
+void
+emap_split_commit(tsdn_t *tsdn, emap_t *emap,
+    emap_split_prepare_t *split_prepare, edata_t *lead, size_t size_a,
+    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
+    szind_t szind_b, bool slab_b) {
+	edata_size_set(lead, size_a);
+	edata_szind_set(lead, szind_a);
+
+	emap_rtree_write_acquired(tsdn, emap, split_prepare->lead_elm_a,
+	    split_prepare->lead_elm_b, lead, szind_a, slab_a);
+	emap_rtree_write_acquired(tsdn, emap, split_prepare->trail_elm_a,
+	    split_prepare->trail_elm_b, trail, szind_b, slab_b);
+}
diff --git a/src/extent.c b/src/extent.c
index bb88c20..fa9bc41 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1276,53 +1276,27 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 		goto label_error_a;
 	}
 
-	edata_init(trail, ehooks_ind_get(ehooks),
-	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
-	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
-	    edata_zeroed_get(edata), edata_committed_get(edata),
-	    edata_dumpable_get(edata), EXTENT_NOT_HEAD);
-
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *lead_elm_a, *lead_elm_b;
-	{
-		edata_t lead;
-
-		edata_init(&lead, ehooks_ind_get(ehooks),
-		    edata_addr_get(edata), size_a,
-		    slab_a, szind_a, edata_sn_get(edata),
-		    edata_state_get(edata), edata_zeroed_get(edata),
-		    edata_committed_get(edata), edata_dumpable_get(edata),
-		    EXTENT_NOT_HEAD);
-
-		emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx,
-		    &lead, false, true, &lead_elm_a, &lead_elm_b);
-	}
-	rtree_leaf_elm_t *trail_elm_a, *trail_elm_b;
-	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, trail, false,
-	    true, &trail_elm_a, &trail_elm_b);
-
-	if (lead_elm_a == NULL || lead_elm_b == NULL || trail_elm_a == NULL
-	    || trail_elm_b == NULL) {
+	emap_split_prepare_t split_prepare;
+	bool err = emap_split_prepare(tsdn, &emap_global, rtree_ctx,
+	    &split_prepare, edata, size_a, szind_a, slab_a, trail, size_b,
+	    szind_b, slab_b, ehooks_ind_get(ehooks));
+	if (err) {
 		goto label_error_b;
 	}
 
 	emap_lock_edata2(tsdn, &emap_global, edata, trail);
 
-	bool err = ehooks_split(tsdn, ehooks, edata_base_get(edata),
-	    size_a + size_b, size_a, size_b, edata_committed_get(edata));
+	err = ehooks_split(tsdn, ehooks, edata_base_get(edata), size_a + size_b,
+	    size_a, size_b, edata_committed_get(edata));
 
 	if (err) {
 		goto label_error_c;
 	}
 
-	edata_size_set(edata, size_a);
-	edata_szind_set(edata, szind_a);
-
-	emap_rtree_write_acquired(tsdn, &emap_global, lead_elm_a, lead_elm_b,
-	    edata, szind_a, slab_a);
-	emap_rtree_write_acquired(tsdn, &emap_global, trail_elm_a, trail_elm_b,
-	    trail, szind_b, slab_b);
+	emap_split_commit(tsdn, &emap_global, &split_prepare, edata, size_a,
+	    szind_a, slab_a, trail, size_b, szind_b, slab_b);
 
 	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
 
-- 
cgit v0.12


From 040eac77ccca6d07b8457237cfe939b7e182474b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Feb 2020 13:33:05 -0800
Subject: Tell edatas their creation arena immediately.

This avoids having to pass it in anywhere else.
---
 include/jemalloc/internal/emap.h | 2 +-
 src/base.c                       | 1 +
 src/emap.c                       | 4 ++--
 src/extent.c                     | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index e5188d4..fcc9fe4 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -81,7 +81,7 @@ struct emap_split_prepare_s {
 bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     emap_split_prepare_t *split_prepare, edata_t *edata, size_t size_a,
     szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
-    szind_t szind_b, bool slab_b, unsigned ind_b);
+    szind_t szind_b, bool slab_b);
 void emap_split_commit(tsdn_t *tsdn, emap_t *emap,
     emap_split_prepare_t *split_prepare, edata_t *lead, size_t size_a,
     szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
diff --git a/src/base.c b/src/base.c
index 005b0c5..c006774 100644
--- a/src/base.c
+++ b/src/base.c
@@ -468,6 +468,7 @@ base_alloc_edata(tsdn_t *tsdn, base_t *base) {
 	if (edata == NULL) {
 		return NULL;
 	}
+	edata_arena_ind_set(edata, ehooks_ind_get(&base->ehooks));
 	edata_esn_set(edata, esn);
 	return edata;
 }
diff --git a/src/emap.c b/src/emap.c
index 45673c2..b79b66a 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -188,13 +188,13 @@ bool
 emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     emap_split_prepare_t *split_prepare, edata_t *edata, size_t size_a,
     szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
-    szind_t szind_b, bool slab_b, unsigned ind_b) {
+    szind_t szind_b, bool slab_b) {
 	/*
 	 * Note that while the trail mostly inherits its attributes from the
 	 * extent to be split, it maintains its own arena ind -- this allows
 	 * cross-arena edata interactions, such as occur in the range ecache.
 	 */
-	edata_init(trail, ind_b,
+	edata_init(trail, edata_arena_ind_get(trail),
 	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
 	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
 	    edata_zeroed_get(edata), edata_committed_get(edata),
diff --git a/src/extent.c b/src/extent.c
index fa9bc41..e8a1257 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1281,7 +1281,7 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 	emap_split_prepare_t split_prepare;
 	bool err = emap_split_prepare(tsdn, &emap_global, rtree_ctx,
 	    &split_prepare, edata, size_a, szind_a, slab_a, trail, size_b,
-	    szind_b, slab_b, ehooks_ind_get(ehooks));
+	    szind_b, slab_b);
 	if (err) {
 		goto label_error_b;
 	}
-- 
cgit v0.12


From 0586a56f39845433faa54cea5be56b80e14b2570 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Feb 2020 14:15:07 -0800
Subject: Emap: Move in merge functionality.

---
 include/jemalloc/internal/emap.h |  4 ++++
 src/emap.c                       | 37 +++++++++++++++++++++++++++++++++++++
 src/extent.c                     | 28 +++-------------------------
 3 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index fcc9fe4..7835da9 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -86,5 +86,9 @@ void emap_split_commit(tsdn_t *tsdn, emap_t *emap,
     emap_split_prepare_t *split_prepare, edata_t *lead, size_t size_a,
     szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
     szind_t szind_b, bool slab_b);
+void emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail);
+void emap_merge_commit(tsdn_t *tsdn, emap_t *emap,
+    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail);
 
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index b79b66a..bde986f 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -236,3 +236,40 @@ emap_split_commit(tsdn_t *tsdn, emap_t *emap,
 	emap_rtree_write_acquired(tsdn, emap, split_prepare->trail_elm_a,
 	    split_prepare->trail_elm_b, trail, szind_b, slab_b);
 }
+
+void
+emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail) {
+	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, lead, true, false,
+	    &split_prepare->lead_elm_a, &split_prepare->lead_elm_b);
+	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, trail, true, false,
+	    &split_prepare->trail_elm_a, &split_prepare->trail_elm_b);
+}
+
+void
+emap_merge_commit(tsdn_t *tsdn, emap_t *emap,
+    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail) {
+	if (split_prepare->lead_elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &emap->rtree,
+		    split_prepare->lead_elm_b, NULL, SC_NSIZES, false);
+	}
+
+	rtree_leaf_elm_t *merged_b;
+	if (split_prepare->trail_elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &emap->rtree,
+		    split_prepare->trail_elm_a, NULL, SC_NSIZES, false);
+		merged_b = split_prepare->trail_elm_b;
+	} else {
+		merged_b = split_prepare->trail_elm_a;
+	}
+
+	edata_size_set(lead, edata_size_get(lead) + edata_size_get(trail));
+	edata_szind_set(lead, SC_NSIZES);
+	edata_sn_set(lead, (edata_sn_get(lead) < edata_sn_get(trail)) ?
+	    edata_sn_get(lead) : edata_sn_get(trail));
+	edata_zeroed_set(lead, edata_zeroed_get(lead)
+	    && edata_zeroed_get(trail));
+
+	emap_rtree_write_acquired(tsdn, emap, split_prepare->lead_elm_a,
+	    merged_b, lead, SC_NSIZES, false);
+}
diff --git a/src/extent.c b/src/extent.c
index e8a1257..3db82a7 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1342,33 +1342,11 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	 */
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
-	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, a, true,
-	    false, &a_elm_a, &a_elm_b);
-	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, b, true,
-	    false, &b_elm_a, &b_elm_b);
-
+	emap_split_prepare_t split_prepare;
+	emap_merge_prepare(tsdn, &emap_global, rtree_ctx, &split_prepare, a, b);
 	emap_lock_edata2(tsdn, &emap_global, a, b);
 
-	if (a_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap_global.rtree, a_elm_b, NULL,
-		    SC_NSIZES, false);
-	}
-	if (b_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap_global.rtree, b_elm_a, NULL,
-		    SC_NSIZES, false);
-	} else {
-		b_elm_b = b_elm_a;
-	}
-
-	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
-	edata_szind_set(a, SC_NSIZES);
-	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
-	    edata_sn_get(a) : edata_sn_get(b));
-	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
-
-	emap_rtree_write_acquired(tsdn, &emap_global, a_elm_a, b_elm_b, a,
-	    SC_NSIZES, false);
+	emap_merge_commit(tsdn, &emap_global, &split_prepare, a, b);
 
 	emap_unlock_edata2(tsdn, &emap_global, a, b);
 
-- 
cgit v0.12


From 231d1477e5d8dd591d2f51c1c884ac58fc7adb2c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Feb 2020 14:21:48 -0800
Subject: Rename emap_split_prepare_t -> emap_prepare_t.

Both the split and merge functions use it.
---
 include/jemalloc/internal/emap.h | 22 +++++++---------
 src/emap.c                       | 56 +++++++++++++++++++---------------------
 src/extent.c                     | 20 +++++++-------
 3 files changed, 45 insertions(+), 53 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 7835da9..3be9192 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -70,8 +70,8 @@ void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap,
 void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap,
     rtree_ctx_t *rtree_ctx, edata_t *edata);
 
-typedef struct emap_split_prepare_s emap_split_prepare_t;
-struct emap_split_prepare_s {
+typedef struct emap_prepare_s emap_prepare_t;
+struct emap_prepare_s {
 	rtree_leaf_elm_t *lead_elm_a;
 	rtree_leaf_elm_t *lead_elm_b;
 	rtree_leaf_elm_t *trail_elm_a;
@@ -79,16 +79,14 @@ struct emap_split_prepare_s {
 };
 
 bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_split_prepare_t *split_prepare, edata_t *edata, size_t size_a,
-    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
-    szind_t szind_b, bool slab_b);
-void emap_split_commit(tsdn_t *tsdn, emap_t *emap,
-    emap_split_prepare_t *split_prepare, edata_t *lead, size_t size_a,
-    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
-    szind_t szind_b, bool slab_b);
+    emap_prepare_t *prepare, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, edata_t *trail, size_t size_b, szind_t szind_b, bool slab_b);
+void emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *lead, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
+    size_t size_b, szind_t szind_b, bool slab_b);
 void emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail);
-void emap_merge_commit(tsdn_t *tsdn, emap_t *emap,
-    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail);
+    emap_prepare_t *prepare, edata_t *lead, edata_t *trail);
+void emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *lead, edata_t *trail);
 
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index bde986f..1d41307 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -186,9 +186,8 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 
 bool
 emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_split_prepare_t *split_prepare, edata_t *edata, size_t size_a,
-    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
-    szind_t szind_b, bool slab_b) {
+    emap_prepare_t *prepare, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, edata_t *trail, size_t size_b, szind_t szind_b, bool slab_b) {
 	/*
 	 * Note that while the trail mostly inherits its attributes from the
 	 * extent to be split, it maintains its own arena ind -- this allows
@@ -210,57 +209,54 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, &lead, false, true,
-	    &split_prepare->lead_elm_a, &split_prepare->lead_elm_b);
+	    &prepare->lead_elm_a, &prepare->lead_elm_b);
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, trail, false, true,
-	    &split_prepare->trail_elm_a, &split_prepare->trail_elm_b);
+	    &prepare->trail_elm_a, &prepare->trail_elm_b);
 
-	if (split_prepare->lead_elm_a == NULL
-	    || split_prepare->lead_elm_b == NULL
-	    || split_prepare->trail_elm_a == NULL
-	    || split_prepare->trail_elm_b == NULL) {
+	if (prepare->lead_elm_a == NULL || prepare->lead_elm_b == NULL
+	    || prepare->trail_elm_a == NULL || prepare->trail_elm_b == NULL) {
 		return true;
 	}
 	return false;
 }
 
 void
-emap_split_commit(tsdn_t *tsdn, emap_t *emap,
-    emap_split_prepare_t *split_prepare, edata_t *lead, size_t size_a,
-    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
-    szind_t szind_b, bool slab_b) {
+emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *lead, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
+    size_t size_b, szind_t szind_b, bool slab_b) {
 	edata_size_set(lead, size_a);
 	edata_szind_set(lead, szind_a);
 
-	emap_rtree_write_acquired(tsdn, emap, split_prepare->lead_elm_a,
-	    split_prepare->lead_elm_b, lead, szind_a, slab_a);
-	emap_rtree_write_acquired(tsdn, emap, split_prepare->trail_elm_a,
-	    split_prepare->trail_elm_b, trail, szind_b, slab_b);
+	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a,
+	    prepare->lead_elm_b, lead, szind_a, slab_a);
+	emap_rtree_write_acquired(tsdn, emap, prepare->trail_elm_a,
+	    prepare->trail_elm_b, trail, szind_b, slab_b);
 }
 
 void
 emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail) {
+    emap_prepare_t *prepare, edata_t *lead, edata_t *trail) {
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, lead, true, false,
-	    &split_prepare->lead_elm_a, &split_prepare->lead_elm_b);
+	    &prepare->lead_elm_a, &prepare->lead_elm_b);
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, trail, true, false,
-	    &split_prepare->trail_elm_a, &split_prepare->trail_elm_b);
+	    &prepare->trail_elm_a, &prepare->trail_elm_b);
 }
 
 void
-emap_merge_commit(tsdn_t *tsdn, emap_t *emap,
-    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail) {
-	if (split_prepare->lead_elm_b != NULL) {
+emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *lead, edata_t *trail) {
+	if (prepare->lead_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    split_prepare->lead_elm_b, NULL, SC_NSIZES, false);
+		    prepare->lead_elm_b, NULL, SC_NSIZES, false);
 	}
 
 	rtree_leaf_elm_t *merged_b;
-	if (split_prepare->trail_elm_b != NULL) {
+	if (prepare->trail_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    split_prepare->trail_elm_a, NULL, SC_NSIZES, false);
-		merged_b = split_prepare->trail_elm_b;
+		    prepare->trail_elm_a, NULL, SC_NSIZES, false);
+		merged_b = prepare->trail_elm_b;
 	} else {
-		merged_b = split_prepare->trail_elm_a;
+		merged_b = prepare->trail_elm_a;
 	}
 
 	edata_size_set(lead, edata_size_get(lead) + edata_size_get(trail));
@@ -270,6 +266,6 @@ emap_merge_commit(tsdn_t *tsdn, emap_t *emap,
 	edata_zeroed_set(lead, edata_zeroed_get(lead)
 	    && edata_zeroed_get(trail));
 
-	emap_rtree_write_acquired(tsdn, emap, split_prepare->lead_elm_a,
-	    merged_b, lead, SC_NSIZES, false);
+	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a, merged_b,
+	    lead, SC_NSIZES, false);
 }
diff --git a/src/extent.c b/src/extent.c
index 3db82a7..2c8813d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1278,10 +1278,9 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	emap_split_prepare_t split_prepare;
-	bool err = emap_split_prepare(tsdn, &emap_global, rtree_ctx,
-	    &split_prepare, edata, size_a, szind_a, slab_a, trail, size_b,
-	    szind_b, slab_b);
+	emap_prepare_t prepare;
+	bool err = emap_split_prepare(tsdn, &emap_global, rtree_ctx, &prepare,
+	    edata, size_a, szind_a, slab_a, trail, size_b, szind_b, slab_b);
 	if (err) {
 		goto label_error_b;
 	}
@@ -1295,8 +1294,8 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 		goto label_error_c;
 	}
 
-	emap_split_commit(tsdn, &emap_global, &split_prepare, edata, size_a,
-	    szind_a, slab_a, trail, size_b, szind_b, slab_b);
+	emap_split_commit(tsdn, &emap_global, &prepare, edata, size_a, szind_a,
+	    slab_a, trail, size_b, szind_b, slab_b);
 
 	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
 
@@ -1342,12 +1341,11 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	 */
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	emap_split_prepare_t split_prepare;
-	emap_merge_prepare(tsdn, &emap_global, rtree_ctx, &split_prepare, a, b);
-	emap_lock_edata2(tsdn, &emap_global, a, b);
-
-	emap_merge_commit(tsdn, &emap_global, &split_prepare, a, b);
+	emap_prepare_t prepare;
+	emap_merge_prepare(tsdn, &emap_global, rtree_ctx, &prepare, a, b);
 
+	emap_lock_edata2(tsdn, &emap_global, a, b);
+	emap_merge_commit(tsdn, &emap_global, &prepare, a, b);
 	emap_unlock_edata2(tsdn, &emap_global, a, b);
 
 	edata_cache_put(tsdn, edata_cache, b);
-- 
cgit v0.12


From 08eb1e6c3164b90cebe0f28bb07c0586a74f3c9e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Feb 2020 12:16:30 -0800
Subject: Emap: Comments and cleanup

Document some of the public interface, and hide the functions that are no longer
used outside of the emap module.
---
 include/jemalloc/internal/emap.h | 33 ++++++++++++++++++++++-----------
 src/emap.c                       |  4 ++--
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 3be9192..99ebfd8 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -15,26 +15,21 @@ extern emap_t emap_global;
 
 bool emap_init(emap_t *emap);
 
+/*
+ * Grab the lock or locks associated with the edata or edatas indicated (which
+ * is done just by simple address hashing).  The hashing strategy means that
+ * it's never safe to grab locks incrementally -- you have to grab all the locks
+ * you'll need at once, and release them all at once.
+ */
 void emap_lock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
 void emap_unlock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
-
 void emap_lock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
     edata_t *edata2);
 void emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
     edata_t *edata2);
-
 edata_t *emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap,
     rtree_ctx_t *rtree_ctx, void *addr, bool inactive_only);
 
-bool emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap,
-    rtree_ctx_t *rtree_ctx, const edata_t *edata, bool dependent,
-    bool init_missing, rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b);
-
-/* Only temporarily public; this will be internal eventually. */
-void emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap,
-    rtree_leaf_elm_t *elm_a, rtree_leaf_elm_t *elm_b, edata_t *edata,
-    szind_t szind, bool slab);
-
 /*
  * Associate the given edata with its beginning and end address, setting the
  * szind and slab info appropriately.
@@ -78,6 +73,22 @@ struct emap_prepare_s {
 	rtree_leaf_elm_t *trail_elm_b;
 };
 
+/**
+ * These functions do some of the metadata management for merging and splitting
+ * extents.  In particular, they set the mappings from addresses to edatas and
+ * fill in lead and trail.
+ *
+ * Each operation has a "prepare" and a "commit" portion.  The prepare portion
+ * does the operations that can be done without exclusive access to the extent
+ * in question, while the commit variant requires exclusive access to maintain
+ * the emap invariants.  The only function that can fail is emap_split_prepare,
+ * and it returns true on failure (at which point the caller shouldn't commit).
+ *
+ * In all cases, "lead" refers to the lower-addressed extent, and trail to the
+ * higher-addressed one.  Trail can contain garbage (except for its arena_ind
+ * and esn values) data for the split variants, and can be reused for any
+ * purpose by its given arena after a merge or a failed split.
+ */
 bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     emap_prepare_t *prepare, edata_t *edata, size_t size_a, szind_t szind_a,
     bool slab_a, edata_t *trail, size_t size_b, szind_t szind_b, bool slab_b);
diff --git a/src/emap.c b/src/emap.c
index 1d41307..9ff51e3 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -105,7 +105,7 @@ emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 	return ret;
 }
 
-bool
+static bool
 emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     const edata_t *edata, bool dependent, bool init_missing,
     rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
@@ -126,7 +126,7 @@ emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 	return false;
 }
 
-void
+static void
 emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
     rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
 	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, edata, szind, slab);
-- 
cgit v0.12


From 1d449bd9a6aca25f3cdfc58545f4857f52f36b12 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Feb 2020 13:51:05 -0800
Subject: Emap: Internal rtree context setting.

The only time sharing an rtree context saves across extent operations isn't a
no-op is when tsd is unavailable.  But this happens only in situations like
thread death or initialization, and we don't care about shaving off every
possible cycle in such scenarios.
---
 include/jemalloc/internal/emap.h |  48 +++++++-----
 src/emap.c                       |  73 +++++++++++++++----
 src/extent.c                     | 153 +++++++++++++--------------------------
 3 files changed, 139 insertions(+), 135 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 99ebfd8..7016394 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -27,16 +27,16 @@ void emap_lock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
     edata_t *edata2);
 void emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
     edata_t *edata2);
-edata_t *emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap,
-    rtree_ctx_t *rtree_ctx, void *addr, bool inactive_only);
+edata_t *emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, void *addr,
+    bool inactive_only);
 
 /*
  * Associate the given edata with its beginning and end address, setting the
  * szind and slab info appropriately.
  * Returns true on error (i.e. resource exhaustion).
  */
-bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    edata_t *edata, szind_t szind, bool slab);
+bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    szind_t szind, bool slab);
 
 /*
  * Does the same thing, but with the interior of the range, for slab
@@ -57,13 +57,11 @@ bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
  * touched, so no allocation is necessary to fill the interior once the boundary
  * has been touched.
  */
-void emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    edata_t *edata, szind_t szind);
+void emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    szind_t szind);
 
-void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap,
-    rtree_ctx_t *rtree_ctx, edata_t *edata);
-void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap,
-    rtree_ctx_t *rtree_ctx, edata_t *edata);
+void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
 
 typedef struct emap_prepare_s emap_prepare_t;
 struct emap_prepare_s {
@@ -74,9 +72,12 @@ struct emap_prepare_s {
 };
 
 /**
- * These functions do some of the metadata management for merging and splitting
- * extents.  In particular, they set the mappings from addresses to edatas and
- * fill in lead and trail.
+ * These functions do some of the metadata management for merging, splitting,
+ * and reusing extents.  In particular, they set the boundary mappings from
+ * addresses to edatas and fill in the szind, size, and slab values for the
+ * output edata (and, for splitting, *all* values for the trail).  If the result
+ * is going to be used as a slab, you still need to call emap_register_interior
+ * on it, though.
  *
  * Each operation has a "prepare" and a "commit" portion.  The prepare portion
  * does the operations that can be done without exclusive access to the extent
@@ -89,15 +90,26 @@ struct emap_prepare_s {
  * and esn values) data for the split variants, and can be reused for any
  * purpose by its given arena after a merge or a failed split.
  */
-bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_prepare_t *prepare, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, edata_t *trail, size_t size_b, szind_t szind_b, bool slab_b);
+void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, size_t size,
+    szind_t szind, bool slab);
+bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
+    size_t size_b, szind_t szind_b, bool slab_b);
 void emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *lead, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
     size_t size_b, szind_t szind_b, bool slab_b);
-void emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_prepare_t *prepare, edata_t *lead, edata_t *trail);
+void emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *lead, edata_t *trail);
 void emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *lead, edata_t *trail);
 
+/* Assert that the emap's view of the given edata matches the edata's view. */
+void emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+static inline void
+emap_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	if (config_debug) {
+		emap_do_assert_mapped(tsdn, emap, edata);
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index 9ff51e3..5b7d4e1 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -5,6 +5,15 @@
 
 emap_t emap_global;
 
+/*
+ * Note: Ends without at semicolon, so that
+ *     EMAP_DECLARE_RTREE_CTX;
+ * in uses will avoid empty-statement warnings.
+ */
+#define EMAP_DECLARE_RTREE_CTX						\
+    rtree_ctx_t rtree_ctx_fallback;					\
+    rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
+
 enum emap_lock_result_e {
 	emap_lock_result_success,
 	emap_lock_result_failure,
@@ -89,8 +98,9 @@ emap_try_lock_rtree_leaf_elm(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm,
  * address, and NULL otherwise.
  */
 edata_t *
-emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    void *addr, bool inactive_only) {
+emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, void *addr,
+    bool inactive_only) {
+	EMAP_DECLARE_RTREE_CTX;
 	edata_t *ret = NULL;
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
 	    rtree_ctx, (uintptr_t)addr, false, false);
@@ -137,8 +147,10 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 }
 
 bool
-emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    edata_t *edata, szind_t szind, bool slab) {
+emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    szind_t szind, bool slab) {
+	EMAP_DECLARE_RTREE_CTX;
+
 	rtree_leaf_elm_t *elm_a, *elm_b;
 	bool err = emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
 	    false, true, &elm_a, &elm_b);
@@ -150,8 +162,10 @@ emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 }
 
 void
-emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    edata_t *edata, szind_t szind) {
+emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    szind_t szind) {
+	EMAP_DECLARE_RTREE_CTX;
+
 	assert(edata_slab_get(edata));
 
 	/* Register interior. */
@@ -163,8 +177,8 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 }
 
 void
-emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    edata_t *edata) {
+emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	EMAP_DECLARE_RTREE_CTX;
 	rtree_leaf_elm_t *elm_a, *elm_b;
 
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
@@ -174,8 +188,9 @@ emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 }
 
 void
-emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    edata_t *edata) {
+emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	EMAP_DECLARE_RTREE_CTX;
+
 	assert(edata_slab_get(edata));
 	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
 		rtree_clear(tsdn, &emap->rtree, rtree_ctx,
@@ -184,10 +199,29 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 	}
 }
 
+void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, size_t size,
+    szind_t szind, bool slab) {
+	EMAP_DECLARE_RTREE_CTX;
+
+	edata_szind_set(edata, szind);
+	if (szind != SC_NSIZES) {
+		rtree_szind_slab_update(tsdn, &emap->rtree, rtree_ctx,
+		    (uintptr_t)edata_addr_get(edata), szind, slab);
+		if (slab && edata_size_get(edata) > PAGE) {
+			rtree_szind_slab_update(tsdn,
+			    &emap->rtree, rtree_ctx,
+			    (uintptr_t)edata_past_get(edata) - (uintptr_t)PAGE,
+			    szind, slab);
+			}
+		}
+
+}
+
 bool
-emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_prepare_t *prepare, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, edata_t *trail, size_t size_b, szind_t szind_b, bool slab_b) {
+emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
+    size_t size_b, szind_t szind_b, bool slab_b) {
+	EMAP_DECLARE_RTREE_CTX;
 	/*
 	 * Note that while the trail mostly inherits its attributes from the
 	 * extent to be split, it maintains its own arena ind -- this allows
@@ -234,8 +268,9 @@ emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 }
 
 void
-emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_prepare_t *prepare, edata_t *lead, edata_t *trail) {
+emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *lead, edata_t *trail) {
+	EMAP_DECLARE_RTREE_CTX;
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, lead, true, false,
 	    &prepare->lead_elm_a, &prepare->lead_elm_b);
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, trail, true, false,
@@ -269,3 +304,11 @@ emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a, merged_b,
 	    lead, SC_NSIZES, false);
 }
+
+void
+emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	EMAP_DECLARE_RTREE_CTX;
+
+	assert(rtree_edata_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata), true) == edata);
+}
diff --git a/src/extent.c b/src/extent.c
index 2c8813d..7c79ced 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -43,8 +43,8 @@ static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t usize, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
-    bool *coalesced, bool growing_retained);
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
+    bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
@@ -81,12 +81,11 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache,
-    edata_t *edata) {
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, edata_cache, ehooks, rtree_ctx,
-	    ecache, edata, &coalesced, false);
+	edata = extent_try_coalesce(tsdn, edata_cache, ehooks, ecache, edata,
+	    &coalesced, false);
 	edata_state_set(edata, ecache->state);
 
 	if (!coalesced) {
@@ -160,9 +159,6 @@ ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 edata_t *
 ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     size_t npages_min) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	/*
@@ -188,7 +184,7 @@ ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		}
 		/* Try to coalesce. */
 		if (extent_try_delayed_coalesce(tsdn, &arena->edata_cache,
-		    ehooks, rtree_ctx, ecache, edata)) {
+		    ehooks, ecache, edata)) {
 			break;
 		}
 		/*
@@ -309,9 +305,6 @@ extent_gdump_sub(tsdn_t *tsdn, const edata_t *edata) {
 
 static bool
 extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
 	/*
 	 * We need to hold the lock to protect against a concurrent coalesce
 	 * operation that sees us in a partial state.
@@ -321,15 +314,13 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 	szind_t szind = edata_szind_get_maybe_invalid(edata);
 	bool slab = edata_slab_get(edata);
 
-	if (emap_register_boundary(tsdn, &emap_global, rtree_ctx, edata, szind,
-	    slab)) {
+	if (emap_register_boundary(tsdn, &emap_global, edata, szind, slab)) {
 		emap_unlock_edata(tsdn, &emap_global, edata);
 		return true;
 	}
 
 	if (slab) {
-		emap_register_interior(tsdn, &emap_global, rtree_ctx, edata,
-		    szind);
+		emap_register_interior(tsdn, &emap_global, edata, szind);
 	}
 
 	emap_unlock_edata(tsdn, &emap_global, edata);
@@ -362,13 +353,10 @@ extent_reregister(tsdn_t *tsdn, edata_t *edata) {
  */
 static void
 extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
 	emap_lock_edata(tsdn, &emap_global, edata);
-	emap_deregister_boundary(tsdn, &emap_global, rtree_ctx, edata);
+	emap_deregister_boundary(tsdn, &emap_global, edata);
 	if (edata_slab_get(edata)) {
-		emap_deregister_interior(tsdn, &emap_global, rtree_ctx, edata);
+		emap_deregister_interior(tsdn, &emap_global, edata);
 		edata_slab_set(edata, false);
 	}
 	emap_unlock_edata(tsdn, &emap_global, edata);
@@ -394,8 +382,8 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, void *new_addr, size_t size,
-    size_t pad, size_t alignment, bool slab, bool growing_retained) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(alignment > 0);
@@ -420,8 +408,8 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
-		edata = emap_lock_edata_from_addr(tsdn, &emap_global, rtree_ctx,
-		    new_addr, false);
+		edata = emap_lock_edata_from_addr(tsdn, &emap_global, new_addr,
+		    false);
 		if (edata != NULL) {
 			/*
 			 * We might null-out edata to report an error, but we
@@ -480,7 +468,6 @@ typedef enum {
 
 static extent_split_interior_result_t
 extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx,
     /* The result of splitting, in case of success. */
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
@@ -529,22 +516,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 
 	if (leadsize == 0 && trailsize == 0) {
-		/*
-		 * Splitting causes szind to be set as a side effect, but no
-		 * splitting occurred.
-		 */
-		edata_szind_set(*edata, szind);
-		if (szind != SC_NSIZES) {
-			rtree_szind_slab_update(tsdn, &emap_global.rtree,
-			    rtree_ctx, (uintptr_t)edata_addr_get(*edata), szind,
-			    slab);
-			if (slab && edata_size_get(*edata) > PAGE) {
-				rtree_szind_slab_update(tsdn,
-				    &emap_global.rtree, rtree_ctx,
-				    (uintptr_t)edata_past_get(*edata) -
-				    (uintptr_t)PAGE, szind, slab);
-			}
-		}
+		emap_remap(tsdn, &emap_global, *edata, size, szind, slab);
 	}
 
 	return extent_split_interior_ok;
@@ -558,18 +530,16 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, void *new_addr, size_t size,
-    size_t pad, size_t alignment, bool slab, szind_t szind, edata_t *edata,
-    bool growing_retained) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, edata_t *edata, bool growing_retained) {
 	edata_t *lead;
 	edata_t *trail;
 	edata_t *to_leak;
 	edata_t *to_salvage;
 
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, ehooks, rtree_ctx, &edata, &lead, &trail, &to_leak,
-	    &to_salvage, new_addr, size, pad, alignment, slab, szind,
-	    growing_retained);
+	    tsdn, arena, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
+	    new_addr, size, pad, alignment, slab, szind, growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
 	    && !opt_retain) {
@@ -605,7 +575,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			extents_abandon_vm(tsdn, arena, ehooks, ecache, to_leak,
 			    growing_retained);
 			assert(emap_lock_edata_from_addr(tsdn, &emap_global,
-			    rtree_ctx, leak, false) == NULL);
+			    leak, false) == NULL);
 		}
 		return NULL;
 	}
@@ -626,19 +596,14 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	assert(pad == 0 || !slab);
 	assert(!*zero || !slab);
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks,
-	    rtree_ctx, ecache, new_addr, size, pad, alignment, slab,
-	    growing_retained);
+	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks, ecache,
+	    new_addr, size, pad, alignment, slab, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
-	edata = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, ecache,
-	    new_addr, size, pad, alignment, slab, szind, edata,
-	    growing_retained);
+	edata = extent_recycle_split(tsdn, arena, ehooks, ecache, new_addr,
+	    size, pad, alignment, slab, szind, edata, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -665,8 +630,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	assert(edata_state_get(edata) == extent_state_active);
 	if (slab) {
 		edata_slab_set(edata, slab);
-		emap_register_interior(tsdn, &emap_global, rtree_ctx, edata,
-		    szind);
+		emap_register_interior(tsdn, &emap_global, edata, szind);
 	}
 
 	if (*zero) {
@@ -724,14 +688,15 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
 	    &committed);
 
-	edata_init(edata, arena_ind_get(arena), ptr, alloc_size, false,
-	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
-	    committed, true, EXTENT_IS_HEAD);
 	if (ptr == NULL) {
 		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		goto label_err;
 	}
 
+	edata_init(edata, arena_ind_get(arena), ptr, alloc_size, false,
+	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
+	    committed, true, EXTENT_IS_HEAD);
+
 	if (extent_register_no_gdump_add(tsdn, edata)) {
 		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		goto label_err;
@@ -744,15 +709,13 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		*commit = true;
 	}
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
 	edata_t *lead;
 	edata_t *trail;
 	edata_t *to_leak;
 	edata_t *to_salvage;
+
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    arena, ehooks, rtree_ctx, &edata, &lead, &trail, &to_leak,
+	    arena, ehooks, &edata, &lead, &trail, &to_leak,
 	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
 
 	if (result == extent_split_interior_ok) {
@@ -824,13 +787,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		extent_addr_randomize(tsdn, arena, edata, alignment);
 	}
 	if (slab) {
-		rtree_ctx_t rtree_ctx_fallback;
-		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
-		    &rtree_ctx_fallback);
-
 		edata_slab_set(edata, true);
-		emap_register_interior(tsdn, &emap_global, rtree_ctx, edata,
-		    szind);
+		emap_register_interior(tsdn, &emap_global, edata, szind);
 	}
 	if (*zero && !edata_zeroed_get(edata)) {
 		void *addr = edata_base_get(edata);
@@ -949,8 +907,8 @@ extent_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 
 static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
-    bool *coalesced, bool growing_retained, bool inactive_only) {
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
+    bool growing_retained, bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -966,7 +924,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 
 		/* Try to coalesce forward. */
 		edata_t *next = emap_lock_edata_from_addr(tsdn, &emap_global,
-		    rtree_ctx, edata_past_get(edata), inactive_only);
+		    edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
 			 * ecache->mtx only protects against races for
@@ -992,7 +950,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 
 		/* Try to coalesce backward. */
 		edata_t *prev = emap_lock_edata_from_addr(tsdn, &emap_global,
-		    rtree_ctx, edata_before_get(edata), inactive_only);
+		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(ecache, edata,
 			    prev);
@@ -1020,18 +978,17 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 
 static edata_t *
 extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, rtree_ctx,
-	    ecache, edata, coalesced, growing_retained, false);
+    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks,  ecache,
+	    edata, coalesced, growing_retained, false);
 }
 
 static edata_t *
 extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
-    bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, rtree_ctx,
-	    ecache, edata, coalesced, growing_retained, true);
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
+    bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, ecache,
+	    edata, coalesced, growing_retained, true);
 }
 
 /*
@@ -1041,9 +998,6 @@ extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
 static void
 extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata, bool growing_retained) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
 	assert((ecache->state != extent_state_dirty &&
 	    ecache->state != extent_state_muzzy) ||
 	    !edata_zeroed_get(edata));
@@ -1052,16 +1006,15 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 
 	edata_szind_set(edata, SC_NSIZES);
 	if (edata_slab_get(edata)) {
-		emap_deregister_interior(tsdn, &emap_global, rtree_ctx, edata);
+		emap_deregister_interior(tsdn, &emap_global, edata);
 		edata_slab_set(edata, false);
 	}
 
-	assert(rtree_edata_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata), true) == edata);
+	emap_assert_mapped(tsdn, &emap_global, edata);
 
 	if (!ecache->delay_coalesce) {
 		edata = extent_try_coalesce(tsdn, &arena->edata_cache, ehooks,
-		    rtree_ctx, ecache, edata, NULL, growing_retained);
+		    ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &arena->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
@@ -1069,8 +1022,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn,
-			    &arena->edata_cache, ehooks, rtree_ctx, ecache,
-			    edata, &coalesced, growing_retained);
+			    &arena->edata_cache, ehooks, ecache, edata,
+			    &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold &&
 		    arena_may_force_decay(arena)) {
@@ -1276,11 +1229,9 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 		goto label_error_a;
 	}
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	emap_prepare_t prepare;
-	bool err = emap_split_prepare(tsdn, &emap_global, rtree_ctx, &prepare,
-	    edata, size_a, szind_a, slab_a, trail, size_b, szind_b, slab_b);
+	bool err = emap_split_prepare(tsdn, &emap_global, &prepare, edata,
+	    size_a, szind_a, slab_a, trail, size_b, szind_b, slab_b);
 	if (err) {
 		goto label_error_b;
 	}
@@ -1339,10 +1290,8 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	 * owned, so the following code uses decomposed helper functions rather
 	 * than extent_{,de}register() to do things in the right order.
 	 */
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	emap_prepare_t prepare;
-	emap_merge_prepare(tsdn, &emap_global, rtree_ctx, &prepare, a, b);
+	emap_merge_prepare(tsdn, &emap_global, &prepare, a, b);
 
 	emap_lock_edata2(tsdn, &emap_global, a, b);
 	emap_merge_commit(tsdn, &emap_global, &prepare, a, b);
-- 
cgit v0.12


From 9b5d105fc36e719869f3e113d0d2dc16cf24a60c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Feb 2020 14:50:34 -0800
Subject: Emap: Move in iealloc.

This is logically scoped to the emap.
---
 include/jemalloc/internal/arena_inlines_b.h        | 22 +++++++++++++---------
 include/jemalloc/internal/emap.h                   |  9 +++++++++
 .../internal/jemalloc_internal_inlines_b.h         | 11 -----------
 src/arena.c                                        |  8 ++++----
 src/ctl.c                                          |  2 +-
 src/ehooks.c                                       |  8 ++++----
 src/inspect.c                                      |  4 ++--
 src/jemalloc.c                                     |  4 ++--
 src/large.c                                        |  2 +-
 src/prof.c                                         |  2 +-
 src/tcache.c                                       |  6 ++++--
 test/unit/binshard.c                               |  4 ++--
 test/unit/prof_recent.c                            |  2 +-
 13 files changed, 44 insertions(+), 40 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index b39578c..7947813 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 #define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
@@ -47,10 +48,10 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata = iealloc(tsd_tsdn(tsd), ptr);
+		edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 		is_slab = edata_slab_get(edata);
 	} else if (unlikely(!(is_slab = alloc_ctx->slab))) {
-		edata = iealloc(tsd_tsdn(tsd), ptr);
+		edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	}
 
 	if (unlikely(!is_slab)) {
@@ -73,13 +74,15 @@ arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx) {
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+		edata_t *edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 		if (unlikely(!edata_slab_get(edata))) {
 			large_prof_tctx_reset(edata);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
-			large_prof_tctx_reset(iealloc(tsd_tsdn(tsd), ptr));
+			edata_t *edata = emap_lookup(tsd_tsdn(tsd),
+			    &emap_global, ptr);
+			large_prof_tctx_reset(edata);
 		}
 	}
 }
@@ -89,7 +92,7 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+	edata_t *edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	assert(!edata_slab_get(edata));
 
 	large_prof_tctx_reset(edata);
@@ -177,8 +180,9 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(tsdn_t *tsdn, const void *ptr) {
-	return (arena_t *)atomic_load_p(&arenas[edata_arena_ind_get(
-	    iealloc(tsdn, ptr))], ATOMIC_RELAXED);
+	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	unsigned arena_ind = edata_arena_ind_get(edata);
+	return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_RELAXED);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -233,7 +237,7 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	if (config_prof && unlikely(szind < SC_NBINS)) {
 		arena_dalloc_promoted(tsdn, ptr, NULL, true);
 	} else {
-		edata_t *edata = iealloc(tsdn, ptr);
+		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 		large_dalloc(tsdn, edata);
 	}
 }
@@ -277,7 +281,7 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 			    slow_path);
 		}
 	} else {
-		edata_t *edata = iealloc(tsdn, ptr);
+		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 		large_dalloc(tsdn, edata);
 	}
 }
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 7016394..a6aadbc 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -112,4 +112,13 @@ emap_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE edata_t *
+emap_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	return rtree_edata_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr,
+	    true);
+}
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 00fb604..fc526c4 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,9 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_B_H
 #define JEMALLOC_INTERNAL_INLINES_B_H
 
-#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/rtree.h"
 
 /* Choose an arena based on a per-thread value. */
 static inline arena_t *
@@ -77,13 +75,4 @@ arena_is_auto(arena_t *arena) {
 	return (arena_ind_get(arena) < manual_arena_base);
 }
 
-JEMALLOC_ALWAYS_INLINE edata_t *
-iealloc(tsdn_t *tsdn, const void *ptr) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	return rtree_edata_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true);
-}
-
 #endif /* JEMALLOC_INTERNAL_INLINES_B_H */
diff --git a/src/arena.c b/src/arena.c
index 3206a9a..f7f3ee5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1637,7 +1637,7 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	cassert(config_prof);
 	assert(opt_prof);
 
-	edata_t *edata = iealloc(tsdn, ptr);
+	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 	size_t usize = edata_usize_get(edata);
 	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
 	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
@@ -1769,7 +1769,7 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 
 void
 arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
-	edata_t *edata = iealloc(tsdn, ptr);
+	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 	arena_t *arena = arena_get_from_edata(edata);
 
 	arena_dalloc_bin(tsdn, arena, edata, ptr);
@@ -1783,7 +1783,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	/* Calls with non-zero extra had to clamp extra. */
 	assert(extra == 0 || size + extra <= SC_LARGE_MAXCLASS);
 
-	edata_t *edata = iealloc(tsdn, ptr);
+	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		ret = true;
 		goto done;
@@ -1817,7 +1817,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		ret = true;
 	}
 done:
-	assert(edata == iealloc(tsdn, ptr));
+	assert(edata == emap_lookup(tsdn, &emap_global, ptr));
 	*newsize = edata_usize_get(edata);
 
 	return ret;
diff --git a/src/ctl.c b/src/ctl.c
index 29909df..3f30ef0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2667,7 +2667,7 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	ret = EINVAL;
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
-	edata = iealloc(tsd_tsdn(tsd), ptr);
+	edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	if (edata == NULL)
 		goto label_return;
 
diff --git a/src/ehooks.c b/src/ehooks.c
index 5ea73e3..13d9ab0 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -189,8 +189,8 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static inline bool
 ehooks_same_sn(tsdn_t *tsdn, void *addr_a, void *addr_b) {
-	edata_t *a = iealloc(tsdn, addr_a);
-	edata_t *b = iealloc(tsdn, addr_b);
+	edata_t *a = emap_lookup(tsdn, &emap_global, addr_a);
+	edata_t *b = emap_lookup(tsdn, &emap_global, addr_b);
 	return edata_sn_comp(a, b) == 0;
 }
 
@@ -253,9 +253,9 @@ bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	tsdn_t *tsdn = tsdn_fetch();
-	edata_t *a = iealloc(tsdn, addr_a);
+	edata_t *a = emap_lookup(tsdn, &emap_global, addr_a);
 	bool head_a = edata_is_head_get(a);
-	edata_t *b = iealloc(tsdn, addr_b);
+	edata_t *b = emap_lookup(tsdn, &emap_global, addr_b);
 	bool head_b = edata_is_head_get(b);
 	return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b, head_b);
 }
diff --git a/src/inspect.c b/src/inspect.c
index 5ad23a0..1be3429 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -6,7 +6,7 @@ inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
     size_t *nregs, size_t *size) {
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
 
-	const edata_t *edata = iealloc(tsdn, ptr);
+	const edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = 0;
 		return;
@@ -31,7 +31,7 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
 	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
 
-	const edata_t *edata = iealloc(tsdn, ptr);
+	const edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
 		*slabcur_addr = NULL;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8f34989..2b4cd27 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3423,7 +3423,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * object associated with the ptr (though the content of the edata_t
 	 * object can be changed).
 	 */
-	edata_t *old_edata = iealloc(tsd_tsdn(tsd), ptr);
+	edata_t *old_edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
@@ -3462,7 +3462,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * xallocx() should keep using the same edata_t object (though its
 	 * content can be changed).
 	 */
-	assert(iealloc(tsd_tsdn(tsd), ptr) == old_edata);
+	assert(emap_lookup(tsd_tsdn(tsd), &emap_global, ptr) == old_edata);
 
 	if (unlikely(usize == old_usize)) {
 		te_alloc_rollback(tsd, usize);
diff --git a/src/large.c b/src/large.c
index 2e52098..d393c43 100644
--- a/src/large.c
+++ b/src/large.c
@@ -272,7 +272,7 @@ void *
 large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
-	edata_t *edata = iealloc(tsdn, ptr);
+	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 
 	size_t oldusize = edata_usize_get(edata);
 	/* The following should have been caught by callers. */
diff --git a/src/prof.c b/src/prof.c
index 248532e..7b57dd2 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -148,7 +148,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx) {
-	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+	edata_t *edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	prof_info_set(tsd, edata, tctx);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
diff --git a/src/tcache.c b/src/tcache.c
index 9146f24..e9331d0 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -160,7 +160,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		    item_edata);
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
-			item_edata[i] = iealloc(tsdn, *(bottom_item - i));
+			item_edata[i] = emap_lookup(tsd_tsdn(tsd), &emap_global,
+			    *(bottom_item - i));
 		}
 	}
 
@@ -258,7 +259,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 #ifndef JEMALLOC_EXTRA_SIZE_CHECK
 	/* Look up edata once per item. */
 	for (unsigned i = 0 ; i < nflush; i++) {
-		item_edata[i] = iealloc(tsd_tsdn(tsd), *(bottom_item - i));
+		item_edata[i] = emap_lookup(tsd_tsdn(tsd), &emap_global,
+		    *(bottom_item - i));
 	}
 #else
 	tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index d9a0d59..d5f43df 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -62,12 +62,12 @@ thd_start(void *varg) {
 		ptr = mallocx(1, MALLOCX_TCACHE_NONE);
 		ptr2 = mallocx(129, MALLOCX_TCACHE_NONE);
 
-		edata = iealloc(tsdn, ptr);
+		edata = emap_lookup(tsdn, &emap_global, ptr);
 		shard1 = edata_binshard_get(edata);
 		dallocx(ptr, 0);
 		assert_u_lt(shard1, 16, "Unexpected bin shard used");
 
-		edata = iealloc(tsdn, ptr2);
+		edata = emap_lookup(tsdn, &emap_global, ptr2);
 		shard2 = edata_binshard_get(edata);
 		dallocx(ptr2, 0);
 		assert_u_lt(shard2, 4, "Unexpected bin shard used");
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 3c10618..a8761ca 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -101,7 +101,7 @@ TEST_END
 
 static void confirm_malloc(tsd_t *tsd, void *p) {
 	assert_ptr_not_null(p, "malloc failed unexpectedly");
-	edata_t *e = iealloc(TSDN_NULL, p);
+	edata_t *e = emap_lookup(TSDN_NULL, &emap_global, p);
 	assert_ptr_not_null(e, "NULL edata for living pointer");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *n = edata_prof_recent_alloc_get(tsd, e);
-- 
cgit v0.12


From 65a54d771467df1d2144ae3da9ebf4ae2388bd4d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Feb 2020 17:17:15 -0800
Subject: Emap: Move in szind and slab modifications.

---
 include/jemalloc/internal/emap.h | 12 +++++++++---
 src/arena.c                      | 16 +++-------------
 src/emap.c                       | 16 ++++++++++++++--
 src/extent.c                     |  2 +-
 src/large.c                      |  9 +++------
 5 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index a6aadbc..9e3b415 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -79,7 +79,13 @@ struct emap_prepare_s {
  * is going to be used as a slab, you still need to call emap_register_interior
  * on it, though.
  *
- * Each operation has a "prepare" and a "commit" portion.  The prepare portion
+ * Remap simply changes the szind and slab status of an extent's boundary
+ * mappings.  If the extent is not a slab, it doesn't bother with updating the
+ * end mapping (since lookups only occur in the interior of an extent for
+ * slabs).  Since the szind and slab status only make sense for active extents,
+ * this should only be called while activating or deactivating an extent.
+ *
+ * Split and merge have a "prepare" and a "commit" portion.  The prepare portion
  * does the operations that can be done without exclusive access to the extent
  * in question, while the commit variant requires exclusive access to maintain
  * the emap invariants.  The only function that can fail is emap_split_prepare,
@@ -90,8 +96,8 @@ struct emap_prepare_s {
  * and esn values) data for the split variants, and can be reused for any
  * purpose by its given arena after a merge or a failed split.
  */
-void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, size_t size,
-    szind_t szind, bool slab);
+void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
+    bool slab);
 bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
     size_t size_b, szind_t szind_b, bool slab_b);
diff --git a/src/arena.c b/src/arena.c
index f7f3ee5..2df7df6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1599,16 +1599,10 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 		safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS);
 	}
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	edata_t *edata = rtree_edata_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true);
+	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 
 	szind_t szind = sz_size2index(usize);
-	edata_szind_set(edata, szind);
-	rtree_szind_slab_update(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, szind, false);
+	emap_remap(tsdn, &emap_global, edata, szind, false);
 
 	prof_idump_rollback(tsdn, usize);
 
@@ -1620,11 +1614,7 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	edata_szind_set(edata, SC_NBINS);
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_szind_slab_update(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, SC_NBINS, false);
+	emap_remap(tsdn, &emap_global, edata, SC_NBINS, false);
 
 	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
 
diff --git a/src/emap.c b/src/emap.c
index 5b7d4e1..ae0d312 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -199,14 +199,26 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 }
 
-void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, size_t size,
-    szind_t szind, bool slab) {
+void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
+    bool slab) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	edata_szind_set(edata, szind);
 	if (szind != SC_NSIZES) {
 		rtree_szind_slab_update(tsdn, &emap->rtree, rtree_ctx,
 		    (uintptr_t)edata_addr_get(edata), szind, slab);
+		/*
+		 * Recall that this is called only for active->inactive and
+		 * inactive->active transitions (since only active extents have
+		 * meaningful values for szind and slab).  Active, non-slab
+		 * extents only need to handle lookups at their head (on
+		 * deallocation), so we don't bother filling in the end
+		 * boundary.
+		 *
+		 * For slab extents, we do the end-mapping change.  This still
+		 * leaves the interior unmodified; an emap_register_interior
+		 * call is coming in those cases, though.
+		 */
 		if (slab && edata_size_get(edata) > PAGE) {
 			rtree_szind_slab_update(tsdn,
 			    &emap->rtree, rtree_ctx,
diff --git a/src/extent.c b/src/extent.c
index 7c79ced..d06b8d6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -516,7 +516,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 
 	if (leadsize == 0 && trailsize == 0) {
-		emap_remap(tsdn, &emap_global, *edata, size, szind, slab);
+		emap_remap(tsdn, &emap_global, *edata, szind, slab);
 	}
 
 	return extent_split_interior_ok;
diff --git a/src/large.c b/src/large.c
index d393c43..3965c5e 100644
--- a/src/large.c
+++ b/src/large.c
@@ -3,10 +3,10 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prof_recent.h"
-#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
@@ -175,12 +175,9 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 		extent_dalloc_wrapper(tsdn, arena, ehooks, trail);
 		return true;
 	}
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
 	szind_t szind = sz_size2index(usize);
-	edata_szind_set(edata, szind);
-	rtree_szind_slab_update(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)edata_addr_get(edata), szind, false);
+	emap_remap(tsdn, &emap_global, edata, szind, false);
 
 	if (config_stats && new_mapping) {
 		arena_stats_mapped_add(tsdn, &arena->stats, trailsize);
-- 
cgit v0.12


From f7d9c6c42d51af2a06048e64b1a35a39c143eb4a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Feb 2020 18:58:19 -0800
Subject: Emap: Move in alloc_ctx lookup functionality.

---
 include/jemalloc/internal/arena_structs.h |  6 ----
 include/jemalloc/internal/arena_types.h   |  1 -
 include/jemalloc/internal/emap.h          | 18 ++++++++++
 src/jemalloc.c                            | 56 ++++++++++++++-----------------
 4 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 2d5c568..fde540a 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -197,10 +197,4 @@ struct arena_tdata_s {
 	ticker_t		decay_ticker;
 };
 
-/* Used to pass rtree lookup context down the path. */
-struct alloc_ctx_s {
-	szind_t szind;
-	bool slab;
-};
-
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 369dff0..b13d8a0 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -12,7 +12,6 @@
 typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
-typedef struct alloc_ctx_s alloc_ctx_t;
 
 typedef enum {
 	percpu_arena_mode_names_base   = 0, /* Used for options processing. */
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 9e3b415..4588daf 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -11,6 +11,13 @@ struct emap_s {
 	mutex_pool_t mtx_pool;
 };
 
+/* Used to pass rtree lookup context down the path. */
+typedef struct alloc_ctx_t alloc_ctx_t;
+struct alloc_ctx_t {
+	szind_t szind;
+	bool slab;
+};
+
 extern emap_t emap_global;
 
 bool emap_init(emap_t *emap);
@@ -127,4 +134,15 @@ emap_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	    true);
 }
 
+/* Fills in alloc_ctx with the info in the map. */
+JEMALLOC_ALWAYS_INLINE void
+emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, void *ptr,
+    alloc_ctx_t *alloc_ctx) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	rtree_szind_slab_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr,
+	    true, &alloc_ctx->szind, &alloc_ctx->slab);
+}
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2b4cd27..d0af5da 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2568,9 +2568,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	alloc_ctx_t alloc_ctx;
-	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+	emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
 	size_t usize = sz_index2size(alloc_ctx.szind);
@@ -2601,57 +2599,55 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	alloc_ctx_t alloc_ctx, *ctx;
+	alloc_ctx_t alloc_ctx;
 	if (!config_prof) {
-		/* Means usize will be used to determine szind. */
-		ctx = NULL;
+		alloc_ctx.szind = sz_size2index(usize);
+		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 	} else {
 		if (likely(!prof_sample_aligned(ptr))) {
-			ctx = &alloc_ctx;
 			/*
 			 * When the ptr is not page aligned, it was not sampled.
 			 * usize can be trusted to determine szind and slab.
 			 */
-			ctx->szind = sz_size2index(usize);
+			alloc_ctx.szind = sz_size2index(usize);
 			if (config_cache_oblivious) {
-				ctx->slab = (ctx->szind < SC_NBINS);
+				alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 			} else {
 				/* Non page aligned must be slab allocated. */
-				ctx->slab = true;
+				alloc_ctx.slab = true;
 			}
 			if (config_debug) {
 				alloc_ctx_t dbg_ctx;
-				rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-				rtree_szind_slab_read(tsd_tsdn(tsd),
-				    &emap_global.rtree, rtree_ctx,
-				    (uintptr_t)ptr, true, &dbg_ctx.szind,
-				    &dbg_ctx.slab);
-				assert(dbg_ctx.szind == ctx->szind);
-				assert(dbg_ctx.slab == ctx->slab);
+				emap_alloc_info_lookup(tsd_tsdn(tsd),
+				    &emap_global, ptr, &dbg_ctx);
+				assert(dbg_ctx.szind == alloc_ctx.szind);
+				assert(dbg_ctx.slab == alloc_ctx.slab);
 			}
 		} else if (opt_prof) {
-			ctx = &alloc_ctx;
-			rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-			rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree,
-			    rtree_ctx, (uintptr_t)ptr, true, &ctx->szind,
-			    &ctx->slab);
-			/* Small alloc may have !slab (sampled). */
-			bool sz_correct = (ctx->szind == sz_size2index(usize));
-			if (config_opt_safety_checks && !sz_correct) {
-				safety_check_fail_sized_dealloc(true);
+			emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global,
+			    ptr, &alloc_ctx);
+
+			if (config_opt_safety_checks) {
+				/* Small alloc may have !slab (sampled). */
+				if (alloc_ctx.szind != sz_size2index(usize)) {
+					safety_check_fail_sized_dealloc(true);
+				}
 			}
 		} else {
-			ctx = NULL;
+			alloc_ctx.szind = sz_size2index(usize);
+			alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 		}
 	}
 
 	if (config_prof && opt_prof) {
-		prof_free(tsd, ptr, usize, ctx);
+		prof_free(tsd, ptr, usize, &alloc_ctx);
 	}
 	if (likely(!slow_path)) {
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, false);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
+		    false);
 	} else {
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, true);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
+		    true);
 	}
 	thread_dalloc_event(tsd, usize);
 }
-- 
cgit v0.12


From 06e42090f7ff42d944dbf318dd24eeac43e59255 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 6 Feb 2020 10:59:48 -0800
Subject: Make jemalloc.c use the emap interface.

While we're here, we'll also clean up some style nits.
---
 include/jemalloc/internal/emap.h | 15 +++++++++++++++
 src/jemalloc.c                   | 33 ++++++++++++++-------------------
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 4588daf..3a8182d 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -145,4 +145,19 @@ emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, void *ptr,
 	    true, &alloc_ctx->szind, &alloc_ctx->slab);
 }
 
+/*
+ * Fills in alloc_ctx, but only if it can be done easily (i.e. with a hit in the
+ * L1 rtree cache.
+ *
+ * Returns whether or not alloc_ctx was filled in.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+emap_alloc_info_try_lookup_fast(tsd_t *tsd, emap_t *emap, void *ptr,
+    alloc_ctx_t *alloc_ctx) {
+	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+	bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &emap->rtree,
+	    rtree_ctx, (uintptr_t)ptr, &alloc_ctx->szind, &alloc_ctx->slab);
+	return res;
+}
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d0af5da..90a948c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2688,26 +2688,24 @@ free_default(void *ptr) {
 	}
 }
 
+/* Returns whether or not the free attempt was successful. */
 JEMALLOC_ALWAYS_INLINE
 bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	tsd_t *tsd = tsd_get(false);
 
-	szind_t szind;
+	alloc_ctx_t alloc_ctx;
 	if (!size_hint) {
-		if (unlikely(!tsd || !tsd_fast(tsd))) {
+		if (unlikely(tsd == NULL || !tsd_fast(tsd))) {
 			return false;
 		}
-		bool slab;
-		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd),
-		    &emap_global.rtree, rtree_ctx, (uintptr_t)ptr, &szind,
-		    &slab);
+		bool res = emap_alloc_info_try_lookup_fast(tsd, &emap_global,
+		    ptr, &alloc_ctx);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
-		if (unlikely(!res || !slab)) {
+		if (unlikely(!res || !alloc_ctx.slab)) {
 			return false;
 		}
-		assert(szind != SC_NSIZES);
+		assert(alloc_ctx.szind != SC_NSIZES);
 	} else {
 		/*
 		 * The size hinted fastpath does not involve rtree lookup, thus
@@ -2715,7 +2713,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		 * check to be folded into the branch testing fast_threshold
 		 * (set to 0 when !tsd_fast).
 		 */
-		if (unlikely(!tsd)) {
+		if (unlikely(tsd == NULL)) {
 			return false;
 		}
 		/*
@@ -2727,12 +2725,13 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		    (config_prof && prof_sample_aligned(ptr)))) {
 			return false;
 		}
-		szind = sz_size2index_lookup(size);
+		alloc_ctx.szind = sz_size2index_lookup(size);
+		alloc_ctx.slab = false;
 	}
 	uint64_t deallocated, threshold;
 	te_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
 
-	size_t usize = sz_index2size(szind);
+	size_t usize = sz_index2size(alloc_ctx.szind);
 	uint64_t deallocated_after = deallocated + usize;
 	/*
 	 * Check for events and tsd non-nominal (fast_threshold will be set to
@@ -2743,7 +2742,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	}
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
-	cache_bin_t *bin = tcache_small_bin_get(tcache, szind);
+	cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
 	if (!cache_bin_dalloc_easy(bin, ptr)) {
 		return false;
 	}
@@ -3143,9 +3142,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	}
 
 	alloc_ctx_t alloc_ctx;
-	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+	emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3422,9 +3419,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	edata_t *old_edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 
 	alloc_ctx_t alloc_ctx;
-	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+	emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
-- 
cgit v0.12


From ac50c1e44b1a34b27ca72ada25a65d685253e2c2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 6 Feb 2020 13:16:07 -0800
Subject: Emap: Remove direct access to emap internals.

In the process, we do a few local cleanups and optimizations.  In particular,
the size safety check on tcache flush no longer does a redundant load.
---
 include/jemalloc/internal/arena_inlines_b.h | 168 +++++++++++-----------------
 include/jemalloc/internal/emap.h            |  39 ++++++-
 include/jemalloc/internal/rtree.h           |  13 ++-
 include/jemalloc/internal/util.h            |   7 ++
 src/arena.c                                 |   6 +-
 src/tcache.c                                |  24 ++--
 test/unit/arena_reset.c                     |  19 ++--
 7 files changed, 144 insertions(+), 132 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 7947813..eb82e71 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -188,15 +188,11 @@ arena_aalloc(tsdn_t *tsdn, const void *ptr) {
 JEMALLOC_ALWAYS_INLINE size_t
 arena_salloc(tsdn_t *tsdn, const void *ptr) {
 	assert(ptr != NULL);
+	alloc_ctx_t alloc_ctx;
+	emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+	assert(alloc_ctx.szind != SC_NSIZES);
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	szind_t szind = rtree_szind_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true);
-	assert(szind != SC_NSIZES);
-
-	return sz_index2size(szind);
+	return sz_index2size(alloc_ctx.szind);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -210,26 +206,24 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	 *   failure.
 	 */
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	edata_t *edata;
-	szind_t szind;
-	if (rtree_edata_szind_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, false, &edata, &szind)) {
+	emap_full_alloc_ctx_t full_alloc_ctx;
+	bool missing = emap_full_alloc_info_try_lookup(tsdn, &emap_global, ptr,
+	    &full_alloc_ctx);
+	if (missing) {
 		return 0;
 	}
 
-	if (edata == NULL) {
+	if (full_alloc_ctx.edata == NULL) {
 		return 0;
 	}
-	assert(edata_state_get(edata) == extent_state_active);
+	assert(edata_state_get(full_alloc_ctx.edata) == extent_state_active);
 	/* Only slab members should be looked up via interior pointers. */
-	assert(edata_addr_get(edata) == ptr || edata_slab_get(edata));
+	assert(edata_addr_get(full_alloc_ctx.edata) == ptr
+	    || edata_slab_get(full_alloc_ctx.edata));
 
-	assert(szind != SC_NSIZES);
+	assert(full_alloc_ctx.szind != SC_NSIZES);
 
-	return sz_index2size(szind);
+	return sz_index2size(full_alloc_ctx.szind);
 }
 
 static inline void
@@ -246,27 +240,21 @@ static inline void
 arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	assert(ptr != NULL);
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	szind_t szind;
-	bool slab;
-	rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true, &szind, &slab);
+	alloc_ctx_t alloc_ctx;
+	emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 
 	if (config_debug) {
-		edata_t *edata = rtree_edata_read(tsdn, &emap_global.rtree,
-		    rtree_ctx, (uintptr_t)ptr, true);
-		assert(szind == edata_szind_get(edata));
-		assert(szind < SC_NSIZES);
-		assert(slab == edata_slab_get(edata));
+		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		assert(alloc_ctx.szind == edata_szind_get(edata));
+		assert(alloc_ctx.szind < SC_NSIZES);
+		assert(alloc_ctx.slab == edata_slab_get(edata));
 	}
 
-	if (likely(slab)) {
+	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
 	} else {
-		arena_dalloc_large_no_tcache(tsdn, ptr, szind);
+		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
 	}
 }
 
@@ -288,7 +276,7 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    alloc_ctx_t *alloc_ctx, bool slow_path) {
+    alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
@@ -297,34 +285,28 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		return;
 	}
 
-	szind_t szind;
-	bool slab;
-	rtree_ctx_t *rtree_ctx;
-	if (alloc_ctx != NULL) {
-		szind = alloc_ctx->szind;
-		slab = alloc_ctx->slab;
-		assert(szind != SC_NSIZES);
+	alloc_ctx_t alloc_ctx;
+	if (caller_alloc_ctx != NULL) {
+		alloc_ctx = *caller_alloc_ctx;
 	} else {
-		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
-		    (uintptr_t)ptr, true, &szind, &slab);
+		util_assume(!tsdn_null(tsdn));
+		emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 	}
 
 	if (config_debug) {
-		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		edata_t *edata = rtree_edata_read(tsdn, &emap_global.rtree,
-		    rtree_ctx, (uintptr_t)ptr, true);
-		assert(szind == edata_szind_get(edata));
-		assert(szind < SC_NSIZES);
-		assert(slab == edata_slab_get(edata));
+		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		assert(alloc_ctx.szind == edata_szind_get(edata));
+		assert(alloc_ctx.szind < SC_NSIZES);
+		assert(alloc_ctx.slab == edata_slab_get(edata));
 	}
 
-	if (likely(slab)) {
+	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
-		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
-		    slow_path);
+		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+		    alloc_ctx.szind, slow_path);
 	} else {
-		arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
+		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
+		    slow_path);
 	}
 }
 
@@ -333,47 +315,41 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 	assert(ptr != NULL);
 	assert(size <= SC_LARGE_MAXCLASS);
 
-	szind_t szind;
-	bool slab;
+	alloc_ctx_t alloc_ctx;
 	if (!config_prof || !opt_prof) {
 		/*
 		 * There is no risk of being confused by a promoted sampled
 		 * object, so base szind and slab on the given size.
 		 */
-		szind = sz_size2index(size);
-		slab = (szind < SC_NBINS);
+		alloc_ctx.szind = sz_size2index(size);
+		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 	}
 
 	if ((config_prof && opt_prof) || config_debug) {
-		rtree_ctx_t rtree_ctx_fallback;
-		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
-		    &rtree_ctx_fallback);
-
-		rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
-		    (uintptr_t)ptr, true, &szind, &slab);
+		emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 
-		assert(szind == sz_size2index(size));
-		assert((config_prof && opt_prof) || slab == (szind < SC_NBINS));
+		assert(alloc_ctx.szind == sz_size2index(size));
+		assert((config_prof && opt_prof)
+		    || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS));
 
 		if (config_debug) {
-			edata_t *edata = rtree_edata_read(tsdn,
-			    &emap_global.rtree, rtree_ctx, (uintptr_t)ptr, true);
-			assert(szind == edata_szind_get(edata));
-			assert(slab == edata_slab_get(edata));
+			edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+			assert(alloc_ctx.szind == edata_szind_get(edata));
+			assert(alloc_ctx.slab == edata_slab_get(edata));
 		}
 	}
 
-	if (likely(slab)) {
+	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
 	} else {
-		arena_dalloc_large_no_tcache(tsdn, ptr, szind);
+		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    alloc_ctx_t *alloc_ctx, bool slow_path) {
+    alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 	assert(size <= SC_LARGE_MAXCLASS);
@@ -383,48 +359,38 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		return;
 	}
 
-	szind_t szind;
-	bool slab;
-	alloc_ctx_t local_ctx;
+	alloc_ctx_t alloc_ctx;
 	if (config_prof && opt_prof) {
-		if (alloc_ctx == NULL) {
+		if (caller_alloc_ctx == NULL) {
 			/* Uncommon case and should be a static check. */
-			rtree_ctx_t rtree_ctx_fallback;
-			rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
-			    &rtree_ctx_fallback);
-			rtree_szind_slab_read(tsdn, &emap_global.rtree,
-			    rtree_ctx, (uintptr_t)ptr, true, &local_ctx.szind,
-			    &local_ctx.slab);
-			assert(local_ctx.szind == sz_size2index(size));
-			alloc_ctx = &local_ctx;
+			emap_alloc_info_lookup(tsdn, &emap_global, ptr,
+			    &alloc_ctx);
+			assert(alloc_ctx.szind == sz_size2index(size));
+		} else {
+			alloc_ctx = *caller_alloc_ctx;
 		}
-		slab = alloc_ctx->slab;
-		szind = alloc_ctx->szind;
 	} else {
 		/*
 		 * There is no risk of being confused by a promoted sampled
 		 * object, so base szind and slab on the given size.
 		 */
-		szind = sz_size2index(size);
-		slab = (szind < SC_NBINS);
+		alloc_ctx.szind = sz_size2index(size);
+		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 	}
 
 	if (config_debug) {
-		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
-		    (uintptr_t)ptr, true, &szind, &slab);
-		edata_t *edata = rtree_edata_read(tsdn,
-		    &emap_global.rtree, rtree_ctx, (uintptr_t)ptr, true);
-		assert(szind == edata_szind_get(edata));
-		assert(slab == edata_slab_get(edata));
+		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		assert(alloc_ctx.szind == edata_szind_get(edata));
+		assert(alloc_ctx.slab == edata_slab_get(edata));
 	}
 
-	if (likely(slab)) {
+	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
-		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
-		    slow_path);
+		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+		    alloc_ctx.szind, slow_path);
 	} else {
-		arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
+		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
+		    slow_path);
 	}
 }
 
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 3a8182d..89bb968 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -18,6 +18,13 @@ struct alloc_ctx_t {
 	bool slab;
 };
 
+typedef struct emap_full_alloc_ctx_s emap_full_alloc_ctx_t;
+struct emap_full_alloc_ctx_s {
+	szind_t szind;
+	bool slab;
+	edata_t *edata;
+};
+
 extern emap_t emap_global;
 
 bool emap_init(emap_t *emap);
@@ -136,7 +143,7 @@ emap_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 
 /* Fills in alloc_ctx with the info in the map. */
 JEMALLOC_ALWAYS_INLINE void
-emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, void *ptr,
+emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     alloc_ctx_t *alloc_ctx) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -145,6 +152,34 @@ emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, void *ptr,
 	    true, &alloc_ctx->szind, &alloc_ctx->slab);
 }
 
+/* The pointer must be mapped. */
+JEMALLOC_ALWAYS_INLINE void
+emap_full_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
+    emap_full_alloc_ctx_t *full_alloc_ctx) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	rtree_edata_szind_slab_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)ptr, true, &full_alloc_ctx->edata,
+	    &full_alloc_ctx->szind, &full_alloc_ctx->slab);
+}
+
+/*
+ * The pointer is allowed to not be mapped.
+ *
+ * Returns true when the pointer is not present.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+emap_full_alloc_info_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
+    emap_full_alloc_ctx_t *full_alloc_ctx) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	return rtree_edata_szind_slab_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)ptr, false, &full_alloc_ctx->edata,
+	    &full_alloc_ctx->szind, &full_alloc_ctx->slab);
+}
+
 /*
  * Fills in alloc_ctx, but only if it can be done easily (i.e. with a hit in the
  * L1 rtree cache.
@@ -152,7 +187,7 @@ emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, void *ptr,
  * Returns whether or not alloc_ctx was filled in.
  */
 JEMALLOC_ALWAYS_INLINE bool
-emap_alloc_info_try_lookup_fast(tsd_t *tsd, emap_t *emap, void *ptr,
+emap_alloc_info_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
     alloc_ctx_t *alloc_ctx) {
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 	bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &emap->rtree,
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 339c7e5..11a52ed 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -440,15 +440,24 @@ rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
  */
 
 JEMALLOC_ALWAYS_INLINE bool
-rtree_edata_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent, edata_t **r_edata, szind_t *r_szind) {
+rtree_edata_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, edata_t **r_edata,
+    szind_t *r_szind, bool *r_slab) {
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
 	    dependent);
 	if (!dependent && elm == NULL) {
 		return true;
 	}
+#ifdef RTREE_LEAF_COMPACT
+	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
+	*r_edata = rtree_leaf_elm_bits_edata_get(bits);
+	*r_szind = rtree_leaf_elm_bits_szind_get(bits);
+	*r_slab = rtree_leaf_elm_bits_slab_get(bits);
+#else
 	*r_edata = rtree_leaf_elm_edata_read(tsdn, rtree, elm, dependent);
 	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
+	*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
+#endif
 	return false;
 }
 
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 304cb54..cb75147 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -62,6 +62,13 @@ get_errno(void) {
 #endif
 }
 
+JEMALLOC_ALWAYS_INLINE void
+util_assume(bool b) {
+	if (!b) {
+		unreachable();
+	}
+}
+
 #undef UTIL_INLINE
 
 #endif /* JEMALLOC_INTERNAL_UTIL_H */
diff --git a/src/arena.c b/src/arena.c
index 2df7df6..b2a0ac7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1111,10 +1111,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 		alloc_ctx_t alloc_ctx;
-		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-		rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree,
-		    rtree_ctx, (uintptr_t)ptr, true, &alloc_ctx.szind,
-		    &alloc_ctx.slab);
+		emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr,
+		    &alloc_ctx);
 		assert(alloc_ctx.szind != SC_NSIZES);
 
 		if (config_stats || (config_prof && opt_prof)) {
diff --git a/src/tcache.c b/src/tcache.c
index e9331d0..b7c0a54 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -114,10 +114,10 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 /* Enabled with --enable-extra-size-check. */
 static void
-tbin_edatas_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
-    size_t nflush, edata_t **edatas){
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
+    size_t nflush, edata_t **edatas) {
+	/* Avoids null-checking tsdn in the loop below. */
+	util_assume(tsd != NULL);
 
 	/*
 	 * Verify that the items in the tcache all have the correct size; this
@@ -125,16 +125,16 @@ tbin_edatas_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 	 * instead of corrupting metadata.  Since this can be turned on for opt
 	 * builds, avoid the branch in the loop.
 	 */
-	szind_t szind;
-	size_t sz_sum = binind * nflush;
+	size_t szind_sum = binind * nflush;
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	for (unsigned i = 0 ; i < nflush; i++) {
-		rtree_edata_szind_read(tsdn, &emap_global.rtree,
-		    rtree_ctx, (uintptr_t)*(bottom_item - i), true,
-		    &edatas[i], &szind);
-		sz_sum -= szind;
+		emap_full_alloc_ctx_t full_alloc_ctx;
+		emap_full_alloc_info_lookup(tsd_tsdn(tsd), &emap_global,
+		    *(bottom_item - i), &full_alloc_ctx);
+		edatas[i] = full_alloc_ctx.edata;
+		szind_sum -= full_alloc_ctx.szind;
 	}
-	if (sz_sum != 0) {
+	if (szind_sum != 0) {
 		safety_check_fail_sized_dealloc(false);
 	}
 }
@@ -156,7 +156,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
-		tbin_edatas_lookup_size_check(tsdn, tbin, binind, nflush,
+		tbin_edatas_lookup_size_check(tsd, tbin, binind, nflush,
 		    item_edata);
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index a1f1d07..7fbde0b 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -60,28 +60,25 @@ get_large_size(size_t ind) {
 /* Like ivsalloc(), but safe to call on discarded allocations. */
 static size_t
 vsalloc(tsdn_t *tsdn, const void *ptr) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	edata_t *edata;
-	szind_t szind;
-	if (rtree_edata_szind_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, false, &edata, &szind)) {
+	emap_full_alloc_ctx_t full_alloc_ctx;
+	bool missing = emap_full_alloc_info_try_lookup(tsdn, &emap_global,
+	    ptr, &full_alloc_ctx);
+	if (missing) {
 		return 0;
 	}
 
-	if (edata == NULL) {
+	if (full_alloc_ctx.edata == NULL) {
 		return 0;
 	}
-	if (edata_state_get(edata) != extent_state_active) {
+	if (edata_state_get(full_alloc_ctx.edata) != extent_state_active) {
 		return 0;
 	}
 
-	if (szind == SC_NSIZES) {
+	if (full_alloc_ctx.szind == SC_NSIZES) {
 		return 0;
 	}
 
-	return sz_index2size(szind);
+	return sz_index2size(full_alloc_ctx.szind);
 }
 
 static unsigned
-- 
cgit v0.12


From 7e6c8a72869d00e641404e962a830d635a3cd825 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 6 Feb 2020 13:45:04 -0800
Subject: Emap: Standardize naming.

Namespace everything under emap_, always specify what it is we're looking up
(emap_lookup -> emap_edata_lookup), and use "ctx" over "info".
---
 include/jemalloc/internal/arena_inlines_b.h        | 57 ++++++++++++----------
 include/jemalloc/internal/emap.h                   | 18 +++----
 .../internal/jemalloc_internal_inlines_c.h         |  6 +--
 include/jemalloc/internal/prof_inlines_b.h         | 11 +++--
 src/arena.c                                        | 14 +++---
 src/ctl.c                                          |  2 +-
 src/ehooks.c                                       |  8 +--
 src/inspect.c                                      |  4 +-
 src/jemalloc.c                                     | 37 +++++++-------
 src/large.c                                        |  2 +-
 src/prof.c                                         |  2 +-
 src/tcache.c                                       |  8 +--
 test/unit/arena_reset.c                            |  4 +-
 test/unit/binshard.c                               |  4 +-
 test/unit/prof_recent.c                            |  2 +-
 15 files changed, 93 insertions(+), 86 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index eb82e71..e7f7b85 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -37,7 +37,7 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
+arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
     prof_info_t *prof_info, bool reset_recent) {
 	cassert(config_prof);
 	assert(ptr != NULL);
@@ -48,10 +48,10 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+		edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 		is_slab = edata_slab_get(edata);
 	} else if (unlikely(!(is_slab = alloc_ctx->slab))) {
-		edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+		edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	}
 
 	if (unlikely(!is_slab)) {
@@ -68,19 +68,21 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx) {
+arena_prof_tctx_reset(tsd_t *tsd, const void *ptr,
+    emap_alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata_t *edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global,
+		    ptr);
 		if (unlikely(!edata_slab_get(edata))) {
 			large_prof_tctx_reset(edata);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
-			edata_t *edata = emap_lookup(tsd_tsdn(tsd),
+			edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
 			    &emap_global, ptr);
 			large_prof_tctx_reset(edata);
 		}
@@ -92,7 +94,7 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	edata_t *edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	assert(!edata_slab_get(edata));
 
 	large_prof_tctx_reset(edata);
@@ -180,7 +182,7 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(tsdn_t *tsdn, const void *ptr) {
-	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 	unsigned arena_ind = edata_arena_ind_get(edata);
 	return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_RELAXED);
 }
@@ -188,8 +190,8 @@ arena_aalloc(tsdn_t *tsdn, const void *ptr) {
 JEMALLOC_ALWAYS_INLINE size_t
 arena_salloc(tsdn_t *tsdn, const void *ptr) {
 	assert(ptr != NULL);
-	alloc_ctx_t alloc_ctx;
-	emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
 	return sz_index2size(alloc_ctx.szind);
@@ -207,7 +209,7 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	 */
 
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_info_try_lookup(tsdn, &emap_global, ptr,
+	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &emap_global, ptr,
 	    &full_alloc_ctx);
 	if (missing) {
 		return 0;
@@ -231,7 +233,7 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	if (config_prof && unlikely(szind < SC_NBINS)) {
 		arena_dalloc_promoted(tsdn, ptr, NULL, true);
 	} else {
-		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		large_dalloc(tsdn, edata);
 	}
 }
@@ -240,11 +242,11 @@ static inline void
 arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	assert(ptr != NULL);
 
-	alloc_ctx_t alloc_ctx;
-	emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 
 	if (config_debug) {
-		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
@@ -269,14 +271,14 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 			    slow_path);
 		}
 	} else {
-		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		large_dalloc(tsdn, edata);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
+    emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
@@ -285,16 +287,16 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		return;
 	}
 
-	alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_t alloc_ctx;
 	if (caller_alloc_ctx != NULL) {
 		alloc_ctx = *caller_alloc_ctx;
 	} else {
 		util_assume(!tsdn_null(tsdn));
-		emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+		emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
@@ -315,7 +317,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 	assert(ptr != NULL);
 	assert(size <= SC_LARGE_MAXCLASS);
 
-	alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_t alloc_ctx;
 	if (!config_prof || !opt_prof) {
 		/*
 		 * There is no risk of being confused by a promoted sampled
@@ -326,14 +328,15 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 	}
 
 	if ((config_prof && opt_prof) || config_debug) {
-		emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+		emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 
 		assert(alloc_ctx.szind == sz_size2index(size));
 		assert((config_prof && opt_prof)
 		    || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS));
 
 		if (config_debug) {
-			edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+			edata_t *edata = emap_edata_lookup(tsdn, &emap_global,
+			    ptr);
 			assert(alloc_ctx.szind == edata_szind_get(edata));
 			assert(alloc_ctx.slab == edata_slab_get(edata));
 		}
@@ -349,7 +352,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 
 JEMALLOC_ALWAYS_INLINE void
 arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
+    emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 	assert(size <= SC_LARGE_MAXCLASS);
@@ -359,11 +362,11 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		return;
 	}
 
-	alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_t alloc_ctx;
 	if (config_prof && opt_prof) {
 		if (caller_alloc_ctx == NULL) {
 			/* Uncommon case and should be a static check. */
-			emap_alloc_info_lookup(tsdn, &emap_global, ptr,
+			emap_alloc_ctx_lookup(tsdn, &emap_global, ptr,
 			    &alloc_ctx);
 			assert(alloc_ctx.szind == sz_size2index(size));
 		} else {
@@ -379,7 +382,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.slab == edata_slab_get(edata));
 	}
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 89bb968..c4b4014 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -12,8 +12,8 @@ struct emap_s {
 };
 
 /* Used to pass rtree lookup context down the path. */
-typedef struct alloc_ctx_t alloc_ctx_t;
-struct alloc_ctx_t {
+typedef struct emap_alloc_ctx_t emap_alloc_ctx_t;
+struct emap_alloc_ctx_t {
 	szind_t szind;
 	bool slab;
 };
@@ -133,7 +133,7 @@ emap_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 }
 
 JEMALLOC_ALWAYS_INLINE edata_t *
-emap_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
+emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -143,8 +143,8 @@ emap_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 
 /* Fills in alloc_ctx with the info in the map. */
 JEMALLOC_ALWAYS_INLINE void
-emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
-    alloc_ctx_t *alloc_ctx) {
+emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
+    emap_alloc_ctx_t *alloc_ctx) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -154,7 +154,7 @@ emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 
 /* The pointer must be mapped. */
 JEMALLOC_ALWAYS_INLINE void
-emap_full_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
+emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_full_alloc_ctx_t *full_alloc_ctx) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -170,7 +170,7 @@ emap_full_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
  * Returns true when the pointer is not present.
  */
 JEMALLOC_ALWAYS_INLINE bool
-emap_full_alloc_info_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
+emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_full_alloc_ctx_t *full_alloc_ctx) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -187,8 +187,8 @@ emap_full_alloc_info_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
  * Returns whether or not alloc_ctx was filled in.
  */
 JEMALLOC_ALWAYS_INLINE bool
-emap_alloc_info_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
-    alloc_ctx_t *alloc_ctx) {
+emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
+    emap_alloc_ctx_t *alloc_ctx) {
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 	bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &emap->rtree,
 	    rtree_ctx, (uintptr_t)ptr, &alloc_ctx->szind, &alloc_ctx->slab);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index cdb10eb..0a5ffba 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -101,8 +101,8 @@ ivsalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
-    bool is_internal, bool slow_path) {
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    emap_alloc_ctx_t *alloc_ctx, bool is_internal, bool slow_path) {
 	assert(ptr != NULL);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr)));
@@ -125,7 +125,7 @@ idalloc(tsd_t *tsd, void *ptr) {
 
 JEMALLOC_ALWAYS_INLINE void
 isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    alloc_ctx_t *alloc_ctx, bool slow_path) {
+    emap_alloc_ctx_t *alloc_ctx, bool slow_path) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index c53dac5..7e28d83 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -40,7 +40,7 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
+prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
     prof_info_t *prof_info) {
 	cassert(config_prof);
 	assert(ptr != NULL);
@@ -51,7 +51,7 @@ prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 
 JEMALLOC_ALWAYS_INLINE void
 prof_info_get_and_reset_recent(tsd_t *tsd, const void *ptr,
-    alloc_ctx_t *alloc_ctx, prof_info_t *prof_info) {
+    emap_alloc_ctx_t *alloc_ctx, prof_info_t *prof_info) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(prof_info != NULL);
@@ -60,7 +60,7 @@ prof_info_get_and_reset_recent(tsd_t *tsd, const void *ptr,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx) {
+prof_tctx_reset(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -127,7 +127,7 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 
 JEMALLOC_ALWAYS_INLINE void
 prof_malloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
-    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
+    emap_alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -214,7 +214,8 @@ prof_sample_aligned(const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
+prof_free(tsd_t *tsd, const void *ptr, size_t usize,
+    emap_alloc_ctx_t *alloc_ctx) {
 	prof_info_t prof_info;
 	prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
 
diff --git a/src/arena.c b/src/arena.c
index b2a0ac7..aa19e09 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1110,8 +1110,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		size_t usize;
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
-		alloc_ctx_t alloc_ctx;
-		emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr,
+		emap_alloc_ctx_t alloc_ctx;
+		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr,
 		    &alloc_ctx);
 		assert(alloc_ctx.szind != SC_NSIZES);
 
@@ -1597,7 +1597,7 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 		safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS);
 	}
 
-	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 
 	szind_t szind = sz_size2index(usize);
 	emap_remap(tsdn, &emap_global, edata, szind, false);
@@ -1625,7 +1625,7 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	cassert(config_prof);
 	assert(opt_prof);
 
-	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 	size_t usize = edata_usize_get(edata);
 	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
 	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
@@ -1757,7 +1757,7 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 
 void
 arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
-	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 	arena_t *arena = arena_get_from_edata(edata);
 
 	arena_dalloc_bin(tsdn, arena, edata, ptr);
@@ -1771,7 +1771,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	/* Calls with non-zero extra had to clamp extra. */
 	assert(extra == 0 || size + extra <= SC_LARGE_MAXCLASS);
 
-	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		ret = true;
 		goto done;
@@ -1805,7 +1805,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		ret = true;
 	}
 done:
-	assert(edata == emap_lookup(tsdn, &emap_global, ptr));
+	assert(edata == emap_edata_lookup(tsdn, &emap_global, ptr));
 	*newsize = edata_usize_get(edata);
 
 	return ret;
diff --git a/src/ctl.c b/src/ctl.c
index 3f30ef0..3123ab8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2667,7 +2667,7 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	ret = EINVAL;
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
-	edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	if (edata == NULL)
 		goto label_return;
 
diff --git a/src/ehooks.c b/src/ehooks.c
index 13d9ab0..ff459df 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -189,8 +189,8 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static inline bool
 ehooks_same_sn(tsdn_t *tsdn, void *addr_a, void *addr_b) {
-	edata_t *a = emap_lookup(tsdn, &emap_global, addr_a);
-	edata_t *b = emap_lookup(tsdn, &emap_global, addr_b);
+	edata_t *a = emap_edata_lookup(tsdn, &emap_global, addr_a);
+	edata_t *b = emap_edata_lookup(tsdn, &emap_global, addr_b);
 	return edata_sn_comp(a, b) == 0;
 }
 
@@ -253,9 +253,9 @@ bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	tsdn_t *tsdn = tsdn_fetch();
-	edata_t *a = emap_lookup(tsdn, &emap_global, addr_a);
+	edata_t *a = emap_edata_lookup(tsdn, &emap_global, addr_a);
 	bool head_a = edata_is_head_get(a);
-	edata_t *b = emap_lookup(tsdn, &emap_global, addr_b);
+	edata_t *b = emap_edata_lookup(tsdn, &emap_global, addr_b);
 	bool head_b = edata_is_head_get(b);
 	return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b, head_b);
 }
diff --git a/src/inspect.c b/src/inspect.c
index 1be3429..6c4dd8a 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -6,7 +6,7 @@ inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
     size_t *nregs, size_t *size) {
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
 
-	const edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	const edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = 0;
 		return;
@@ -31,7 +31,7 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
 	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
 
-	const edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	const edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
 		*slabcur_addr = NULL;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 90a948c..907235a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2170,7 +2170,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		prof_tctx_t *tctx = prof_alloc_prep(
 		    tsd, usize, prof_active_get_unlocked(), true);
 
-		alloc_ctx_t alloc_ctx;
+		emap_alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
 			alloc_ctx.slab = (usize <= SC_SMALL_MAXCLASS);
 			allocation = imalloc_no_sample(
@@ -2567,8 +2567,8 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	alloc_ctx_t alloc_ctx;
-	emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
 	size_t usize = sz_index2size(alloc_ctx.szind);
@@ -2599,7 +2599,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_t alloc_ctx;
 	if (!config_prof) {
 		alloc_ctx.szind = sz_size2index(usize);
 		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
@@ -2617,14 +2617,14 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 				alloc_ctx.slab = true;
 			}
 			if (config_debug) {
-				alloc_ctx_t dbg_ctx;
-				emap_alloc_info_lookup(tsd_tsdn(tsd),
+				emap_alloc_ctx_t dbg_ctx;
+				emap_alloc_ctx_lookup(tsd_tsdn(tsd),
 				    &emap_global, ptr, &dbg_ctx);
 				assert(dbg_ctx.szind == alloc_ctx.szind);
 				assert(dbg_ctx.slab == alloc_ctx.slab);
 			}
 		} else if (opt_prof) {
-			emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global,
+			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
 			    ptr, &alloc_ctx);
 
 			if (config_opt_safety_checks) {
@@ -2693,12 +2693,12 @@ JEMALLOC_ALWAYS_INLINE
 bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	tsd_t *tsd = tsd_get(false);
 
-	alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_t alloc_ctx;
 	if (!size_hint) {
 		if (unlikely(tsd == NULL || !tsd_fast(tsd))) {
 			return false;
 		}
-		bool res = emap_alloc_info_try_lookup_fast(tsd, &emap_global,
+		bool res = emap_alloc_ctx_try_lookup_fast(tsd, &emap_global,
 		    ptr, &alloc_ctx);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
@@ -3069,7 +3069,8 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 JEMALLOC_ALWAYS_INLINE void *
 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
-    arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
+    arena_t *arena, emap_alloc_ctx_t *alloc_ctx,
+    hook_ralloc_args_t *hook_args) {
 	prof_info_t old_prof_info;
 	prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
 	bool prof_active = prof_active_get_unlocked();
@@ -3141,8 +3142,8 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		tcache = tcache_get(tsd);
 	}
 
-	alloc_ctx_t alloc_ctx;
-	emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3315,7 +3316,7 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
 
 JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
-    size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
+    size_t extra, size_t alignment, bool zero, emap_alloc_ctx_t *alloc_ctx) {
 	/*
 	 * old_prof_info is only used for asserting that the profiling info
 	 * isn't changed by the ixalloc() call.
@@ -3416,10 +3417,11 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * object associated with the ptr (though the content of the edata_t
 	 * object can be changed).
 	 */
-	edata_t *old_edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata_t *old_edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global,
+	    ptr);
 
-	alloc_ctx_t alloc_ctx;
-	emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3453,7 +3455,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * xallocx() should keep using the same edata_t object (though its
 	 * content can be changed).
 	 */
-	assert(emap_lookup(tsd_tsdn(tsd), &emap_global, ptr) == old_edata);
+	assert(emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr)
+	    == old_edata);
 
 	if (unlikely(usize == old_usize)) {
 		te_alloc_rollback(tsd, usize);
diff --git a/src/large.c b/src/large.c
index 3965c5e..f13b1e5 100644
--- a/src/large.c
+++ b/src/large.c
@@ -269,7 +269,7 @@ void *
 large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
-	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 
 	size_t oldusize = edata_usize_get(edata);
 	/* The following should have been caught by callers. */
diff --git a/src/prof.c b/src/prof.c
index 7b57dd2..49f5a0e 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -148,7 +148,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx) {
-	edata_t *edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	prof_info_set(tsd, edata, tctx);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
diff --git a/src/tcache.c b/src/tcache.c
index b7c0a54..33d3cba 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -129,7 +129,7 @@ tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	for (unsigned i = 0 ; i < nflush; i++) {
 		emap_full_alloc_ctx_t full_alloc_ctx;
-		emap_full_alloc_info_lookup(tsd_tsdn(tsd), &emap_global,
+		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
 		    *(bottom_item - i), &full_alloc_ctx);
 		edatas[i] = full_alloc_ctx.edata;
 		szind_sum -= full_alloc_ctx.szind;
@@ -160,8 +160,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		    item_edata);
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
-			item_edata[i] = emap_lookup(tsd_tsdn(tsd), &emap_global,
-			    *(bottom_item - i));
+			item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd),
+			    &emap_global, *(bottom_item - i));
 		}
 	}
 
@@ -259,7 +259,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 #ifndef JEMALLOC_EXTRA_SIZE_CHECK
 	/* Look up edata once per item. */
 	for (unsigned i = 0 ; i < nflush; i++) {
-		item_edata[i] = emap_lookup(tsd_tsdn(tsd), &emap_global,
+		item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd), &emap_global,
 		    *(bottom_item - i));
 	}
 #else
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 7fbde0b..64db058 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -61,8 +61,8 @@ get_large_size(size_t ind) {
 static size_t
 vsalloc(tsdn_t *tsdn, const void *ptr) {
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_info_try_lookup(tsdn, &emap_global,
-	    ptr, &full_alloc_ctx);
+	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &emap_global, ptr,
+	    &full_alloc_ctx);
 	if (missing) {
 		return 0;
 	}
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index d5f43df..6e10d47 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -62,12 +62,12 @@ thd_start(void *varg) {
 		ptr = mallocx(1, MALLOCX_TCACHE_NONE);
 		ptr2 = mallocx(129, MALLOCX_TCACHE_NONE);
 
-		edata = emap_lookup(tsdn, &emap_global, ptr);
+		edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		shard1 = edata_binshard_get(edata);
 		dallocx(ptr, 0);
 		assert_u_lt(shard1, 16, "Unexpected bin shard used");
 
-		edata = emap_lookup(tsdn, &emap_global, ptr2);
+		edata = emap_edata_lookup(tsdn, &emap_global, ptr2);
 		shard2 = edata_binshard_get(edata);
 		dallocx(ptr2, 0);
 		assert_u_lt(shard2, 4, "Unexpected bin shard used");
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index a8761ca..962be74 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -101,7 +101,7 @@ TEST_END
 
 static void confirm_malloc(tsd_t *tsd, void *p) {
 	assert_ptr_not_null(p, "malloc failed unexpectedly");
-	edata_t *e = emap_lookup(TSDN_NULL, &emap_global, p);
+	edata_t *e = emap_edata_lookup(TSDN_NULL, &emap_global, p);
 	assert_ptr_not_null(e, "NULL edata for living pointer");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *n = edata_prof_recent_alloc_get(tsd, e);
-- 
cgit v0.12


From 34b7165fde9622afe75037a2c8862f53269f10bb Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 17 Feb 2020 11:48:42 -0800
Subject: Put szind_t, pszind_t in sz.h.

---
 include/jemalloc/internal/cache_bin.h               | 1 +
 include/jemalloc/internal/jemalloc_internal_types.h | 6 ------
 include/jemalloc/internal/sz.h                      | 6 ++++++
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 38b8e32..60feb15 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_CACHE_BIN_H
 
 #include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sz.h"
 
 /*
  * The cache_bins are the mechanism that the tcache and the arena use to
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 324a4b1..d8da4de 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -3,12 +3,6 @@
 
 #include "jemalloc/internal/quantum.h"
 
-/* Page size index type. */
-typedef unsigned pszind_t;
-
-/* Size class index type. */
-typedef unsigned szind_t;
-
 /* Processor / core id type. */
 typedef int malloc_cpuid_t;
 
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 73fb0a4..b094116 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -22,6 +22,12 @@
  * size that would result from such an allocation.
  */
 
+/* Page size index type. */
+typedef unsigned pszind_t;
+
+/* Size class index type. */
+typedef unsigned szind_t;
+
 /*
  * sz_pind2sz_tab encodes the same information as could be computed by
  * sz_pind2sz_compute().
-- 
cgit v0.12


From 182192f83c029a794ee3c32767f43e471a00bd26 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 17 Feb 2020 12:00:57 -0800
Subject: Base: Pull into a single header.

---
 include/jemalloc/internal/base.h                   | 103 +++++++++++++++++++++
 include/jemalloc/internal/base_externs.h           |  22 -----
 include/jemalloc/internal/base_inlines.h           |  13 ---
 include/jemalloc/internal/base_types.h             |  33 -------
 include/jemalloc/internal/edata_cache.h            |   2 +
 .../jemalloc/internal/jemalloc_internal_includes.h |   4 -
 src/base.c                                         |   9 ++
 7 files changed, 114 insertions(+), 72 deletions(-)
 create mode 100644 include/jemalloc/internal/base.h
 delete mode 100644 include/jemalloc/internal/base_externs.h
 delete mode 100644 include/jemalloc/internal/base_inlines.h
 delete mode 100644 include/jemalloc/internal/base_types.h

diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
new file mode 100644
index 0000000..dcac3b6
--- /dev/null
+++ b/include/jemalloc/internal/base.h
@@ -0,0 +1,103 @@
+#ifndef JEMALLOC_INTERNAL_BASE_H
+#define JEMALLOC_INTERNAL_BASE_H
+
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/mutex.h"
+
+enum metadata_thp_mode_e {
+	metadata_thp_disabled   = 0,
+	/*
+	 * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
+	 * + low usage arena (i.e. THP becomes a significant percentage), the
+	 * "auto" option only starts using THP after a base allocator used up
+	 * the first THP region.  Starting from the second hugepage (in a single
+	 * arena), "auto" behaves the same as "always", i.e. madvise hugepage
+	 * right away.
+	 */
+	metadata_thp_auto       = 1,
+	metadata_thp_always     = 2,
+	metadata_thp_mode_limit = 3
+};
+typedef enum metadata_thp_mode_e metadata_thp_mode_t;
+
+#define METADATA_THP_DEFAULT metadata_thp_disabled
+extern metadata_thp_mode_t opt_metadata_thp;
+extern const char *metadata_thp_mode_names[];
+
+
+/* Embedded at the beginning of every block of base-managed virtual memory. */
+typedef struct base_block_s base_block_t;
+struct base_block_s {
+	/* Total size of block's virtual memory mapping. */
+	size_t size;
+
+	/* Next block in list of base's blocks. */
+	base_block_t *next;
+
+	/* Tracks unused trailing space. */
+	edata_t edata;
+};
+
+typedef struct base_s base_t;
+struct base_s {
+	/*
+	 * User-configurable extent hook functions.
+	 */
+	ehooks_t ehooks;
+
+	/* Protects base_alloc() and base_stats_get() operations. */
+	malloc_mutex_t mtx;
+
+	/* Using THP when true (metadata_thp auto mode). */
+	bool auto_thp_switched;
+	/*
+	 * Most recent size class in the series of increasingly large base
+	 * extents.  Logarithmic spacing between subsequent allocations ensures
+	 * that the total number of distinct mappings remains small.
+	 */
+	pszind_t pind_last;
+
+	/* Serial number generation state. */
+	size_t extent_sn_next;
+
+	/* Chain of all blocks associated with base. */
+	base_block_t *blocks;
+
+	/* Heap of extents that track unused trailing space within blocks. */
+	edata_heap_t avail[SC_NSIZES];
+
+	/* Stats, only maintained if config_stats. */
+	size_t allocated;
+	size_t resident;
+	size_t mapped;
+	/* Number of THP regions touched. */
+	size_t n_thp;
+};
+
+static inline unsigned
+base_ind_get(const base_t *base) {
+	return ehooks_ind_get(&base->ehooks);
+}
+
+static inline bool
+metadata_thp_enabled(void) {
+	return (opt_metadata_thp != metadata_thp_disabled);
+}
+
+base_t *b0get(void);
+base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+void base_delete(tsdn_t *tsdn, base_t *base);
+ehooks_t *base_ehooks_get(base_t *base);
+extent_hooks_t *base_extent_hooks_set(base_t *base,
+    extent_hooks_t *extent_hooks);
+void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
+edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
+void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+    size_t *resident, size_t *mapped, size_t *n_thp);
+void base_prefork(tsdn_t *tsdn, base_t *base);
+void base_postfork_parent(tsdn_t *tsdn, base_t *base);
+void base_postfork_child(tsdn_t *tsdn, base_t *base);
+bool base_boot(tsdn_t *tsdn);
+
+#endif /* JEMALLOC_INTERNAL_BASE_H */
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
deleted file mode 100644
index 2f24131..0000000
--- a/include/jemalloc/internal/base_externs.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
-#define JEMALLOC_INTERNAL_BASE_EXTERNS_H
-
-extern metadata_thp_mode_t opt_metadata_thp;
-extern const char *metadata_thp_mode_names[];
-
-base_t *b0get(void);
-base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-void base_delete(tsdn_t *tsdn, base_t *base);
-ehooks_t *base_ehooks_get(base_t *base);
-extent_hooks_t *base_extent_hooks_set(base_t *base,
-    extent_hooks_t *extent_hooks);
-void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
-edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
-void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
-    size_t *resident, size_t *mapped, size_t *n_thp);
-void base_prefork(tsdn_t *tsdn, base_t *base);
-void base_postfork_parent(tsdn_t *tsdn, base_t *base);
-void base_postfork_child(tsdn_t *tsdn, base_t *base);
-bool base_boot(tsdn_t *tsdn);
-
-#endif /* JEMALLOC_INTERNAL_BASE_EXTERNS_H */
diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
deleted file mode 100644
index 221fca8..0000000
--- a/include/jemalloc/internal/base_inlines.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BASE_INLINES_H
-#define JEMALLOC_INTERNAL_BASE_INLINES_H
-
-static inline unsigned
-base_ind_get(const base_t *base) {
-	return ehooks_ind_get(&base->ehooks);
-}
-
-static inline bool
-metadata_thp_enabled(void) {
-	return (opt_metadata_thp != metadata_thp_disabled);
-}
-#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */
diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h
deleted file mode 100644
index b6db77d..0000000
--- a/include/jemalloc/internal/base_types.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BASE_TYPES_H
-#define JEMALLOC_INTERNAL_BASE_TYPES_H
-
-typedef struct base_block_s base_block_t;
-typedef struct base_s base_t;
-
-#define METADATA_THP_DEFAULT metadata_thp_disabled
-
-/*
- * In auto mode, arenas switch to huge pages for the base allocator on the
- * second base block.  a0 switches to thp on the 5th block (after 20 megabytes
- * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
- */
-
-#define BASE_AUTO_THP_THRESHOLD    2
-#define BASE_AUTO_THP_THRESHOLD_A0 5
-
-typedef enum {
-	metadata_thp_disabled   = 0,
-	/*
-	 * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
-	 * + low usage arena (i.e. THP becomes a significant percentage), the
-	 * "auto" option only starts using THP after a base allocator used up
-	 * the first THP region.  Starting from the second hugepage (in a single
-	 * arena), "auto" behaves the same as "always", i.e. madvise hugepage
-	 * right away.
-	 */
-	metadata_thp_auto       = 1,
-	metadata_thp_always     = 2,
-	metadata_thp_mode_limit = 3
-} metadata_thp_mode_t;
-
-#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 9cb0d1c..73ac7af 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EDATA_CACHE_H
 #define JEMALLOC_INTERNAL_EDATA_CACHE_H
 
+#include "jemalloc/internal/base.h"
+
 /*
  * A cache of edata_t structures allocated via base_alloc_edata (as opposed to
  * the underlying extents they describe).  The contents of returned edata_t
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 75a94d3..72b5a72 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/base_types.h"
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/prof_types.h"
@@ -51,7 +50,6 @@
 
 #include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/arena_structs.h"
-#include "jemalloc/internal/base_structs.h"
 #include "jemalloc/internal/tcache_structs.h"
 #include "jemalloc/internal/background_thread_structs.h"
 
@@ -60,7 +58,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/base_externs.h"
 #include "jemalloc/internal/arena_externs.h"
 #include "jemalloc/internal/large_externs.h"
 #include "jemalloc/internal/tcache_externs.h"
@@ -72,7 +69,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/jemalloc_internal_inlines_a.h"
-#include "jemalloc/internal/base_inlines.h"
 /*
  * Include portions of arena code interleaved with tcache code in order to
  * resolve circular dependencies.
diff --git a/src/base.c b/src/base.c
index c006774..595b771 100644
--- a/src/base.c
+++ b/src/base.c
@@ -7,6 +7,15 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sz.h"
 
+/*
+ * In auto mode, arenas switch to huge pages for the base allocator on the
+ * second base block.  a0 switches to thp on the 5th block (after 20 megabytes
+ * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
+ */
+
+#define BASE_AUTO_THP_THRESHOLD    2
+#define BASE_AUTO_THP_THRESHOLD_A0 5
+
 /******************************************************************************/
 /* Data. */
 
-- 
cgit v0.12


From 7013716aaab806dc6ed2de3437170cdfa2b15a4a Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 17 Feb 2020 12:24:09 -0800
Subject: Emap: Take (and propagate) a zeroed parameter.

Rtree needs this, and we should really treat them similarly.
---
 include/jemalloc/internal/emap.h | 2 +-
 src/emap.c                       | 4 ++--
 src/jemalloc.c                   | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index c4b4014..b51a0c5 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -27,7 +27,7 @@ struct emap_full_alloc_ctx_s {
 
 extern emap_t emap_global;
 
-bool emap_init(emap_t *emap);
+bool emap_init(emap_t *emap, bool zeroed);
 
 /*
  * Grab the lock or locks associated with the edata or edatas indicated (which
diff --git a/src/emap.c b/src/emap.c
index ae0d312..200a782 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -22,9 +22,9 @@ enum emap_lock_result_e {
 typedef enum emap_lock_result_e emap_lock_result_t;
 
 bool
-emap_init(emap_t *emap) {
+emap_init(emap_t *emap, bool zeroed) {
 	bool err;
-	err = rtree_new(&emap->rtree, true);
+	err = rtree_new(&emap->rtree, zeroed);
 	if (err) {
 		return true;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 907235a..e2adffd 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1572,7 +1572,8 @@ malloc_init_hard_a0_locked() {
 	if (base_boot(TSDN_NULL)) {
 		return true;
 	}
-	if (emap_init(&emap_global)) {
+	/* emap_global is static, hence zeroed. */
+	if (emap_init(&emap_global, /* zeroed */ true)) {
 		return true;
 	}
 	if (extent_boot()) {
-- 
cgit v0.12


From a0c1f4ac57abe164cecc027efd697a7f1e0e2db4 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 17 Feb 2020 13:11:10 -0800
Subject: Rtree: take the base allocator as a parameter.

This facilitates better testing by avoiding mixing of the "real" base with the
base used by the rtree under test.
---
 include/jemalloc/internal/base.h  |   3 +-
 include/jemalloc/internal/emap.h  |   3 +-
 include/jemalloc/internal/rtree.h |  17 +------
 src/base.c                        |   6 +--
 src/emap.c                        |   4 +-
 src/jemalloc.c                    |   2 +-
 src/rtree.c                       |  74 +++-------------------------
 test/unit/rtree.c                 | 101 ++++++++++----------------------------
 8 files changed, 45 insertions(+), 165 deletions(-)

diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index dcac3b6..628e393 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -86,7 +86,8 @@ metadata_thp_enabled(void) {
 }
 
 base_t *b0get(void);
-base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+base_t *base_new(tsdn_t *tsdn, unsigned ind,
+    const extent_hooks_t *extent_hooks);
 void base_delete(tsdn_t *tsdn, base_t *base);
 ehooks_t *base_ehooks_get(base_t *base);
 extent_hooks_t *base_extent_hooks_set(base_t *base,
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index b51a0c5..b9f6bc0 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EMAP_H
 #define JEMALLOC_INTERNAL_EMAP_H
 
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/mutex_pool.h"
 #include "jemalloc/internal/rtree.h"
 
@@ -27,7 +28,7 @@ struct emap_full_alloc_ctx_s {
 
 extern emap_t emap_global;
 
-bool emap_init(emap_t *emap, bool zeroed);
+bool emap_init(emap_t *emap, base_t *base, bool zeroed);
 
 /*
  * Grab the lock or locks associated with the edata or edatas indicated (which
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 11a52ed..094cc1a 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -78,6 +78,7 @@ struct rtree_level_s {
 
 typedef struct rtree_s rtree_t;
 struct rtree_s {
+	base_t			*base;
 	malloc_mutex_t		init_lock;
 	/* Number of elements based on rtree_levels[0].bits. */
 #if RTREE_HEIGHT > 1
@@ -109,22 +110,8 @@ static const rtree_level_t rtree_levels[] = {
 #endif
 };
 
-bool rtree_new(rtree_t *rtree, bool zeroed);
+bool rtree_new(rtree_t *rtree, base_t *base, bool zeroed);
 
-typedef rtree_node_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
-extern rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc;
-
-typedef rtree_leaf_elm_t *(rtree_leaf_alloc_t)(tsdn_t *, rtree_t *, size_t);
-extern rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc;
-
-typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_node_elm_t *);
-extern rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc;
-
-typedef void (rtree_leaf_dalloc_t)(tsdn_t *, rtree_t *, rtree_leaf_elm_t *);
-extern rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc;
-#ifdef JEMALLOC_JET
-void rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
-#endif
 rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
 
diff --git a/src/base.c b/src/base.c
index 595b771..ebb42da 100644
--- a/src/base.c
+++ b/src/base.c
@@ -343,7 +343,7 @@ b0get(void) {
 }
 
 base_t *
-base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
+base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks) {
 	pszind_t pind_last = 0;
 	size_t extent_sn_next = 0;
 
@@ -353,7 +353,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * memory, and then initialize the ehooks within the base_t.
 	 */
 	ehooks_t fake_ehooks;
-	ehooks_init(&fake_ehooks, extent_hooks, ind);
+	ehooks_init(&fake_ehooks, (extent_hooks_t *)extent_hooks, ind);
 
 	base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
@@ -366,7 +366,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
 	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
 	    &gap_size, base_size, base_alignment);
-	ehooks_init(&base->ehooks, extent_hooks, ind);
+	ehooks_init(&base->ehooks, (extent_hooks_t *)extent_hooks, ind);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
 	    malloc_mutex_rank_exclusive)) {
 		base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
diff --git a/src/emap.c b/src/emap.c
index 200a782..723dfad 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -22,9 +22,9 @@ enum emap_lock_result_e {
 typedef enum emap_lock_result_e emap_lock_result_t;
 
 bool
-emap_init(emap_t *emap, bool zeroed) {
+emap_init(emap_t *emap, base_t *base, bool zeroed) {
 	bool err;
-	err = rtree_new(&emap->rtree, zeroed);
+	err = rtree_new(&emap->rtree, base, zeroed);
 	if (err) {
 		return true;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e2adffd..6dc2e47 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1573,7 +1573,7 @@ malloc_init_hard_a0_locked() {
 		return true;
 	}
 	/* emap_global is static, hence zeroed. */
-	if (emap_init(&emap_global, /* zeroed */ true)) {
+	if (emap_init(&emap_global, b0get(), /* zeroed */ true)) {
 		return true;
 	}
 	if (extent_boot()) {
diff --git a/src/rtree.c b/src/rtree.c
index 4ae41fe..07a4e9a 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -10,7 +10,7 @@
  * used.
  */
 bool
-rtree_new(rtree_t *rtree, bool zeroed) {
+rtree_new(rtree_t *rtree, base_t *base, bool zeroed) {
 #ifdef JEMALLOC_JET
 	if (!zeroed) {
 		memset(rtree, 0, sizeof(rtree_t)); /* Clear root. */
@@ -18,6 +18,7 @@ rtree_new(rtree_t *rtree, bool zeroed) {
 #else
 	assert(zeroed);
 #endif
+	rtree->base = base;
 
 	if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE,
 	    malloc_mutex_rank_exclusive)) {
@@ -28,75 +29,16 @@ rtree_new(rtree_t *rtree, bool zeroed) {
 }
 
 static rtree_node_elm_t *
-rtree_node_alloc_impl(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_node_elm_t *)base_alloc(tsdn, b0get(), nelms *
-	    sizeof(rtree_node_elm_t), CACHELINE);
+rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+	return (rtree_node_elm_t *)base_alloc(tsdn, rtree->base,
+	    nelms * sizeof(rtree_node_elm_t), CACHELINE);
 }
-rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc = rtree_node_alloc_impl;
-
-static void
-rtree_node_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *node) {
-	/* Nodes are never deleted during normal operation. */
-	not_reached();
-}
-rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc =
-    rtree_node_dalloc_impl;
 
 static rtree_leaf_elm_t *
-rtree_leaf_alloc_impl(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_leaf_elm_t *)base_alloc(tsdn, b0get(), nelms *
-	    sizeof(rtree_leaf_elm_t), CACHELINE);
-}
-rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc = rtree_leaf_alloc_impl;
-
-static void
-rtree_leaf_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *leaf) {
-	/* Leaves are never deleted during normal operation. */
-	not_reached();
+rtree_leaf_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+	return (rtree_leaf_elm_t *)base_alloc(tsdn, rtree->base,
+	    nelms * sizeof(rtree_leaf_elm_t), CACHELINE);
 }
-rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc =
-    rtree_leaf_dalloc_impl;
-
-#ifdef JEMALLOC_JET
-#  if RTREE_HEIGHT > 1
-static void
-rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *subtree,
-    unsigned level) {
-	size_t nchildren = ZU(1) << rtree_levels[level].bits;
-	if (level + 2 < RTREE_HEIGHT) {
-		for (size_t i = 0; i < nchildren; i++) {
-			rtree_node_elm_t *node =
-			    (rtree_node_elm_t *)atomic_load_p(&subtree[i].child,
-			    ATOMIC_RELAXED);
-			if (node != NULL) {
-				rtree_delete_subtree(tsdn, rtree, node, level +
-				    1);
-			}
-		}
-	} else {
-		for (size_t i = 0; i < nchildren; i++) {
-			rtree_leaf_elm_t *leaf =
-			    (rtree_leaf_elm_t *)atomic_load_p(&subtree[i].child,
-			    ATOMIC_RELAXED);
-			if (leaf != NULL) {
-				rtree_leaf_dalloc(tsdn, rtree, leaf);
-			}
-		}
-	}
-
-	if (subtree != rtree->root) {
-		rtree_node_dalloc(tsdn, rtree, subtree);
-	}
-}
-#  endif
-
-void
-rtree_delete(tsdn_t *tsdn, rtree_t *rtree) {
-#  if RTREE_HEIGHT > 1
-	rtree_delete_subtree(tsdn, rtree, rtree->root, 0);
-#  endif
-}
-#endif
 
 static rtree_node_elm_t *
 rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 2477db0..b5ece82 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -4,80 +4,26 @@
 
 #define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1)
 
-rtree_node_alloc_t *rtree_node_alloc_orig;
-rtree_node_dalloc_t *rtree_node_dalloc_orig;
-rtree_leaf_alloc_t *rtree_leaf_alloc_orig;
-rtree_leaf_dalloc_t *rtree_leaf_dalloc_orig;
-
 /* Potentially too large to safely place on the stack. */
 rtree_t test_rtree;
 
-static rtree_node_elm_t *
-rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	rtree_node_elm_t *node;
-
-	if (rtree != &test_rtree) {
-		return rtree_node_alloc_orig(tsdn, rtree, nelms);
-	}
-
-	malloc_mutex_unlock(tsdn, &rtree->init_lock);
-	node = (rtree_node_elm_t *)calloc(nelms, sizeof(rtree_node_elm_t));
-	assert_ptr_not_null(node, "Unexpected calloc() failure");
-	malloc_mutex_lock(tsdn, &rtree->init_lock);
-
-	return node;
-}
-
-static void
-rtree_node_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_node_elm_t *node) {
-	if (rtree != &test_rtree) {
-		rtree_node_dalloc_orig(tsdn, rtree, node);
-		return;
-	}
-
-	free(node);
-}
-
-static rtree_leaf_elm_t *
-rtree_leaf_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	rtree_leaf_elm_t *leaf;
-
-	if (rtree != &test_rtree) {
-		return rtree_leaf_alloc_orig(tsdn, rtree, nelms);
-	}
-
-	malloc_mutex_unlock(tsdn, &rtree->init_lock);
-	leaf = (rtree_leaf_elm_t *)calloc(nelms, sizeof(rtree_leaf_elm_t));
-	assert_ptr_not_null(leaf, "Unexpected calloc() failure");
-	malloc_mutex_lock(tsdn, &rtree->init_lock);
-
-	return leaf;
-}
-
-static void
-rtree_leaf_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *leaf) {
-	if (rtree != &test_rtree) {
-		rtree_leaf_dalloc_orig(tsdn, rtree, leaf);
-		return;
-	}
-
-	free(leaf);
-}
-
 TEST_BEGIN(test_rtree_read_empty) {
 	tsdn_t *tsdn;
 
 	tsdn = tsdn_fetch();
 
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	assert_ptr_not_null(base, "Unexpected base_new failure");
+
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
+	assert_false(rtree_new(rtree, base, false),
+	    "Unexpected rtree_new() failure");
 	assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE,
 	    false), "rtree_edata_read() should return NULL for empty tree");
-	rtree_delete(tsdn, rtree);
+
+	base_delete(tsdn, base);
 }
 TEST_END
 
@@ -95,10 +41,14 @@ TEST_BEGIN(test_rtree_extrema) {
 
 	tsdn_t *tsdn = tsdn_fetch();
 
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	assert_ptr_not_null(base, "Unexpected base_new failure");
+
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
+	assert_false(rtree_new(rtree, base, false),
+	    "Unexpected rtree_new() failure");
 
 	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &edata_a,
 	    edata_szind_get(&edata_a), edata_slab_get(&edata_a)),
@@ -116,12 +66,14 @@ TEST_BEGIN(test_rtree_extrema) {
 	    ~((uintptr_t)0), true), &edata_b,
 	    "rtree_edata_read() should return previously set value");
 
-	rtree_delete(tsdn, rtree);
+	base_delete(tsdn, base);
 }
 TEST_END
 
 TEST_BEGIN(test_rtree_bits) {
 	tsdn_t *tsdn = tsdn_fetch();
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	assert_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[] = {PAGE, PAGE + 1,
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
@@ -133,7 +85,8 @@ TEST_BEGIN(test_rtree_bits) {
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
+	assert_false(rtree_new(rtree, base, false),
+	    "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
 		assert_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
@@ -153,7 +106,7 @@ TEST_BEGIN(test_rtree_bits) {
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
 	}
 
-	rtree_delete(tsdn, rtree);
+	base_delete(tsdn, base);
 }
 TEST_END
 
@@ -162,6 +115,10 @@ TEST_BEGIN(test_rtree_random) {
 #define SEED 42
 	sfmt_t *sfmt = init_gen_rand(SEED);
 	tsdn_t *tsdn = tsdn_fetch();
+
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	assert_ptr_not_null(base, "Unexpected base_new failure");
+
 	uintptr_t keys[NSET];
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
@@ -171,7 +128,8 @@ TEST_BEGIN(test_rtree_random) {
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
-	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
+	assert_false(rtree_new(rtree, base, false),
+	    "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < NSET; i++) {
 		keys[i] = (uintptr_t)gen_rand64(sfmt);
@@ -204,7 +162,7 @@ TEST_BEGIN(test_rtree_random) {
 		    "rtree_edata_read() should return previously set value");
 	}
 
-	rtree_delete(tsdn, rtree);
+	base_delete(tsdn, base);
 	fini_gen_rand(sfmt);
 #undef NSET
 #undef SEED
@@ -213,15 +171,6 @@ TEST_END
 
 int
 main(void) {
-	rtree_node_alloc_orig = rtree_node_alloc;
-	rtree_node_alloc = rtree_node_alloc_intercept;
-	rtree_node_dalloc_orig = rtree_node_dalloc;
-	rtree_node_dalloc = rtree_node_dalloc_intercept;
-	rtree_leaf_alloc_orig = rtree_leaf_alloc;
-	rtree_leaf_alloc = rtree_leaf_alloc_intercept;
-	rtree_leaf_dalloc_orig = rtree_leaf_dalloc;
-	rtree_leaf_dalloc = rtree_leaf_dalloc_intercept;
-
 	return test(
 	    test_rtree_read_empty,
 	    test_rtree_extrema,
-- 
cgit v0.12


From 29436fa056169389f3d76c74aae1465604bdd799 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 17 Feb 2020 14:09:29 -0800
Subject: Break prof and tcache knowledge of b0.

---
 include/jemalloc/internal/prof_externs.h   |  2 +-
 include/jemalloc/internal/tcache_externs.h |  4 ++--
 src/ctl.c                                  |  2 +-
 src/jemalloc.c                             |  4 ++--
 src/prof.c                                 | 21 ++++++++++++---------
 src/tcache.c                               | 16 ++++++++--------
 6 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 36571c8..0b6fecd 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -93,7 +93,7 @@ bool prof_gdump_get(tsdn_t *tsdn);
 bool prof_gdump_set(tsdn_t *tsdn, bool active);
 void prof_boot0(void);
 void prof_boot1(void);
-bool prof_boot2(tsd_t *tsd);
+bool prof_boot2(tsd_t *tsd, base_t *base);
 void prof_prefork0(tsdn_t *tsdn);
 void prof_prefork1(tsdn_t *tsdn);
 void prof_postfork_parent(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 2060bb1..db6f98b 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -36,10 +36,10 @@ void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
 tcache_t *tcache_create_explicit(tsd_t *tsd);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
+bool	tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool	tcache_boot(tsdn_t *tsdn);
+bool	tcache_boot(tsdn_t *tsdn, base_t *base);
 void tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
diff --git a/src/ctl.c b/src/ctl.c
index 3123ab8..d149ce6 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2040,7 +2040,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	unsigned tcache_ind;
 
 	READONLY();
-	if (tcaches_create(tsd, &tcache_ind)) {
+	if (tcaches_create(tsd, b0get(), &tcache_ind)) {
 		ret = EFAULT;
 		goto label_return;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 6dc2e47..4e1d3df 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1586,7 +1586,7 @@ malloc_init_hard_a0_locked() {
 		prof_boot1();
 	}
 	arena_boot(&sc_data);
-	if (tcache_boot(TSDN_NULL)) {
+	if (tcache_boot(TSDN_NULL, b0get())) {
 		return true;
 	}
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS,
@@ -1823,7 +1823,7 @@ malloc_init_hard(void) {
 	if (malloc_init_narenas() || background_thread_boot1(tsd_tsdn(tsd))) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
-	if (config_prof && prof_boot2(tsd)) {
+	if (config_prof && prof_boot2(tsd, b0get())) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
 
diff --git a/src/prof.c b/src/prof.c
index 49f5a0e..761cb95 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -91,6 +91,9 @@ static uint64_t prof_dump_iseq;
 static uint64_t prof_dump_mseq;
 static uint64_t prof_dump_useq;
 
+/* The fallback allocator profiling functionality will use. */
+base_t *prof_base;
+
 malloc_mutex_t prof_dump_mtx;
 static char *prof_dump_prefix = NULL;
 
@@ -584,8 +587,8 @@ prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	if (prof_dump_prefix == NULL) {
 		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 		/* Everything is still guarded by ctl_mtx. */
-		char *buffer = base_alloc(tsdn, b0get(), PROF_DUMP_FILENAME_LEN,
-		    QUANTUM);
+		char *buffer = base_alloc(tsdn, prof_base,
+		    PROF_DUMP_FILENAME_LEN, QUANTUM);
 		if (buffer == NULL) {
 			return true;
 		}
@@ -944,7 +947,7 @@ prof_boot1(void) {
 }
 
 bool
-prof_boot2(tsd_t *tsd) {
+prof_boot2(tsd_t *tsd, base_t *base) {
 	cassert(config_prof);
 
 	if (opt_prof) {
@@ -1017,9 +1020,10 @@ prof_boot2(tsd_t *tsd) {
 			return true;
 		}
 
-		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
-		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
-		    CACHELINE);
+		prof_base = base;
+
+		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), base,
+		    PROF_NCTX_LOCKS * sizeof(malloc_mutex_t), CACHELINE);
 		if (gctx_locks == NULL) {
 			return true;
 		}
@@ -1031,9 +1035,8 @@ prof_boot2(tsd_t *tsd) {
 			}
 		}
 
-		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
-		    b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t),
-		    CACHELINE);
+		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), base,
+		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t), CACHELINE);
 		if (tdata_locks == NULL) {
 			return true;
 		}
diff --git a/src/tcache.c b/src/tcache.c
index 33d3cba..782d883 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -664,14 +664,14 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 }
 
 static bool
-tcaches_create_prep(tsd_t *tsd) {
+tcaches_create_prep(tsd_t *tsd, base_t *base) {
 	bool err;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 
 	if (tcaches == NULL) {
-		tcaches = base_alloc(tsd_tsdn(tsd), b0get(), sizeof(tcache_t *)
-		    * (MALLOCX_TCACHE_MAX+1), CACHELINE);
+		tcaches = base_alloc(tsd_tsdn(tsd), base,
+		    sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1), CACHELINE);
 		if (tcaches == NULL) {
 			err = true;
 			goto label_return;
@@ -690,12 +690,12 @@ label_return:
 }
 
 bool
-tcaches_create(tsd_t *tsd, unsigned *r_ind) {
+tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind) {
 	witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
 
 	bool err;
 
-	if (tcaches_create_prep(tsd)) {
+	if (tcaches_create_prep(tsd, base)) {
 		err = true;
 		goto label_return;
 	}
@@ -772,7 +772,7 @@ tcaches_destroy(tsd_t *tsd, unsigned ind) {
 }
 
 bool
-tcache_boot(tsdn_t *tsdn) {
+tcache_boot(tsdn_t *tsdn, base_t *base) {
 	/* If necessary, clamp opt_lg_tcache_max. */
 	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
 	    SC_SMALL_MAXCLASS) {
@@ -789,8 +789,8 @@ tcache_boot(tsdn_t *tsdn) {
 	nhbins = sz_size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
-	    * sizeof(cache_bin_info_t), CACHELINE);
+	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base,
+	    nhbins * sizeof(cache_bin_info_t), CACHELINE);
 	if (tcache_bin_info == NULL) {
 		return true;
 	}
-- 
cgit v0.12


From 162c2bcf319966b83e56a552b158d87a211bfcd1 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 17 Feb 2020 14:13:38 -0800
Subject: Background thread: take base as a parameter.

---
 include/jemalloc/internal/background_thread_externs.h | 2 +-
 src/background_thread.c                               | 4 ++--
 src/jemalloc.c                                        | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 0f997e1..224e370 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -27,6 +27,6 @@ extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *,
     void *(*)(void *), void *__restrict);
 #endif
 bool background_thread_boot0(void);
-bool background_thread_boot1(tsdn_t *tsdn);
+bool background_thread_boot1(tsdn_t *tsdn, base_t *base);
 
 #endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */
diff --git a/src/background_thread.c b/src/background_thread.c
index 90b027e..ca06be0 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -895,7 +895,7 @@ background_thread_boot0(void) {
 }
 
 bool
-background_thread_boot1(tsdn_t *tsdn) {
+background_thread_boot1(tsdn_t *tsdn, base_t *base) {
 #ifdef JEMALLOC_BACKGROUND_THREAD
 	assert(have_background_thread);
 	assert(narenas_total_get() > 0);
@@ -914,7 +914,7 @@ background_thread_boot1(tsdn_t *tsdn) {
 	}
 
 	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
-	    b0get(), opt_max_background_threads *
+	    base, opt_max_background_threads *
 	    sizeof(background_thread_info_t), CACHELINE);
 	if (background_thread_info == NULL) {
 		return true;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4e1d3df..b29ae47 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1820,7 +1820,8 @@ malloc_init_hard(void) {
 	/* Set reentrancy level to 1 during init. */
 	pre_reentrancy(tsd, NULL);
 	/* Initialize narenas before prof_boot2 (for allocation). */
-	if (malloc_init_narenas() || background_thread_boot1(tsd_tsdn(tsd))) {
+	if (malloc_init_narenas()
+	    || background_thread_boot1(tsd_tsdn(tsd), b0get())) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
 	if (config_prof && prof_boot2(tsd, b0get())) {
-- 
cgit v0.12


From 21dfa4300dd372c11c7e1392225f58ae92c35eeb Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 18 Feb 2020 14:39:06 -0800
Subject: Change assert_* to expect_* in tests

```
grep -Irl assert_ test/ | xargs sed -i \
    's/witness_assert/witness_do_not_replace/g';
grep -Irl assert_ test/ | xargs sed -i \
    's/malloc_mutex_assert_owner/malloc_mutex_do_not_replace_owner/g';

grep -Ir assert_ test/ | grep -o "[_a-zA-Z]*assert_[_a-zA-Z]*" | \
    grep -v "^assert_"; # confirm no output
grep -Irl assert_ test/ | xargs sed -i 's/assert_/expect_/g';

grep -Irl witness_do_not_replace test/ | xargs sed -i \
    's/witness_do_not_replace/witness_assert/g';
grep -Irl malloc_mutex_do_not_replace_owner test/ | xargs sed -i \
    's/malloc_mutex_do_not_replace_owner/malloc_mutex_assert_owner/g';
```
---
 test/include/test/btalloc.h              |   2 +-
 test/include/test/extent_hooks.h         |  40 ++--
 test/include/test/jemalloc_test.h.in     |   4 +-
 test/include/test/test.h                 | 224 ++++++++++-----------
 test/integration/MALLOCX_ARENA.c         |   8 +-
 test/integration/aligned_alloc.c         |  12 +-
 test/integration/allocated.c             |  20 +-
 test/integration/cpp/basic.cpp           |   4 +-
 test/integration/extent.c                |  90 ++++-----
 test/integration/mallocx.c               |  56 +++---
 test/integration/overflow.c              |  20 +-
 test/integration/posix_memalign.c        |  12 +-
 test/integration/rallocx.c               |  58 +++---
 test/integration/slab_sizes.c            |  22 +-
 test/integration/smallocx.c              |  66 +++---
 test/integration/thread_arena.c          |  10 +-
 test/integration/thread_tcache_enabled.c |  38 ++--
 test/integration/xallocx.c               | 110 +++++-----
 test/unit/SFMT.c                         |  32 +--
 test/unit/a0.c                           |   2 +-
 test/unit/arena_reset.c                  |  56 +++---
 test/unit/atomic.c                       |  44 ++--
 test/unit/background_thread.c            |  28 +--
 test/unit/background_thread_enable.c     |  28 +--
 test/unit/base.c                         |  66 +++---
 test/unit/binshard.c                     |  28 +--
 test/unit/bit_util.c                     |  54 ++---
 test/unit/bitmap.c                       |  96 ++++-----
 test/unit/buf_writer.c                   |  28 +--
 test/unit/cache_bin.c                    |  32 +--
 test/unit/ckh.c                          |  74 +++----
 test/unit/counter.c                      |  38 ++--
 test/unit/decay.c                        | 142 ++++++-------
 test/unit/div.c                          |   2 +-
 test/unit/emitter.c                      |  14 +-
 test/unit/extent_quantize.c              |  34 ++--
 test/unit/fork.c                         |   8 +-
 test/unit/hash.c                         |   2 +-
 test/unit/hook.c                         | 336 +++++++++++++++----------------
 test/unit/huge.c                         |  58 +++---
 test/unit/inspect.c                      |  84 ++++----
 test/unit/junk.c                         |  22 +-
 test/unit/log.c                          |  14 +-
 test/unit/mallctl.c                      | 322 ++++++++++++++---------------
 test/unit/malloc_io.c                    |  18 +-
 test/unit/math.c                         |  12 +-
 test/unit/mq.c                           |  18 +-
 test/unit/mtx.c                          |   6 +-
 test/unit/nstime.c                       |  70 +++----
 test/unit/pack.c                         |  20 +-
 test/unit/pages.c                        |   6 +-
 test/unit/ph.c                           |  58 +++---
 test/unit/prng.c                         |  36 ++--
 test/unit/prof_accum.c                   |   8 +-
 test/unit/prof_active.c                  |  14 +-
 test/unit/prof_gdump.c                   |  24 +--
 test/unit/prof_idump.c                   |  12 +-
 test/unit/prof_log.c                     |  58 +++---
 test/unit/prof_recent.c                  | 140 ++++++-------
 test/unit/prof_reset.c                   |  66 +++---
 test/unit/prof_tctx.c                    |  16 +-
 test/unit/prof_thread_name.c             |  14 +-
 test/unit/ql.c                           |  24 +--
 test/unit/qr.c                           |  32 +--
 test/unit/rb.c                           |  60 +++---
 test/unit/retained.c                     |  24 +--
 test/unit/rtree.c                        |  42 ++--
 test/unit/safety_check.c                 |  14 +-
 test/unit/sc.c                           |   6 +-
 test/unit/seq.c                          |  12 +-
 test/unit/size_classes.c                 |  88 ++++----
 test/unit/slab.c                         |   4 +-
 test/unit/smoothstep.c                   |  12 +-
 test/unit/stats.c                        | 168 ++++++++--------
 test/unit/stats_print.c                  |  26 +--
 test/unit/test_hooks.c                   |   8 +-
 test/unit/thread_event.c                 |   6 +-
 test/unit/ticker.c                       |  36 ++--
 test/unit/tsd.c                          |  48 ++---
 test/unit/witness.c                      |  32 +--
 test/unit/zero.c                         |  10 +-
 test/unit/zero_realloc_abort.c           |   4 +-
 test/unit/zero_realloc_free.c            |   8 +-
 test/unit/zero_realloc_strict.c          |  10 +-
 test/unit/zero_reallocs.c                |  10 +-
 85 files changed, 1860 insertions(+), 1860 deletions(-)

diff --git a/test/include/test/btalloc.h b/test/include/test/btalloc.h
index 5877ea7..8f34599 100644
--- a/test/include/test/btalloc.h
+++ b/test/include/test/btalloc.h
@@ -25,6 +25,6 @@ btalloc_##n(size_t size, unsigned bits) {				\
 		}							\
 	}								\
 	/* Intentionally sabotage tail call optimization. */		\
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");		\
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");		\
 	return p;							\
 }
diff --git a/test/include/test/extent_hooks.h b/test/include/test/extent_hooks.h
index 1f06201..aad0a46 100644
--- a/test/include/test/extent_hooks.h
+++ b/test/include/test/extent_hooks.h
@@ -86,9 +86,9 @@ extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	    "*zero=%s, *commit=%s, arena_ind=%u)\n", __func__, extent_hooks,
 	    new_addr, size, alignment, *zero ?  "true" : "false", *commit ?
 	    "true" : "false", arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->alloc, extent_alloc_hook,
+	expect_ptr_eq(extent_hooks->alloc, extent_alloc_hook,
 	    "Wrong hook function");
 	called_alloc = true;
 	if (!try_alloc) {
@@ -108,9 +108,9 @@ extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
 	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
 	    "true" : "false", arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->dalloc, extent_dalloc_hook,
+	expect_ptr_eq(extent_hooks->dalloc, extent_dalloc_hook,
 	    "Wrong hook function");
 	called_dalloc = true;
 	if (!try_dalloc) {
@@ -127,9 +127,9 @@ extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
 	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
 	    "true" : "false", arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->destroy, extent_destroy_hook,
+	expect_ptr_eq(extent_hooks->destroy, extent_destroy_hook,
 	    "Wrong hook function");
 	called_destroy = true;
 	if (!try_destroy) {
@@ -147,9 +147,9 @@ extent_commit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
 	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
 	    offset, length, arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->commit, extent_commit_hook,
+	expect_ptr_eq(extent_hooks->commit, extent_commit_hook,
 	    "Wrong hook function");
 	called_commit = true;
 	if (!try_commit) {
@@ -169,9 +169,9 @@ extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
 	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
 	    offset, length, arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->decommit, extent_decommit_hook,
+	expect_ptr_eq(extent_hooks->decommit, extent_decommit_hook,
 	    "Wrong hook function");
 	called_decommit = true;
 	if (!try_decommit) {
@@ -191,9 +191,9 @@ extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
 	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
 	    offset, length, arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->purge_lazy, extent_purge_lazy_hook,
+	expect_ptr_eq(extent_hooks->purge_lazy, extent_purge_lazy_hook,
 	    "Wrong hook function");
 	called_purge_lazy = true;
 	if (!try_purge_lazy) {
@@ -214,9 +214,9 @@ extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
 	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
 	    offset, length, arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->purge_forced, extent_purge_forced_hook,
+	expect_ptr_eq(extent_hooks->purge_forced, extent_purge_forced_hook,
 	    "Wrong hook function");
 	called_purge_forced = true;
 	if (!try_purge_forced) {
@@ -238,9 +238,9 @@ extent_split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
 	    addr, size, size_a, size_b, committed ? "true" : "false",
 	    arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->split, extent_split_hook,
+	expect_ptr_eq(extent_hooks->split, extent_split_hook,
 	    "Wrong hook function");
 	called_split = true;
 	if (!try_split) {
@@ -262,11 +262,11 @@ extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
 	    addr_a, size_a, addr_b, size_b, committed ? "true" : "false",
 	    arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->merge, extent_merge_hook,
+	expect_ptr_eq(extent_hooks->merge, extent_merge_hook,
 	    "Wrong hook function");
-	assert_ptr_eq((void *)((uintptr_t)addr_a + size_a), addr_b,
+	expect_ptr_eq((void *)((uintptr_t)addr_a + size_a), addr_b,
 	    "Extents not mergeable");
 	called_merge = true;
 	if (!try_merge) {
@@ -284,6 +284,6 @@ extent_hooks_prep(void) {
 	size_t sz;
 
 	sz = sizeof(default_hooks);
-	assert_d_eq(mallctl("arena.0.extent_hooks", (void *)&default_hooks, &sz,
+	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&default_hooks, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() error");
 }
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index a59408f..e5d6306 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -139,7 +139,7 @@ static const bool config_debug =
 #undef assert
 #undef not_reached
 #undef not_implemented
-#undef assert_not_implemented
+#undef expect_not_implemented
 
 #define assert(e) do {							\
 	if (!(e)) {							\
@@ -163,7 +163,7 @@ static const bool config_debug =
 	abort();							\
 } while (0)
 
-#define assert_not_implemented(e) do {					\
+#define expect_not_implemented(e) do {					\
 	if (!(e)) {							\
 		not_implemented();					\
 	}								\
diff --git a/test/include/test/test.h b/test/include/test/test.h
index 9081716..cf6616b 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -1,6 +1,6 @@
 #define ASSERT_BUFSIZE	256
 
-#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
+#define expect_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
 	const t a_ = (a);						\
 	const t b_ = (b);						\
 	if (!(a_ cmp b_)) {						\
@@ -17,200 +17,200 @@
 	}								\
 } while (0)
 
-#define assert_ptr_eq(a, b, ...)	assert_cmp(void *, a, b, ==,	\
+#define expect_ptr_eq(a, b, ...)	expect_cmp(void *, a, b, ==,	\
     !=, "p", __VA_ARGS__)
-#define assert_ptr_ne(a, b, ...)	assert_cmp(void *, a, b, !=,	\
+#define expect_ptr_ne(a, b, ...)	expect_cmp(void *, a, b, !=,	\
     ==, "p", __VA_ARGS__)
-#define assert_ptr_null(a, ...)		assert_cmp(void *, a, NULL, ==,	\
+#define expect_ptr_null(a, ...)		expect_cmp(void *, a, NULL, ==,	\
     !=, "p", __VA_ARGS__)
-#define assert_ptr_not_null(a, ...)	assert_cmp(void *, a, NULL, !=,	\
+#define expect_ptr_not_null(a, ...)	expect_cmp(void *, a, NULL, !=,	\
     ==, "p", __VA_ARGS__)
 
-#define assert_c_eq(a, b, ...)	assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
-#define assert_c_ne(a, b, ...)	assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
-#define assert_c_lt(a, b, ...)	assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
-#define assert_c_le(a, b, ...)	assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
-#define assert_c_ge(a, b, ...)	assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
-#define assert_c_gt(a, b, ...)	assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
-
-#define assert_x_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
-#define assert_x_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
-#define assert_x_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
-#define assert_x_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
-#define assert_x_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
-#define assert_x_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
-
-#define assert_d_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
-#define assert_d_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
-#define assert_d_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
-#define assert_d_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
-#define assert_d_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
-#define assert_d_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
-
-#define assert_u_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
-#define assert_u_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
-#define assert_u_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
-#define assert_u_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
-#define assert_u_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
-#define assert_u_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
-
-#define assert_ld_eq(a, b, ...)	assert_cmp(long, a, b, ==,	\
+#define expect_c_eq(a, b, ...)	expect_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
+#define expect_c_ne(a, b, ...)	expect_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
+#define expect_c_lt(a, b, ...)	expect_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
+#define expect_c_le(a, b, ...)	expect_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
+#define expect_c_ge(a, b, ...)	expect_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
+#define expect_c_gt(a, b, ...)	expect_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
+
+#define expect_x_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
+#define expect_x_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
+#define expect_x_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
+#define expect_x_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
+#define expect_x_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
+#define expect_x_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
+
+#define expect_d_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
+#define expect_d_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
+#define expect_d_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
+#define expect_d_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
+#define expect_d_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
+#define expect_d_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
+
+#define expect_u_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
+#define expect_u_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
+#define expect_u_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
+#define expect_u_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
+#define expect_u_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
+#define expect_u_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
+
+#define expect_ld_eq(a, b, ...)	expect_cmp(long, a, b, ==,	\
     !=, "ld", __VA_ARGS__)
-#define assert_ld_ne(a, b, ...)	assert_cmp(long, a, b, !=,	\
+#define expect_ld_ne(a, b, ...)	expect_cmp(long, a, b, !=,	\
     ==, "ld", __VA_ARGS__)
-#define assert_ld_lt(a, b, ...)	assert_cmp(long, a, b, <,	\
+#define expect_ld_lt(a, b, ...)	expect_cmp(long, a, b, <,	\
     >=, "ld", __VA_ARGS__)
-#define assert_ld_le(a, b, ...)	assert_cmp(long, a, b, <=,	\
+#define expect_ld_le(a, b, ...)	expect_cmp(long, a, b, <=,	\
     >, "ld", __VA_ARGS__)
-#define assert_ld_ge(a, b, ...)	assert_cmp(long, a, b, >=,	\
+#define expect_ld_ge(a, b, ...)	expect_cmp(long, a, b, >=,	\
     <, "ld", __VA_ARGS__)
-#define assert_ld_gt(a, b, ...)	assert_cmp(long, a, b, >,	\
+#define expect_ld_gt(a, b, ...)	expect_cmp(long, a, b, >,	\
     <=, "ld", __VA_ARGS__)
 
-#define assert_lu_eq(a, b, ...)	assert_cmp(unsigned long,	\
+#define expect_lu_eq(a, b, ...)	expect_cmp(unsigned long,	\
     a, b, ==, !=, "lu", __VA_ARGS__)
-#define assert_lu_ne(a, b, ...)	assert_cmp(unsigned long,	\
+#define expect_lu_ne(a, b, ...)	expect_cmp(unsigned long,	\
     a, b, !=, ==, "lu", __VA_ARGS__)
-#define assert_lu_lt(a, b, ...)	assert_cmp(unsigned long,	\
+#define expect_lu_lt(a, b, ...)	expect_cmp(unsigned long,	\
     a, b, <, >=, "lu", __VA_ARGS__)
-#define assert_lu_le(a, b, ...)	assert_cmp(unsigned long,	\
+#define expect_lu_le(a, b, ...)	expect_cmp(unsigned long,	\
     a, b, <=, >, "lu", __VA_ARGS__)
-#define assert_lu_ge(a, b, ...)	assert_cmp(unsigned long,	\
+#define expect_lu_ge(a, b, ...)	expect_cmp(unsigned long,	\
     a, b, >=, <, "lu", __VA_ARGS__)
-#define assert_lu_gt(a, b, ...)	assert_cmp(unsigned long,	\
+#define expect_lu_gt(a, b, ...)	expect_cmp(unsigned long,	\
     a, b, >, <=, "lu", __VA_ARGS__)
 
-#define assert_qd_eq(a, b, ...)	assert_cmp(long long, a, b, ==,	\
+#define expect_qd_eq(a, b, ...)	expect_cmp(long long, a, b, ==,	\
     !=, "qd", __VA_ARGS__)
-#define assert_qd_ne(a, b, ...)	assert_cmp(long long, a, b, !=,	\
+#define expect_qd_ne(a, b, ...)	expect_cmp(long long, a, b, !=,	\
     ==, "qd", __VA_ARGS__)
-#define assert_qd_lt(a, b, ...)	assert_cmp(long long, a, b, <,	\
+#define expect_qd_lt(a, b, ...)	expect_cmp(long long, a, b, <,	\
     >=, "qd", __VA_ARGS__)
-#define assert_qd_le(a, b, ...)	assert_cmp(long long, a, b, <=,	\
+#define expect_qd_le(a, b, ...)	expect_cmp(long long, a, b, <=,	\
     >, "qd", __VA_ARGS__)
-#define assert_qd_ge(a, b, ...)	assert_cmp(long long, a, b, >=,	\
+#define expect_qd_ge(a, b, ...)	expect_cmp(long long, a, b, >=,	\
     <, "qd", __VA_ARGS__)
-#define assert_qd_gt(a, b, ...)	assert_cmp(long long, a, b, >,	\
+#define expect_qd_gt(a, b, ...)	expect_cmp(long long, a, b, >,	\
     <=, "qd", __VA_ARGS__)
 
-#define assert_qu_eq(a, b, ...)	assert_cmp(unsigned long long,	\
+#define expect_qu_eq(a, b, ...)	expect_cmp(unsigned long long,	\
     a, b, ==, !=, "qu", __VA_ARGS__)
-#define assert_qu_ne(a, b, ...)	assert_cmp(unsigned long long,	\
+#define expect_qu_ne(a, b, ...)	expect_cmp(unsigned long long,	\
     a, b, !=, ==, "qu", __VA_ARGS__)
-#define assert_qu_lt(a, b, ...)	assert_cmp(unsigned long long,	\
+#define expect_qu_lt(a, b, ...)	expect_cmp(unsigned long long,	\
     a, b, <, >=, "qu", __VA_ARGS__)
-#define assert_qu_le(a, b, ...)	assert_cmp(unsigned long long,	\
+#define expect_qu_le(a, b, ...)	expect_cmp(unsigned long long,	\
     a, b, <=, >, "qu", __VA_ARGS__)
-#define assert_qu_ge(a, b, ...)	assert_cmp(unsigned long long,	\
+#define expect_qu_ge(a, b, ...)	expect_cmp(unsigned long long,	\
     a, b, >=, <, "qu", __VA_ARGS__)
-#define assert_qu_gt(a, b, ...)	assert_cmp(unsigned long long,	\
+#define expect_qu_gt(a, b, ...)	expect_cmp(unsigned long long,	\
     a, b, >, <=, "qu", __VA_ARGS__)
 
-#define assert_jd_eq(a, b, ...)	assert_cmp(intmax_t, a, b, ==,	\
+#define expect_jd_eq(a, b, ...)	expect_cmp(intmax_t, a, b, ==,	\
     !=, "jd", __VA_ARGS__)
-#define assert_jd_ne(a, b, ...)	assert_cmp(intmax_t, a, b, !=,	\
+#define expect_jd_ne(a, b, ...)	expect_cmp(intmax_t, a, b, !=,	\
     ==, "jd", __VA_ARGS__)
-#define assert_jd_lt(a, b, ...)	assert_cmp(intmax_t, a, b, <,	\
+#define expect_jd_lt(a, b, ...)	expect_cmp(intmax_t, a, b, <,	\
     >=, "jd", __VA_ARGS__)
-#define assert_jd_le(a, b, ...)	assert_cmp(intmax_t, a, b, <=,	\
+#define expect_jd_le(a, b, ...)	expect_cmp(intmax_t, a, b, <=,	\
     >, "jd", __VA_ARGS__)
-#define assert_jd_ge(a, b, ...)	assert_cmp(intmax_t, a, b, >=,	\
+#define expect_jd_ge(a, b, ...)	expect_cmp(intmax_t, a, b, >=,	\
     <, "jd", __VA_ARGS__)
-#define assert_jd_gt(a, b, ...)	assert_cmp(intmax_t, a, b, >,	\
+#define expect_jd_gt(a, b, ...)	expect_cmp(intmax_t, a, b, >,	\
     <=, "jd", __VA_ARGS__)
 
-#define assert_ju_eq(a, b, ...)	assert_cmp(uintmax_t, a, b, ==,	\
+#define expect_ju_eq(a, b, ...)	expect_cmp(uintmax_t, a, b, ==,	\
     !=, "ju", __VA_ARGS__)
-#define assert_ju_ne(a, b, ...)	assert_cmp(uintmax_t, a, b, !=,	\
+#define expect_ju_ne(a, b, ...)	expect_cmp(uintmax_t, a, b, !=,	\
     ==, "ju", __VA_ARGS__)
-#define assert_ju_lt(a, b, ...)	assert_cmp(uintmax_t, a, b, <,	\
+#define expect_ju_lt(a, b, ...)	expect_cmp(uintmax_t, a, b, <,	\
     >=, "ju", __VA_ARGS__)
-#define assert_ju_le(a, b, ...)	assert_cmp(uintmax_t, a, b, <=,	\
+#define expect_ju_le(a, b, ...)	expect_cmp(uintmax_t, a, b, <=,	\
     >, "ju", __VA_ARGS__)
-#define assert_ju_ge(a, b, ...)	assert_cmp(uintmax_t, a, b, >=,	\
+#define expect_ju_ge(a, b, ...)	expect_cmp(uintmax_t, a, b, >=,	\
     <, "ju", __VA_ARGS__)
-#define assert_ju_gt(a, b, ...)	assert_cmp(uintmax_t, a, b, >,	\
+#define expect_ju_gt(a, b, ...)	expect_cmp(uintmax_t, a, b, >,	\
     <=, "ju", __VA_ARGS__)
 
-#define assert_zd_eq(a, b, ...)	assert_cmp(ssize_t, a, b, ==,	\
+#define expect_zd_eq(a, b, ...)	expect_cmp(ssize_t, a, b, ==,	\
     !=, "zd", __VA_ARGS__)
-#define assert_zd_ne(a, b, ...)	assert_cmp(ssize_t, a, b, !=,	\
+#define expect_zd_ne(a, b, ...)	expect_cmp(ssize_t, a, b, !=,	\
     ==, "zd", __VA_ARGS__)
-#define assert_zd_lt(a, b, ...)	assert_cmp(ssize_t, a, b, <,	\
+#define expect_zd_lt(a, b, ...)	expect_cmp(ssize_t, a, b, <,	\
     >=, "zd", __VA_ARGS__)
-#define assert_zd_le(a, b, ...)	assert_cmp(ssize_t, a, b, <=,	\
+#define expect_zd_le(a, b, ...)	expect_cmp(ssize_t, a, b, <=,	\
     >, "zd", __VA_ARGS__)
-#define assert_zd_ge(a, b, ...)	assert_cmp(ssize_t, a, b, >=,	\
+#define expect_zd_ge(a, b, ...)	expect_cmp(ssize_t, a, b, >=,	\
     <, "zd", __VA_ARGS__)
-#define assert_zd_gt(a, b, ...)	assert_cmp(ssize_t, a, b, >,	\
+#define expect_zd_gt(a, b, ...)	expect_cmp(ssize_t, a, b, >,	\
     <=, "zd", __VA_ARGS__)
 
-#define assert_zu_eq(a, b, ...)	assert_cmp(size_t, a, b, ==,	\
+#define expect_zu_eq(a, b, ...)	expect_cmp(size_t, a, b, ==,	\
     !=, "zu", __VA_ARGS__)
-#define assert_zu_ne(a, b, ...)	assert_cmp(size_t, a, b, !=,	\
+#define expect_zu_ne(a, b, ...)	expect_cmp(size_t, a, b, !=,	\
     ==, "zu", __VA_ARGS__)
-#define assert_zu_lt(a, b, ...)	assert_cmp(size_t, a, b, <,	\
+#define expect_zu_lt(a, b, ...)	expect_cmp(size_t, a, b, <,	\
     >=, "zu", __VA_ARGS__)
-#define assert_zu_le(a, b, ...)	assert_cmp(size_t, a, b, <=,	\
+#define expect_zu_le(a, b, ...)	expect_cmp(size_t, a, b, <=,	\
     >, "zu", __VA_ARGS__)
-#define assert_zu_ge(a, b, ...)	assert_cmp(size_t, a, b, >=,	\
+#define expect_zu_ge(a, b, ...)	expect_cmp(size_t, a, b, >=,	\
     <, "zu", __VA_ARGS__)
-#define assert_zu_gt(a, b, ...)	assert_cmp(size_t, a, b, >,	\
+#define expect_zu_gt(a, b, ...)	expect_cmp(size_t, a, b, >,	\
     <=, "zu", __VA_ARGS__)
 
-#define assert_d32_eq(a, b, ...)	assert_cmp(int32_t, a, b, ==,	\
+#define expect_d32_eq(a, b, ...)	expect_cmp(int32_t, a, b, ==,	\
     !=, FMTd32, __VA_ARGS__)
-#define assert_d32_ne(a, b, ...)	assert_cmp(int32_t, a, b, !=,	\
+#define expect_d32_ne(a, b, ...)	expect_cmp(int32_t, a, b, !=,	\
     ==, FMTd32, __VA_ARGS__)
-#define assert_d32_lt(a, b, ...)	assert_cmp(int32_t, a, b, <,	\
+#define expect_d32_lt(a, b, ...)	expect_cmp(int32_t, a, b, <,	\
     >=, FMTd32, __VA_ARGS__)
-#define assert_d32_le(a, b, ...)	assert_cmp(int32_t, a, b, <=,	\
+#define expect_d32_le(a, b, ...)	expect_cmp(int32_t, a, b, <=,	\
     >, FMTd32, __VA_ARGS__)
-#define assert_d32_ge(a, b, ...)	assert_cmp(int32_t, a, b, >=,	\
+#define expect_d32_ge(a, b, ...)	expect_cmp(int32_t, a, b, >=,	\
     <, FMTd32, __VA_ARGS__)
-#define assert_d32_gt(a, b, ...)	assert_cmp(int32_t, a, b, >,	\
+#define expect_d32_gt(a, b, ...)	expect_cmp(int32_t, a, b, >,	\
     <=, FMTd32, __VA_ARGS__)
 
-#define assert_u32_eq(a, b, ...)	assert_cmp(uint32_t, a, b, ==,	\
+#define expect_u32_eq(a, b, ...)	expect_cmp(uint32_t, a, b, ==,	\
     !=, FMTu32, __VA_ARGS__)
-#define assert_u32_ne(a, b, ...)	assert_cmp(uint32_t, a, b, !=,	\
+#define expect_u32_ne(a, b, ...)	expect_cmp(uint32_t, a, b, !=,	\
     ==, FMTu32, __VA_ARGS__)
-#define assert_u32_lt(a, b, ...)	assert_cmp(uint32_t, a, b, <,	\
+#define expect_u32_lt(a, b, ...)	expect_cmp(uint32_t, a, b, <,	\
     >=, FMTu32, __VA_ARGS__)
-#define assert_u32_le(a, b, ...)	assert_cmp(uint32_t, a, b, <=,	\
+#define expect_u32_le(a, b, ...)	expect_cmp(uint32_t, a, b, <=,	\
     >, FMTu32, __VA_ARGS__)
-#define assert_u32_ge(a, b, ...)	assert_cmp(uint32_t, a, b, >=,	\
+#define expect_u32_ge(a, b, ...)	expect_cmp(uint32_t, a, b, >=,	\
     <, FMTu32, __VA_ARGS__)
-#define assert_u32_gt(a, b, ...)	assert_cmp(uint32_t, a, b, >,	\
+#define expect_u32_gt(a, b, ...)	expect_cmp(uint32_t, a, b, >,	\
     <=, FMTu32, __VA_ARGS__)
 
-#define assert_d64_eq(a, b, ...)	assert_cmp(int64_t, a, b, ==,	\
+#define expect_d64_eq(a, b, ...)	expect_cmp(int64_t, a, b, ==,	\
     !=, FMTd64, __VA_ARGS__)
-#define assert_d64_ne(a, b, ...)	assert_cmp(int64_t, a, b, !=,	\
+#define expect_d64_ne(a, b, ...)	expect_cmp(int64_t, a, b, !=,	\
     ==, FMTd64, __VA_ARGS__)
-#define assert_d64_lt(a, b, ...)	assert_cmp(int64_t, a, b, <,	\
+#define expect_d64_lt(a, b, ...)	expect_cmp(int64_t, a, b, <,	\
     >=, FMTd64, __VA_ARGS__)
-#define assert_d64_le(a, b, ...)	assert_cmp(int64_t, a, b, <=,	\
+#define expect_d64_le(a, b, ...)	expect_cmp(int64_t, a, b, <=,	\
     >, FMTd64, __VA_ARGS__)
-#define assert_d64_ge(a, b, ...)	assert_cmp(int64_t, a, b, >=,	\
+#define expect_d64_ge(a, b, ...)	expect_cmp(int64_t, a, b, >=,	\
     <, FMTd64, __VA_ARGS__)
-#define assert_d64_gt(a, b, ...)	assert_cmp(int64_t, a, b, >,	\
+#define expect_d64_gt(a, b, ...)	expect_cmp(int64_t, a, b, >,	\
     <=, FMTd64, __VA_ARGS__)
 
-#define assert_u64_eq(a, b, ...)	assert_cmp(uint64_t, a, b, ==,	\
+#define expect_u64_eq(a, b, ...)	expect_cmp(uint64_t, a, b, ==,	\
     !=, FMTu64, __VA_ARGS__)
-#define assert_u64_ne(a, b, ...)	assert_cmp(uint64_t, a, b, !=,	\
+#define expect_u64_ne(a, b, ...)	expect_cmp(uint64_t, a, b, !=,	\
     ==, FMTu64, __VA_ARGS__)
-#define assert_u64_lt(a, b, ...)	assert_cmp(uint64_t, a, b, <,	\
+#define expect_u64_lt(a, b, ...)	expect_cmp(uint64_t, a, b, <,	\
     >=, FMTu64, __VA_ARGS__)
-#define assert_u64_le(a, b, ...)	assert_cmp(uint64_t, a, b, <=,	\
+#define expect_u64_le(a, b, ...)	expect_cmp(uint64_t, a, b, <=,	\
     >, FMTu64, __VA_ARGS__)
-#define assert_u64_ge(a, b, ...)	assert_cmp(uint64_t, a, b, >=,	\
+#define expect_u64_ge(a, b, ...)	expect_cmp(uint64_t, a, b, >=,	\
     <, FMTu64, __VA_ARGS__)
-#define assert_u64_gt(a, b, ...)	assert_cmp(uint64_t, a, b, >,	\
+#define expect_u64_gt(a, b, ...)	expect_cmp(uint64_t, a, b, >,	\
     <=, FMTu64, __VA_ARGS__)
 
-#define assert_b_eq(a, b, ...) do {					\
+#define expect_b_eq(a, b, ...) do {					\
 	bool a_ = (a);							\
 	bool b_ = (b);							\
 	if (!(a_ == b_)) {						\
@@ -226,7 +226,7 @@
 		p_test_fail(prefix, message);				\
 	}								\
 } while (0)
-#define assert_b_ne(a, b, ...) do {					\
+#define expect_b_ne(a, b, ...) do {					\
 	bool a_ = (a);							\
 	bool b_ = (b);							\
 	if (!(a_ != b_)) {						\
@@ -242,10 +242,10 @@
 		p_test_fail(prefix, message);				\
 	}								\
 } while (0)
-#define assert_true(a, ...)	assert_b_eq(a, true, __VA_ARGS__)
-#define assert_false(a, ...)	assert_b_eq(a, false, __VA_ARGS__)
+#define expect_true(a, ...)	expect_b_eq(a, true, __VA_ARGS__)
+#define expect_false(a, ...)	expect_b_eq(a, false, __VA_ARGS__)
 
-#define assert_str_eq(a, b, ...) do {				\
+#define expect_str_eq(a, b, ...) do {				\
 	if (strcmp((a), (b))) {						\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -258,7 +258,7 @@
 		p_test_fail(prefix, message);				\
 	}								\
 } while (0)
-#define assert_str_ne(a, b, ...) do {				\
+#define expect_str_ne(a, b, ...) do {				\
 	if (!strcmp((a), (b))) {					\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -272,7 +272,7 @@
 	}								\
 } while (0)
 
-#define assert_not_reached(...) do {					\
+#define expect_not_reached(...) do {					\
 	char prefix[ASSERT_BUFSIZE];					\
 	char message[ASSERT_BUFSIZE];					\
 	malloc_snprintf(prefix, sizeof(prefix),				\
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
index 222164d..7e61df0 100644
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -18,7 +18,7 @@ thd_start(void *arg) {
 	size_t sz;
 
 	sz = sizeof(arena_ind);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Error in arenas.create");
 
 	if (thread_ind % 4 != 3) {
@@ -29,16 +29,16 @@ thd_start(void *arg) {
 		    (sizeof(dss_precs)/sizeof(char*));
 		const char *dss = dss_precs[prec_ind];
 		int expected_err = (have_dss || prec_ind == 0) ? 0 : EFAULT;
-		assert_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
+		expect_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
 		    "Error in mallctlnametomib()");
 		mib[1] = arena_ind;
-		assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss,
+		expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss,
 		    sizeof(const char *)), expected_err,
 		    "Error in mallctlbymib()");
 	}
 
 	p = mallocx(1, MALLOCX_ARENA(arena_ind));
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	dallocx(p, 0);
 
 	return NULL;
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 4375b17..3f619e7 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -9,7 +9,7 @@
  */
 static void
 purge(void) {
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
 
@@ -20,14 +20,14 @@ TEST_BEGIN(test_alignment_errors) {
 	alignment = 0;
 	set_errno(0);
 	p = aligned_alloc(alignment, 1);
-	assert_false(p != NULL || get_errno() != EINVAL,
+	expect_false(p != NULL || get_errno() != EINVAL,
 	    "Expected error for invalid alignment %zu", alignment);
 
 	for (alignment = sizeof(size_t); alignment < MAXALIGN;
 	    alignment <<= 1) {
 		set_errno(0);
 		p = aligned_alloc(alignment + 1, 1);
-		assert_false(p != NULL || get_errno() != EINVAL,
+		expect_false(p != NULL || get_errno() != EINVAL,
 		    "Expected error for invalid alignment %zu",
 		    alignment + 1);
 	}
@@ -58,7 +58,7 @@ TEST_BEGIN(test_oom_errors) {
 #endif
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
-	assert_false(p != NULL || get_errno() != ENOMEM,
+	expect_false(p != NULL || get_errno() != ENOMEM,
 	    "Expected error for aligned_alloc(%zu, %zu)",
 	    alignment, size);
 
@@ -71,7 +71,7 @@ TEST_BEGIN(test_oom_errors) {
 #endif
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
-	assert_false(p != NULL || get_errno() != ENOMEM,
+	expect_false(p != NULL || get_errno() != ENOMEM,
 	    "Expected error for aligned_alloc(%zu, %zu)",
 	    alignment, size);
 
@@ -83,7 +83,7 @@ TEST_BEGIN(test_oom_errors) {
 #endif
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
-	assert_false(p != NULL || get_errno() != ENOMEM,
+	expect_false(p != NULL || get_errno() != ENOMEM,
 	    "Expected error for aligned_alloc(&p, %zu, %zu)",
 	    alignment, size);
 }
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
index 1425fd0..8f2f21d 100644
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -32,7 +32,7 @@ thd_start(void *arg) {
 		test_fail("%s(): Error in mallctl(): %s", __func__,
 		    strerror(err));
 	}
-	assert_u64_eq(*ap0, a0,
+	expect_u64_eq(*ap0, a0,
 	    "\"thread.allocatedp\" should provide a pointer to internal "
 	    "storage");
 
@@ -53,25 +53,25 @@ thd_start(void *arg) {
 		test_fail("%s(): Error in mallctl(): %s", __func__,
 		    strerror(err));
 	}
-	assert_u64_eq(*dp0, d0,
+	expect_u64_eq(*dp0, d0,
 	    "\"thread.deallocatedp\" should provide a pointer to internal "
 	    "storage");
 
 	p = malloc(1);
-	assert_ptr_not_null(p, "Unexpected malloc() error");
+	expect_ptr_not_null(p, "Unexpected malloc() error");
 
 	sz = sizeof(a1);
 	mallctl("thread.allocated", (void *)&a1, &sz, NULL, 0);
 	sz = sizeof(ap1);
 	mallctl("thread.allocatedp", (void *)&ap1, &sz, NULL, 0);
-	assert_u64_eq(*ap1, a1,
+	expect_u64_eq(*ap1, a1,
 	    "Dereferenced \"thread.allocatedp\" value should equal "
 	    "\"thread.allocated\" value");
-	assert_ptr_eq(ap0, ap1,
+	expect_ptr_eq(ap0, ap1,
 	    "Pointer returned by \"thread.allocatedp\" should not change");
 
 	usize = malloc_usable_size(p);
-	assert_u64_le(a0 + usize, a1,
+	expect_u64_le(a0 + usize, a1,
 	    "Allocated memory counter should increase by at least the amount "
 	    "explicitly allocated");
 
@@ -81,19 +81,19 @@ thd_start(void *arg) {
 	mallctl("thread.deallocated", (void *)&d1, &sz, NULL, 0);
 	sz = sizeof(dp1);
 	mallctl("thread.deallocatedp", (void *)&dp1, &sz, NULL, 0);
-	assert_u64_eq(*dp1, d1,
+	expect_u64_eq(*dp1, d1,
 	    "Dereferenced \"thread.deallocatedp\" value should equal "
 	    "\"thread.deallocated\" value");
-	assert_ptr_eq(dp0, dp1,
+	expect_ptr_eq(dp0, dp1,
 	    "Pointer returned by \"thread.deallocatedp\" should not change");
 
-	assert_u64_le(d0 + usize, d1,
+	expect_u64_le(d0 + usize, d1,
 	    "Deallocated memory counter should increase by at least the amount "
 	    "explicitly deallocated");
 
 	return NULL;
 label_ENOENT:
-	assert_false(config_stats,
+	expect_false(config_stats,
 	    "ENOENT should only be returned if stats are disabled");
 	test_skip("\"thread.allocated\" mallctl not available");
 	return NULL;
diff --git a/test/integration/cpp/basic.cpp b/test/integration/cpp/basic.cpp
index 65890ec..b48ec8a 100644
--- a/test/integration/cpp/basic.cpp
+++ b/test/integration/cpp/basic.cpp
@@ -3,14 +3,14 @@
 
 TEST_BEGIN(test_basic) {
 	auto foo = new long(4);
-	assert_ptr_not_null(foo, "Unexpected new[] failure");
+	expect_ptr_not_null(foo, "Unexpected new[] failure");
 	delete foo;
 	// Test nullptr handling.
 	foo = nullptr;
 	delete foo;
 
 	auto bar = new long;
-	assert_ptr_not_null(bar, "Unexpected new failure");
+	expect_ptr_not_null(bar, "Unexpected new failure");
 	delete bar;
 	// Test nullptr handling.
 	bar = nullptr;
diff --git a/test/integration/extent.c b/test/integration/extent.c
index a75ba03..ccc314d 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -10,7 +10,7 @@ check_background_thread_enabled(void) {
 	if (ret == ENOENT) {
 		return false;
 	}
-	assert_d_eq(ret, 0, "Unexpected mallctl error");
+	expect_d_eq(ret, 0, "Unexpected mallctl error");
 	return enabled;
 }
 
@@ -27,16 +27,16 @@ test_extent_body(unsigned arena_ind) {
 
 	/* Get large size classes. */
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
 	    0), 0, "Unexpected arenas.lextent.0.size failure");
-	assert_d_eq(mallctl("arenas.lextent.1.size", (void *)&large1, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.1.size", (void *)&large1, &sz, NULL,
 	    0), 0, "Unexpected arenas.lextent.1.size failure");
-	assert_d_eq(mallctl("arenas.lextent.2.size", (void *)&large2, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.2.size", (void *)&large2, &sz, NULL,
 	    0), 0, "Unexpected arenas.lextent.2.size failure");
 
 	/* Test dalloc/decommit/purge cascade. */
 	purge_miblen = sizeof(purge_mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
+	expect_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	purge_mib[1] = (size_t)arena_ind;
 	called_alloc = false;
@@ -44,22 +44,22 @@ test_extent_body(unsigned arena_ind) {
 	try_dalloc = false;
 	try_decommit = false;
 	p = mallocx(large0 * 2, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
-	assert_true(called_alloc, "Expected alloc call");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_true(called_alloc, "Expected alloc call");
 	called_dalloc = false;
 	called_decommit = false;
 	did_purge_lazy = false;
 	did_purge_forced = false;
 	called_split = false;
 	xallocx_success_a = (xallocx(p, large0, 0, flags) == large0);
-	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	expect_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
 	    0, "Unexpected arena.%u.purge error", arena_ind);
 	if (xallocx_success_a) {
-		assert_true(called_dalloc, "Expected dalloc call");
-		assert_true(called_decommit, "Expected decommit call");
-		assert_true(did_purge_lazy || did_purge_forced,
+		expect_true(called_dalloc, "Expected dalloc call");
+		expect_true(called_decommit, "Expected decommit call");
+		expect_true(did_purge_lazy || did_purge_forced,
 		    "Expected purge");
-		assert_true(called_split, "Expected split call");
+		expect_true(called_split, "Expected split call");
 	}
 	dallocx(p, flags);
 	try_dalloc = true;
@@ -68,25 +68,25 @@ test_extent_body(unsigned arena_ind) {
 	try_dalloc = false;
 	try_decommit = true;
 	p = mallocx(large0 * 2, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	did_decommit = false;
 	did_commit = false;
 	called_split = false;
 	did_split = false;
 	did_merge = false;
 	xallocx_success_b = (xallocx(p, large0, 0, flags) == large0);
-	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	expect_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
 	    0, "Unexpected arena.%u.purge error", arena_ind);
 	if (xallocx_success_b) {
-		assert_true(did_split, "Expected split");
+		expect_true(did_split, "Expected split");
 	}
 	xallocx_success_c = (xallocx(p, large0 * 2, 0, flags) == large0 * 2);
 	if (did_split) {
-		assert_b_eq(did_decommit, did_commit,
+		expect_b_eq(did_decommit, did_commit,
 		    "Expected decommit/commit match");
 	}
 	if (xallocx_success_b && xallocx_success_c) {
-		assert_true(did_merge, "Expected merge");
+		expect_true(did_merge, "Expected merge");
 	}
 	dallocx(p, flags);
 	try_dalloc = true;
@@ -94,7 +94,7 @@ test_extent_body(unsigned arena_ind) {
 
 	/* Make sure non-large allocation succeeds. */
 	p = mallocx(42, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	dallocx(p, flags);
 }
 
@@ -110,7 +110,7 @@ test_manual_hook_auto_arena(void) {
 
 	sz = sizeof(unsigned);
 	/* Get number of auto arenas. */
-	assert_d_eq(mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0),
+	expect_d_eq(mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	if (narenas == 1) {
 		return;
@@ -118,18 +118,18 @@ test_manual_hook_auto_arena(void) {
 
 	/* Install custom extent hooks on arena 1 (might not be initialized). */
 	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
+	expect_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
 	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
 	hooks_mib[1] = 1;
 	old_size = sizeof(extent_hooks_t *);
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
-	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
 	    &old_size, (void *)&new_hooks, new_size), 0,
 	    "Unexpected extent_hooks error");
 	static bool auto_arena_created = false;
 	if (old_hooks != &hooks) {
-		assert_b_eq(auto_arena_created, false,
+		expect_b_eq(auto_arena_created, false,
 		    "Expected auto arena 1 created only once.");
 		auto_arena_created = true;
 	}
@@ -146,35 +146,35 @@ test_manual_hook_body(void) {
 	extent_hooks_prep();
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 
 	/* Install custom extent hooks. */
 	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
+	expect_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
 	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
 	hooks_mib[1] = (size_t)arena_ind;
 	old_size = sizeof(extent_hooks_t *);
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
-	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
 	    &old_size, (void *)&new_hooks, new_size), 0,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->alloc, extent_alloc_hook,
+	expect_ptr_ne(old_hooks->alloc, extent_alloc_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->dalloc, extent_dalloc_hook,
+	expect_ptr_ne(old_hooks->dalloc, extent_dalloc_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->commit, extent_commit_hook,
+	expect_ptr_ne(old_hooks->commit, extent_commit_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->decommit, extent_decommit_hook,
+	expect_ptr_ne(old_hooks->decommit, extent_decommit_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->purge_lazy, extent_purge_lazy_hook,
+	expect_ptr_ne(old_hooks->purge_lazy, extent_purge_lazy_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->purge_forced, extent_purge_forced_hook,
+	expect_ptr_ne(old_hooks->purge_forced, extent_purge_forced_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->split, extent_split_hook,
+	expect_ptr_ne(old_hooks->split, extent_split_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->merge, extent_merge_hook,
+	expect_ptr_ne(old_hooks->merge, extent_merge_hook,
 	    "Unexpected extent_hooks error");
 
 	if (!check_background_thread_enabled()) {
@@ -182,26 +182,26 @@ test_manual_hook_body(void) {
 	}
 
 	/* Restore extent hooks. */
-	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
 	    (void *)&old_hooks, new_size), 0, "Unexpected extent_hooks error");
-	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
 	    &old_size, NULL, 0), 0, "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks, default_hooks, "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->alloc, default_hooks->alloc,
+	expect_ptr_eq(old_hooks, default_hooks, "Unexpected extent_hooks error");
+	expect_ptr_eq(old_hooks->alloc, default_hooks->alloc,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->dalloc, default_hooks->dalloc,
+	expect_ptr_eq(old_hooks->dalloc, default_hooks->dalloc,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->commit, default_hooks->commit,
+	expect_ptr_eq(old_hooks->commit, default_hooks->commit,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->decommit, default_hooks->decommit,
+	expect_ptr_eq(old_hooks->decommit, default_hooks->decommit,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->purge_lazy, default_hooks->purge_lazy,
+	expect_ptr_eq(old_hooks->purge_lazy, default_hooks->purge_lazy,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->purge_forced, default_hooks->purge_forced,
+	expect_ptr_eq(old_hooks->purge_forced, default_hooks->purge_forced,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->split, default_hooks->split,
+	expect_ptr_eq(old_hooks->split, default_hooks->split,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->merge, default_hooks->merge,
+	expect_ptr_eq(old_hooks->merge, default_hooks->merge,
 	    "Unexpected extent_hooks error");
 }
 
@@ -232,7 +232,7 @@ TEST_BEGIN(test_extent_auto_hook) {
 	sz = sizeof(unsigned);
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
 	    (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure");
 
 	test_skip_if(check_background_thread_enabled());
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 645d4db..fdf1e3f 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -6,7 +6,7 @@ get_nsizes_impl(const char *cmd) {
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return ret;
@@ -25,11 +25,11 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
@@ -47,7 +47,7 @@ get_large_size(size_t ind) {
  */
 static void
 purge(void) {
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
 
@@ -66,16 +66,16 @@ TEST_BEGIN(test_overflow) {
 
 	largemax = get_large_size(get_nlarge()-1);
 
-	assert_ptr_null(mallocx(largemax+1, 0),
+	expect_ptr_null(mallocx(largemax+1, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", largemax+1);
 
-	assert_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0),
+	expect_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
 
-	assert_ptr_null(mallocx(SIZE_T_MAX, 0),
+	expect_ptr_null(mallocx(SIZE_T_MAX, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX);
 
-	assert_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
+	expect_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
 	    "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))",
 	    ZU(PTRDIFF_MAX)+1);
 }
@@ -85,11 +85,11 @@ static void *
 remote_alloc(void *arg) {
 	unsigned arena;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	size_t large_sz;
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
 	    NULL, 0), 0, "Unexpected mallctl failure");
 
 	void *ptr = mallocx(large_sz, MALLOCX_ARENA(arena)
@@ -105,7 +105,7 @@ TEST_BEGIN(test_remote_free) {
 	void *ret;
 	thd_create(&thd, remote_alloc, (void *)&ret);
 	thd_join(thd, NULL);
-	assert_ptr_not_null(ret, "Unexpected mallocx failure");
+	expect_ptr_not_null(ret, "Unexpected mallocx failure");
 
 	/* Avoid TCACHE_NONE to explicitly test tcache_flush(). */
 	dallocx(ret, 0);
@@ -131,7 +131,7 @@ TEST_BEGIN(test_oom) {
 			oom = true;
 		}
 	}
-	assert_true(oom,
+	expect_true(oom,
 	    "Expected OOM during series of calls to mallocx(size=%zu, 0)",
 	    largemax);
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
@@ -142,14 +142,14 @@ TEST_BEGIN(test_oom) {
 	purge();
 
 #if LG_SIZEOF_PTR == 3
-	assert_ptr_null(mallocx(0x8000000000000000ULL,
+	expect_ptr_null(mallocx(0x8000000000000000ULL,
 	    MALLOCX_ALIGN(0x8000000000000000ULL)),
 	    "Expected OOM for mallocx()");
-	assert_ptr_null(mallocx(0x8000000000000000ULL,
+	expect_ptr_null(mallocx(0x8000000000000000ULL,
 	    MALLOCX_ALIGN(0x80000000)),
 	    "Expected OOM for mallocx()");
 #else
-	assert_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)),
+	expect_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)),
 	    "Expected OOM for mallocx()");
 #endif
 }
@@ -166,28 +166,28 @@ TEST_BEGIN(test_basic) {
 		size_t nsz, rsz;
 		void *p;
 		nsz = nallocx(sz, 0);
-		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
+		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, 0);
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
-		assert_zu_ge(rsz, sz, "Real size smaller than expected");
-		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
+		expect_zu_ge(rsz, sz, "Real size smaller than expected");
+		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
 		dallocx(p, 0);
 
 		p = mallocx(sz, 0);
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
-		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
+		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, MALLOCX_ZERO);
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "Unexpected mallocx(size=%zx, flags=MALLOCX_ZERO) error",
 		    nsz);
 		rsz = sallocx(p, 0);
-		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
+		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
 		dallocx(p, 0);
 		purge();
 	}
@@ -224,22 +224,22 @@ TEST_BEGIN(test_alignment_and_size) {
 			for (i = 0; i < NITER; i++) {
 				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
 				    MALLOCX_ZERO | MALLOCX_ARENA(0));
-				assert_zu_ne(nsz, 0,
+				expect_zu_ne(nsz, 0,
 				    "nallocx() error for alignment=%zu, "
 				    "size=%zu (%#zx)", alignment, sz, sz);
 				ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
 				    MALLOCX_ZERO | MALLOCX_ARENA(0));
-				assert_ptr_not_null(ps[i],
+				expect_ptr_not_null(ps[i],
 				    "mallocx() error for alignment=%zu, "
 				    "size=%zu (%#zx)", alignment, sz, sz);
 				rsz = sallocx(ps[i], 0);
-				assert_zu_ge(rsz, sz,
+				expect_zu_ge(rsz, sz,
 				    "Real size smaller than expected for "
 				    "alignment=%zu, size=%zu", alignment, sz);
-				assert_zu_eq(nsz, rsz,
+				expect_zu_eq(nsz, rsz,
 				    "nallocx()/sallocx() size mismatch for "
 				    "alignment=%zu, size=%zu", alignment, sz);
-				assert_ptr_null(
+				expect_ptr_null(
 				    (void *)((uintptr_t)ps[i] & (alignment-1)),
 				    "%p inadequately aligned for"
 				    " alignment=%zu, size=%zu", ps[i],
diff --git a/test/integration/overflow.c b/test/integration/overflow.c
index 748ebb6..ce63327 100644
--- a/test/integration/overflow.c
+++ b/test/integration/overflow.c
@@ -17,33 +17,33 @@ TEST_BEGIN(test_overflow) {
 	void *p;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
+	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
 	    0), 0, "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() error");
 	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
 	    NULL, 0), 0, "Unexpected mallctlbymib() error");
 
-	assert_ptr_null(malloc(max_size_class + 1),
+	expect_ptr_null(malloc(max_size_class + 1),
 	    "Expected OOM due to over-sized allocation request");
-	assert_ptr_null(malloc(SIZE_T_MAX),
+	expect_ptr_null(malloc(SIZE_T_MAX),
 	    "Expected OOM due to over-sized allocation request");
 
-	assert_ptr_null(calloc(1, max_size_class + 1),
+	expect_ptr_null(calloc(1, max_size_class + 1),
 	    "Expected OOM due to over-sized allocation request");
-	assert_ptr_null(calloc(1, SIZE_T_MAX),
+	expect_ptr_null(calloc(1, SIZE_T_MAX),
 	    "Expected OOM due to over-sized allocation request");
 
 	p = malloc(1);
-	assert_ptr_not_null(p, "Unexpected malloc() OOM");
-	assert_ptr_null(realloc(p, max_size_class + 1),
+	expect_ptr_not_null(p, "Unexpected malloc() OOM");
+	expect_ptr_null(realloc(p, max_size_class + 1),
 	    "Expected OOM due to over-sized allocation request");
-	assert_ptr_null(realloc(p, SIZE_T_MAX),
+	expect_ptr_null(realloc(p, SIZE_T_MAX),
 	    "Expected OOM due to over-sized allocation request");
 	free(p);
 }
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index d992260..6f8a1b0 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -9,7 +9,7 @@
  */
 static void
 purge(void) {
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
 
@@ -18,14 +18,14 @@ TEST_BEGIN(test_alignment_errors) {
 	void *p;
 
 	for (alignment = 0; alignment < sizeof(void *); alignment++) {
-		assert_d_eq(posix_memalign(&p, alignment, 1), EINVAL,
+		expect_d_eq(posix_memalign(&p, alignment, 1), EINVAL,
 		    "Expected error for invalid alignment %zu",
 		    alignment);
 	}
 
 	for (alignment = sizeof(size_t); alignment < MAXALIGN;
 	    alignment <<= 1) {
-		assert_d_ne(posix_memalign(&p, alignment + 1, 1), 0,
+		expect_d_ne(posix_memalign(&p, alignment + 1, 1), 0,
 		    "Expected error for invalid alignment %zu",
 		    alignment + 1);
 	}
@@ -43,7 +43,7 @@ TEST_BEGIN(test_oom_errors) {
 	alignment = 0x80000000LU;
 	size      = 0x80000000LU;
 #endif
-	assert_d_ne(posix_memalign(&p, alignment, size), 0,
+	expect_d_ne(posix_memalign(&p, alignment, size), 0,
 	    "Expected error for posix_memalign(&p, %zu, %zu)",
 	    alignment, size);
 
@@ -54,7 +54,7 @@ TEST_BEGIN(test_oom_errors) {
 	alignment = 0x40000000LU;
 	size      = 0xc0000001LU;
 #endif
-	assert_d_ne(posix_memalign(&p, alignment, size), 0,
+	expect_d_ne(posix_memalign(&p, alignment, size), 0,
 	    "Expected error for posix_memalign(&p, %zu, %zu)",
 	    alignment, size);
 
@@ -64,7 +64,7 @@ TEST_BEGIN(test_oom_errors) {
 #else
 	size = 0xfffffff0LU;
 #endif
-	assert_d_ne(posix_memalign(&p, alignment, size), 0,
+	expect_d_ne(posix_memalign(&p, alignment, size), 0,
 	    "Expected error for posix_memalign(&p, %zu, %zu)",
 	    alignment, size);
 }
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 08ed08d..6cc4437 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -6,7 +6,7 @@ get_nsizes_impl(const char *cmd) {
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return ret;
@@ -25,11 +25,11 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
@@ -50,28 +50,28 @@ TEST_BEGIN(test_grow_and_shrink) {
 #define MAXSZ ZU(12 * 1024 * 1024)
 
 	p = mallocx(1, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	szs[0] = sallocx(p, 0);
 
 	for (i = 0; i < NCYCLES; i++) {
 		for (j = 1; j < NSZS && szs[j-1] < MAXSZ; j++) {
 			q = rallocx(p, szs[j-1]+1, 0);
-			assert_ptr_not_null(q,
+			expect_ptr_not_null(q,
 			    "Unexpected rallocx() error for size=%zu-->%zu",
 			    szs[j-1], szs[j-1]+1);
 			szs[j] = sallocx(q, 0);
-			assert_zu_ne(szs[j], szs[j-1]+1,
+			expect_zu_ne(szs[j], szs[j-1]+1,
 			    "Expected size to be at least: %zu", szs[j-1]+1);
 			p = q;
 		}
 
 		for (j--; j > 0; j--) {
 			q = rallocx(p, szs[j-1], 0);
-			assert_ptr_not_null(q,
+			expect_ptr_not_null(q,
 			    "Unexpected rallocx() error for size=%zu-->%zu",
 			    szs[j], szs[j-1]);
 			tsz = sallocx(q, 0);
-			assert_zu_eq(tsz, szs[j-1],
+			expect_zu_eq(tsz, szs[j-1],
 			    "Expected size=%zu, got size=%zu", szs[j-1], tsz);
 			p = q;
 		}
@@ -113,23 +113,23 @@ TEST_BEGIN(test_zero) {
 	for (i = 0; i < sizeof(start_sizes)/sizeof(size_t); i++) {
 		size_t start_size = start_sizes[i];
 		p = mallocx(start_size, MALLOCX_ZERO);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		expect_ptr_not_null(p, "Unexpected mallocx() error");
 		psz = sallocx(p, 0);
 
-		assert_false(validate_fill(p, 0, 0, psz),
+		expect_false(validate_fill(p, 0, 0, psz),
 		    "Expected zeroed memory");
 		memset(p, FILL_BYTE, psz);
-		assert_false(validate_fill(p, FILL_BYTE, 0, psz),
+		expect_false(validate_fill(p, FILL_BYTE, 0, psz),
 		    "Expected filled memory");
 
 		for (j = 1; j < RANGE; j++) {
 			q = rallocx(p, start_size+j, MALLOCX_ZERO);
-			assert_ptr_not_null(q, "Unexpected rallocx() error");
+			expect_ptr_not_null(q, "Unexpected rallocx() error");
 			qsz = sallocx(q, 0);
 			if (q != p || qsz != psz) {
-				assert_false(validate_fill(q, FILL_BYTE, 0,
+				expect_false(validate_fill(q, FILL_BYTE, 0,
 				    psz), "Expected filled memory");
-				assert_false(validate_fill(q, 0, psz, qsz-psz),
+				expect_false(validate_fill(q, 0, psz, qsz-psz),
 				    "Expected zeroed memory");
 			}
 			if (psz != qsz) {
@@ -139,7 +139,7 @@ TEST_BEGIN(test_zero) {
 			}
 			p = q;
 		}
-		assert_false(validate_fill(p, FILL_BYTE, 0, psz),
+		expect_false(validate_fill(p, FILL_BYTE, 0, psz),
 		    "Expected filled memory");
 		dallocx(p, 0);
 	}
@@ -154,13 +154,13 @@ TEST_BEGIN(test_align) {
 
 	align = ZU(1);
 	p = mallocx(1, MALLOCX_ALIGN(align));
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	for (align <<= 1; align <= MAX_ALIGN; align <<= 1) {
 		q = rallocx(p, 1, MALLOCX_ALIGN(align));
-		assert_ptr_not_null(q,
+		expect_ptr_not_null(q,
 		    "Unexpected rallocx() error for align=%zu", align);
-		assert_ptr_null(
+		expect_ptr_null(
 		    (void *)((uintptr_t)q & (align-1)),
 		    "%p inadequately aligned for align=%zu",
 		    q, align);
@@ -180,23 +180,23 @@ TEST_BEGIN(test_lg_align_and_zero) {
 
 	lg_align = 0;
 	p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	for (lg_align++; lg_align <= MAX_LG_ALIGN; lg_align++) {
 		q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
-		assert_ptr_not_null(q,
+		expect_ptr_not_null(q,
 		    "Unexpected rallocx() error for lg_align=%u", lg_align);
-		assert_ptr_null(
+		expect_ptr_null(
 		    (void *)((uintptr_t)q & ((ZU(1) << lg_align)-1)),
 		    "%p inadequately aligned for lg_align=%u", q, lg_align);
 		sz = sallocx(q, 0);
 		if ((sz << 1) <= MAX_VALIDATE) {
-			assert_false(validate_fill(q, 0, 0, sz),
+			expect_false(validate_fill(q, 0, 0, sz),
 			    "Expected zeroed memory");
 		} else {
-			assert_false(validate_fill(q, 0, 0, MAX_VALIDATE),
+			expect_false(validate_fill(q, 0, 0, MAX_VALIDATE),
 			    "Expected zeroed memory");
-			assert_false(validate_fill(
+			expect_false(validate_fill(
 			    (void *)((uintptr_t)q+sz-MAX_VALIDATE),
 			    0, 0, MAX_VALIDATE), "Expected zeroed memory");
 		}
@@ -225,18 +225,18 @@ TEST_BEGIN(test_overflow) {
 	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(1, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_ptr_null(rallocx(p, largemax+1, 0),
+	expect_ptr_null(rallocx(p, largemax+1, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0)", largemax+1);
 
-	assert_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0),
+	expect_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
 
-	assert_ptr_null(rallocx(p, SIZE_T_MAX, 0),
+	expect_ptr_null(rallocx(p, SIZE_T_MAX, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX);
 
-	assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
+	expect_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
 	    "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))",
 	    ZU(PTRDIFF_MAX)+1);
 
diff --git a/test/integration/slab_sizes.c b/test/integration/slab_sizes.c
index af250c3..f6a66f2 100644
--- a/test/integration/slab_sizes.c
+++ b/test/integration/slab_sizes.c
@@ -10,19 +10,19 @@ TEST_BEGIN(test_slab_sizes) {
 	size_t len;
 
 	len = sizeof(nbins);
-	assert_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0,
 	    "nbins mallctl failure");
 
 	len = sizeof(page);
-	assert_d_eq(mallctl("arenas.page", &page, &len, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.page", &page, &len, NULL, 0), 0,
 	    "page mallctl failure");
 
 	len = 4;
-	assert_d_eq(mallctlnametomib("arenas.bin.0.size", sizemib, &len), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.size", sizemib, &len), 0,
 	    "bin size mallctlnametomib failure");
 
 	len = 4;
-	assert_d_eq(mallctlnametomib("arenas.bin.0.slab_size", slabmib, &len),
+	expect_d_eq(mallctlnametomib("arenas.bin.0.slab_size", slabmib, &len),
 	    0, "slab size mallctlnametomib failure");
 
 	size_t biggest_slab_seen = 0;
@@ -33,11 +33,11 @@ TEST_BEGIN(test_slab_sizes) {
 		len = sizeof(size_t);
 		sizemib[2] = i;
 		slabmib[2] = i;
-		assert_d_eq(mallctlbymib(sizemib, 4, (void *)&bin_size, &len,
+		expect_d_eq(mallctlbymib(sizemib, 4, (void *)&bin_size, &len,
 		    NULL, 0), 0, "bin size mallctlbymib failure");
 
 		len = sizeof(size_t);
-		assert_d_eq(mallctlbymib(slabmib, 4, (void *)&slab_size, &len,
+		expect_d_eq(mallctlbymib(slabmib, 4, (void *)&slab_size, &len,
 		    NULL, 0), 0, "slab size mallctlbymib failure");
 
 		if (bin_size < 100) {
@@ -48,19 +48,19 @@ TEST_BEGIN(test_slab_sizes) {
 			 * should at least make sure that the number of pages
 			 * goes up.
 			 */
-			assert_zu_ge(slab_size, biggest_slab_seen,
+			expect_zu_ge(slab_size, biggest_slab_seen,
 			    "Slab sizes should go up");
 			biggest_slab_seen = slab_size;
 		} else if (
 		    (100 <= bin_size && bin_size < 128)
 		    || (128 < bin_size && bin_size <= 200)) {
-			assert_zu_eq(slab_size, page,
+			expect_zu_eq(slab_size, page,
 			    "Forced-small slabs should be small");
 		} else if (bin_size == 128) {
-			assert_zu_eq(slab_size, 2 * page,
+			expect_zu_eq(slab_size, 2 * page,
 			    "Forced-2-page slab should be 2 pages");
 		} else if (200 < bin_size && bin_size <= 4096) {
-			assert_zu_ge(slab_size, biggest_slab_seen,
+			expect_zu_ge(slab_size, biggest_slab_seen,
 			    "Slab sizes should go up");
 			biggest_slab_seen = slab_size;
 		}
@@ -69,7 +69,7 @@ TEST_BEGIN(test_slab_sizes) {
 	 * For any reasonable configuration, 17 pages should be a valid slab
 	 * size for 4096-byte items.
 	 */
-	assert_zu_eq(biggest_slab_seen, 17 * page, "Didn't hit page target");
+	expect_zu_eq(biggest_slab_seen, 17 * page, "Didn't hit page target");
 }
 TEST_END
 
diff --git a/test/integration/smallocx.c b/test/integration/smallocx.c
index 2486752..389319b 100644
--- a/test/integration/smallocx.c
+++ b/test/integration/smallocx.c
@@ -26,7 +26,7 @@ get_nsizes_impl(const char *cmd) {
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return ret;
@@ -45,11 +45,11 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
@@ -67,7 +67,7 @@ get_large_size(size_t ind) {
  */
 static void
 purge(void) {
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
 
@@ -86,16 +86,16 @@ TEST_BEGIN(test_overflow) {
 
 	largemax = get_large_size(get_nlarge()-1);
 
-	assert_ptr_null(smallocx(largemax+1, 0).ptr,
+	expect_ptr_null(smallocx(largemax+1, 0).ptr,
 	    "Expected OOM for smallocx(size=%#zx, 0)", largemax+1);
 
-	assert_ptr_null(smallocx(ZU(PTRDIFF_MAX)+1, 0).ptr,
+	expect_ptr_null(smallocx(ZU(PTRDIFF_MAX)+1, 0).ptr,
 	    "Expected OOM for smallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
 
-	assert_ptr_null(smallocx(SIZE_T_MAX, 0).ptr,
+	expect_ptr_null(smallocx(SIZE_T_MAX, 0).ptr,
 	    "Expected OOM for smallocx(size=%#zx, 0)", SIZE_T_MAX);
 
-	assert_ptr_null(smallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)).ptr,
+	expect_ptr_null(smallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)).ptr,
 	    "Expected OOM for smallocx(size=1, MALLOCX_ALIGN(%#zx))",
 	    ZU(PTRDIFF_MAX)+1);
 }
@@ -105,17 +105,17 @@ static void *
 remote_alloc(void *arg) {
 	unsigned arena;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	size_t large_sz;
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
 	    NULL, 0), 0, "Unexpected mallctl failure");
 
 	smallocx_return_t r
 	    = smallocx(large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
 	void *ptr = r.ptr;
-	assert_zu_eq(r.size,
+	expect_zu_eq(r.size,
 	    nallocx(large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE),
 	    "Expected smalloc(size,flags).size == nallocx(size,flags)");
 	void **ret = (void **)arg;
@@ -129,7 +129,7 @@ TEST_BEGIN(test_remote_free) {
 	void *ret;
 	thd_create(&thd, remote_alloc, (void *)&ret);
 	thd_join(thd, NULL);
-	assert_ptr_not_null(ret, "Unexpected smallocx failure");
+	expect_ptr_not_null(ret, "Unexpected smallocx failure");
 
 	/* Avoid TCACHE_NONE to explicitly test tcache_flush(). */
 	dallocx(ret, 0);
@@ -155,7 +155,7 @@ TEST_BEGIN(test_oom) {
 			oom = true;
 		}
 	}
-	assert_true(oom,
+	expect_true(oom,
 	    "Expected OOM during series of calls to smallocx(size=%zu, 0)",
 	    largemax);
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
@@ -166,14 +166,14 @@ TEST_BEGIN(test_oom) {
 	purge();
 
 #if LG_SIZEOF_PTR == 3
-	assert_ptr_null(smallocx(0x8000000000000000ULL,
+	expect_ptr_null(smallocx(0x8000000000000000ULL,
 	    MALLOCX_ALIGN(0x8000000000000000ULL)).ptr,
 	    "Expected OOM for smallocx()");
-	assert_ptr_null(smallocx(0x8000000000000000ULL,
+	expect_ptr_null(smallocx(0x8000000000000000ULL,
 	    MALLOCX_ALIGN(0x80000000)).ptr,
 	    "Expected OOM for smallocx()");
 #else
-	assert_ptr_null(smallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)).ptr,
+	expect_ptr_null(smallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)).ptr,
 	    "Expected OOM for smallocx()");
 #endif
 }
@@ -191,36 +191,36 @@ TEST_BEGIN(test_basic) {
 		size_t nsz, rsz, smz;
 		void *p;
 		nsz = nallocx(sz, 0);
-		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
+		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		ret = smallocx(sz, 0);
 		p = ret.ptr;
 		smz = ret.size;
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
-		assert_zu_ge(rsz, sz, "Real size smaller than expected");
-		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
-		assert_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
+		expect_zu_ge(rsz, sz, "Real size smaller than expected");
+		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
+		expect_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
 		dallocx(p, 0);
 
 		ret = smallocx(sz, 0);
 		p = ret.ptr;
 		smz = ret.size;
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
-		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
-		assert_zu_ne(smz, 0, "Unexpected smallocx() error");
+		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
+		expect_zu_ne(smz, 0, "Unexpected smallocx() error");
 		ret = smallocx(sz, MALLOCX_ZERO);
 		p = ret.ptr;
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "Unexpected smallocx(size=%zx, flags=MALLOCX_ZERO) error",
 		    nsz);
 		rsz = sallocx(p, 0);
-		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
-		assert_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
+		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
+		expect_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
 		dallocx(p, 0);
 		purge();
 	}
@@ -257,27 +257,27 @@ TEST_BEGIN(test_alignment_and_size) {
 			for (i = 0; i < NITER; i++) {
 				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
 				    MALLOCX_ZERO);
-				assert_zu_ne(nsz, 0,
+				expect_zu_ne(nsz, 0,
 				    "nallocx() error for alignment=%zu, "
 				    "size=%zu (%#zx)", alignment, sz, sz);
 				smallocx_return_t ret
 				    = smallocx(sz, MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
 				ps[i] = ret.ptr;
-				assert_ptr_not_null(ps[i],
+				expect_ptr_not_null(ps[i],
 				    "smallocx() error for alignment=%zu, "
 				    "size=%zu (%#zx)", alignment, sz, sz);
 				rsz = sallocx(ps[i], 0);
 				smz = ret.size;
-				assert_zu_ge(rsz, sz,
+				expect_zu_ge(rsz, sz,
 				    "Real size smaller than expected for "
 				    "alignment=%zu, size=%zu", alignment, sz);
-				assert_zu_eq(nsz, rsz,
+				expect_zu_eq(nsz, rsz,
 				    "nallocx()/sallocx() size mismatch for "
 				    "alignment=%zu, size=%zu", alignment, sz);
-				assert_zu_eq(nsz, smz,
+				expect_zu_eq(nsz, smz,
 				    "nallocx()/smallocx() size mismatch for "
 				    "alignment=%zu, size=%zu", alignment, sz);
-				assert_ptr_null(
+				expect_ptr_null(
 				    (void *)((uintptr_t)ps[i] & (alignment-1)),
 				    "%p inadequately aligned for"
 				    " alignment=%zu, size=%zu", ps[i],
diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c
index 1e5ec05..4a6abf6 100644
--- a/test/integration/thread_arena.c
+++ b/test/integration/thread_arena.c
@@ -11,7 +11,7 @@ thd_start(void *arg) {
 	int err;
 
 	p = malloc(1);
-	assert_ptr_not_null(p, "Error in malloc()");
+	expect_ptr_not_null(p, "Error in malloc()");
 	free(p);
 
 	size = sizeof(arena_ind);
@@ -31,7 +31,7 @@ thd_start(void *arg) {
 		buferror(err, buf, sizeof(buf));
 		test_fail("Error in mallctl(): %s", buf);
 	}
-	assert_u_eq(arena_ind, main_arena_ind,
+	expect_u_eq(arena_ind, main_arena_ind,
 	    "Arena index should be same as for main thread");
 
 	return NULL;
@@ -52,11 +52,11 @@ TEST_BEGIN(test_thread_arena) {
 	unsigned i;
 
 	p = malloc(1);
-	assert_ptr_not_null(p, "Error in malloc()");
+	expect_ptr_not_null(p, "Error in malloc()");
 
 	unsigned arena_ind, old_arena_ind;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Arena creation failure");
 
 	size_t size = sizeof(arena_ind);
@@ -73,7 +73,7 @@ TEST_BEGIN(test_thread_arena) {
 	for (i = 0; i < NTHREADS; i++) {
 		intptr_t join_ret;
 		thd_join(thds[i], (void *)&join_ret);
-		assert_zd_eq(join_ret, 0, "Unexpected thread join error");
+		expect_zd_eq(join_ret, 0, "Unexpected thread join error");
 	}
 	free(p);
 }
diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c
index 95c9acc..d44dbe9 100644
--- a/test/integration/thread_tcache_enabled.c
+++ b/test/integration/thread_tcache_enabled.c
@@ -4,59 +4,59 @@ void *
 thd_start(void *arg) {
 	bool e0, e1;
 	size_t sz = sizeof(bool);
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 
 	if (e0) {
 		e1 = false;
-		assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+		expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 		    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-		assert_true(e0, "tcache should be enabled");
+		expect_true(e0, "tcache should be enabled");
 	}
 
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_false(e0, "tcache should be disabled");
+	expect_false(e0, "tcache should be disabled");
 
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_true(e0, "tcache should be enabled");
+	expect_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_true(e0, "tcache should be enabled");
+	expect_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_false(e0, "tcache should be disabled");
+	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_false(e0, "tcache should be disabled");
+	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_true(e0, "tcache should be enabled");
+	expect_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_true(e0, "tcache should be enabled");
+	expect_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_false(e0, "tcache should be disabled");
+	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	return NULL;
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index cd0ca04..1370854 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -11,7 +11,7 @@ arena_ind(void) {
 
 	if (ind == 0) {
 		size_t sz = sizeof(ind);
-		assert_d_eq(mallctl("arenas.create", (void *)&ind, &sz, NULL,
+		expect_d_eq(mallctl("arenas.create", (void *)&ind, &sz, NULL,
 		    0), 0, "Unexpected mallctl failure creating arena");
 	}
 
@@ -23,11 +23,11 @@ TEST_BEGIN(test_same_size) {
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	sz = sallocx(p, 0);
 
 	tsz = xallocx(p, sz, 0, 0);
-	assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
+	expect_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
 
 	dallocx(p, 0);
 }
@@ -38,11 +38,11 @@ TEST_BEGIN(test_extra_no_move) {
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	sz = sallocx(p, 0);
 
 	tsz = xallocx(p, sz, sz-42, 0);
-	assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
+	expect_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
 
 	dallocx(p, 0);
 }
@@ -53,11 +53,11 @@ TEST_BEGIN(test_no_move_fail) {
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	sz = sallocx(p, 0);
 
 	tsz = xallocx(p, sz + 5, 0, 0);
-	assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
+	expect_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
 
 	dallocx(p, 0);
 }
@@ -69,7 +69,7 @@ get_nsizes_impl(const char *cmd) {
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return ret;
@@ -93,11 +93,11 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
@@ -122,20 +122,20 @@ TEST_BEGIN(test_size) {
 	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(small0, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	/* Test smallest supported size. */
-	assert_zu_eq(xallocx(p, 1, 0, 0), small0,
+	expect_zu_eq(xallocx(p, 1, 0, 0), small0,
 	    "Unexpected xallocx() behavior");
 
 	/* Test largest supported size. */
-	assert_zu_le(xallocx(p, largemax, 0, 0), largemax,
+	expect_zu_le(xallocx(p, largemax, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	/* Test size overflow. */
-	assert_zu_le(xallocx(p, largemax+1, 0, 0), largemax,
+	expect_zu_le(xallocx(p, largemax+1, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, SIZE_T_MAX, 0, 0), largemax,
+	expect_zu_le(xallocx(p, SIZE_T_MAX, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, 0);
@@ -151,22 +151,22 @@ TEST_BEGIN(test_size_extra_overflow) {
 	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(small0, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	/* Test overflows that can be resolved by clamping extra. */
-	assert_zu_le(xallocx(p, largemax-1, 2, 0), largemax,
+	expect_zu_le(xallocx(p, largemax-1, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, largemax, 1, 0), largemax,
+	expect_zu_le(xallocx(p, largemax, 1, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	/* Test overflow such that largemax-size underflows. */
-	assert_zu_le(xallocx(p, largemax+1, 2, 0), largemax,
+	expect_zu_le(xallocx(p, largemax+1, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, largemax+2, 3, 0), largemax,
+	expect_zu_le(xallocx(p, largemax+2, 3, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, SIZE_T_MAX-2, 2, 0), largemax,
+	expect_zu_le(xallocx(p, SIZE_T_MAX-2, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, SIZE_T_MAX-1, 1, 0), largemax,
+	expect_zu_le(xallocx(p, SIZE_T_MAX-1, 1, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, 0);
@@ -183,21 +183,21 @@ TEST_BEGIN(test_extra_small) {
 	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(small0, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
-	assert_zu_eq(xallocx(p, small1, 0, 0), small0,
+	expect_zu_eq(xallocx(p, small1, 0, 0), small0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, small1, 0, 0), small0,
+	expect_zu_eq(xallocx(p, small1, 0, 0), small0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, small0, small1 - small0, 0), small0,
+	expect_zu_eq(xallocx(p, small0, small1 - small0, 0), small0,
 	    "Unexpected xallocx() behavior");
 
 	/* Test size+extra overflow. */
-	assert_zu_eq(xallocx(p, small0, largemax - small0 + 1, 0), small0,
+	expect_zu_eq(xallocx(p, small0, largemax - small0 + 1, 0), small0,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, small0, SIZE_T_MAX - small0, 0), small0,
+	expect_zu_eq(xallocx(p, small0, SIZE_T_MAX - small0, 0), small0,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, 0);
@@ -217,56 +217,56 @@ TEST_BEGIN(test_extra_large) {
 	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(large3, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
-	assert_zu_eq(xallocx(p, large3, 0, flags), large3,
+	expect_zu_eq(xallocx(p, large3, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with zero extra. */
-	assert_zu_ge(xallocx(p, large1, 0, flags), large1,
+	expect_zu_ge(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, smallmax, 0, flags), large1,
+	expect_zu_ge(xallocx(p, smallmax, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
 
 	if (xallocx(p, large3, 0, flags) != large3) {
 		p = rallocx(p, large3, flags);
-		assert_ptr_not_null(p, "Unexpected rallocx() failure");
+		expect_ptr_not_null(p, "Unexpected rallocx() failure");
 	}
 	/* Test size decrease with non-zero extra. */
-	assert_zu_eq(xallocx(p, large1, large3 - large1, flags), large3,
+	expect_zu_eq(xallocx(p, large1, large3 - large1, flags), large3,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, large2, large3 - large2, flags), large3,
+	expect_zu_eq(xallocx(p, large2, large3 - large2, flags), large3,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, large1, large2 - large1, flags), large2,
+	expect_zu_ge(xallocx(p, large1, large2 - large1, flags), large2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, smallmax, large1 - smallmax, flags), large1,
+	expect_zu_ge(xallocx(p, smallmax, large1 - smallmax, flags), large1,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, large1, 0, flags), large1,
+	expect_zu_ge(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with zero extra. */
-	assert_zu_le(xallocx(p, large3, 0, flags), large3,
+	expect_zu_le(xallocx(p, large3, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, largemax+1, 0, flags), large3,
+	expect_zu_le(xallocx(p, largemax+1, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, large1, 0, flags), large1,
+	expect_zu_ge(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_le(xallocx(p, large1, SIZE_T_MAX - large1, flags), largemax,
+	expect_zu_le(xallocx(p, large1, SIZE_T_MAX - large1, flags), largemax,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, large1, 0, flags), large1,
+	expect_zu_ge(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_le(xallocx(p, large1, large3 - large1, flags), large3,
+	expect_zu_le(xallocx(p, large1, large3 - large1, flags), large3,
 	    "Unexpected xallocx() behavior");
 
 	if (xallocx(p, large3, 0, flags) != large3) {
 		p = rallocx(p, large3, flags);
-		assert_ptr_not_null(p, "Unexpected rallocx() failure");
+		expect_ptr_not_null(p, "Unexpected rallocx() failure");
 	}
 	/* Test size+extra overflow. */
-	assert_zu_le(xallocx(p, large3, largemax - large3 + 1, flags), largemax,
+	expect_zu_le(xallocx(p, large3, largemax - large3 + 1, flags), largemax,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, flags);
@@ -320,8 +320,8 @@ test_zero(size_t szmin, size_t szmax) {
 
 	sz = szmax;
 	p = mallocx(sz, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
-	assert_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu",
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu",
 	    sz);
 
 	/*
@@ -329,30 +329,30 @@ test_zero(size_t szmin, size_t szmax) {
 	 * errors.
 	 */
 	memset(p, FILL_BYTE, sz);
-	assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+	expect_false(validate_fill(p, FILL_BYTE, 0, sz),
 	    "Memory not filled: sz=%zu", sz);
 
 	/* Shrink in place so that we can expect growing in place to succeed. */
 	sz = szmin;
 	if (xallocx(p, sz, 0, flags) != sz) {
 		p = rallocx(p, sz, flags);
-		assert_ptr_not_null(p, "Unexpected rallocx() failure");
+		expect_ptr_not_null(p, "Unexpected rallocx() failure");
 	}
-	assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+	expect_false(validate_fill(p, FILL_BYTE, 0, sz),
 	    "Memory not filled: sz=%zu", sz);
 
 	for (sz = szmin; sz < szmax; sz = nsz) {
 		nsz = nallocx(sz+1, flags);
 		if (xallocx(p, sz+1, 0, flags) != nsz) {
 			p = rallocx(p, sz+1, flags);
-			assert_ptr_not_null(p, "Unexpected rallocx() failure");
+			expect_ptr_not_null(p, "Unexpected rallocx() failure");
 		}
-		assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+		expect_false(validate_fill(p, FILL_BYTE, 0, sz),
 		    "Memory not filled: sz=%zu", sz);
-		assert_false(validate_fill(p, 0x00, sz, nsz-sz),
+		expect_false(validate_fill(p, 0x00, sz, nsz-sz),
 		    "Memory not filled: sz=%zu, nsz-sz=%zu", sz, nsz-sz);
 		memset((void *)((uintptr_t)p + sz), FILL_BYTE, nsz-sz);
-		assert_false(validate_fill(p, FILL_BYTE, 0, nsz),
+		expect_false(validate_fill(p, FILL_BYTE, 0, nsz),
 		    "Memory not filled: nsz=%zu", nsz);
 	}
 
diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c
index 1fc8cf1..b9f85dd 100644
--- a/test/unit/SFMT.c
+++ b/test/unit/SFMT.c
@@ -1456,7 +1456,7 @@ TEST_BEGIN(test_gen_rand_32) {
 	uint32_t r32;
 	sfmt_t *ctx;
 
-	assert_d_le(get_min_array_size32(), BLOCK_SIZE,
+	expect_d_le(get_min_array_size32(), BLOCK_SIZE,
 	    "Array size too small");
 	ctx = init_gen_rand(1234);
 	fill_array32(ctx, array32, BLOCK_SIZE);
@@ -1466,16 +1466,16 @@ TEST_BEGIN(test_gen_rand_32) {
 	ctx = init_gen_rand(1234);
 	for (i = 0; i < BLOCK_SIZE; i++) {
 		if (i < COUNT_1) {
-			assert_u32_eq(array32[i], init_gen_rand_32_expected[i],
+			expect_u32_eq(array32[i], init_gen_rand_32_expected[i],
 			    "Output mismatch for i=%d", i);
 		}
 		r32 = gen_rand32(ctx);
-		assert_u32_eq(r32, array32[i],
+		expect_u32_eq(r32, array32[i],
 		    "Mismatch at array32[%d]=%x, gen=%x", i, array32[i], r32);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r32 = gen_rand32(ctx);
-		assert_u32_eq(r32, array32_2[i],
+		expect_u32_eq(r32, array32_2[i],
 		    "Mismatch at array32_2[%d]=%x, gen=%x", i, array32_2[i],
 		    r32);
 	}
@@ -1491,7 +1491,7 @@ TEST_BEGIN(test_by_array_32) {
 	uint32_t r32;
 	sfmt_t *ctx;
 
-	assert_d_le(get_min_array_size32(), BLOCK_SIZE,
+	expect_d_le(get_min_array_size32(), BLOCK_SIZE,
 	    "Array size too small");
 	ctx = init_by_array(ini, 4);
 	fill_array32(ctx, array32, BLOCK_SIZE);
@@ -1501,16 +1501,16 @@ TEST_BEGIN(test_by_array_32) {
 	ctx = init_by_array(ini, 4);
 	for (i = 0; i < BLOCK_SIZE; i++) {
 		if (i < COUNT_1) {
-			assert_u32_eq(array32[i], init_by_array_32_expected[i],
+			expect_u32_eq(array32[i], init_by_array_32_expected[i],
 			    "Output mismatch for i=%d", i);
 		}
 		r32 = gen_rand32(ctx);
-		assert_u32_eq(r32, array32[i],
+		expect_u32_eq(r32, array32[i],
 		    "Mismatch at array32[%d]=%x, gen=%x", i, array32[i], r32);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r32 = gen_rand32(ctx);
-		assert_u32_eq(r32, array32_2[i],
+		expect_u32_eq(r32, array32_2[i],
 		    "Mismatch at array32_2[%d]=%x, gen=%x", i, array32_2[i],
 		    r32);
 	}
@@ -1525,7 +1525,7 @@ TEST_BEGIN(test_gen_rand_64) {
 	uint64_t r;
 	sfmt_t *ctx;
 
-	assert_d_le(get_min_array_size64(), BLOCK_SIZE64,
+	expect_d_le(get_min_array_size64(), BLOCK_SIZE64,
 	    "Array size too small");
 	ctx = init_gen_rand(4321);
 	fill_array64(ctx, array64, BLOCK_SIZE64);
@@ -1535,17 +1535,17 @@ TEST_BEGIN(test_gen_rand_64) {
 	ctx = init_gen_rand(4321);
 	for (i = 0; i < BLOCK_SIZE64; i++) {
 		if (i < COUNT_1) {
-			assert_u64_eq(array64[i], init_gen_rand_64_expected[i],
+			expect_u64_eq(array64[i], init_gen_rand_64_expected[i],
 			    "Output mismatch for i=%d", i);
 		}
 		r = gen_rand64(ctx);
-		assert_u64_eq(r, array64[i],
+		expect_u64_eq(r, array64[i],
 		    "Mismatch at array64[%d]=%"FMTx64", gen=%"FMTx64, i,
 		    array64[i], r);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r = gen_rand64(ctx);
-		assert_u64_eq(r, array64_2[i],
+		expect_u64_eq(r, array64_2[i],
 		    "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64"", i,
 		    array64_2[i], r);
 	}
@@ -1561,7 +1561,7 @@ TEST_BEGIN(test_by_array_64) {
 	uint32_t ini[] = {5, 4, 3, 2, 1};
 	sfmt_t *ctx;
 
-	assert_d_le(get_min_array_size64(), BLOCK_SIZE64,
+	expect_d_le(get_min_array_size64(), BLOCK_SIZE64,
 	    "Array size too small");
 	ctx = init_by_array(ini, 5);
 	fill_array64(ctx, array64, BLOCK_SIZE64);
@@ -1571,17 +1571,17 @@ TEST_BEGIN(test_by_array_64) {
 	ctx = init_by_array(ini, 5);
 	for (i = 0; i < BLOCK_SIZE64; i++) {
 		if (i < COUNT_1) {
-			assert_u64_eq(array64[i], init_by_array_64_expected[i],
+			expect_u64_eq(array64[i], init_by_array_64_expected[i],
 			    "Output mismatch for i=%d", i);
 		}
 		r = gen_rand64(ctx);
-		assert_u64_eq(r, array64[i],
+		expect_u64_eq(r, array64[i],
 		    "Mismatch at array64[%d]=%"FMTx64" gen=%"FMTx64, i,
 		    array64[i], r);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r = gen_rand64(ctx);
-		assert_u64_eq(r, array64_2[i],
+		expect_u64_eq(r, array64_2[i],
 		    "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64, i,
 		    array64_2[i], r);
 	}
diff --git a/test/unit/a0.c b/test/unit/a0.c
index a27ab3f..c1be79a 100644
--- a/test/unit/a0.c
+++ b/test/unit/a0.c
@@ -4,7 +4,7 @@ TEST_BEGIN(test_a0) {
 	void *p;
 
 	p = a0malloc(1);
-	assert_ptr_not_null(p, "Unexpected a0malloc() error");
+	expect_ptr_not_null(p, "Unexpected a0malloc() error");
 	a0dalloc(p);
 }
 TEST_END
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 64db058..a7a23f7 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -13,7 +13,7 @@ get_nsizes_impl(const char *cmd) {
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return ret;
@@ -37,11 +37,11 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
@@ -85,7 +85,7 @@ static unsigned
 do_arena_create(extent_hooks_t *h) {
 	unsigned arena_ind;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
 	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
 	    "Unexpected mallctl() failure");
 	return arena_ind;
@@ -105,19 +105,19 @@ do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
 	nlarge = get_nlarge() > NLARGE ? NLARGE : get_nlarge();
 	*nptrs = nsmall + nlarge;
 	*ptrs = (void **)malloc(*nptrs * sizeof(void *));
-	assert_ptr_not_null(*ptrs, "Unexpected malloc() failure");
+	expect_ptr_not_null(*ptrs, "Unexpected malloc() failure");
 
 	/* Allocate objects with a wide range of sizes. */
 	for (i = 0; i < nsmall; i++) {
 		sz = get_small_size(i);
 		(*ptrs)[i] = mallocx(sz, flags);
-		assert_ptr_not_null((*ptrs)[i],
+		expect_ptr_not_null((*ptrs)[i],
 		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 	for (i = 0; i < nlarge; i++) {
 		sz = get_large_size(i);
 		(*ptrs)[nsmall + i] = mallocx(sz, flags);
-		assert_ptr_not_null((*ptrs)[i],
+		expect_ptr_not_null((*ptrs)[i],
 		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 
@@ -125,7 +125,7 @@ do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
 
 	/* Verify allocations. */
 	for (i = 0; i < *nptrs; i++) {
-		assert_zu_gt(ivsalloc(tsdn, (*ptrs)[i]), 0,
+		expect_zu_gt(ivsalloc(tsdn, (*ptrs)[i]), 0,
 		    "Allocation should have queryable size");
 	}
 }
@@ -143,7 +143,7 @@ do_arena_reset_post(void **ptrs, unsigned nptrs, unsigned arena_ind) {
 	}
 	/* Verify allocations no longer exist. */
 	for (i = 0; i < nptrs; i++) {
-		assert_zu_eq(vsalloc(tsdn, ptrs[i]), 0,
+		expect_zu_eq(vsalloc(tsdn, ptrs[i]), 0,
 		    "Allocation should no longer exist");
 	}
 	if (have_background_thread) {
@@ -160,10 +160,10 @@ do_arena_reset_destroy(const char *name, unsigned arena_ind) {
 	size_t miblen;
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib(name, mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib(name, mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
@@ -197,23 +197,23 @@ arena_i_initialized(unsigned arena_ind, bool refresh) {
 
 	if (refresh) {
 		uint64_t epoch = 1;
-		assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+		expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
 		    sizeof(epoch)), 0, "Unexpected mallctl() failure");
 	}
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
 	sz = sizeof(initialized);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&initialized, &sz, NULL,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&initialized, &sz, NULL,
 	    0), 0, "Unexpected mallctlbymib() failure");
 
 	return initialized;
 }
 
 TEST_BEGIN(test_arena_destroy_initial) {
-	assert_false(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
+	expect_false(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
 	    "Destroyed arena stats should not be initialized");
 }
 TEST_END
@@ -226,9 +226,9 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 	arena_ind = do_arena_create(NULL);
 	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
 
-	assert_false(arena_i_initialized(arena_ind, false),
+	expect_false(arena_i_initialized(arena_ind, false),
 	    "Arena stats should not be initialized");
-	assert_true(arena_i_initialized(arena_ind, true),
+	expect_true(arena_i_initialized(arena_ind, true),
 	    "Arena stats should be initialized");
 
 	/*
@@ -239,9 +239,9 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 
 	do_arena_destroy(arena_ind);
 
-	assert_false(arena_i_initialized(arena_ind, true),
+	expect_false(arena_i_initialized(arena_ind, true),
 	    "Arena stats should not be initialized");
-	assert_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
+	expect_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
 	    "Destroyed arena stats should be initialized");
 
 	do_arena_reset_post(ptrs, nptrs, arena_ind);
@@ -249,7 +249,7 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 	arena_ind_prev = arena_ind;
 	arena_ind = do_arena_create(NULL);
 	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
-	assert_u_eq(arena_ind, arena_ind_prev,
+	expect_u_eq(arena_ind, arena_ind_prev,
 	    "Arena index should have been recycled");
 	do_arena_destroy(arena_ind);
 	do_arena_reset_post(ptrs, nptrs, arena_ind);
@@ -268,9 +268,9 @@ extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
 	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
 	    "true" : "false", arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->dalloc, extent_dalloc_unmap,
+	expect_ptr_eq(extent_hooks->dalloc, extent_dalloc_unmap,
 	    "Wrong hook function");
 	called_dalloc = true;
 	if (!try_dalloc) {
@@ -314,20 +314,20 @@ TEST_BEGIN(test_arena_destroy_hooks_unmap) {
 	arena_ind = do_arena_create(&hooks);
 	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
 
-	assert_true(did_alloc, "Expected alloc");
+	expect_true(did_alloc, "Expected alloc");
 
-	assert_false(arena_i_initialized(arena_ind, false),
+	expect_false(arena_i_initialized(arena_ind, false),
 	    "Arena stats should not be initialized");
-	assert_true(arena_i_initialized(arena_ind, true),
+	expect_true(arena_i_initialized(arena_ind, true),
 	    "Arena stats should be initialized");
 
 	did_dalloc = false;
 	do_arena_destroy(arena_ind);
-	assert_true(did_dalloc, "Expected dalloc");
+	expect_true(did_dalloc, "Expected dalloc");
 
-	assert_false(arena_i_initialized(arena_ind, true),
+	expect_false(arena_i_initialized(arena_ind, true),
 	    "Arena stats should not be initialized");
-	assert_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
+	expect_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
 	    "Destroyed arena stats should be initialized");
 
 	do_arena_reset_post(ptrs, nptrs, arena_ind);
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index 572d8d2..1326a11 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -6,7 +6,7 @@
  * some places and "ptr" in others.  In the long run it would be nice to unify
  * these, but in the short run we'll use this shim.
  */
-#define assert_p_eq assert_ptr_eq
+#define expect_p_eq expect_ptr_eq
 
 /*
  * t: the non-atomic type, like "uint32_t".
@@ -24,20 +24,20 @@
 									\
 	/* ATOMIC_INIT and load. */					\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val1, val, "Load or init failed");		\
+	expect_##ta##_eq(val1, val, "Load or init failed");		\
 									\
 	/* Store. */							\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	atomic_store_##ta(&atom, val2, ATOMIC_RELAXED);			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val2, val, "Store failed");			\
+	expect_##ta##_eq(val2, val, "Store failed");			\
 									\
 	/* Exchange. */							\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	val = atomic_exchange_##ta(&atom, val2, ATOMIC_RELAXED);	\
-	assert_##ta##_eq(val1, val, "Exchange returned invalid value");	\
+	expect_##ta##_eq(val1, val, "Exchange returned invalid value");	\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val2, val, "Exchange store invalid value");	\
+	expect_##ta##_eq(val2, val, "Exchange store invalid value");	\
 									\
 	/* 								\
 	 * Weak CAS.  Spurious failures are allowed, so we loop a few	\
@@ -49,17 +49,17 @@
 		expected = val2;					\
 		success = atomic_compare_exchange_weak_##ta(&atom,	\
 		    &expected, val3, ATOMIC_RELAXED, ATOMIC_RELAXED);	\
-		assert_##ta##_eq(val1, expected, 			\
+		expect_##ta##_eq(val1, expected, 			\
 		    "CAS should update expected");			\
 	}								\
-	assert_b_eq(val1 == val2, success,				\
+	expect_b_eq(val1 == val2, success,				\
 	    "Weak CAS did the wrong state update");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
 	if (success) {							\
-		assert_##ta##_eq(val3, val,				\
+		expect_##ta##_eq(val3, val,				\
 		    "Successful CAS should update atomic");		\
 	} else {							\
-		assert_##ta##_eq(val1, val,				\
+		expect_##ta##_eq(val1, val,				\
 		    "Unsuccessful CAS should not update atomic");	\
 	}								\
 									\
@@ -68,14 +68,14 @@
 	expected = val2;						\
 	success = atomic_compare_exchange_strong_##ta(&atom, &expected,	\
 	    val3, ATOMIC_RELAXED, ATOMIC_RELAXED);			\
-	assert_b_eq(val1 == val2, success,				\
+	expect_b_eq(val1 == val2, success,				\
 	    "Strong CAS did the wrong state update");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
 	if (success) {							\
-		assert_##ta##_eq(val3, val,				\
+		expect_##ta##_eq(val3, val,				\
 		    "Successful CAS should update atomic");		\
 	} else {							\
-		assert_##ta##_eq(val1, val,				\
+		expect_##ta##_eq(val1, val,				\
 		    "Unsuccessful CAS should not update atomic");	\
 	}								\
 									\
@@ -89,46 +89,46 @@
 	/* Fetch-add. */						\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	val = atomic_fetch_add_##ta(&atom, val2, ATOMIC_RELAXED);	\
-	assert_##ta##_eq(val1, val,					\
+	expect_##ta##_eq(val1, val,					\
 	    "Fetch-add should return previous value");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val1 + val2, val,				\
+	expect_##ta##_eq(val1 + val2, val,				\
 	    "Fetch-add should update atomic");				\
 									\
 	/* Fetch-sub. */						\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	val = atomic_fetch_sub_##ta(&atom, val2, ATOMIC_RELAXED);	\
-	assert_##ta##_eq(val1, val,					\
+	expect_##ta##_eq(val1, val,					\
 	    "Fetch-sub should return previous value");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val1 - val2, val,				\
+	expect_##ta##_eq(val1 - val2, val,				\
 	    "Fetch-sub should update atomic");				\
 									\
 	/* Fetch-and. */						\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	val = atomic_fetch_and_##ta(&atom, val2, ATOMIC_RELAXED);	\
-	assert_##ta##_eq(val1, val,					\
+	expect_##ta##_eq(val1, val,					\
 	    "Fetch-and should return previous value");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val1 & val2, val,				\
+	expect_##ta##_eq(val1 & val2, val,				\
 	    "Fetch-and should update atomic");				\
 									\
 	/* Fetch-or. */							\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	val = atomic_fetch_or_##ta(&atom, val2, ATOMIC_RELAXED);	\
-	assert_##ta##_eq(val1, val,					\
+	expect_##ta##_eq(val1, val,					\
 	    "Fetch-or should return previous value");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val1 | val2, val,				\
+	expect_##ta##_eq(val1 | val2, val,				\
 	    "Fetch-or should update atomic");				\
 									\
 	/* Fetch-xor. */						\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	val = atomic_fetch_xor_##ta(&atom, val2, ATOMIC_RELAXED);	\
-	assert_##ta##_eq(val1, val,					\
+	expect_##ta##_eq(val1, val,					\
 	    "Fetch-xor should return previous value");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val1 ^ val2, val,				\
+	expect_##ta##_eq(val1 ^ val2, val,				\
 	    "Fetch-xor should update atomic");				\
 } while (0)
 
diff --git a/test/unit/background_thread.c b/test/unit/background_thread.c
index f597285..c60010a 100644
--- a/test/unit/background_thread.c
+++ b/test/unit/background_thread.c
@@ -8,15 +8,15 @@ test_switch_background_thread_ctl(bool new_val) {
 	size_t sz = sizeof(bool);
 
 	e1 = new_val;
-	assert_d_eq(mallctl("background_thread", (void *)&e0, &sz,
+	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz,
 	    &e1, sz), 0, "Unexpected mallctl() failure");
-	assert_b_eq(e0, !e1,
+	expect_b_eq(e0, !e1,
 	    "background_thread should be %d before.\n", !e1);
 	if (e1) {
-		assert_zu_gt(n_background_threads, 0,
+		expect_zu_gt(n_background_threads, 0,
 		    "Number of background threads should be non zero.\n");
 	} else {
-		assert_zu_eq(n_background_threads, 0,
+		expect_zu_eq(n_background_threads, 0,
 		    "Number of background threads should be zero.\n");
 	}
 }
@@ -27,15 +27,15 @@ test_repeat_background_thread_ctl(bool before) {
 	size_t sz = sizeof(bool);
 
 	e1 = before;
-	assert_d_eq(mallctl("background_thread", (void *)&e0, &sz,
+	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz,
 	    &e1, sz), 0, "Unexpected mallctl() failure");
-	assert_b_eq(e0, before,
+	expect_b_eq(e0, before,
 	    "background_thread should be %d.\n", before);
 	if (e1) {
-		assert_zu_gt(n_background_threads, 0,
+		expect_zu_gt(n_background_threads, 0,
 		    "Number of background threads should be non zero.\n");
 	} else {
-		assert_zu_eq(n_background_threads, 0,
+		expect_zu_eq(n_background_threads, 0,
 		    "Number of background threads should be zero.\n");
 	}
 }
@@ -46,16 +46,16 @@ TEST_BEGIN(test_background_thread_ctl) {
 	bool e0, e1;
 	size_t sz = sizeof(bool);
 
-	assert_d_eq(mallctl("opt.background_thread", (void *)&e0, &sz,
+	expect_d_eq(mallctl("opt.background_thread", (void *)&e0, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctl("background_thread", (void *)&e1, &sz,
+	expect_d_eq(mallctl("background_thread", (void *)&e1, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
-	assert_b_eq(e0, e1,
+	expect_b_eq(e0, e1,
 	    "Default and opt.background_thread does not match.\n");
 	if (e0) {
 		test_switch_background_thread_ctl(false);
 	}
-	assert_zu_eq(n_background_threads, 0,
+	expect_zu_eq(n_background_threads, 0,
 	    "Number of background threads should be 0.\n");
 
 	for (unsigned i = 0; i < 4; i++) {
@@ -80,7 +80,7 @@ TEST_BEGIN(test_background_thread_running) {
 
 	test_repeat_background_thread_ctl(false);
 	test_switch_background_thread_ctl(true);
-	assert_b_eq(info->state, background_thread_started,
+	expect_b_eq(info->state, background_thread_started,
 	    "Background_thread did not start.\n");
 
 	nstime_t start;
@@ -100,7 +100,7 @@ TEST_BEGIN(test_background_thread_running) {
 		nstime_t now;
 		nstime_init_update(&now);
 		nstime_subtract(&now, &start);
-		assert_u64_lt(nstime_sec(&now), 1000,
+		expect_u64_lt(nstime_sec(&now), 1000,
 		    "Background threads did not run for 1000 seconds.");
 		sleep(1);
 	}
diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index d894e93..46776f3 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -16,16 +16,16 @@ TEST_BEGIN(test_deferred) {
 	 * approximation.
 	 */
 	for (unsigned i = 0; i < 10 * ncpus; i++) {
-		assert_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
+		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}
 
 	bool enable = true;
 	size_t sz_b = sizeof(bool);
-	assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
+	expect_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to enable background threads");
 	enable = false;
-	assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
+	expect_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to disable background threads");
 }
 TEST_END
@@ -36,43 +36,43 @@ TEST_BEGIN(test_max_background_threads) {
 	size_t max_n_thds;
 	size_t opt_max_n_thds;
 	size_t sz_m = sizeof(max_n_thds);
-	assert_d_eq(mallctl("opt.max_background_threads",
+	expect_d_eq(mallctl("opt.max_background_threads",
 	    &opt_max_n_thds, &sz_m, NULL, 0), 0,
 	    "Failed to get opt.max_background_threads");
-	assert_d_eq(mallctl("max_background_threads", &max_n_thds, &sz_m, NULL,
+	expect_d_eq(mallctl("max_background_threads", &max_n_thds, &sz_m, NULL,
 	    0), 0, "Failed to get max background threads");
-	assert_zu_eq(opt_max_n_thds, max_n_thds,
+	expect_zu_eq(opt_max_n_thds, max_n_thds,
 	    "max_background_threads and "
 	    "opt.max_background_threads should match");
-	assert_d_eq(mallctl("max_background_threads", NULL, NULL, &max_n_thds,
+	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &max_n_thds,
 	    sz_m), 0, "Failed to set max background threads");
 
 	unsigned id;
 	size_t sz_u = sizeof(unsigned);
 
 	for (unsigned i = 0; i < 10 * ncpus; i++) {
-		assert_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
+		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}
 
 	bool enable = true;
 	size_t sz_b = sizeof(bool);
-	assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
+	expect_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to enable background threads");
-	assert_zu_eq(n_background_threads, max_n_thds,
+	expect_zu_eq(n_background_threads, max_n_thds,
 	    "Number of background threads should not change.\n");
 	size_t new_max_thds = max_n_thds - 1;
 	if (new_max_thds > 0) {
-		assert_d_eq(mallctl("max_background_threads", NULL, NULL,
+		expect_d_eq(mallctl("max_background_threads", NULL, NULL,
 		    &new_max_thds, sz_m), 0,
 		    "Failed to set max background threads");
-		assert_zu_eq(n_background_threads, new_max_thds,
+		expect_zu_eq(n_background_threads, new_max_thds,
 		    "Number of background threads should decrease by 1.\n");
 	}
 	new_max_thds = 1;
-	assert_d_eq(mallctl("max_background_threads", NULL, NULL, &new_max_thds,
+	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &new_max_thds,
 	    sz_m), 0, "Failed to set max background threads");
-	assert_zu_eq(n_background_threads, new_max_thds,
+	expect_zu_eq(n_background_threads, new_max_thds,
 	    "Number of background threads should be 1.\n");
 }
 TEST_END
diff --git a/test/unit/base.c b/test/unit/base.c
index 3b848ca..5e990b3 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -37,21 +37,21 @@ TEST_BEGIN(test_base_hooks_default) {
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
 		    &n_thp);
-		assert_zu_ge(allocated0, sizeof(base_t),
+		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
-			assert_zu_gt(n_thp, 0,
+			expect_zu_gt(n_thp, 0,
 			    "Base should have 1 THP at least.");
 		}
 	}
 
-	assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
+	expect_ptr_not_null(base_alloc(tsdn, base, 42, 1),
 	    "Unexpected base_alloc() failure");
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
 		    &n_thp);
-		assert_zu_ge(allocated1 - allocated0, 42,
+		expect_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
 
@@ -75,26 +75,26 @@ TEST_BEGIN(test_base_hooks_null) {
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	base = base_new(tsdn, 0, &hooks);
-	assert_ptr_not_null(base, "Unexpected base_new() failure");
+	expect_ptr_not_null(base, "Unexpected base_new() failure");
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
 		    &n_thp);
-		assert_zu_ge(allocated0, sizeof(base_t),
+		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
-			assert_zu_gt(n_thp, 0,
+			expect_zu_gt(n_thp, 0,
 			    "Base should have 1 THP at least.");
 		}
 	}
 
-	assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
+	expect_ptr_not_null(base_alloc(tsdn, base, 42, 1),
 	    "Unexpected base_alloc() failure");
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
 		    &n_thp);
-		assert_zu_ge(allocated1 - allocated0, 42,
+		expect_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
 
@@ -121,8 +121,8 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	did_alloc = false;
 	base = base_new(tsdn, 0, &hooks);
-	assert_ptr_not_null(base, "Unexpected base_new() failure");
-	assert_true(did_alloc, "Expected alloc");
+	expect_ptr_not_null(base, "Unexpected base_new() failure");
+	expect_true(did_alloc, "Expected alloc");
 
 	/*
 	 * Check for tight packing at specified alignment under simple
@@ -143,21 +143,21 @@ TEST_BEGIN(test_base_hooks_not_null) {
 			size_t align_ceil = ALIGNMENT_CEILING(alignment,
 			    QUANTUM);
 			p = base_alloc(tsdn, base, 1, alignment);
-			assert_ptr_not_null(p,
+			expect_ptr_not_null(p,
 			    "Unexpected base_alloc() failure");
-			assert_ptr_eq(p,
+			expect_ptr_eq(p,
 			    (void *)(ALIGNMENT_CEILING((uintptr_t)p,
 			    alignment)), "Expected quantum alignment");
 			q = base_alloc(tsdn, base, alignment, alignment);
-			assert_ptr_not_null(q,
+			expect_ptr_not_null(q,
 			    "Unexpected base_alloc() failure");
-			assert_ptr_eq((void *)((uintptr_t)p + align_ceil), q,
+			expect_ptr_eq((void *)((uintptr_t)p + align_ceil), q,
 			    "Minimal allocation should take up %zu bytes",
 			    align_ceil);
 			r = base_alloc(tsdn, base, 1, alignment);
-			assert_ptr_not_null(r,
+			expect_ptr_not_null(r,
 			    "Unexpected base_alloc() failure");
-			assert_ptr_eq((void *)((uintptr_t)q + align_ceil), r,
+			expect_ptr_eq((void *)((uintptr_t)q + align_ceil), r,
 			    "Minimal allocation should take up %zu bytes",
 			    align_ceil);
 		}
@@ -168,23 +168,23 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	 * that the first block's remaining space is considered for subsequent
 	 * allocation.
 	 */
-	assert_zu_ge(edata_bsize_get(&base->blocks->edata), QUANTUM,
+	expect_zu_ge(edata_bsize_get(&base->blocks->edata), QUANTUM,
 	    "Remainder insufficient for test");
 	/* Use up all but one quantum of block. */
 	while (edata_bsize_get(&base->blocks->edata) > QUANTUM) {
 		p = base_alloc(tsdn, base, QUANTUM, QUANTUM);
-		assert_ptr_not_null(p, "Unexpected base_alloc() failure");
+		expect_ptr_not_null(p, "Unexpected base_alloc() failure");
 	}
 	r_exp = edata_addr_get(&base->blocks->edata);
-	assert_zu_eq(base->extent_sn_next, 1, "One extant block expected");
+	expect_zu_eq(base->extent_sn_next, 1, "One extant block expected");
 	q = base_alloc(tsdn, base, QUANTUM + 1, QUANTUM);
-	assert_ptr_not_null(q, "Unexpected base_alloc() failure");
-	assert_ptr_ne(q, r_exp, "Expected allocation from new block");
-	assert_zu_eq(base->extent_sn_next, 2, "Two extant blocks expected");
+	expect_ptr_not_null(q, "Unexpected base_alloc() failure");
+	expect_ptr_ne(q, r_exp, "Expected allocation from new block");
+	expect_zu_eq(base->extent_sn_next, 2, "Two extant blocks expected");
 	r = base_alloc(tsdn, base, QUANTUM, QUANTUM);
-	assert_ptr_not_null(r, "Unexpected base_alloc() failure");
-	assert_ptr_eq(r, r_exp, "Expected allocation from first block");
-	assert_zu_eq(base->extent_sn_next, 2, "Two extant blocks expected");
+	expect_ptr_not_null(r, "Unexpected base_alloc() failure");
+	expect_ptr_eq(r, r_exp, "Expected allocation from first block");
+	expect_zu_eq(base->extent_sn_next, 2, "Two extant blocks expected");
 
 	/*
 	 * Check for proper alignment support when normal blocks are too small.
@@ -199,9 +199,9 @@ TEST_BEGIN(test_base_hooks_not_null) {
 		for (i = 0; i < sizeof(alignments) / sizeof(size_t); i++) {
 			size_t alignment = alignments[i];
 			p = base_alloc(tsdn, base, QUANTUM, alignment);
-			assert_ptr_not_null(p,
+			expect_ptr_not_null(p,
 			    "Unexpected base_alloc() failure");
-			assert_ptr_eq(p,
+			expect_ptr_eq(p,
 			    (void *)(ALIGNMENT_CEILING((uintptr_t)p,
 			    alignment)), "Expected %zu-byte alignment",
 			    alignment);
@@ -211,11 +211,11 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	called_dalloc = called_destroy = called_decommit = called_purge_lazy =
 	    called_purge_forced = false;
 	base_delete(tsdn, base);
-	assert_true(called_dalloc, "Expected dalloc call");
-	assert_true(!called_destroy, "Unexpected destroy call");
-	assert_true(called_decommit, "Expected decommit call");
-	assert_true(called_purge_lazy, "Expected purge_lazy call");
-	assert_true(called_purge_forced, "Expected purge_forced call");
+	expect_true(called_dalloc, "Expected dalloc call");
+	expect_true(!called_destroy, "Unexpected destroy call");
+	expect_true(called_decommit, "Expected decommit call");
+	expect_true(called_purge_lazy, "Expected purge_lazy call");
+	expect_true(called_purge_forced, "Expected purge_forced call");
 
 	try_dalloc = true;
 	try_destroy = true;
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index 6e10d47..243a9b3 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -13,7 +13,7 @@ thd_producer(void *varg) {
 
 	sz = sizeof(arena);
 	/* Remote arena. */
-	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	for (i = 0; i < REMOTE_NALLOC / 2; i++) {
 		mem[i] = mallocx(1, MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena));
@@ -42,7 +42,7 @@ TEST_BEGIN(test_producer_consumer) {
 	/* Remote deallocation by the current thread. */
 	for (i = 0; i < NTHREADS; i++) {
 		for (unsigned j = 0; j < REMOTE_NALLOC; j++) {
-			assert_ptr_not_null(mem[i][j],
+			expect_ptr_not_null(mem[i][j],
 			    "Unexpected remote allocation failure");
 			dallocx(mem[i][j], 0);
 		}
@@ -65,12 +65,12 @@ thd_start(void *varg) {
 		edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		shard1 = edata_binshard_get(edata);
 		dallocx(ptr, 0);
-		assert_u_lt(shard1, 16, "Unexpected bin shard used");
+		expect_u_lt(shard1, 16, "Unexpected bin shard used");
 
 		edata = emap_edata_lookup(tsdn, &emap_global, ptr2);
 		shard2 = edata_binshard_get(edata);
 		dallocx(ptr2, 0);
-		assert_u_lt(shard2, 4, "Unexpected bin shard used");
+		expect_u_lt(shard2, 4, "Unexpected bin shard used");
 
 		if (shard1 > 0 || shard2 > 0) {
 			/* Triggered sharded bin usage. */
@@ -98,7 +98,7 @@ TEST_BEGIN(test_bin_shard_mt) {
 			sharded = true;
 		}
 	}
-	assert_b_eq(sharded, true, "Did not find sharded bins");
+	expect_b_eq(sharded, true, "Did not find sharded bins");
 }
 TEST_END
 
@@ -108,14 +108,14 @@ TEST_BEGIN(test_bin_shard) {
 	size_t miblen, miblen2, len;
 
 	len = sizeof(nbins);
-	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	miblen = 4;
-	assert_d_eq(mallctlnametomib("arenas.bin.0.nshards", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.nshards", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	miblen2 = 4;
-	assert_d_eq(mallctlnametomib("arenas.bin.0.size", mib2, &miblen2), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.size", mib2, &miblen2), 0,
 	    "Unexpected mallctlnametomib() failure");
 
 	for (i = 0; i < nbins; i++) {
@@ -124,22 +124,22 @@ TEST_BEGIN(test_bin_shard) {
 
 		mib[2] = i;
 		sz1 = sizeof(nshards);
-		assert_d_eq(mallctlbymib(mib, miblen, (void *)&nshards, &sz1,
+		expect_d_eq(mallctlbymib(mib, miblen, (void *)&nshards, &sz1,
 		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
 
 		mib2[2] = i;
 		sz2 = sizeof(size);
-		assert_d_eq(mallctlbymib(mib2, miblen2, (void *)&size, &sz2,
+		expect_d_eq(mallctlbymib(mib2, miblen2, (void *)&size, &sz2,
 		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
 
 		if (size >= 1 && size <= 128) {
-			assert_u_eq(nshards, 16, "Unexpected nshards");
+			expect_u_eq(nshards, 16, "Unexpected nshards");
 		} else if (size == 256) {
-			assert_u_eq(nshards, 8, "Unexpected nshards");
+			expect_u_eq(nshards, 8, "Unexpected nshards");
 		} else if (size > 128 && size <= 512) {
-			assert_u_eq(nshards, 4, "Unexpected nshards");
+			expect_u_eq(nshards, 4, "Unexpected nshards");
 		} else {
-			assert_u_eq(nshards, 1, "Unexpected nshards");
+			expect_u_eq(nshards, 1, "Unexpected nshards");
 		}
 	}
 }
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index b747deb..3eeb7a3 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -6,27 +6,27 @@
 	unsigned i, pow2;						\
 	t x;								\
 									\
-	assert_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
+	expect_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
 									\
 	for (i = 0; i < sizeof(t) * 8; i++) {				\
-		assert_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1)	\
+		expect_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1)	\
 		    << i, "Unexpected result");				\
 	}								\
 									\
 	for (i = 2; i < sizeof(t) * 8; i++) {				\
-		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
+		expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
 		    ((t)1) << i, "Unexpected result");			\
 	}								\
 									\
 	for (i = 0; i < sizeof(t) * 8 - 1; i++) {			\
-		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
+		expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
 		    ((t)1) << (i+1), "Unexpected result");		\
 	}								\
 									\
 	for (pow2 = 1; pow2 < 25; pow2++) {				\
 		for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2;	\
 		    x++) {						\
-			assert_##suf##_eq(pow2_ceil_##suf(x),		\
+			expect_##suf##_eq(pow2_ceil_##suf(x),		\
 			    ((t)1) << pow2,				\
 			    "Unexpected result, x=%"pri, x);		\
 		}							\
@@ -49,35 +49,35 @@ TEST_BEGIN(test_pow2_ceil_zu) {
 TEST_END
 
 void
-assert_lg_ceil_range(size_t input, unsigned answer) {
+expect_lg_ceil_range(size_t input, unsigned answer) {
 	if (input == 1) {
-		assert_u_eq(0, answer, "Got %u as lg_ceil of 1", answer);
+		expect_u_eq(0, answer, "Got %u as lg_ceil of 1", answer);
 		return;
 	}
-	assert_zu_le(input, (ZU(1) << answer),
+	expect_zu_le(input, (ZU(1) << answer),
 	    "Got %u as lg_ceil of %zu", answer, input);
-	assert_zu_gt(input, (ZU(1) << (answer - 1)),
+	expect_zu_gt(input, (ZU(1) << (answer - 1)),
 	    "Got %u as lg_ceil of %zu", answer, input);
 }
 
 void
-assert_lg_floor_range(size_t input, unsigned answer) {
+expect_lg_floor_range(size_t input, unsigned answer) {
 	if (input == 1) {
-		assert_u_eq(0, answer, "Got %u as lg_floor of 1", answer);
+		expect_u_eq(0, answer, "Got %u as lg_floor of 1", answer);
 		return;
 	}
-	assert_zu_ge(input, (ZU(1) << answer),
+	expect_zu_ge(input, (ZU(1) << answer),
 	    "Got %u as lg_floor of %zu", answer, input);
-	assert_zu_lt(input, (ZU(1) << (answer + 1)),
+	expect_zu_lt(input, (ZU(1) << (answer + 1)),
 	    "Got %u as lg_floor of %zu", answer, input);
 }
 
 TEST_BEGIN(test_lg_ceil_floor) {
 	for (size_t i = 1; i < 10 * 1000 * 1000; i++) {
-		assert_lg_ceil_range(i, lg_ceil(i));
-		assert_lg_ceil_range(i, LG_CEIL(i));
-		assert_lg_floor_range(i, lg_floor(i));
-		assert_lg_floor_range(i, LG_FLOOR(i));
+		expect_lg_ceil_range(i, lg_ceil(i));
+		expect_lg_ceil_range(i, LG_CEIL(i));
+		expect_lg_floor_range(i, lg_floor(i));
+		expect_lg_floor_range(i, LG_FLOOR(i));
 	}
 	for (int i = 10; i < 8 * (1 << LG_SIZEOF_PTR) - 5; i++) {
 		for (size_t j = 0; j < (1 << 4); j++) {
@@ -85,17 +85,17 @@ TEST_BEGIN(test_lg_ceil_floor) {
 			    - j * ((size_t)1 << (i - 4));
 			size_t num2 = ((size_t)1 << i)
 			    + j * ((size_t)1 << (i - 4));
-			assert_zu_ne(num1, 0, "Invalid lg argument");
-			assert_zu_ne(num2, 0, "Invalid lg argument");
-			assert_lg_ceil_range(num1, lg_ceil(num1));
-			assert_lg_ceil_range(num1, LG_CEIL(num1));
-			assert_lg_ceil_range(num2, lg_ceil(num2));
-			assert_lg_ceil_range(num2, LG_CEIL(num2));
+			expect_zu_ne(num1, 0, "Invalid lg argument");
+			expect_zu_ne(num2, 0, "Invalid lg argument");
+			expect_lg_ceil_range(num1, lg_ceil(num1));
+			expect_lg_ceil_range(num1, LG_CEIL(num1));
+			expect_lg_ceil_range(num2, lg_ceil(num2));
+			expect_lg_ceil_range(num2, LG_CEIL(num2));
 
-			assert_lg_floor_range(num1, lg_floor(num1));
-			assert_lg_floor_range(num1, LG_FLOOR(num1));
-			assert_lg_floor_range(num2, lg_floor(num2));
-			assert_lg_floor_range(num2, LG_FLOOR(num2));
+			expect_lg_floor_range(num1, lg_floor(num1));
+			expect_lg_floor_range(num1, LG_FLOOR(num1));
+			expect_lg_floor_range(num2, lg_floor(num2));
+			expect_lg_floor_range(num2, LG_FLOOR(num2));
 		}
 	}
 }
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 182f2f6..6b0ea9e 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -97,28 +97,28 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 	bitmap_info_t binfo_dyn;
 	bitmap_info_init(&binfo_dyn, nbits);
 
-	assert_zu_eq(bitmap_size(binfo), bitmap_size(&binfo_dyn),
+	expect_zu_eq(bitmap_size(binfo), bitmap_size(&binfo_dyn),
 	    "Unexpected difference between static and dynamic initialization, "
 	    "nbits=%zu", nbits);
-	assert_zu_eq(binfo->nbits, binfo_dyn.nbits,
+	expect_zu_eq(binfo->nbits, binfo_dyn.nbits,
 	    "Unexpected difference between static and dynamic initialization, "
 	    "nbits=%zu", nbits);
 #ifdef BITMAP_USE_TREE
-	assert_u_eq(binfo->nlevels, binfo_dyn.nlevels,
+	expect_u_eq(binfo->nlevels, binfo_dyn.nlevels,
 	    "Unexpected difference between static and dynamic initialization, "
 	    "nbits=%zu", nbits);
 	{
 		unsigned i;
 
 		for (i = 0; i < binfo->nlevels; i++) {
-			assert_zu_eq(binfo->levels[i].group_offset,
+			expect_zu_eq(binfo->levels[i].group_offset,
 			    binfo_dyn.levels[i].group_offset,
 			    "Unexpected difference between static and dynamic "
 			    "initialization, nbits=%zu, level=%u", nbits, i);
 		}
 	}
 #else
-	assert_zu_eq(binfo->ngroups, binfo_dyn.ngroups,
+	expect_zu_eq(binfo->ngroups, binfo_dyn.ngroups,
 	    "Unexpected difference between static and dynamic initialization");
 #endif
 }
@@ -140,9 +140,9 @@ static size_t
 test_bitmap_size_body(const bitmap_info_t *binfo, size_t nbits,
     size_t prev_size) {
 	size_t size = bitmap_size(binfo);
-	assert_zu_ge(size, (nbits >> 3),
+	expect_zu_ge(size, (nbits >> 3),
 	    "Bitmap size is smaller than expected");
-	assert_zu_ge(size, prev_size, "Bitmap size is smaller than expected");
+	expect_zu_ge(size, prev_size, "Bitmap size is smaller than expected");
 	return size;
 }
 
@@ -170,17 +170,17 @@ static void
 test_bitmap_init_body(const bitmap_info_t *binfo, size_t nbits) {
 	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
-	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 
 	bitmap_init(bitmap, binfo, false);
 	for (i = 0; i < nbits; i++) {
-		assert_false(bitmap_get(bitmap, binfo, i),
+		expect_false(bitmap_get(bitmap, binfo, i),
 		    "Bit should be unset");
 	}
 
 	bitmap_init(bitmap, binfo, true);
 	for (i = 0; i < nbits; i++) {
-		assert_true(bitmap_get(bitmap, binfo, i), "Bit should be set");
+		expect_true(bitmap_get(bitmap, binfo, i), "Bit should be set");
 	}
 
 	free(bitmap);
@@ -207,13 +207,13 @@ static void
 test_bitmap_set_body(const bitmap_info_t *binfo, size_t nbits) {
 	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
-	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo, false);
 
 	for (i = 0; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i);
 	}
-	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	expect_true(bitmap_full(bitmap, binfo), "All bits should be set");
 	free(bitmap);
 }
 
@@ -238,20 +238,20 @@ static void
 test_bitmap_unset_body(const bitmap_info_t *binfo, size_t nbits) {
 	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
-	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo, false);
 
 	for (i = 0; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i);
 	}
-	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	expect_true(bitmap_full(bitmap, binfo), "All bits should be set");
 	for (i = 0; i < nbits; i++) {
 		bitmap_unset(bitmap, binfo, i);
 	}
 	for (i = 0; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i);
 	}
-	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	expect_true(bitmap_full(bitmap, binfo), "All bits should be set");
 	free(bitmap);
 }
 
@@ -275,25 +275,25 @@ TEST_END
 static void
 test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
-	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo, false);
 
 	/* Iteratively set bits starting at the beginning. */
 	for (size_t i = 0; i < nbits; i++) {
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
-		assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+		expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
 	}
-	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	expect_true(bitmap_full(bitmap, binfo), "All bits should be set");
 
 	/*
 	 * Iteratively unset bits starting at the end, and verify that
@@ -301,17 +301,17 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	 */
 	for (size_t i = nbits - 1; i < nbits; i--) { /* (nbits..0] */
 		bitmap_unset(bitmap, binfo, i);
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should the bit previously unset");
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
 		    "First unset bit should the bit previously unset");
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 		    "First unset bit should the bit previously unset");
-		assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+		expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 		    "First unset bit should the bit previously unset");
 		bitmap_unset(bitmap, binfo, i);
 	}
-	assert_false(bitmap_get(bitmap, binfo, 0), "Bit should be unset");
+	expect_false(bitmap_get(bitmap, binfo, 0), "Bit should be unset");
 
 	/*
 	 * Iteratively set bits starting at the beginning, and verify that
@@ -319,29 +319,29 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	 */
 	for (size_t i = 1; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i - 1);
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
-		assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+		expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
 		bitmap_unset(bitmap, binfo, i);
 	}
-	assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), nbits - 1,
+	expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), nbits - 1,
 	    "First unset bit should be the last bit");
-	assert_zu_eq(bitmap_ffu(bitmap, binfo, (nbits > 1) ? nbits-2 : nbits-1),
+	expect_zu_eq(bitmap_ffu(bitmap, binfo, (nbits > 1) ? nbits-2 : nbits-1),
 	    nbits - 1, "First unset bit should be the last bit");
-	assert_zu_eq(bitmap_ffu(bitmap, binfo, nbits - 1), nbits - 1,
+	expect_zu_eq(bitmap_ffu(bitmap, binfo, nbits - 1), nbits - 1,
 	    "First unset bit should be the last bit");
-	assert_zu_eq(bitmap_sfu(bitmap, binfo), nbits - 1,
+	expect_zu_eq(bitmap_sfu(bitmap, binfo), nbits - 1,
 	    "First unset bit should be the last bit");
-	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	expect_true(bitmap_full(bitmap, binfo), "All bits should be set");
 
 	/*
 	 * Bubble a "usu" pattern through the bitmap and verify that
@@ -352,22 +352,22 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 			bitmap_unset(bitmap, binfo, i);
 			bitmap_unset(bitmap, binfo, i+2);
 			if (i > 0) {
-				assert_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
 				    "Unexpected first unset bit");
 			}
-			assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 			    "Unexpected first unset bit");
-			assert_zu_eq(bitmap_ffu(bitmap, binfo, i+1), i+2,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+1), i+2,
 			    "Unexpected first unset bit");
-			assert_zu_eq(bitmap_ffu(bitmap, binfo, i+2), i+2,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+2), i+2,
 			    "Unexpected first unset bit");
 			if (i + 3 < nbits) {
-				assert_zu_eq(bitmap_ffu(bitmap, binfo, i+3),
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i+3),
 				    nbits, "Unexpected first unset bit");
 			}
-			assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+			expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 			    "Unexpected first unset bit");
-			assert_zu_eq(bitmap_sfu(bitmap, binfo), i+2,
+			expect_zu_eq(bitmap_sfu(bitmap, binfo), i+2,
 			    "Unexpected first unset bit");
 		}
 	}
@@ -382,20 +382,20 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 		for (size_t i = 0; i < nbits-1; i++) {
 			bitmap_unset(bitmap, binfo, i);
 			if (i > 0) {
-				assert_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
 				    "Unexpected first unset bit");
 			}
-			assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 			    "Unexpected first unset bit");
-			assert_zu_eq(bitmap_ffu(bitmap, binfo, i+1), nbits-1,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+1), nbits-1,
 			    "Unexpected first unset bit");
-			assert_zu_eq(bitmap_ffu(bitmap, binfo, nbits-1),
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, nbits-1),
 			    nbits-1, "Unexpected first unset bit");
 
-			assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+			expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 			    "Unexpected first unset bit");
 		}
-		assert_zu_eq(bitmap_sfu(bitmap, binfo), nbits-1,
+		expect_zu_eq(bitmap_sfu(bitmap, binfo), nbits-1,
 		    "Unexpected first unset bit");
 	}
 
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 5171d61..37314db 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -14,7 +14,7 @@ static void test_write_cb(void *cbopaque, const char *s) {
 	size_t prev_test_write_len = test_write_len;
 	test_write_len += strlen(s); /* only increase the length */
 	arg_store = *(uint64_t *)cbopaque; /* only pass along the argument */
-	assert_zu_le(prev_test_write_len, test_write_len,
+	expect_zu_le(prev_test_write_len, test_write_len,
 	    "Test write overflowed");
 }
 
@@ -22,7 +22,7 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 	char s[UNIT_MAX + 1];
 	size_t n_unit, remain, i;
 	ssize_t unit;
-	assert_ptr_not_null(buf_writer->buf, "Buffer is null");
+	expect_ptr_not_null(buf_writer->buf, "Buffer is null");
 	write_cb_t *write_cb = buf_writer_get_write_cb(buf_writer);
 	void *cbopaque = buf_writer_get_cbopaque(buf_writer);
 
@@ -41,7 +41,7 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 				remain += unit;
 				if (remain > buf_writer->buf_size) {
 					/* Flushes should have happened. */
-					assert_u64_eq(arg_store, arg, "Call "
+					expect_u64_eq(arg_store, arg, "Call "
 					    "back argument didn't get through");
 					remain %= buf_writer->buf_size;
 					if (remain == 0) {
@@ -49,12 +49,12 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 						remain += buf_writer->buf_size;
 					}
 				}
-				assert_zu_eq(test_write_len + remain, i * unit,
+				expect_zu_eq(test_write_len + remain, i * unit,
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}
 			buf_writer_flush(buf_writer);
-			assert_zu_eq(test_write_len, n_unit * unit,
+			expect_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
 			    " writing %zu strings of length %zu", n_unit, unit);
 		}
@@ -65,7 +65,7 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 TEST_BEGIN(test_buf_write_static) {
 	buf_writer_t buf_writer;
 	tsdn_t *tsdn = tsdn_fetch();
-	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	expect_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    test_buf, TEST_BUF_SIZE),
 	    "buf_writer_init() should not encounter error on static buffer");
 	test_buf_writer_body(tsdn, &buf_writer);
@@ -75,7 +75,7 @@ TEST_END
 TEST_BEGIN(test_buf_write_dynamic) {
 	buf_writer_t buf_writer;
 	tsdn_t *tsdn = tsdn_fetch();
-	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	expect_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    NULL, TEST_BUF_SIZE), "buf_writer_init() should not OOM");
 	test_buf_writer_body(tsdn, &buf_writer);
 }
@@ -84,13 +84,13 @@ TEST_END
 TEST_BEGIN(test_buf_write_oom) {
 	buf_writer_t buf_writer;
 	tsdn_t *tsdn = tsdn_fetch();
-	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	expect_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
-	assert_ptr_null(buf_writer.buf, "Buffer should be null");
+	expect_ptr_null(buf_writer.buf, "Buffer should be null");
 	write_cb_t *write_cb = buf_writer_get_write_cb(&buf_writer);
-	assert_ptr_eq(write_cb, test_write_cb, "Should use test_write_cb");
+	expect_ptr_eq(write_cb, test_write_cb, "Should use test_write_cb");
 	void *cbopaque = buf_writer_get_cbopaque(&buf_writer);
-	assert_ptr_eq(cbopaque, &arg, "Should use arg");
+	expect_ptr_eq(cbopaque, &arg, "Should use arg");
 
 	char s[UNIT_MAX + 1];
 	size_t n_unit, i;
@@ -107,14 +107,14 @@ TEST_BEGIN(test_buf_write_oom) {
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
 				write_cb(cbopaque, s);
-				assert_u64_eq(arg_store, arg,
+				expect_u64_eq(arg_store, arg,
 				    "Call back argument didn't get through");
-				assert_zu_eq(test_write_len, i * unit,
+				expect_zu_eq(test_write_len, i * unit,
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}
 			buf_writer_flush(&buf_writer);
-			assert_zu_eq(test_write_len, n_unit * unit,
+			expect_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
 			    " writing %zu strings of length %zu", n_unit, unit);
 		}
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 12201a2..f98a92c 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -8,52 +8,52 @@ TEST_BEGIN(test_cache_bin) {
 	/* Page aligned to make sure lowbits not overflowable. */
 	void **stack = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ALIGN(PAGE));
 
-	assert_ptr_not_null(stack, "Unexpected mallocx failure");
+	expect_ptr_not_null(stack, "Unexpected mallocx failure");
 	/* Initialize to empty; bin 0. */
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(0);
 	void **empty_position = stack + ncached_max;
 	bin->cur_ptr.ptr = empty_position;
 	bin->low_water_position = bin->cur_ptr.lowbits;
 	bin->full_position = (uint32_t)(uintptr_t)stack;
-	assert_ptr_eq(cache_bin_empty_position_get(bin, 0), empty_position,
+	expect_ptr_eq(cache_bin_empty_position_get(bin, 0), empty_position,
 	    "Incorrect empty position");
-	/* Not using assert_zu etc on cache_bin_sz_t since it may change. */
-	assert_true(cache_bin_ncached_get(bin, 0) == 0, "Incorrect cache size");
+	/* Not using expect_zu etc on cache_bin_sz_t since it may change. */
+	expect_true(cache_bin_ncached_get(bin, 0) == 0, "Incorrect cache size");
 
 	bool success;
 	void *ret = cache_bin_alloc_easy(bin, &success, 0);
-	assert_false(success, "Empty cache bin should not alloc");
-	assert_true(cache_bin_low_water_get(bin, 0) == 0,
+	expect_false(success, "Empty cache bin should not alloc");
+	expect_true(cache_bin_low_water_get(bin, 0) == 0,
 	    "Incorrect low water mark");
 
 	cache_bin_ncached_set(bin, 0, 0);
-	assert_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
+	expect_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
 	for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
 		success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
-		assert_true(success && cache_bin_ncached_get(bin, 0) == i,
+		expect_true(success && cache_bin_ncached_get(bin, 0) == i,
 		    "Bin dalloc failure");
 	}
 	success = cache_bin_dalloc_easy(bin, (void *)1);
-	assert_false(success, "Bin should be full");
-	assert_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
+	expect_false(success, "Bin should be full");
+	expect_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
 
 	cache_bin_ncached_set(bin, 0, ncached_max);
-	assert_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
+	expect_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
 	/* Emulate low water after refill. */
 	bin->low_water_position = bin->full_position;
 	for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
 		ret = cache_bin_alloc_easy(bin, &success, 0);
 		cache_bin_sz_t ncached = cache_bin_ncached_get(bin, 0);
-		assert_true(success && ncached == i - 1,
+		expect_true(success && ncached == i - 1,
 		    "Cache bin alloc failure");
-		assert_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
-		assert_true(cache_bin_low_water_get(bin, 0) == ncached,
+		expect_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
+		expect_true(cache_bin_low_water_get(bin, 0) == ncached,
 		    "Incorrect low water mark");
 	}
 
 	ret = cache_bin_alloc_easy(bin, &success, 0);
-	assert_false(success, "Empty cache bin should not alloc.");
-	assert_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
+	expect_false(success, "Empty cache bin should not alloc.");
+	expect_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
 	    "Bin should be empty");
 }
 TEST_END
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index 707ea5f..36142ac 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -6,11 +6,11 @@ TEST_BEGIN(test_new_delete) {
 
 	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
+	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
 	    ckh_string_keycomp), "Unexpected ckh_new() error");
 	ckh_delete(tsd, &ckh);
 
-	assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
+	expect_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
 	ckh_delete(tsd, &ckh);
 }
@@ -30,16 +30,16 @@ TEST_BEGIN(test_count_insert_search_remove) {
 
 	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
+	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
 	    ckh_string_keycomp), "Unexpected ckh_new() error");
-	assert_zu_eq(ckh_count(&ckh), 0,
+	expect_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
 	    ckh_count(&ckh));
 
 	/* Insert. */
 	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
 		ckh_insert(tsd, &ckh, strs[i], strs[i]);
-		assert_zu_eq(ckh_count(&ckh), i+1,
+		expect_zu_eq(ckh_count(&ckh), i+1,
 		    "ckh_count() should return %zu, but it returned %zu", i+1,
 		    ckh_count(&ckh));
 	}
@@ -57,17 +57,17 @@ TEST_BEGIN(test_count_insert_search_remove) {
 		vp = (i & 2) ? &v.p : NULL;
 		k.p = NULL;
 		v.p = NULL;
-		assert_false(ckh_search(&ckh, strs[i], kp, vp),
+		expect_false(ckh_search(&ckh, strs[i], kp, vp),
 		    "Unexpected ckh_search() error");
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
 		vs = (i & 2) ? strs[i] : (const char *)NULL;
-		assert_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
+		expect_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
 		    i);
-		assert_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
+		expect_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
 		    i);
 	}
-	assert_true(ckh_search(&ckh, missing, NULL, NULL),
+	expect_true(ckh_search(&ckh, missing, NULL, NULL),
 	    "Unexpected ckh_search() success");
 
 	/* Remove. */
@@ -83,16 +83,16 @@ TEST_BEGIN(test_count_insert_search_remove) {
 		vp = (i & 2) ? &v.p : NULL;
 		k.p = NULL;
 		v.p = NULL;
-		assert_false(ckh_remove(tsd, &ckh, strs[i], kp, vp),
+		expect_false(ckh_remove(tsd, &ckh, strs[i], kp, vp),
 		    "Unexpected ckh_remove() error");
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
 		vs = (i & 2) ? strs[i] : (const char *)NULL;
-		assert_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
+		expect_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
 		    i);
-		assert_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
+		expect_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
 		    i);
-		assert_zu_eq(ckh_count(&ckh),
+		expect_zu_eq(ckh_count(&ckh),
 		    sizeof(strs)/sizeof(const char *) - i - 1,
 		    "ckh_count() should return %zu, but it returned %zu",
 		        sizeof(strs)/sizeof(const char *) - i - 1,
@@ -113,40 +113,40 @@ TEST_BEGIN(test_insert_iter_remove) {
 
 	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
+	expect_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
 
 	for (i = 0; i < NITEMS; i++) {
 		p[i] = mallocx(i+1, 0);
-		assert_ptr_not_null(p[i], "Unexpected mallocx() failure");
+		expect_ptr_not_null(p[i], "Unexpected mallocx() failure");
 	}
 
 	for (i = 0; i < NITEMS; i++) {
 		size_t j;
 
 		for (j = i; j < NITEMS; j++) {
-			assert_false(ckh_insert(tsd, &ckh, p[j], p[j]),
+			expect_false(ckh_insert(tsd, &ckh, p[j], p[j]),
 			    "Unexpected ckh_insert() failure");
-			assert_false(ckh_search(&ckh, p[j], &q, &r),
+			expect_false(ckh_search(&ckh, p[j], &q, &r),
 			    "Unexpected ckh_search() failure");
-			assert_ptr_eq(p[j], q, "Key pointer mismatch");
-			assert_ptr_eq(p[j], r, "Value pointer mismatch");
+			expect_ptr_eq(p[j], q, "Key pointer mismatch");
+			expect_ptr_eq(p[j], r, "Value pointer mismatch");
 		}
 
-		assert_zu_eq(ckh_count(&ckh), NITEMS,
+		expect_zu_eq(ckh_count(&ckh), NITEMS,
 		    "ckh_count() should return %zu, but it returned %zu",
 		    NITEMS, ckh_count(&ckh));
 
 		for (j = i + 1; j < NITEMS; j++) {
-			assert_false(ckh_search(&ckh, p[j], NULL, NULL),
+			expect_false(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() failure");
-			assert_false(ckh_remove(tsd, &ckh, p[j], &q, &r),
+			expect_false(ckh_remove(tsd, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() failure");
-			assert_ptr_eq(p[j], q, "Key pointer mismatch");
-			assert_ptr_eq(p[j], r, "Value pointer mismatch");
-			assert_true(ckh_search(&ckh, p[j], NULL, NULL),
+			expect_ptr_eq(p[j], q, "Key pointer mismatch");
+			expect_ptr_eq(p[j], r, "Value pointer mismatch");
+			expect_true(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() success");
-			assert_true(ckh_remove(tsd, &ckh, p[j], &q, &r),
+			expect_true(ckh_remove(tsd, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() success");
 		}
 
@@ -159,11 +159,11 @@ TEST_BEGIN(test_insert_iter_remove) {
 			for (tabind = 0; !ckh_iter(&ckh, &tabind, &q, &r);) {
 				size_t k;
 
-				assert_ptr_eq(q, r, "Key and val not equal");
+				expect_ptr_eq(q, r, "Key and val not equal");
 
 				for (k = 0; k < NITEMS; k++) {
 					if (p[k] == q) {
-						assert_false(seen[k],
+						expect_false(seen[k],
 						    "Item %zu already seen", k);
 						seen[k] = true;
 						break;
@@ -172,29 +172,29 @@ TEST_BEGIN(test_insert_iter_remove) {
 			}
 
 			for (j = 0; j < i + 1; j++) {
-				assert_true(seen[j], "Item %zu not seen", j);
+				expect_true(seen[j], "Item %zu not seen", j);
 			}
 			for (; j < NITEMS; j++) {
-				assert_false(seen[j], "Item %zu seen", j);
+				expect_false(seen[j], "Item %zu seen", j);
 			}
 		}
 	}
 
 	for (i = 0; i < NITEMS; i++) {
-		assert_false(ckh_search(&ckh, p[i], NULL, NULL),
+		expect_false(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() failure");
-		assert_false(ckh_remove(tsd, &ckh, p[i], &q, &r),
+		expect_false(ckh_remove(tsd, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() failure");
-		assert_ptr_eq(p[i], q, "Key pointer mismatch");
-		assert_ptr_eq(p[i], r, "Value pointer mismatch");
-		assert_true(ckh_search(&ckh, p[i], NULL, NULL),
+		expect_ptr_eq(p[i], q, "Key pointer mismatch");
+		expect_ptr_eq(p[i], r, "Value pointer mismatch");
+		expect_true(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() success");
-		assert_true(ckh_remove(tsd, &ckh, p[i], &q, &r),
+		expect_true(ckh_remove(tsd, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() success");
 		dallocx(p[i], 0);
 	}
 
-	assert_zu_eq(ckh_count(&ckh), 0,
+	expect_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu",
 	    ZU(0), ckh_count(&ckh));
 	ckh_delete(tsd, &ckh);
diff --git a/test/unit/counter.c b/test/unit/counter.c
index 619510d..585cbc6 100644
--- a/test/unit/counter.c
+++ b/test/unit/counter.c
@@ -16,24 +16,24 @@ TEST_BEGIN(test_counter_accum) {
 		trigger = counter_accum(tsd_tsdn(tsd), &c, increment);
 		accum += increment;
 		if (accum < interval) {
-			assert_b_eq(trigger, false, "Should not trigger");
+			expect_b_eq(trigger, false, "Should not trigger");
 		} else {
-			assert_b_eq(trigger, true, "Should have triggered");
+			expect_b_eq(trigger, true, "Should have triggered");
 		}
 	}
-	assert_b_eq(trigger, true, "Should have triggered");
+	expect_b_eq(trigger, true, "Should have triggered");
 }
 TEST_END
 
 void
-assert_counter_value(counter_accum_t *c, uint64_t v) {
+expect_counter_value(counter_accum_t *c, uint64_t v) {
 	uint64_t accum;
 #ifdef JEMALLOC_ATOMIC_U64
 	accum = atomic_load_u64(&(c->accumbytes), ATOMIC_RELAXED);
 #else
 	accum = c->accumbytes;
 #endif
-	assert_u64_eq(accum, v, "Counter value mismatch");
+	expect_u64_eq(accum, v, "Counter value mismatch");
 }
 
 TEST_BEGIN(test_counter_rollback) {
@@ -47,34 +47,34 @@ TEST_BEGIN(test_counter_rollback) {
 
 	bool trigger;
 	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	assert_b_eq(trigger, false, "Should not trigger");
+	expect_b_eq(trigger, false, "Should not trigger");
 	counter_rollback(tsd_tsdn(tsd), &c, half_interval + 1);
-	assert_counter_value(&c,  0);
+	expect_counter_value(&c,  0);
 
 	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	assert_b_eq(trigger, false, "Should not trigger");
+	expect_b_eq(trigger, false, "Should not trigger");
 	counter_rollback(tsd_tsdn(tsd), &c, half_interval - 1);
-	assert_counter_value(&c,  1);
+	expect_counter_value(&c,  1);
 
 	counter_rollback(tsd_tsdn(tsd), &c, 1);
-	assert_counter_value(&c,  0);
+	expect_counter_value(&c,  0);
 
 	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	assert_b_eq(trigger, false, "Should not trigger");
+	expect_b_eq(trigger, false, "Should not trigger");
 	counter_rollback(tsd_tsdn(tsd), &c, 1);
-	assert_counter_value(&c,  half_interval - 1);
+	expect_counter_value(&c,  half_interval - 1);
 
 	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	assert_b_eq(trigger, false, "Should not trigger");
-	assert_counter_value(&c,  interval - 1);
+	expect_b_eq(trigger, false, "Should not trigger");
+	expect_counter_value(&c,  interval - 1);
 
 	trigger = counter_accum(tsd_tsdn(tsd), &c, 1);
-	assert_b_eq(trigger, true, "Should have triggered");
-	assert_counter_value(&c, 0);
+	expect_b_eq(trigger, true, "Should have triggered");
+	expect_counter_value(&c, 0);
 
 	trigger = counter_accum(tsd_tsdn(tsd), &c, interval + 1);
-	assert_b_eq(trigger, true, "Should have triggered");
-	assert_counter_value(&c, 1);
+	expect_b_eq(trigger, true, "Should have triggered");
+	expect_counter_value(&c, 1);
 }
 TEST_END
 
@@ -114,7 +114,7 @@ TEST_BEGIN(test_counter_mt) {
 		thd_join(thds[i], &ret);
 		sum += (uintptr_t)ret;
 	}
-	assert_u64_eq(sum, N_THDS * N_ITER_THD / (interval / ITER_INCREMENT),
+	expect_u64_eq(sum, N_THDS * N_ITER_THD / (interval / ITER_INCREMENT),
 	    "Incorrect number of triggers");
 }
 TEST_END
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 59936db..7ed270f 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -17,7 +17,7 @@ check_background_thread_enabled(void) {
 	if (ret == ENOENT) {
 		return false;
 	}
-	assert_d_eq(ret, 0, "Unexpected mallctl error");
+	expect_d_eq(ret, 0, "Unexpected mallctl error");
 	return enabled;
 }
 
@@ -39,22 +39,22 @@ static unsigned
 do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	unsigned arena_ind;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
 
-	assert_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
+	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
 	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
 	    "Unexpected mallctlbymib() failure");
 
-	assert_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
+	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
 	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
 	    "Unexpected mallctlbymib() failure");
 
@@ -65,17 +65,17 @@ static void
 do_arena_destroy(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
 void
 do_epoch(void) {
 	uint64_t epoch = 1;
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 }
 
@@ -83,10 +83,10 @@ void
 do_purge(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
@@ -94,10 +94,10 @@ void
 do_decay(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
@@ -105,12 +105,12 @@ static uint64_t
 get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
 	size_t mib[4];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
 	uint64_t npurge = 0;
 	size_t sz = sizeof(npurge);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
 	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
 	return npurge;
 }
@@ -145,12 +145,12 @@ get_arena_pdirty(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
 	size_t pdirty;
 	size_t sz = sizeof(pdirty);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 	return pdirty;
 }
@@ -160,12 +160,12 @@ get_arena_pmuzzy(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
 	size_t pmuzzy;
 	size_t sz = sizeof(pmuzzy);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 	return pmuzzy;
 }
@@ -173,7 +173,7 @@ get_arena_pmuzzy(unsigned arena_ind) {
 static void *
 do_mallocx(size_t size, int flags) {
 	void *p = mallocx(size, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	return p;
 }
 
@@ -193,7 +193,7 @@ TEST_BEGIN(test_decay_ticks) {
 	void *p;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 
 	/* Set up a manually managed arena for test. */
@@ -202,11 +202,11 @@ TEST_BEGIN(test_decay_ticks) {
 	/* Migrate to the new arena, and get the ticker. */
 	unsigned old_arena_ind;
 	size_t sz_arena_ind = sizeof(old_arena_ind);
-	assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind,
+	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind,
 	    &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)), 0,
 	    "Unexpected mallctl() failure");
 	decay_ticker = decay_ticker_get(tsd_fetch(), arena_ind);
-	assert_ptr_not_null(decay_ticker,
+	expect_ptr_not_null(decay_ticker,
 	    "Unexpected failure getting decay ticker");
 
 	/*
@@ -218,38 +218,38 @@ TEST_BEGIN(test_decay_ticks) {
 	/* malloc(). */
 	tick0 = ticker_read(decay_ticker);
 	p = malloc(large0);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()");
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()");
 	/* free(). */
 	tick0 = ticker_read(decay_ticker);
 	free(p);
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0, "Expected ticker to tick during free()");
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during free()");
 
 	/* calloc(). */
 	tick0 = ticker_read(decay_ticker);
 	p = calloc(1, large0);
-	assert_ptr_not_null(p, "Unexpected calloc() failure");
+	expect_ptr_not_null(p, "Unexpected calloc() failure");
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()");
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()");
 	free(p);
 
 	/* posix_memalign(). */
 	tick0 = ticker_read(decay_ticker);
-	assert_d_eq(posix_memalign(&p, sizeof(size_t), large0), 0,
+	expect_d_eq(posix_memalign(&p, sizeof(size_t), large0), 0,
 	    "Unexpected posix_memalign() failure");
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
+	expect_u32_ne(tick1, tick0,
 	    "Expected ticker to tick during posix_memalign()");
 	free(p);
 
 	/* aligned_alloc(). */
 	tick0 = ticker_read(decay_ticker);
 	p = aligned_alloc(sizeof(size_t), large0);
-	assert_ptr_not_null(p, "Unexpected aligned_alloc() failure");
+	expect_ptr_not_null(p, "Unexpected aligned_alloc() failure");
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
+	expect_u32_ne(tick1, tick0,
 	    "Expected ticker to tick during aligned_alloc()");
 	free(p);
 
@@ -257,20 +257,20 @@ TEST_BEGIN(test_decay_ticks) {
 	/* Allocate. */
 	tick0 = ticker_read(decay_ticker);
 	p = realloc(NULL, large0);
-	assert_ptr_not_null(p, "Unexpected realloc() failure");
+	expect_ptr_not_null(p, "Unexpected realloc() failure");
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 	/* Reallocate. */
 	tick0 = ticker_read(decay_ticker);
 	p = realloc(p, large0);
-	assert_ptr_not_null(p, "Unexpected realloc() failure");
+	expect_ptr_not_null(p, "Unexpected realloc() failure");
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 	/* Deallocate. */
 	tick0 = ticker_read(decay_ticker);
 	realloc(p, 0);
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 
 	/*
 	 * Test the *allocx() APIs using large and small size classes, with
@@ -288,40 +288,40 @@ TEST_BEGIN(test_decay_ticks) {
 			/* mallocx(). */
 			tick0 = ticker_read(decay_ticker);
 			p = mallocx(sz, MALLOCX_TCACHE_NONE);
-			assert_ptr_not_null(p, "Unexpected mallocx() failure");
+			expect_ptr_not_null(p, "Unexpected mallocx() failure");
 			tick1 = ticker_read(decay_ticker);
-			assert_u32_ne(tick1, tick0,
+			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during mallocx() (sz=%zu)",
 			    sz);
 			/* rallocx(). */
 			tick0 = ticker_read(decay_ticker);
 			p = rallocx(p, sz, MALLOCX_TCACHE_NONE);
-			assert_ptr_not_null(p, "Unexpected rallocx() failure");
+			expect_ptr_not_null(p, "Unexpected rallocx() failure");
 			tick1 = ticker_read(decay_ticker);
-			assert_u32_ne(tick1, tick0,
+			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during rallocx() (sz=%zu)",
 			    sz);
 			/* xallocx(). */
 			tick0 = ticker_read(decay_ticker);
 			xallocx(p, sz, 0, MALLOCX_TCACHE_NONE);
 			tick1 = ticker_read(decay_ticker);
-			assert_u32_ne(tick1, tick0,
+			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during xallocx() (sz=%zu)",
 			    sz);
 			/* dallocx(). */
 			tick0 = ticker_read(decay_ticker);
 			dallocx(p, MALLOCX_TCACHE_NONE);
 			tick1 = ticker_read(decay_ticker);
-			assert_u32_ne(tick1, tick0,
+			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during dallocx() (sz=%zu)",
 			    sz);
 			/* sdallocx(). */
 			p = mallocx(sz, MALLOCX_TCACHE_NONE);
-			assert_ptr_not_null(p, "Unexpected mallocx() failure");
+			expect_ptr_not_null(p, "Unexpected mallocx() failure");
 			tick0 = ticker_read(decay_ticker);
 			sdallocx(p, sz, MALLOCX_TCACHE_NONE);
 			tick1 = ticker_read(decay_ticker);
-			assert_u32_ne(tick1, tick0,
+			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during sdallocx() "
 			    "(sz=%zu)", sz);
 		}
@@ -338,11 +338,11 @@ TEST_BEGIN(test_decay_ticks) {
 
 	size_t tcache_max, sz_tcache_max;
 	sz_tcache_max = sizeof(tcache_max);
-	assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
+	expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
 	    &sz_tcache_max, NULL, 0), 0, "Unexpected mallctl() failure");
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
+	expect_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
 	    NULL, 0), 0, "Unexpected mallctl failure");
 
 	for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) {
@@ -351,26 +351,26 @@ TEST_BEGIN(test_decay_ticks) {
 		/* tcache fill. */
 		tick0 = ticker_read(decay_ticker);
 		p = mallocx(sz, MALLOCX_TCACHE(tcache_ind));
-		assert_ptr_not_null(p, "Unexpected mallocx() failure");
+		expect_ptr_not_null(p, "Unexpected mallocx() failure");
 		tick1 = ticker_read(decay_ticker);
-		assert_u32_ne(tick1, tick0,
+		expect_u32_ne(tick1, tick0,
 		    "Expected ticker to tick during tcache fill "
 		    "(sz=%zu)", sz);
 		/* tcache flush. */
 		dallocx(p, MALLOCX_TCACHE(tcache_ind));
 		tick0 = ticker_read(decay_ticker);
-		assert_d_eq(mallctl("tcache.flush", NULL, NULL,
+		expect_d_eq(mallctl("tcache.flush", NULL, NULL,
 		    (void *)&tcache_ind, sizeof(unsigned)), 0,
 		    "Unexpected mallctl failure");
 		tick1 = ticker_read(decay_ticker);
 
 		/* Will only tick if it's in tcache. */
 		if (sz <= tcache_max) {
-			assert_u32_ne(tick1, tick0,
+			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during tcache "
 			    "flush (sz=%zu)", sz);
 		} else {
-			assert_u32_eq(tick1, tick0,
+			expect_u32_eq(tick1, tick0,
 			    "Unexpected ticker tick during tcache "
 			    "flush (sz=%zu)", sz);
 		}
@@ -417,7 +417,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 	dallocx(p, flags);
 
 	if (config_stats) {
-		assert_u64_gt(dirty_npurge1 + muzzy_npurge1, dirty_npurge0 +
+		expect_u64_gt(dirty_npurge1 + muzzy_npurge1, dirty_npurge0 +
 		    muzzy_npurge0, "Expected purging to occur");
 	}
 #undef NINTERVALS
@@ -442,7 +442,7 @@ TEST_BEGIN(test_decay_ticker) {
 
 	size_t tcache_max;
 	size_t sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL,
+	expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 	large = nallocx(tcache_max + 1, flags);
 
@@ -467,7 +467,7 @@ TEST_BEGIN(test_decay_ticker) {
 		dallocx(ps[i], flags);
 		unsigned nupdates0 = nupdates_mock;
 		do_decay(arena_ind);
-		assert_u_gt(nupdates_mock, nupdates0,
+		expect_u_gt(nupdates_mock, nupdates0,
 		    "Expected nstime_update() to be called");
 	}
 
@@ -495,10 +495,10 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 	unsigned i, nupdates0;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 	do_epoch();
 	sz = sizeof(uint64_t);
@@ -515,15 +515,15 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 
 	for (i = 0; i < NPS; i++) {
 		ps[i] = mallocx(large0, flags);
-		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure");
+		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure");
 	}
 
 	for (i = 0; i < NPS; i++) {
 		dallocx(ps[i], flags);
 		nupdates0 = nupdates_mock;
-		assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
+		expect_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
 		    "Unexpected arena.0.decay failure");
-		assert_u_gt(nupdates_mock, nupdates0,
+		expect_u_gt(nupdates_mock, nupdates0,
 		    "Expected nstime_update() to be called");
 	}
 
@@ -532,7 +532,7 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 	npurge1 = get_arena_npurge(0);
 
 	if (config_stats) {
-		assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
+		expect_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
 	}
 
 	nstime_monotonic = nstime_monotonic_orig;
@@ -545,16 +545,16 @@ TEST_BEGIN(test_decay_now) {
 	test_skip_if(check_background_thread_enabled());
 
 	unsigned arena_ind = do_arena_create(0, 0);
-	assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
-	assert_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
+	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
 	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
 	/* Verify that dirty/muzzy pages never linger after deallocation. */
 	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
 		size_t size = sizes[i];
 		generate_dirty(arena_ind, size);
-		assert_zu_eq(get_arena_pdirty(arena_ind), 0,
+		expect_zu_eq(get_arena_pdirty(arena_ind), 0,
 		    "Unexpected dirty pages");
-		assert_zu_eq(get_arena_pmuzzy(arena_ind), 0,
+		expect_zu_eq(get_arena_pmuzzy(arena_ind), 0,
 		    "Unexpected muzzy pages");
 	}
 	do_arena_destroy(arena_ind);
@@ -566,8 +566,8 @@ TEST_BEGIN(test_decay_never) {
 
 	unsigned arena_ind = do_arena_create(-1, -1);
 	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
-	assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
-	assert_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
+	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
 	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
 	void *ptrs[sizeof(sizes)/sizeof(size_t)];
 	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
@@ -576,15 +576,15 @@ TEST_BEGIN(test_decay_never) {
 	/* Verify that each deallocation generates additional dirty pages. */
 	size_t pdirty_prev = get_arena_pdirty(arena_ind);
 	size_t pmuzzy_prev = get_arena_pmuzzy(arena_ind);
-	assert_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
-	assert_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
+	expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
+	expect_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
 	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
 		dallocx(ptrs[i], flags);
 		size_t pdirty = get_arena_pdirty(arena_ind);
 		size_t pmuzzy = get_arena_pmuzzy(arena_ind);
-		assert_zu_gt(pdirty + (size_t)get_arena_dirty_purged(arena_ind),
+		expect_zu_gt(pdirty + (size_t)get_arena_dirty_purged(arena_ind),
 		    pdirty_prev, "Expected dirty pages to increase.");
-		assert_zu_eq(pmuzzy, 0, "Unexpected muzzy pages");
+		expect_zu_eq(pmuzzy, 0, "Unexpected muzzy pages");
 		pdirty_prev = pdirty;
 	}
 	do_arena_destroy(arena_ind);
diff --git a/test/unit/div.c b/test/unit/div.c
index b47f10b..29aea66 100644
--- a/test/unit/div.c
+++ b/test/unit/div.c
@@ -14,7 +14,7 @@ TEST_BEGIN(test_div_exhaustive) {
 		    dividend += divisor) {
 			size_t quotient = div_compute(
 			    &div_info, dividend);
-			assert_zu_eq(dividend, quotient * divisor,
+			expect_zu_eq(dividend, quotient * divisor,
 			    "With divisor = %zu, dividend = %zu, "
 			    "got quotient %zu", divisor, dividend, quotient);
 		}
diff --git a/test/unit/emitter.c b/test/unit/emitter.c
index 712c9e1..ef8f9ff 100644
--- a/test/unit/emitter.c
+++ b/test/unit/emitter.c
@@ -58,14 +58,14 @@ forwarding_cb(void *buf_descriptor_v, const char *str) {
 
 	size_t written = malloc_snprintf(buf_descriptor->buf,
 	    buf_descriptor->len, "%s", str);
-	assert_zu_eq(written, strlen(str), "Buffer overflow!");
+	expect_zu_eq(written, strlen(str), "Buffer overflow!");
 	buf_descriptor->buf += written;
 	buf_descriptor->len -= written;
-	assert_zu_gt(buf_descriptor->len, 0, "Buffer out of space!");
+	expect_zu_gt(buf_descriptor->len, 0, "Buffer out of space!");
 }
 
 static void
-assert_emit_output(void (*emit_fn)(emitter_t *),
+expect_emit_output(void (*emit_fn)(emitter_t *),
     const char *expected_json_output,
     const char *expected_json_compact_output,
     const char *expected_table_output) {
@@ -80,7 +80,7 @@ assert_emit_output(void (*emit_fn)(emitter_t *),
 	emitter_init(&emitter, emitter_output_json, &forwarding_cb,
 	    &buf_descriptor);
 	(*emit_fn)(&emitter);
-	assert_str_eq(expected_json_output, buf, "json output failure");
+	expect_str_eq(expected_json_output, buf, "json output failure");
 
 	buf_descriptor.buf = buf;
 	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
@@ -89,7 +89,7 @@ assert_emit_output(void (*emit_fn)(emitter_t *),
 	emitter_init(&emitter, emitter_output_json_compact, &forwarding_cb,
 	    &buf_descriptor);
 	(*emit_fn)(&emitter);
-	assert_str_eq(expected_json_compact_output, buf,
+	expect_str_eq(expected_json_compact_output, buf,
 	    "compact json output failure");
 
 	buf_descriptor.buf = buf;
@@ -99,7 +99,7 @@ assert_emit_output(void (*emit_fn)(emitter_t *),
 	emitter_init(&emitter, emitter_output_table, &forwarding_cb,
 	    &buf_descriptor);
 	(*emit_fn)(&emitter);
-	assert_str_eq(expected_table_output, buf, "table output failure");
+	expect_str_eq(expected_table_output, buf, "table output failure");
 }
 
 static void
@@ -505,7 +505,7 @@ static const char *table_row_table =
 
 #define GENERATE_TEST(feature)					\
 TEST_BEGIN(test_##feature) {					\
-	assert_emit_output(emit_##feature, feature##_json,	\
+	expect_emit_output(emit_##feature, feature##_json,	\
 	    feature##_json_compact, feature##_table);		\
 }								\
 TEST_END
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index 64b3baa..27a4a7e 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -12,21 +12,21 @@ TEST_BEGIN(test_small_extent_size) {
 	 */
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
-	assert_d_eq(mallctlnametomib("arenas.bin.0.slab_size", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.slab_size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib failure");
 	for (i = 0; i < nbins; i++) {
 		mib[2] = i;
 		sz = sizeof(size_t);
-		assert_d_eq(mallctlbymib(mib, miblen, (void *)&extent_size, &sz,
+		expect_d_eq(mallctlbymib(mib, miblen, (void *)&extent_size, &sz,
 		    NULL, 0), 0, "Unexpected mallctlbymib failure");
-		assert_zu_eq(extent_size,
+		expect_zu_eq(extent_size,
 		    sz_psz_quantize_floor(extent_size),
 		    "Small extent quantization should be a no-op "
 		    "(extent_size=%zu)", extent_size);
-		assert_zu_eq(extent_size,
+		expect_zu_eq(extent_size,
 		    sz_psz_quantize_ceil(extent_size),
 		    "Small extent quantization should be a no-op "
 		    "(extent_size=%zu)", extent_size);
@@ -47,42 +47,42 @@ TEST_BEGIN(test_large_extent_size) {
 	 */
 
 	sz = sizeof(bool);
-	assert_d_eq(mallctl("config.cache_oblivious", (void *)&cache_oblivious,
+	expect_d_eq(mallctl("config.cache_oblivious", (void *)&cache_oblivious,
 	    &sz, NULL, 0), 0, "Unexpected mallctl failure");
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
+	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 
-	assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib failure");
 	for (i = 0; i < nlextents; i++) {
 		size_t lextent_size, extent_size, floor, ceil;
 
 		mib[2] = i;
 		sz = sizeof(size_t);
-		assert_d_eq(mallctlbymib(mib, miblen, (void *)&lextent_size,
+		expect_d_eq(mallctlbymib(mib, miblen, (void *)&lextent_size,
 		    &sz, NULL, 0), 0, "Unexpected mallctlbymib failure");
 		extent_size = cache_oblivious ? lextent_size + PAGE :
 		    lextent_size;
 		floor = sz_psz_quantize_floor(extent_size);
 		ceil = sz_psz_quantize_ceil(extent_size);
 
-		assert_zu_eq(extent_size, floor,
+		expect_zu_eq(extent_size, floor,
 		    "Extent quantization should be a no-op for precise size "
 		    "(lextent_size=%zu, extent_size=%zu)", lextent_size,
 		    extent_size);
-		assert_zu_eq(extent_size, ceil,
+		expect_zu_eq(extent_size, ceil,
 		    "Extent quantization should be a no-op for precise size "
 		    "(lextent_size=%zu, extent_size=%zu)", lextent_size,
 		    extent_size);
 
 		if (i > 0) {
-			assert_zu_eq(extent_size_prev,
+			expect_zu_eq(extent_size_prev,
 			    sz_psz_quantize_floor(extent_size - PAGE),
 			    "Floor should be a precise size");
 			if (extent_size_prev < ceil_prev) {
-				assert_zu_eq(ceil_prev, extent_size,
+				expect_zu_eq(ceil_prev, extent_size,
 				    "Ceiling should be a precise size "
 				    "(extent_size_prev=%zu, ceil_prev=%zu, "
 				    "extent_size=%zu)", extent_size_prev,
@@ -112,17 +112,17 @@ TEST_BEGIN(test_monotonic) {
 		floor = sz_psz_quantize_floor(extent_size);
 		ceil = sz_psz_quantize_ceil(extent_size);
 
-		assert_zu_le(floor, extent_size,
+		expect_zu_le(floor, extent_size,
 		    "Floor should be <= (floor=%zu, extent_size=%zu, ceil=%zu)",
 		    floor, extent_size, ceil);
-		assert_zu_ge(ceil, extent_size,
+		expect_zu_ge(ceil, extent_size,
 		    "Ceiling should be >= (floor=%zu, extent_size=%zu, "
 		    "ceil=%zu)", floor, extent_size, ceil);
 
-		assert_zu_le(floor_prev, floor, "Floor should be monotonic "
+		expect_zu_le(floor_prev, floor, "Floor should be monotonic "
 		    "(floor_prev=%zu, floor=%zu, extent_size=%zu, ceil=%zu)",
 		    floor_prev, floor, extent_size, ceil);
-		assert_zu_le(ceil_prev, ceil, "Ceiling should be monotonic "
+		expect_zu_le(ceil_prev, ceil, "Ceiling should be monotonic "
 		    "(floor=%zu, extent_size=%zu, ceil_prev=%zu, ceil=%zu)",
 		    floor, extent_size, ceil_prev, ceil);
 
diff --git a/test/unit/fork.c b/test/unit/fork.c
index b169075..4137423 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -36,25 +36,25 @@ TEST_BEGIN(test_fork) {
 	/* Set up a manually managed arena for test. */
 	unsigned arena_ind;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 
 	/* Migrate to the new arena. */
 	unsigned old_arena_ind;
 	sz = sizeof(old_arena_ind);
-	assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
 	    (void *)&arena_ind, sizeof(arena_ind)), 0,
 	    "Unexpected mallctl() failure");
 
 	p = malloc(1);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
 
 	pid = fork();
 
 	free(p);
 
 	p = malloc(64);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
 	free(p);
 
 	if (pid == -1) {
diff --git a/test/unit/hash.c b/test/unit/hash.c
index 7cc034f..49f0823 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -131,7 +131,7 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 	default: not_reached();
 	}
 
-	assert_u32_eq(computed, expected,
+	expect_u32_eq(computed, expected,
 	    "Hash mismatch for %s(): expected %#x but got %#x",
 	    hash_variant_string(variant), expected, computed);
 }
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 36dcb89..16a6f1b 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -70,10 +70,10 @@ set_args_raw(uintptr_t *args_raw, int nargs) {
 }
 
 static void
-assert_args_raw(uintptr_t *args_raw_expected, int nargs) {
+expect_args_raw(uintptr_t *args_raw_expected, int nargs) {
 	int cmp = memcmp(args_raw_expected, arg_args_raw,
 	    sizeof(uintptr_t) * nargs);
-	assert_d_eq(cmp, 0, "Raw args mismatch");
+	expect_d_eq(cmp, 0, "Raw args mismatch");
 }
 
 static void
@@ -132,34 +132,34 @@ TEST_BEGIN(test_hooks_basic) {
 	reset_args();
 	hook_invoke_alloc(hook_alloc_posix_memalign, (void *)222, 333,
 	    args_raw);
-	assert_ptr_eq(arg_extra, (void *)111, "Passed wrong user pointer");
-	assert_d_eq((int)hook_alloc_posix_memalign, arg_type,
+	expect_ptr_eq(arg_extra, (void *)111, "Passed wrong user pointer");
+	expect_d_eq((int)hook_alloc_posix_memalign, arg_type,
 	    "Passed wrong alloc type");
-	assert_ptr_eq((void *)222, arg_result, "Passed wrong result address");
-	assert_u64_eq(333, arg_result_raw, "Passed wrong result");
-	assert_args_raw(args_raw, 3);
+	expect_ptr_eq((void *)222, arg_result, "Passed wrong result address");
+	expect_u64_eq(333, arg_result_raw, "Passed wrong result");
+	expect_args_raw(args_raw, 3);
 
 	/* Dalloc */
 	reset_args();
 	hook_invoke_dalloc(hook_dalloc_sdallocx, (void *)222, args_raw);
-	assert_d_eq((int)hook_dalloc_sdallocx, arg_type,
+	expect_d_eq((int)hook_dalloc_sdallocx, arg_type,
 	    "Passed wrong dalloc type");
-	assert_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
-	assert_ptr_eq((void *)222, arg_address, "Passed wrong address");
-	assert_args_raw(args_raw, 3);
+	expect_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
+	expect_ptr_eq((void *)222, arg_address, "Passed wrong address");
+	expect_args_raw(args_raw, 3);
 
 	/* Expand */
 	reset_args();
 	hook_invoke_expand(hook_expand_xallocx, (void *)222, 333, 444, 555,
 	    args_raw);
-	assert_d_eq((int)hook_expand_xallocx, arg_type,
+	expect_d_eq((int)hook_expand_xallocx, arg_type,
 	    "Passed wrong expand type");
-	assert_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
-	assert_ptr_eq((void *)222, arg_address, "Passed wrong address");
-	assert_zu_eq(333, arg_old_usize, "Passed wrong old usize");
-	assert_zu_eq(444, arg_new_usize, "Passed wrong new usize");
-	assert_zu_eq(555, arg_result_raw, "Passed wrong result");
-	assert_args_raw(args_raw, 4);
+	expect_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
+	expect_ptr_eq((void *)222, arg_address, "Passed wrong address");
+	expect_zu_eq(333, arg_old_usize, "Passed wrong old usize");
+	expect_zu_eq(444, arg_new_usize, "Passed wrong new usize");
+	expect_zu_eq(555, arg_result_raw, "Passed wrong result");
+	expect_args_raw(args_raw, 4);
 
 	hook_remove(TSDN_NULL, handle);
 }
@@ -177,24 +177,24 @@ TEST_BEGIN(test_hooks_null) {
 	void *handle3 = hook_install(TSDN_NULL, &hooks3);
 	void *handle4 = hook_install(TSDN_NULL, &hooks4);
 
-	assert_ptr_ne(handle1, NULL, "Hook installation failed");
-	assert_ptr_ne(handle2, NULL, "Hook installation failed");
-	assert_ptr_ne(handle3, NULL, "Hook installation failed");
-	assert_ptr_ne(handle4, NULL, "Hook installation failed");
+	expect_ptr_ne(handle1, NULL, "Hook installation failed");
+	expect_ptr_ne(handle2, NULL, "Hook installation failed");
+	expect_ptr_ne(handle3, NULL, "Hook installation failed");
+	expect_ptr_ne(handle4, NULL, "Hook installation failed");
 
 	uintptr_t args_raw[4] = {10, 20, 30, 40};
 
 	call_count = 0;
 	hook_invoke_alloc(hook_alloc_malloc, NULL, 0, args_raw);
-	assert_d_eq(call_count, 1, "Called wrong number of times");
+	expect_d_eq(call_count, 1, "Called wrong number of times");
 
 	call_count = 0;
 	hook_invoke_dalloc(hook_dalloc_free, NULL, args_raw);
-	assert_d_eq(call_count, 1, "Called wrong number of times");
+	expect_d_eq(call_count, 1, "Called wrong number of times");
 
 	call_count = 0;
 	hook_invoke_expand(hook_expand_realloc, NULL, 0, 0, 0, args_raw);
-	assert_d_eq(call_count, 1, "Called wrong number of times");
+	expect_d_eq(call_count, 1, "Called wrong number of times");
 
 	hook_remove(TSDN_NULL, handle1);
 	hook_remove(TSDN_NULL, handle2);
@@ -206,16 +206,16 @@ TEST_END
 TEST_BEGIN(test_hooks_remove) {
 	hooks_t hooks = {&test_alloc_hook, NULL, NULL, NULL};
 	void *handle = hook_install(TSDN_NULL, &hooks);
-	assert_ptr_ne(handle, NULL, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation failed");
 	call_count = 0;
 	uintptr_t args_raw[4] = {10, 20, 30, 40};
 	hook_invoke_alloc(hook_alloc_malloc, NULL, 0, args_raw);
-	assert_d_eq(call_count, 1, "Hook not invoked");
+	expect_d_eq(call_count, 1, "Hook not invoked");
 
 	call_count = 0;
 	hook_remove(TSDN_NULL, handle);
 	hook_invoke_alloc(hook_alloc_malloc, NULL, 0, NULL);
-	assert_d_eq(call_count, 0, "Hook invoked after removal");
+	expect_d_eq(call_count, 0, "Hook invoked after removal");
 
 }
 TEST_END
@@ -224,7 +224,7 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {&test_alloc_hook, NULL, NULL, (void *)123};
 	void *handle = hook_install(TSDN_NULL, &hooks);
-	assert_ptr_ne(handle, NULL, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	/* Stop malloc from being optimized away. */
 	volatile int err;
@@ -233,69 +233,69 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	/* malloc */
 	reset();
 	ptr = malloc(1);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_malloc, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_malloc, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
 	free(ptr);
 
 	/* posix_memalign */
 	reset();
 	err = posix_memalign((void **)&ptr, 1024, 1);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_posix_memalign,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_posix_memalign,
 	    "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)err, (uintptr_t)arg_result_raw,
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)err, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)&ptr, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)1024, arg_args_raw[1], "Wrong argument");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[2], "Wrong argument");
+	expect_u64_eq((uintptr_t)&ptr, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)1024, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[2], "Wrong argument");
 	free(ptr);
 
 	/* aligned_alloc */
 	reset();
 	ptr = aligned_alloc(1024, 1);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_aligned_alloc,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_aligned_alloc,
 	    "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 
 	/* calloc */
 	reset();
 	ptr = calloc(11, 13);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_calloc, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_calloc, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)11, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)13, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)11, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)13, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 
 	/* memalign */
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
 	reset();
 	ptr = memalign(1024, 1);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_memalign, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_memalign, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 #endif /* JEMALLOC_OVERRIDE_MEMALIGN */
 
@@ -303,27 +303,27 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 #ifdef JEMALLOC_OVERRIDE_VALLOC
 	reset();
 	ptr = valloc(1);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_valloc, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_valloc, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
 	free(ptr);
 #endif /* JEMALLOC_OVERRIDE_VALLOC */
 
 	/* mallocx */
 	reset();
 	ptr = mallocx(1, MALLOCX_LG_ALIGN(10));
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_mallocx, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_mallocx, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)MALLOCX_LG_ALIGN(10), arg_args_raw[1],
+	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)MALLOCX_LG_ALIGN(10), arg_args_raw[1],
 	    "Wrong flags");
 	free(ptr);
 
@@ -335,7 +335,7 @@ TEST_BEGIN(test_hooks_dalloc_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {NULL, &test_dalloc_hook, NULL, (void *)123};
 	void *handle = hook_install(TSDN_NULL, &hooks);
-	assert_ptr_ne(handle, NULL, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
 
@@ -343,35 +343,35 @@ TEST_BEGIN(test_hooks_dalloc_simple) {
 	reset();
 	ptr = malloc(1);
 	free(ptr);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_dalloc_free, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong pointer freed");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_dalloc_free, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
 
 	/* dallocx() */
 	reset();
 	ptr = malloc(1);
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_dalloc_dallocx, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong pointer freed");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
-	assert_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[1],
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_dalloc_dallocx, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
+	expect_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[1],
 	    "Wrong raw arg");
 
 	/* sdallocx() */
 	reset();
 	ptr = malloc(1);
 	sdallocx(ptr, 1, MALLOCX_TCACHE_NONE);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_dalloc_sdallocx, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong pointer freed");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong raw arg");
-	assert_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[2],
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_dalloc_sdallocx, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong raw arg");
+	expect_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[2],
 	    "Wrong raw arg");
 
 	hook_remove(TSDN_NULL, handle);
@@ -382,7 +382,7 @@ TEST_BEGIN(test_hooks_expand_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {NULL, NULL, &test_expand_hook, (void *)123};
 	void *handle = hook_install(TSDN_NULL, &hooks);
-	assert_ptr_ne(handle, NULL, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
 
@@ -390,17 +390,17 @@ TEST_BEGIN(test_hooks_expand_simple) {
 	reset();
 	ptr = malloc(1);
 	size_t new_usize = xallocx(ptr, 100, 200, MALLOCX_TCACHE_NONE);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_expand_xallocx, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong pointer expanded");
-	assert_u64_eq(arg_old_usize, nallocx(1, 0), "Wrong old usize");
-	assert_u64_eq(arg_new_usize, sallocx(ptr, 0), "Wrong new usize");
-	assert_u64_eq(new_usize, arg_result_raw, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong arg");
-	assert_u64_eq(100, arg_args_raw[1], "Wrong arg");
-	assert_u64_eq(200, arg_args_raw[2], "Wrong arg");
-	assert_u64_eq(MALLOCX_TCACHE_NONE, arg_args_raw[3], "Wrong arg");
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_expand_xallocx, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong pointer expanded");
+	expect_u64_eq(arg_old_usize, nallocx(1, 0), "Wrong old usize");
+	expect_u64_eq(arg_new_usize, sallocx(ptr, 0), "Wrong new usize");
+	expect_u64_eq(new_usize, arg_result_raw, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong arg");
+	expect_u64_eq(100, arg_args_raw[1], "Wrong arg");
+	expect_u64_eq(200, arg_args_raw[2], "Wrong arg");
+	expect_u64_eq(MALLOCX_TCACHE_NONE, arg_args_raw[3], "Wrong arg");
 
 	hook_remove(TSDN_NULL, handle);
 }
@@ -410,21 +410,21 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
 		&test_expand_hook, (void *)123};
 	void *handle = hook_install(TSDN_NULL, &hooks);
-	assert_ptr_ne(handle, NULL, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
 
 	/* realloc(NULL, size) as malloc */
 	reset();
 	ptr = realloc(NULL, 1);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 
 	/* realloc(ptr, 0) as free */
@@ -432,29 +432,29 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 		ptr = malloc(1);
 		reset();
 		realloc(ptr, 0);
-		assert_d_eq(call_count, 1, "Hook not called");
-		assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-		assert_d_eq(arg_type, (int)hook_dalloc_realloc,
+		expect_d_eq(call_count, 1, "Hook not called");
+		expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+		expect_d_eq(arg_type, (int)hook_dalloc_realloc,
 		    "Wrong hook type");
-		assert_ptr_eq(ptr, arg_address,
+		expect_ptr_eq(ptr, arg_address,
 		    "Wrong pointer freed");
-		assert_u64_eq((uintptr_t)ptr, arg_args_raw[0],
+		expect_u64_eq((uintptr_t)ptr, arg_args_raw[0],
 		    "Wrong raw arg");
-		assert_u64_eq((uintptr_t)0, arg_args_raw[1],
+		expect_u64_eq((uintptr_t)0, arg_args_raw[1],
 		    "Wrong raw arg");
 	}
 
 	/* realloc(NULL, 0) as malloc(0) */
 	reset();
 	ptr = realloc(NULL, 0);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 
 	hook_remove(TSDN_NULL, handle);
@@ -467,7 +467,7 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
 		&test_expand_hook, (void *)123};
 	void *handle = hook_install(TSDN_NULL, &hooks);
-	assert_ptr_ne(handle, NULL, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
 	void *volatile ptr2;
@@ -476,16 +476,16 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	ptr = malloc(129);
 	reset();
 	ptr2 = ralloc(ptr, 130, flags);
-	assert_ptr_eq(ptr, ptr2, "Small realloc moved");
+	expect_ptr_eq(ptr, ptr2, "Small realloc moved");
 
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, expand_type, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong address");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, expand_type, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong address");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)130, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)130, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 
 	/*
@@ -499,19 +499,19 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	ptr = ralloc(ptr2, 2 * 1024 * 1024, flags);
 	/* ptr is the new address, ptr2 is the old address. */
 	if (ptr == ptr2) {
-		assert_d_eq(call_count, 1, "Hook not called");
-		assert_d_eq(arg_type, expand_type, "Wrong hook type");
+		expect_d_eq(call_count, 1, "Hook not called");
+		expect_d_eq(arg_type, expand_type, "Wrong hook type");
 	} else {
-		assert_d_eq(call_count, 2, "Wrong hooks called");
-		assert_ptr_eq(ptr, arg_result, "Wrong address");
-		assert_d_eq(arg_type, dalloc_type, "Wrong hook type");
+		expect_d_eq(call_count, 2, "Wrong hooks called");
+		expect_ptr_eq(ptr, arg_result, "Wrong address");
+		expect_d_eq(arg_type, dalloc_type, "Wrong hook type");
 	}
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_ptr_eq(ptr2, arg_address, "Wrong address");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_ptr_eq(ptr2, arg_address, "Wrong address");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)ptr2, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
+	expect_u64_eq((uintptr_t)ptr2, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
 	    "Wrong argument");
 	free(ptr);
 
@@ -519,34 +519,34 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	ptr = malloc(8);
 	reset();
 	ptr2 = ralloc(ptr, 128, flags);
-	assert_ptr_ne(ptr, ptr2, "Small realloc didn't move");
-
-	assert_d_eq(call_count, 2, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, dalloc_type, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong address");
-	assert_ptr_eq(ptr2, arg_result, "Wrong address");
-	assert_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
+	expect_ptr_ne(ptr, ptr2, "Small realloc didn't move");
+
+	expect_d_eq(call_count, 2, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, dalloc_type, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong address");
+	expect_ptr_eq(ptr2, arg_result, "Wrong address");
+	expect_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)128, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)128, arg_args_raw[1], "Wrong argument");
 	free(ptr2);
 
 	/* Realloc with move, large. */
 	ptr = malloc(1);
 	reset();
 	ptr2 = ralloc(ptr, 2 * 1024 * 1024, flags);
-	assert_ptr_ne(ptr, ptr2, "Large realloc didn't move");
-
-	assert_d_eq(call_count, 2, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, dalloc_type, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong address");
-	assert_ptr_eq(ptr2, arg_result, "Wrong address");
-	assert_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
+	expect_ptr_ne(ptr, ptr2, "Large realloc didn't move");
+
+	expect_d_eq(call_count, 2, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, dalloc_type, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong address");
+	expect_ptr_eq(ptr2, arg_result, "Wrong address");
+	expect_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
 	    "Wrong argument");
 	free(ptr2);
 
diff --git a/test/unit/huge.c b/test/unit/huge.c
index ab72cf0..ec64e50 100644
--- a/test/unit/huge.c
+++ b/test/unit/huge.c
@@ -11,37 +11,37 @@ TEST_BEGIN(huge_bind_thread) {
 	size_t sz = sizeof(unsigned);
 
 	/* Bind to a manual arena. */
-	assert_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
 	    "Failed to create arena");
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena1,
+	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena1,
 	    sizeof(arena1)), 0, "Fail to bind thread");
 
 	void *ptr = mallocx(HUGE_SZ, 0);
-	assert_ptr_not_null(ptr, "Fail to allocate huge size");
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	expect_ptr_not_null(ptr, "Fail to allocate huge size");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
 	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
-	assert_u_eq(arena1, arena2, "Wrong arena used after binding");
+	expect_u_eq(arena1, arena2, "Wrong arena used after binding");
 	dallocx(ptr, 0);
 
 	/* Switch back to arena 0. */
 	test_skip_if(have_percpu_arena &&
 	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
 	arena2 = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena2,
+	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena2,
 	    sizeof(arena2)), 0, "Fail to bind thread");
 	ptr = mallocx(SMALL_SZ, MALLOCX_TCACHE_NONE);
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
 	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
-	assert_u_eq(arena2, 0, "Wrong arena used after binding");
+	expect_u_eq(arena2, 0, "Wrong arena used after binding");
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
 
 	/* Then huge allocation should use the huge arena. */
 	ptr = mallocx(HUGE_SZ, 0);
-	assert_ptr_not_null(ptr, "Fail to allocate huge size");
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	expect_ptr_not_null(ptr, "Fail to allocate huge size");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
 	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
-	assert_u_ne(arena2, 0, "Wrong arena used after binding");
-	assert_u_ne(arena1, arena2, "Wrong arena used after binding");
+	expect_u_ne(arena2, 0, "Wrong arena used after binding");
+	expect_u_ne(arena1, arena2, "Wrong arena used after binding");
 	dallocx(ptr, 0);
 }
 TEST_END
@@ -50,22 +50,22 @@ TEST_BEGIN(huge_mallocx) {
 	unsigned arena1, arena2;
 	size_t sz = sizeof(unsigned);
 
-	assert_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
 	    "Failed to create arena");
 	void *huge = mallocx(HUGE_SZ, MALLOCX_ARENA(arena1));
-	assert_ptr_not_null(huge, "Fail to allocate huge size");
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge,
+	expect_ptr_not_null(huge, "Fail to allocate huge size");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge,
 	    sizeof(huge)), 0, "Unexpected mallctl() failure");
-	assert_u_eq(arena1, arena2, "Wrong arena used for mallocx");
+	expect_u_eq(arena1, arena2, "Wrong arena used for mallocx");
 	dallocx(huge, MALLOCX_ARENA(arena1));
 
 	void *huge2 = mallocx(HUGE_SZ, 0);
-	assert_ptr_not_null(huge, "Fail to allocate huge size");
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge2,
+	expect_ptr_not_null(huge, "Fail to allocate huge size");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge2,
 	    sizeof(huge2)), 0, "Unexpected mallctl() failure");
-	assert_u_ne(arena1, arena2,
+	expect_u_ne(arena1, arena2,
 	    "Huge allocation should not come from the manual arena.");
-	assert_u_ne(arena2, 0,
+	expect_u_ne(arena2, 0,
 	    "Huge allocation should not come from the arena 0.");
 	dallocx(huge2, 0);
 }
@@ -75,25 +75,25 @@ TEST_BEGIN(huge_allocation) {
 	unsigned arena1, arena2;
 
 	void *ptr = mallocx(HUGE_SZ, 0);
-	assert_ptr_not_null(ptr, "Fail to allocate huge size");
+	expect_ptr_not_null(ptr, "Fail to allocate huge size");
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.lookup", &arena1, &sz, &ptr, sizeof(ptr)),
+	expect_d_eq(mallctl("arenas.lookup", &arena1, &sz, &ptr, sizeof(ptr)),
 	    0, "Unexpected mallctl() failure");
-	assert_u_gt(arena1, 0, "Huge allocation should not come from arena 0");
+	expect_u_gt(arena1, 0, "Huge allocation should not come from arena 0");
 	dallocx(ptr, 0);
 
 	ptr = mallocx(HUGE_SZ >> 1, 0);
-	assert_ptr_not_null(ptr, "Fail to allocate half huge size");
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	expect_ptr_not_null(ptr, "Fail to allocate half huge size");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
 	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
-	assert_u_ne(arena1, arena2, "Wrong arena used for half huge");
+	expect_u_ne(arena1, arena2, "Wrong arena used for half huge");
 	dallocx(ptr, 0);
 
 	ptr = mallocx(SMALL_SZ, MALLOCX_TCACHE_NONE);
-	assert_ptr_not_null(ptr, "Fail to allocate small size");
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	expect_ptr_not_null(ptr, "Fail to allocate small size");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
 	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
-	assert_u_ne(arena1, arena2,
+	expect_u_ne(arena1, arena2,
 	    "Huge and small should be from different arenas");
 	dallocx(ptr, 0);
 }
diff --git a/test/unit/inspect.c b/test/unit/inspect.c
index 4de0b04..41ef6c2 100644
--- a/test/unit/inspect.c
+++ b/test/unit/inspect.c
@@ -1,11 +1,11 @@
 #include "test/jemalloc_test.h"
 
 #define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do {		\
-	assert_d_eq(mallctl("experimental.utilization." node,		\
+	expect_d_eq(mallctl("experimental.utilization." node,		\
 	    a, b, c, d), EINVAL, "Should fail when " why_inval);	\
-	assert_zu_eq(out_sz, out_sz_ref,				\
+	expect_zu_eq(out_sz, out_sz_ref,				\
 	    "Output size touched when given invalid arguments");	\
-	assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
+	expect_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
 	    "Output content touched when given invalid arguments");	\
 } while (0)
 
@@ -15,11 +15,11 @@
 	TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval)
 
 #define TEST_UTIL_VALID(node) do {					\
-        assert_d_eq(mallctl("experimental.utilization." node,		\
+        expect_d_eq(mallctl("experimental.utilization." node,		\
 	    out, &out_sz, in, in_sz), 0,				\
 	    "Should return 0 on correct arguments");			\
-        assert_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
-	assert_d_ne(memcmp(out, out_ref, out_sz_ref), 0,		\
+        expect_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
+	expect_d_ne(memcmp(out, out_ref, out_sz_ref), 0,		\
 	    "Output content should be changed");			\
 } while (0)
 
@@ -43,11 +43,11 @@ TEST_BEGIN(test_query) {
 		void *out_ref = mallocx(out_sz, 0);
 		size_t out_sz_ref = out_sz;
 
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "test pointer allocation failed");
-		assert_ptr_not_null(out,
+		expect_ptr_not_null(out,
 		    "test output allocation failed");
-		assert_ptr_not_null(out_ref,
+		expect_ptr_not_null(out_ref,
 		    "test reference output allocation failed");
 
 #define SLABCUR_READ(out) (*(void **)out)
@@ -83,60 +83,60 @@ TEST_BEGIN(test_query) {
 
 		/* Examine output for valid call */
 		TEST_UTIL_VALID("query");
-		assert_zu_le(sz, SIZE_READ(out),
+		expect_zu_le(sz, SIZE_READ(out),
 		    "Extent size should be at least allocation size");
-		assert_zu_eq(SIZE_READ(out) & (PAGE - 1), 0,
+		expect_zu_eq(SIZE_READ(out) & (PAGE - 1), 0,
 		    "Extent size should be a multiple of page size");
 		if (sz <= SC_SMALL_MAXCLASS) {
-			assert_zu_le(NFREE_READ(out), NREGS_READ(out),
+			expect_zu_le(NFREE_READ(out), NREGS_READ(out),
 			    "Extent free count exceeded region count");
-			assert_zu_le(NREGS_READ(out), SIZE_READ(out),
+			expect_zu_le(NREGS_READ(out), SIZE_READ(out),
 			    "Extent region count exceeded size");
-			assert_zu_ne(NREGS_READ(out), 0,
+			expect_zu_ne(NREGS_READ(out), 0,
 			    "Extent region count must be positive");
-			assert_true(NFREE_READ(out) == 0 || (SLABCUR_READ(out)
+			expect_true(NFREE_READ(out) == 0 || (SLABCUR_READ(out)
 			    != NULL && SLABCUR_READ(out) <= p),
 			    "Allocation should follow first fit principle");
 			if (config_stats) {
-				assert_zu_le(BIN_NFREE_READ(out),
+				expect_zu_le(BIN_NFREE_READ(out),
 				    BIN_NREGS_READ(out),
 				    "Bin free count exceeded region count");
-				assert_zu_ne(BIN_NREGS_READ(out), 0,
+				expect_zu_ne(BIN_NREGS_READ(out), 0,
 				    "Bin region count must be positive");
-				assert_zu_le(NFREE_READ(out),
+				expect_zu_le(NFREE_READ(out),
 				    BIN_NFREE_READ(out),
 				    "Extent free count exceeded bin free count");
-				assert_zu_le(NREGS_READ(out),
+				expect_zu_le(NREGS_READ(out),
 				    BIN_NREGS_READ(out),
 				    "Extent region count exceeded "
 				    "bin region count");
-				assert_zu_eq(BIN_NREGS_READ(out)
+				expect_zu_eq(BIN_NREGS_READ(out)
 				    % NREGS_READ(out), 0,
 				    "Bin region count isn't a multiple of "
 				    "extent region count");
-				assert_zu_le(
+				expect_zu_le(
 				    BIN_NFREE_READ(out) - NFREE_READ(out),
 				    BIN_NREGS_READ(out) - NREGS_READ(out),
 				    "Free count in other extents in the bin "
 				    "exceeded region count in other extents "
 				    "in the bin");
-				assert_zu_le(NREGS_READ(out) - NFREE_READ(out),
+				expect_zu_le(NREGS_READ(out) - NFREE_READ(out),
 				    BIN_NREGS_READ(out) - BIN_NFREE_READ(out),
 				    "Extent utilized count exceeded "
 				    "bin utilized count");
 			}
 		} else {
-			assert_zu_eq(NFREE_READ(out), 0,
+			expect_zu_eq(NFREE_READ(out), 0,
 			    "Extent free count should be zero");
-			assert_zu_eq(NREGS_READ(out), 1,
+			expect_zu_eq(NREGS_READ(out), 1,
 			    "Extent region count should be one");
-			assert_ptr_null(SLABCUR_READ(out),
+			expect_ptr_null(SLABCUR_READ(out),
 			    "Current slab must be null for large size classes");
 			if (config_stats) {
-				assert_zu_eq(BIN_NFREE_READ(out), 0,
+				expect_zu_eq(BIN_NFREE_READ(out), 0,
 				    "Bin free count must be zero for "
 				    "large sizes");
-				assert_zu_eq(BIN_NREGS_READ(out), 0,
+				expect_zu_eq(BIN_NREGS_READ(out), 0,
 				    "Bin region count must be zero for "
 				    "large sizes");
 			}
@@ -174,8 +174,8 @@ TEST_BEGIN(test_batch) {
 		size_t out_ref[] = {-1, -1, -1, -1, -1, -1};
 		size_t out_sz_ref = out_sz;
 
-		assert_ptr_not_null(p, "test pointer allocation failed");
-		assert_ptr_not_null(q, "test pointer allocation failed");
+		expect_ptr_not_null(p, "test pointer allocation failed");
+		expect_ptr_not_null(q, "test pointer allocation failed");
 
 		/* Test invalid argument(s) errors */
 		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz,
@@ -201,7 +201,7 @@ TEST_BEGIN(test_batch) {
 
 	/* Examine output for valid calls */
 #define TEST_EQUAL_REF(i, message) \
-	assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
+	expect_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
 
 #define NFREE_READ(out, i) out[(i) * 3]
 #define NREGS_READ(out, i) out[(i) * 3 + 1]
@@ -210,21 +210,21 @@ TEST_BEGIN(test_batch) {
 		out_sz_ref = out_sz /= 2;
 		in_sz /= 2;
 		TEST_UTIL_BATCH_VALID;
-		assert_zu_le(sz, SIZE_READ(out, 0),
+		expect_zu_le(sz, SIZE_READ(out, 0),
 		    "Extent size should be at least allocation size");
-		assert_zu_eq(SIZE_READ(out, 0) & (PAGE - 1), 0,
+		expect_zu_eq(SIZE_READ(out, 0) & (PAGE - 1), 0,
 		    "Extent size should be a multiple of page size");
 		if (sz <= SC_SMALL_MAXCLASS) {
-			assert_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
+			expect_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
 			    "Extent free count exceeded region count");
-			assert_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
+			expect_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
 			    "Extent region count exceeded size");
-			assert_zu_ne(NREGS_READ(out, 0), 0,
+			expect_zu_ne(NREGS_READ(out, 0), 0,
 			    "Extent region count must be positive");
 		} else {
-			assert_zu_eq(NFREE_READ(out, 0), 0,
+			expect_zu_eq(NFREE_READ(out, 0), 0,
 			    "Extent free count should be zero");
-			assert_zu_eq(NREGS_READ(out, 0), 1,
+			expect_zu_eq(NREGS_READ(out, 0), 1,
 			    "Extent region count should be one");
 		}
 		TEST_EQUAL_REF(1,
@@ -236,15 +236,15 @@ TEST_BEGIN(test_batch) {
 		TEST_UTIL_BATCH_VALID;
 		TEST_EQUAL_REF(0, "Statistics should be stable across calls");
 		if (sz <= SC_SMALL_MAXCLASS) {
-			assert_zu_le(NFREE_READ(out, 1), NREGS_READ(out, 1),
+			expect_zu_le(NFREE_READ(out, 1), NREGS_READ(out, 1),
 			    "Extent free count exceeded region count");
 		} else {
-			assert_zu_eq(NFREE_READ(out, 0), 0,
+			expect_zu_eq(NFREE_READ(out, 0), 0,
 			    "Extent free count should be zero");
 		}
-		assert_zu_eq(NREGS_READ(out, 0), NREGS_READ(out, 1),
+		expect_zu_eq(NREGS_READ(out, 0), NREGS_READ(out, 1),
 		    "Extent region count should be same for same region size");
-		assert_zu_eq(SIZE_READ(out, 0), SIZE_READ(out, 1),
+		expect_zu_eq(SIZE_READ(out, 0), SIZE_READ(out, 1),
 		    "Extent size should be same for same region size");
 
 #undef SIZE_READ
@@ -261,7 +261,7 @@ TEST_END
 
 int
 main(void) {
-	assert_zu_lt(SC_SMALL_MAXCLASS, TEST_MAX_SIZE,
+	expect_zu_lt(SC_SMALL_MAXCLASS, TEST_MAX_SIZE,
 	    "Test case cannot cover large classes");
 	return test(test_query, test_batch);
 }
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 57e3ad4..772a0b4 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -20,7 +20,7 @@ arena_dalloc_junk_small_intercept(void *ptr, const bin_info_t *bin_info) {
 
 	arena_dalloc_junk_small_orig(ptr, bin_info);
 	for (i = 0; i < bin_info->reg_size; i++) {
-		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
+		expect_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, bin_info->reg_size);
 	}
@@ -35,7 +35,7 @@ large_dalloc_junk_intercept(void *ptr, size_t usize) {
 
 	large_dalloc_junk_orig(ptr, usize);
 	for (i = 0; i < usize; i++) {
-		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
+		expect_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, usize);
 	}
@@ -68,22 +68,22 @@ test_junk(size_t sz_min, size_t sz_max) {
 
 	sz_prev = 0;
 	s = (uint8_t *)mallocx(sz_min, 0);
-	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
+	expect_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
 	    sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
-			assert_u_eq(s[0], 'a',
+			expect_u_eq(s[0], 'a',
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			assert_u_eq(s[sz_prev-1], 'a',
+			expect_u_eq(s[sz_prev-1], 'a',
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    sz_prev-1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
 			if (opt_junk_alloc) {
-				assert_u_eq(s[i], JEMALLOC_ALLOC_JUNK,
+				expect_u_eq(s[i], JEMALLOC_ALLOC_JUNK,
 				    "Newly allocated byte %zu/%zu isn't "
 				    "junk-filled", i, sz);
 			}
@@ -94,14 +94,14 @@ test_junk(size_t sz_min, size_t sz_max) {
 			uint8_t *t;
 			watch_junking(s);
 			t = (uint8_t *)rallocx(s, sz+1, 0);
-			assert_ptr_not_null((void *)t,
+			expect_ptr_not_null((void *)t,
 			    "Unexpected rallocx() failure");
-			assert_zu_ge(sallocx(t, 0), sz+1,
+			expect_zu_ge(sallocx(t, 0), sz+1,
 			    "Unexpectedly small rallocx() result");
 			if (!background_thread_enabled()) {
-				assert_ptr_ne(s, t,
+				expect_ptr_ne(s, t,
 				    "Unexpected in-place rallocx()");
-				assert_true(!opt_junk_free || saw_junking,
+				expect_true(!opt_junk_free || saw_junking,
 				    "Expected region of size %zu to be "
 				    "junk-filled", sz);
 			}
@@ -111,7 +111,7 @@ test_junk(size_t sz_min, size_t sz_max) {
 
 	watch_junking(s);
 	dallocx(s, 0);
-	assert_true(!opt_junk_free || saw_junking,
+	expect_true(!opt_junk_free || saw_junking,
 	    "Expected region of size %zu to be junk-filled", sz);
 
 	if (opt_junk_free) {
diff --git a/test/unit/log.c b/test/unit/log.c
index 10f45bc..02e6a6a 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -30,7 +30,7 @@ expect_no_logging(const char *names) {
 			count++;
 		log_do_end(log_l2_a)
 	}
-	assert_d_eq(count, 0, "Disabled logging not ignored!");
+	expect_d_eq(count, 0, "Disabled logging not ignored!");
 }
 
 TEST_BEGIN(test_log_disabled) {
@@ -61,7 +61,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 			count++;
 		log_do_end(log_l1)
 	}
-	assert_d_eq(count, 10, "Mis-logged!");
+	expect_d_eq(count, 10, "Mis-logged!");
 
 	count = 0;
 	update_log_var_names("l1.a");
@@ -70,7 +70,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 			count++;
 		log_do_end(log_l1_a)
 	}
-	assert_d_eq(count, 10, "Mis-logged!");
+	expect_d_eq(count, 10, "Mis-logged!");
 
 	count = 0;
 	update_log_var_names("l1.a|abc|l2|def");
@@ -83,7 +83,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 			count++;
 		log_do_end(log_l2)
 	}
-	assert_d_eq(count, 20, "Mis-logged!");
+	expect_d_eq(count, 20, "Mis-logged!");
 }
 TEST_END
 
@@ -133,7 +133,7 @@ TEST_BEGIN(test_log_enabled_indirect) {
 		log_do_end(log_l2_b_b)
 	}
 
-	assert_d_eq(count, 40, "Mis-logged!");
+	expect_d_eq(count, 40, "Mis-logged!");
 }
 TEST_END
 
@@ -155,7 +155,7 @@ TEST_BEGIN(test_log_enabled_global) {
 		    count++;
 		log_do_end(log_l2_a_a)
 	}
-	assert_d_eq(count, 20, "Mis-logged!");
+	expect_d_eq(count, 20, "Mis-logged!");
 }
 TEST_END
 
@@ -171,7 +171,7 @@ TEST_BEGIN(test_logs_if_no_init) {
 			count++;
 		log_do_end(l)
 	}
-	assert_d_eq(count, 0, "Logging shouldn't happen if not initialized.");
+	expect_d_eq(count, 0, "Logging shouldn't happen if not initialized.");
 }
 TEST_END
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 14c169b..e38723f 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -7,25 +7,25 @@ TEST_BEGIN(test_mallctl_errors) {
 	uint64_t epoch;
 	size_t sz;
 
-	assert_d_eq(mallctl("no_such_name", NULL, NULL, NULL, 0), ENOENT,
+	expect_d_eq(mallctl("no_such_name", NULL, NULL, NULL, 0), ENOENT,
 	    "mallctl() should return ENOENT for non-existent names");
 
-	assert_d_eq(mallctl("version", NULL, NULL, "0.0.0", strlen("0.0.0")),
+	expect_d_eq(mallctl("version", NULL, NULL, "0.0.0", strlen("0.0.0")),
 	    EPERM, "mallctl() should return EPERM on attempt to write "
 	    "read-only value");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)-1), EINVAL,
 	    "mallctl() should return EINVAL for input size mismatch");
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)+1), EINVAL,
 	    "mallctl() should return EINVAL for input size mismatch");
 
 	sz = sizeof(epoch)-1;
-	assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
+	expect_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
 	sz = sizeof(epoch)+1;
-	assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
+	expect_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
 }
 TEST_END
@@ -35,7 +35,7 @@ TEST_BEGIN(test_mallctlnametomib_errors) {
 	size_t miblen;
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("no_such_name", mib, &miblen), ENOENT,
+	expect_d_eq(mallctlnametomib("no_such_name", mib, &miblen), ENOENT,
 	    "mallctlnametomib() should return ENOENT for non-existent names");
 }
 TEST_END
@@ -47,30 +47,30 @@ TEST_BEGIN(test_mallctlbymib_errors) {
 	size_t miblen;
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("version", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("version", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, "0.0.0",
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, "0.0.0",
 	    strlen("0.0.0")), EPERM, "mallctl() should return EPERM on "
 	    "attempt to write read-only value");
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("epoch", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("epoch", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)-1), EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)+1), EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
 
 	sz = sizeof(epoch)-1;
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
 	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
 	sz = sizeof(epoch)+1;
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
 	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
 }
@@ -81,25 +81,25 @@ TEST_BEGIN(test_mallctl_read_write) {
 	size_t sz = sizeof(old_epoch);
 
 	/* Blind. */
-	assert_d_eq(mallctl("epoch", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("epoch", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
+	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Read. */
-	assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("epoch", (void *)&old_epoch, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
+	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Write. */
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&new_epoch,
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&new_epoch,
 	    sizeof(new_epoch)), 0, "Unexpected mallctl() failure");
-	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
+	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Read+write. */
-	assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz,
+	expect_d_eq(mallctl("epoch", (void *)&old_epoch, &sz,
 	    (void *)&new_epoch, sizeof(new_epoch)), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
+	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 }
 TEST_END
 
@@ -109,10 +109,10 @@ TEST_BEGIN(test_mallctlnametomib_short_mib) {
 
 	miblen = 3;
 	mib[3] = 42;
-	assert_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
-	assert_zu_eq(miblen, 3, "Unexpected mib output length");
-	assert_zu_eq(mib[3], 42,
+	expect_zu_eq(miblen, 3, "Unexpected mib output length");
+	expect_zu_eq(mib[3], 42,
 	    "mallctlnametomib() wrote past the end of the input mib");
 }
 TEST_END
@@ -121,10 +121,10 @@ TEST_BEGIN(test_mallctl_config) {
 #define TEST_MALLCTL_CONFIG(config, t) do {				\
 	t oldval;							\
 	size_t sz = sizeof(oldval);					\
-	assert_d_eq(mallctl("config."#config, (void *)&oldval, &sz,	\
+	expect_d_eq(mallctl("config."#config, (void *)&oldval, &sz,	\
 	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-	assert_b_eq(oldval, config_##config, "Incorrect config value");	\
-	assert_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
+	expect_b_eq(oldval, config_##config, "Incorrect config value");	\
+	expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
 } while (0)
 
 	TEST_MALLCTL_CONFIG(cache_oblivious, bool);
@@ -152,9 +152,9 @@ TEST_BEGIN(test_mallctl_opt) {
 	int expected = config_##config ? 0 : ENOENT;			\
 	int result = mallctl("opt."#opt, (void *)&oldval, &sz, NULL,	\
 	    0);								\
-	assert_d_eq(result, expected,					\
+	expect_d_eq(result, expected,					\
 	    "Unexpected mallctl() result for opt."#opt);		\
-	assert_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
+	expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
 } while (0)
 
 	TEST_MALLCTL_OPT(bool, abort, always);
@@ -203,18 +203,18 @@ TEST_BEGIN(test_manpage_example) {
 	size_t len, miblen;
 
 	len = sizeof(nbins);
-	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	miblen = 4;
-	assert_d_eq(mallctlnametomib("arenas.bin.0.size", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	for (i = 0; i < nbins; i++) {
 		size_t bin_size;
 
 		mib[2] = i;
 		len = sizeof(bin_size);
-		assert_d_eq(mallctlbymib(mib, miblen, (void *)&bin_size, &len,
+		expect_d_eq(mallctlbymib(mib, miblen, (void *)&bin_size, &len,
 		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
 		/* Do something with bin_size... */
 	}
@@ -226,9 +226,9 @@ TEST_BEGIN(test_tcache_none) {
 
 	/* Allocate p and q. */
 	void *p0 = mallocx(42, 0);
-	assert_ptr_not_null(p0, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p0, "Unexpected mallocx() failure");
 	void *q = mallocx(42, 0);
-	assert_ptr_not_null(q, "Unexpected mallocx() failure");
+	expect_ptr_not_null(q, "Unexpected mallocx() failure");
 
 	/* Deallocate p and q, but bypass the tcache for q. */
 	dallocx(p0, 0);
@@ -236,8 +236,8 @@ TEST_BEGIN(test_tcache_none) {
 
 	/* Make sure that tcache-based allocation returns p, not q. */
 	void *p1 = mallocx(42, 0);
-	assert_ptr_not_null(p1, "Unexpected mallocx() failure");
-	assert_ptr_eq(p0, p1, "Expected tcache to allocate cached region");
+	expect_ptr_not_null(p1, "Unexpected mallocx() failure");
+	expect_ptr_eq(p0, p1, "Expected tcache to allocate cached region");
 
 	/* Clean up. */
 	dallocx(p1, MALLOCX_TCACHE_NONE);
@@ -258,25 +258,25 @@ TEST_BEGIN(test_tcache) {
 	/* Create tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
+		expect_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
 		    0), 0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Exercise tcache ID recycling. */
 	for (i = 0; i < NTCACHES; i++) {
-		assert_d_eq(mallctl("tcache.destroy", NULL, NULL,
+		expect_d_eq(mallctl("tcache.destroy", NULL, NULL,
 		    (void *)&tis[i], sizeof(unsigned)), 0,
 		    "Unexpected mallctl() failure, i=%u", i);
 	}
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
+		expect_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
 		    0), 0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Flush empty tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
-		assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
+		expect_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
 		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
 		    i);
 	}
@@ -284,12 +284,12 @@ TEST_BEGIN(test_tcache) {
 	/* Cache some allocations. */
 	for (i = 0; i < NTCACHES; i++) {
 		ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
-		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
+		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
 		    i);
 		dallocx(ps[i], MALLOCX_TCACHE(tis[i]));
 
 		qs[i] = mallocx(qsz, MALLOCX_TCACHE(tis[i]));
-		assert_ptr_not_null(qs[i], "Unexpected mallocx() failure, i=%u",
+		expect_ptr_not_null(qs[i], "Unexpected mallocx() failure, i=%u",
 		    i);
 		dallocx(qs[i], MALLOCX_TCACHE(tis[i]));
 	}
@@ -298,9 +298,9 @@ TEST_BEGIN(test_tcache) {
 	for (i = 0; i < NTCACHES; i++) {
 		void *p0 = ps[i];
 		ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
-		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
+		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
 		    i);
-		assert_ptr_eq(ps[i], p0,
+		expect_ptr_eq(ps[i], p0,
 		    "Expected mallocx() to allocate cached region, i=%u", i);
 	}
 
@@ -308,9 +308,9 @@ TEST_BEGIN(test_tcache) {
 	for (i = 0; i < NTCACHES; i++) {
 		void *q0 = qs[i];
 		qs[i] = rallocx(ps[i], qsz, MALLOCX_TCACHE(tis[i]));
-		assert_ptr_not_null(qs[i], "Unexpected rallocx() failure, i=%u",
+		expect_ptr_not_null(qs[i], "Unexpected rallocx() failure, i=%u",
 		    i);
-		assert_ptr_eq(qs[i], q0,
+		expect_ptr_eq(qs[i], q0,
 		    "Expected rallocx() to allocate cached region, i=%u", i);
 		/* Avoid undefined behavior in case of test failure. */
 		if (qs[i] == NULL) {
@@ -323,14 +323,14 @@ TEST_BEGIN(test_tcache) {
 
 	/* Flush some non-empty tcaches. */
 	for (i = 0; i < NTCACHES/2; i++) {
-		assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
+		expect_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
 		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
 		    i);
 	}
 
 	/* Destroy tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
-		assert_d_eq(mallctl("tcache.destroy", NULL, NULL,
+		expect_d_eq(mallctl("tcache.destroy", NULL, NULL,
 		    (void *)&tis[i], sizeof(unsigned)), 0,
 		    "Unexpected mallctl() failure, i=%u", i);
 	}
@@ -342,32 +342,32 @@ TEST_BEGIN(test_thread_arena) {
 
 	const char *opa;
 	size_t sz = sizeof(opa);
-	assert_d_eq(mallctl("opt.percpu_arena", (void *)&opa, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("opt.percpu_arena", (void *)&opa, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	if (opt_oversize_threshold != 0) {
 		narenas--;
 	}
-	assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect");
+	expect_u_eq(narenas, opt_narenas, "Number of arenas incorrect");
 
 	if (strcmp(opa, "disabled") == 0) {
 		new_arena_ind = narenas - 1;
-		assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
 		    (void *)&new_arena_ind, sizeof(unsigned)), 0,
 		    "Unexpected mallctl() failure");
 		new_arena_ind = 0;
-		assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
 		    (void *)&new_arena_ind, sizeof(unsigned)), 0,
 		    "Unexpected mallctl() failure");
 	} else {
-		assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
 		    NULL, 0), 0, "Unexpected mallctl() failure");
 		new_arena_ind = percpu_arena_ind_limit(opt_percpu_arena) - 1;
 		if (old_arena_ind != new_arena_ind) {
-			assert_d_eq(mallctl("thread.arena",
+			expect_d_eq(mallctl("thread.arena",
 			    (void *)&old_arena_ind, &sz, (void *)&new_arena_ind,
 			    sizeof(unsigned)), EPERM, "thread.arena ctl "
 			    "should not be allowed with percpu arena");
@@ -384,32 +384,32 @@ TEST_BEGIN(test_arena_i_initialized) {
 	bool initialized;
 
 	sz = sizeof(narenas);
-	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	for (i = 0; i < narenas; i++) {
 		mib[1] = i;
 		sz = sizeof(initialized);
-		assert_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL,
+		expect_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL,
 		    0), 0, "Unexpected mallctl() failure");
 	}
 
 	mib[1] = MALLCTL_ARENAS_ALL;
 	sz = sizeof(initialized);
-	assert_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_true(initialized,
+	expect_true(initialized,
 	    "Merged arena statistics should always be initialized");
 
 	/* Equivalent to the above but using mallctl() directly. */
 	sz = sizeof(initialized);
-	assert_d_eq(mallctl(
+	expect_d_eq(mallctl(
 	    "arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".initialized",
 	    (void *)&initialized, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_true(initialized,
+	expect_true(initialized,
 	    "Merged arena statistics should always be initialized");
 }
 TEST_END
@@ -418,17 +418,17 @@ TEST_BEGIN(test_arena_i_dirty_decay_ms) {
 	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arena.0.dirty_decay_ms",
+	expect_d_eq(mallctl("arena.0.dirty_decay_ms",
 	    (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	dirty_decay_ms = -2;
-	assert_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
 	    (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	dirty_decay_ms = 0x7fffffff;
-	assert_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
 	    (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
 	    "Unexpected mallctl() failure");
 
@@ -437,10 +437,10 @@ TEST_BEGIN(test_arena_i_dirty_decay_ms) {
 	    dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
-		assert_d_eq(mallctl("arena.0.dirty_decay_ms",
+		expect_d_eq(mallctl("arena.0.dirty_decay_ms",
 		    (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
 		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
-		assert_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
+		expect_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
 		    "Unexpected old arena.0.dirty_decay_ms");
 	}
 }
@@ -450,17 +450,17 @@ TEST_BEGIN(test_arena_i_muzzy_decay_ms) {
 	ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arena.0.muzzy_decay_ms",
+	expect_d_eq(mallctl("arena.0.muzzy_decay_ms",
 	    (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	muzzy_decay_ms = -2;
-	assert_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
 	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	muzzy_decay_ms = 0x7fffffff;
-	assert_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
 	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
 	    "Unexpected mallctl() failure");
 
@@ -469,10 +469,10 @@ TEST_BEGIN(test_arena_i_muzzy_decay_ms) {
 	    muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
-		assert_d_eq(mallctl("arena.0.muzzy_decay_ms",
+		expect_d_eq(mallctl("arena.0.muzzy_decay_ms",
 		    (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
 		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
-		assert_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
+		expect_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
 		    "Unexpected old arena.0.muzzy_decay_ms");
 	}
 }
@@ -484,19 +484,19 @@ TEST_BEGIN(test_arena_i_purge) {
 	size_t mib[3];
 	size_t miblen = 3;
 
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = narenas;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 
 	mib[1] = MALLCTL_ARENAS_ALL;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 TEST_END
@@ -507,19 +507,19 @@ TEST_BEGIN(test_arena_i_decay) {
 	size_t mib[3];
 	size_t miblen = 3;
 
-	assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = narenas;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 
 	mib[1] = MALLCTL_ARENAS_ALL;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 TEST_END
@@ -531,40 +531,40 @@ TEST_BEGIN(test_arena_i_dss) {
 	size_t miblen;
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() error");
 
 	dss_prec_new = "disabled";
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
 	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
 	    "Unexpected mallctl() failure");
-	assert_str_ne(dss_prec_old, "primary",
+	expect_str_ne(dss_prec_old, "primary",
 	    "Unexpected default for dss precedence");
 
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
 	    (void *)&dss_prec_old, sizeof(dss_prec_old)), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
 	    0), 0, "Unexpected mallctl() failure");
-	assert_str_ne(dss_prec_old, "primary",
+	expect_str_ne(dss_prec_old, "primary",
 	    "Unexpected value for dss precedence");
 
 	mib[1] = narenas_total_get();
 	dss_prec_new = "disabled";
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
 	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
 	    "Unexpected mallctl() failure");
-	assert_str_ne(dss_prec_old, "primary",
+	expect_str_ne(dss_prec_old, "primary",
 	    "Unexpected default for dss precedence");
 
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
 	    (void *)&dss_prec_old, sizeof(dss_prec_new)), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
 	    0), 0, "Unexpected mallctl() failure");
-	assert_str_ne(dss_prec_old, "primary",
+	expect_str_ne(dss_prec_old, "primary",
 	    "Unexpected value for dss precedence");
 }
 TEST_END
@@ -576,43 +576,43 @@ TEST_BEGIN(test_arena_i_retain_grow_limit) {
 
 	bool retain_enabled;
 	size_t sz = sizeof(retain_enabled);
-	assert_d_eq(mallctl("opt.retain", &retain_enabled, &sz, NULL, 0),
+	expect_d_eq(mallctl("opt.retain", &retain_enabled, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	test_skip_if(!retain_enabled);
 
 	sz = sizeof(default_limit);
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.retain_grow_limit", mib, &miblen),
+	expect_d_eq(mallctlnametomib("arena.0.retain_grow_limit", mib, &miblen),
 	    0, "Unexpected mallctlnametomib() error");
 
-	assert_d_eq(mallctlbymib(mib, miblen, &default_limit, &sz, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, &default_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(default_limit, SC_LARGE_MAXCLASS,
+	expect_zu_eq(default_limit, SC_LARGE_MAXCLASS,
 	    "Unexpected default for retain_grow_limit");
 
 	new_limit = PAGE - 1;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
 	    sizeof(new_limit)), EFAULT, "Unexpected mallctl() success");
 
 	new_limit = PAGE + 1;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
 	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(old_limit, PAGE,
+	expect_zu_eq(old_limit, PAGE,
 	    "Unexpected value for retain_grow_limit");
 
 	/* Expect grow less than psize class 10. */
 	new_limit = sz_pind2sz(10) - 1;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
 	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(old_limit, sz_pind2sz(9),
+	expect_zu_eq(old_limit, sz_pind2sz(9),
 	    "Unexpected value for retain_grow_limit");
 
 	/* Restore to default. */
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &default_limit,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &default_limit,
 	    sizeof(default_limit)), 0, "Unexpected mallctl() failure");
 }
 TEST_END
@@ -621,17 +621,17 @@ TEST_BEGIN(test_arenas_dirty_decay_ms) {
 	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arenas.dirty_decay_ms",
+	expect_d_eq(mallctl("arenas.dirty_decay_ms",
 	    (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	dirty_decay_ms = -2;
-	assert_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
 	    (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	dirty_decay_ms = 0x7fffffff;
-	assert_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
 	    (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
 	    "Expected mallctl() failure");
 
@@ -640,10 +640,10 @@ TEST_BEGIN(test_arenas_dirty_decay_ms) {
 	    dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
-		assert_d_eq(mallctl("arenas.dirty_decay_ms",
+		expect_d_eq(mallctl("arenas.dirty_decay_ms",
 		    (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
 		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
-		assert_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
+		expect_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
 		    "Unexpected old arenas.dirty_decay_ms");
 	}
 }
@@ -653,17 +653,17 @@ TEST_BEGIN(test_arenas_muzzy_decay_ms) {
 	ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arenas.muzzy_decay_ms",
+	expect_d_eq(mallctl("arenas.muzzy_decay_ms",
 	    (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	muzzy_decay_ms = -2;
-	assert_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
 	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	muzzy_decay_ms = 0x7fffffff;
-	assert_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
 	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
 	    "Expected mallctl() failure");
 
@@ -672,10 +672,10 @@ TEST_BEGIN(test_arenas_muzzy_decay_ms) {
 	    muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
-		assert_d_eq(mallctl("arenas.muzzy_decay_ms",
+		expect_d_eq(mallctl("arenas.muzzy_decay_ms",
 		    (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
 		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
-		assert_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
+		expect_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
 		    "Unexpected old arenas.muzzy_decay_ms");
 	}
 }
@@ -685,9 +685,9 @@ TEST_BEGIN(test_arenas_constants) {
 #define TEST_ARENAS_CONSTANT(t, name, expected) do {			\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas."#name, (void *)&name, &sz, NULL,	\
+	expect_d_eq(mallctl("arenas."#name, (void *)&name, &sz, NULL,	\
 	    0), 0, "Unexpected mallctl() failure");			\
-	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
+	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
 	TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM);
@@ -703,9 +703,9 @@ TEST_BEGIN(test_arenas_bin_constants) {
 #define TEST_ARENAS_BIN_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas.bin.0."#name, (void *)&name, &sz,	\
+	expect_d_eq(mallctl("arenas.bin.0."#name, (void *)&name, &sz,	\
 	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
+	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
 	TEST_ARENAS_BIN_CONSTANT(size_t, size, bin_infos[0].reg_size);
@@ -722,9 +722,9 @@ TEST_BEGIN(test_arenas_lextent_constants) {
 #define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas.lextent.0."#name, (void *)&name,	\
+	expect_d_eq(mallctl("arenas.lextent.0."#name, (void *)&name,	\
 	    &sz, NULL, 0), 0, "Unexpected mallctl() failure");		\
-	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
+	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
 	TEST_ARENAS_LEXTENT_CONSTANT(size_t, size,
@@ -738,16 +738,16 @@ TEST_BEGIN(test_arenas_create) {
 	unsigned narenas_before, arena, narenas_after;
 	size_t sz = sizeof(unsigned);
 
-	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_before, &sz,
+	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas_before, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL,
+	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL,
 	    0), 0, "Unexpected mallctl() failure");
 
-	assert_u_eq(narenas_before+1, narenas_after,
+	expect_u_eq(narenas_before+1, narenas_after,
 	    "Unexpected number of arenas before versus after extension");
-	assert_u_eq(arena, narenas_after-1, "Unexpected arena index");
+	expect_u_eq(arena, narenas_after-1, "Unexpected arena index");
 }
 TEST_END
 
@@ -756,13 +756,13 @@ TEST_BEGIN(test_arenas_lookup) {
 	void *ptr;
 	size_t sz = sizeof(unsigned);
 
-	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	ptr = mallocx(42, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
-	assert_ptr_not_null(ptr, "Unexpected mallocx() failure");
-	assert_d_eq(mallctl("arenas.lookup", &arena1, &sz, &ptr, sizeof(ptr)),
+	expect_ptr_not_null(ptr, "Unexpected mallocx() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena1, &sz, &ptr, sizeof(ptr)),
 	    0, "Unexpected mallctl() failure");
-	assert_u_eq(arena, arena1, "Unexpected arena index");
+	expect_u_eq(arena, arena1, "Unexpected arena index");
 	dallocx(ptr, 0);
 }
 TEST_END
@@ -778,18 +778,18 @@ TEST_BEGIN(test_prof_active) {
 	size_t len = sizeof(bool);
 
 	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, len), ENOENT,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, &active, len), ENOENT,
 	    "Setting prof_active to true should fail when opt_prof is off");
 	old = true;
-	assert_d_eq(mallctl("prof.active", &old, &len, &active, len), ENOENT,
+	expect_d_eq(mallctl("prof.active", &old, &len, &active, len), ENOENT,
 	    "Setting prof_active to true should fail when opt_prof is off");
-	assert_true(old, "old valud should not be touched when mallctl fails");
+	expect_true(old, "old valud should not be touched when mallctl fails");
 	active = false;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, len), 0,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, &active, len), 0,
 	    "Setting prof_active to false should succeed when opt_prof is off");
-	assert_d_eq(mallctl("prof.active", &old, &len, &active, len), 0,
+	expect_d_eq(mallctl("prof.active", &old, &len, &active, len), 0,
 	    "Setting prof_active to false should succeed when opt_prof is off");
-	assert_false(old, "prof_active should be false when opt_prof is off");
+	expect_false(old, "prof_active should be false when opt_prof is off");
 }
 TEST_END
 
@@ -797,7 +797,7 @@ TEST_BEGIN(test_stats_arenas) {
 #define TEST_STATS_ARENAS(t, name) do {					\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("stats.arenas.0."#name, (void *)&name, &sz,	\
+	expect_d_eq(mallctl("stats.arenas.0."#name, (void *)&name, &sz,	\
 	    NULL, 0), 0, "Unexpected mallctl() failure");		\
 } while (0)
 
@@ -831,21 +831,21 @@ TEST_BEGIN(test_hooks) {
 	size_t sz = sizeof(handle);
 	int err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
 	    sizeof(hooks));
-	assert_d_eq(err, 0, "Hook installation failed");
-	assert_ptr_ne(handle, NULL, "Hook installation gave null handle");
+	expect_d_eq(err, 0, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation gave null handle");
 	void *ptr = mallocx(1, 0);
-	assert_true(hook_called, "Alloc hook not called");
+	expect_true(hook_called, "Alloc hook not called");
 	hook_called = false;
 	free(ptr);
-	assert_true(hook_called, "Free hook not called");
+	expect_true(hook_called, "Free hook not called");
 
 	err = mallctl("experimental.hooks.remove", NULL, NULL, &handle,
 	    sizeof(handle));
-	assert_d_eq(err, 0, "Hook removal failed");
+	expect_d_eq(err, 0, "Hook removal failed");
 	hook_called = false;
 	ptr = mallocx(1, 0);
 	free(ptr);
-	assert_false(hook_called, "Hook called after removal");
+	expect_false(hook_called, "Hook called after removal");
 }
 TEST_END
 
@@ -861,27 +861,27 @@ TEST_BEGIN(test_hooks_exhaustion) {
 		handle = NULL;
 		err = mallctl("experimental.hooks.install", &handle, &sz,
 		    &hooks, sizeof(hooks));
-		assert_d_eq(err, 0, "Error installation hooks");
-		assert_ptr_ne(handle, NULL, "Got NULL handle");
+		expect_d_eq(err, 0, "Error installation hooks");
+		expect_ptr_ne(handle, NULL, "Got NULL handle");
 		handles[i] = handle;
 	}
 	err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
 	    sizeof(hooks));
-	assert_d_eq(err, EAGAIN, "Should have failed hook installation");
+	expect_d_eq(err, EAGAIN, "Should have failed hook installation");
 	for (int i = 0; i < HOOK_MAX; i++) {
 		err = mallctl("experimental.hooks.remove", NULL, NULL,
 		    &handles[i], sizeof(handles[i]));
-		assert_d_eq(err, 0, "Hook removal failed");
+		expect_d_eq(err, 0, "Hook removal failed");
 	}
 	/* Insertion failed, but then we removed some; it should work now. */
 	handle = NULL;
 	err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
 	    sizeof(hooks));
-	assert_d_eq(err, 0, "Hook insertion failed");
-	assert_ptr_ne(handle, NULL, "Got NULL handle");
+	expect_d_eq(err, 0, "Hook insertion failed");
+	expect_ptr_ne(handle, NULL, "Got NULL handle");
 	err = mallctl("experimental.hooks.remove", NULL, NULL, &handle,
 	    sizeof(handle));
-	assert_d_eq(err, 0, "Hook removal failed");
+	expect_d_eq(err, 0, "Hook removal failed");
 }
 TEST_END
 
@@ -901,25 +901,25 @@ TEST_BEGIN(test_thread_idle) {
 	bool tcache_enabled = false;
 	sz = sizeof(tcache_enabled);
 	err = mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL, 0);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 	test_skip_if(!tcache_enabled);
 
 	size_t tcache_max;
 	sz = sizeof(tcache_max);
 	err = mallctl("arenas.tcache_max", &tcache_max, &sz, NULL, 0);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 	test_skip_if(tcache_max == 0);
 
 	unsigned arena_ind;
 	sz = sizeof(arena_ind);
 	err = mallctl("thread.arena", &arena_ind, &sz, NULL, 0);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 
 	/* We're going to do an allocation of size 1, which we know is small. */
 	size_t mib[5];
 	miblen = sizeof(mib)/sizeof(mib[0]);
 	err = mallctlnametomib("stats.arenas.0.small.ndalloc", mib, &miblen);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 	mib[2] = arena_ind;
 
 	/*
@@ -931,25 +931,25 @@ TEST_BEGIN(test_thread_idle) {
 
 	uint64_t epoch;
 	err = mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch));
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 
 	uint64_t small_dalloc_pre_idle;
 	sz = sizeof(small_dalloc_pre_idle);
 	err = mallctlbymib(mib, miblen, &small_dalloc_pre_idle, &sz, NULL, 0);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 
 	err = mallctl("thread.idle", NULL, NULL, NULL, 0);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 
 	err = mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch));
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 
 	uint64_t small_dalloc_post_idle;
 	sz = sizeof(small_dalloc_post_idle);
 	err = mallctlbymib(mib, miblen, &small_dalloc_post_idle, &sz, NULL, 0);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 
-	assert_u64_lt(small_dalloc_pre_idle, small_dalloc_post_idle,
+	expect_u64_lt(small_dalloc_pre_idle, small_dalloc_post_idle,
 	    "Purge didn't flush the tcache");
 }
 TEST_END
diff --git a/test/unit/malloc_io.c b/test/unit/malloc_io.c
index 79ba7fc..1a6e5f6 100644
--- a/test/unit/malloc_io.c
+++ b/test/unit/malloc_io.c
@@ -4,9 +4,9 @@ TEST_BEGIN(test_malloc_strtoumax_no_endptr) {
 	int err;
 
 	set_errno(0);
-	assert_ju_eq(malloc_strtoumax("0", NULL, 0), 0, "Unexpected result");
+	expect_ju_eq(malloc_strtoumax("0", NULL, 0), 0, "Unexpected result");
 	err = get_errno();
-	assert_d_eq(err, 0, "Unexpected failure");
+	expect_d_eq(err, 0, "Unexpected failure");
 }
 TEST_END
 
@@ -89,14 +89,14 @@ TEST_BEGIN(test_malloc_strtoumax) {
 		set_errno(0);
 		result = malloc_strtoumax(test->input, &remainder, test->base);
 		err = get_errno();
-		assert_d_eq(err, test->expected_errno,
+		expect_d_eq(err, test->expected_errno,
 		    "Expected errno %s for \"%s\", base %d",
 		    test->expected_errno_name, test->input, test->base);
-		assert_str_eq(remainder, test->expected_remainder,
+		expect_str_eq(remainder, test->expected_remainder,
 		    "Unexpected remainder for \"%s\", base %d",
 		    test->input, test->base);
 		if (err == 0) {
-			assert_ju_eq(result, test->expected_x,
+			expect_ju_eq(result, test->expected_x,
 			    "Unexpected result for \"%s\", base %d",
 			    test->input, test->base);
 		}
@@ -111,10 +111,10 @@ TEST_BEGIN(test_malloc_snprintf_truncated) {
 	size_t len;
 #define TEST(expected_str_untruncated, ...) do {			\
 	result = malloc_snprintf(buf, len, __VA_ARGS__);		\
-	assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
+	expect_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
 	    "Unexpected string inequality (\"%s\" vs \"%s\")",		\
 	    buf, expected_str_untruncated);				\
-	assert_zu_eq(result, strlen(expected_str_untruncated),		\
+	expect_zu_eq(result, strlen(expected_str_untruncated),		\
 	    "Unexpected result");					\
 } while (0)
 
@@ -142,8 +142,8 @@ TEST_BEGIN(test_malloc_snprintf) {
 	size_t result;
 #define TEST(expected_str, ...) do {					\
 	result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__);	\
-	assert_str_eq(buf, expected_str, "Unexpected output");		\
-	assert_zu_eq(result, strlen(expected_str), "Unexpected result");\
+	expect_str_eq(buf, expected_str, "Unexpected output");		\
+	expect_zu_eq(result, strlen(expected_str), "Unexpected result");\
 } while (0)
 
 	TEST("hello", "hello");
diff --git a/test/unit/math.c b/test/unit/math.c
index 09ef20c..a32767c 100644
--- a/test/unit/math.c
+++ b/test/unit/math.c
@@ -41,7 +41,7 @@ TEST_BEGIN(test_ln_gamma_factorial) {
 
 	/* exp(ln_gamma(x)) == (x-1)! for integer x. */
 	for (x = 1; x <= 21; x++) {
-		assert_true(double_eq_rel(exp(ln_gamma(x)),
+		expect_true(double_eq_rel(exp(ln_gamma(x)),
 		    (double)factorial(x-1), MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect factorial result for x=%u", x);
 	}
@@ -192,7 +192,7 @@ TEST_BEGIN(test_ln_gamma_misc) {
 
 	for (i = 1; i < sizeof(ln_gamma_misc_expected)/sizeof(double); i++) {
 		double x = (double)i * 0.25;
-		assert_true(double_eq_rel(ln_gamma(x),
+		expect_true(double_eq_rel(ln_gamma(x),
 		    ln_gamma_misc_expected[i], MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect ln_gamma result for i=%u", i);
 	}
@@ -242,7 +242,7 @@ TEST_BEGIN(test_pt_norm) {
 
 	for (i = 1; i < sizeof(pt_norm_expected)/sizeof(double); i++) {
 		double p = (double)i * 0.01;
-		assert_true(double_eq_rel(pt_norm(p), pt_norm_expected[i],
+		expect_true(double_eq_rel(pt_norm(p), pt_norm_expected[i],
 		    MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect pt_norm result for i=%u", i);
 	}
@@ -295,7 +295,7 @@ TEST_BEGIN(test_pt_chi2) {
 		double ln_gamma_df = ln_gamma(df * 0.5);
 		for (j = 1; j < 100; j += 7) {
 			double p = (double)j * 0.01;
-			assert_true(double_eq_rel(pt_chi2(p, df, ln_gamma_df),
+			expect_true(double_eq_rel(pt_chi2(p, df, ln_gamma_df),
 			    pt_chi2_expected[e], MAX_REL_ERR, MAX_ABS_ERR),
 			    "Incorrect pt_chi2 result for i=%u, j=%u", i, j);
 			e++;
@@ -356,7 +356,7 @@ TEST_BEGIN(test_pt_gamma_shape) {
 		double ln_gamma_shape = ln_gamma(shape);
 		for (j = 1; j < 100; j += 7) {
 			double p = (double)j * 0.01;
-			assert_true(double_eq_rel(pt_gamma(p, shape, 1.0,
+			expect_true(double_eq_rel(pt_gamma(p, shape, 1.0,
 			    ln_gamma_shape), pt_gamma_expected[e], MAX_REL_ERR,
 			    MAX_ABS_ERR),
 			    "Incorrect pt_gamma result for i=%u, j=%u", i, j);
@@ -370,7 +370,7 @@ TEST_BEGIN(test_pt_gamma_scale) {
 	double shape = 1.0;
 	double ln_gamma_shape = ln_gamma(shape);
 
-	assert_true(double_eq_rel(
+	expect_true(double_eq_rel(
 	    pt_gamma(0.5, shape, 1.0, ln_gamma_shape) * 10.0,
 	    pt_gamma(0.5, shape, 10.0, ln_gamma_shape), MAX_REL_ERR,
 	    MAX_ABS_ERR),
diff --git a/test/unit/mq.c b/test/unit/mq.c
index 57a4d54..f833f77 100644
--- a/test/unit/mq.c
+++ b/test/unit/mq.c
@@ -13,17 +13,17 @@ TEST_BEGIN(test_mq_basic) {
 	mq_t mq;
 	mq_msg_t msg;
 
-	assert_false(mq_init(&mq), "Unexpected mq_init() failure");
-	assert_u_eq(mq_count(&mq), 0, "mq should be empty");
-	assert_ptr_null(mq_tryget(&mq),
+	expect_false(mq_init(&mq), "Unexpected mq_init() failure");
+	expect_u_eq(mq_count(&mq), 0, "mq should be empty");
+	expect_ptr_null(mq_tryget(&mq),
 	    "mq_tryget() should fail when the queue is empty");
 
 	mq_put(&mq, &msg);
-	assert_u_eq(mq_count(&mq), 1, "mq should contain one message");
-	assert_ptr_eq(mq_tryget(&mq), &msg, "mq_tryget() should return msg");
+	expect_u_eq(mq_count(&mq), 1, "mq should contain one message");
+	expect_ptr_eq(mq_tryget(&mq), &msg, "mq_tryget() should return msg");
 
 	mq_put(&mq, &msg);
-	assert_ptr_eq(mq_get(&mq), &msg, "mq_get() should return msg");
+	expect_ptr_eq(mq_get(&mq), &msg, "mq_get() should return msg");
 
 	mq_fini(&mq);
 }
@@ -36,7 +36,7 @@ thd_receiver_start(void *arg) {
 
 	for (i = 0; i < (NSENDERS * NMSGS); i++) {
 		mq_msg_t *msg = mq_get(mq);
-		assert_ptr_not_null(msg, "mq_get() should never return NULL");
+		expect_ptr_not_null(msg, "mq_get() should never return NULL");
 		dallocx(msg, 0);
 	}
 	return NULL;
@@ -51,7 +51,7 @@ thd_sender_start(void *arg) {
 		mq_msg_t *msg;
 		void *p;
 		p = mallocx(sizeof(mq_msg_t), 0);
-		assert_ptr_not_null(p, "Unexpected mallocx() failure");
+		expect_ptr_not_null(p, "Unexpected mallocx() failure");
 		msg = (mq_msg_t *)p;
 		mq_put(mq, msg);
 	}
@@ -64,7 +64,7 @@ TEST_BEGIN(test_mq_threaded) {
 	thd_t senders[NSENDERS];
 	unsigned i;
 
-	assert_false(mq_init(&mq), "Unexpected mq_init() failure");
+	expect_false(mq_init(&mq), "Unexpected mq_init() failure");
 
 	thd_create(&receiver, thd_receiver_start, (void *)&mq);
 	for (i = 0; i < NSENDERS; i++) {
diff --git a/test/unit/mtx.c b/test/unit/mtx.c
index 424587b..4aeebc1 100644
--- a/test/unit/mtx.c
+++ b/test/unit/mtx.c
@@ -6,7 +6,7 @@
 TEST_BEGIN(test_mtx_basic) {
 	mtx_t mtx;
 
-	assert_false(mtx_init(&mtx), "Unexpected mtx_init() failure");
+	expect_false(mtx_init(&mtx), "Unexpected mtx_init() failure");
 	mtx_lock(&mtx);
 	mtx_unlock(&mtx);
 	mtx_fini(&mtx);
@@ -36,7 +36,7 @@ TEST_BEGIN(test_mtx_race) {
 	thd_t thds[NTHREADS];
 	unsigned i;
 
-	assert_false(mtx_init(&arg.mtx), "Unexpected mtx_init() failure");
+	expect_false(mtx_init(&arg.mtx), "Unexpected mtx_init() failure");
 	arg.x = 0;
 	for (i = 0; i < NTHREADS; i++) {
 		thd_create(&thds[i], thd_start, (void *)&arg);
@@ -44,7 +44,7 @@ TEST_BEGIN(test_mtx_race) {
 	for (i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
 	}
-	assert_u_eq(arg.x, NTHREADS * NINCRS,
+	expect_u_eq(arg.x, NTHREADS * NINCRS,
 	    "Race-related counter corruption");
 }
 TEST_END
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index 5a736bb..bf87501 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -6,9 +6,9 @@ TEST_BEGIN(test_nstime_init) {
 	nstime_t nst;
 
 	nstime_init(&nst, 42000000043);
-	assert_u64_eq(nstime_ns(&nst), 42000000043, "ns incorrectly read");
-	assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read");
-	assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read");
+	expect_u64_eq(nstime_ns(&nst), 42000000043, "ns incorrectly read");
+	expect_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read");
+	expect_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read");
 }
 TEST_END
 
@@ -16,8 +16,8 @@ TEST_BEGIN(test_nstime_init2) {
 	nstime_t nst;
 
 	nstime_init2(&nst, 42, 43);
-	assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read");
-	assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read");
+	expect_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read");
+	expect_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read");
 }
 TEST_END
 
@@ -27,8 +27,8 @@ TEST_BEGIN(test_nstime_copy) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_init_zero(&nstb);
 	nstime_copy(&nstb, &nsta);
-	assert_u64_eq(nstime_sec(&nstb), 42, "sec incorrectly copied");
-	assert_u64_eq(nstime_nsec(&nstb), 43, "nsec incorrectly copied");
+	expect_u64_eq(nstime_sec(&nstb), 42, "sec incorrectly copied");
+	expect_u64_eq(nstime_nsec(&nstb), 43, "nsec incorrectly copied");
 }
 TEST_END
 
@@ -37,31 +37,31 @@ TEST_BEGIN(test_nstime_compare) {
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0, "Times should be equal");
-	assert_d_eq(nstime_compare(&nstb, &nsta), 0, "Times should be equal");
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0, "Times should be equal");
+	expect_d_eq(nstime_compare(&nstb, &nsta), 0, "Times should be equal");
 
 	nstime_init2(&nstb, 42, 42);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 1,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 1,
 	    "nsta should be greater than nstb");
-	assert_d_eq(nstime_compare(&nstb, &nsta), -1,
+	expect_d_eq(nstime_compare(&nstb, &nsta), -1,
 	    "nstb should be less than nsta");
 
 	nstime_init2(&nstb, 42, 44);
-	assert_d_eq(nstime_compare(&nsta, &nstb), -1,
+	expect_d_eq(nstime_compare(&nsta, &nstb), -1,
 	    "nsta should be less than nstb");
-	assert_d_eq(nstime_compare(&nstb, &nsta), 1,
+	expect_d_eq(nstime_compare(&nstb, &nsta), 1,
 	    "nstb should be greater than nsta");
 
 	nstime_init2(&nstb, 41, BILLION - 1);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 1,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 1,
 	    "nsta should be greater than nstb");
-	assert_d_eq(nstime_compare(&nstb, &nsta), -1,
+	expect_d_eq(nstime_compare(&nstb, &nsta), -1,
 	    "nstb should be less than nsta");
 
 	nstime_init2(&nstb, 43, 0);
-	assert_d_eq(nstime_compare(&nsta, &nstb), -1,
+	expect_d_eq(nstime_compare(&nsta, &nstb), -1,
 	    "nsta should be less than nstb");
-	assert_d_eq(nstime_compare(&nstb, &nsta), 1,
+	expect_d_eq(nstime_compare(&nstb, &nsta), 1,
 	    "nstb should be greater than nsta");
 }
 TEST_END
@@ -73,14 +73,14 @@ TEST_BEGIN(test_nstime_add) {
 	nstime_copy(&nstb, &nsta);
 	nstime_add(&nsta, &nstb);
 	nstime_init2(&nstb, 84, 86);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect addition result");
 
 	nstime_init2(&nsta, 42, BILLION - 1);
 	nstime_copy(&nstb, &nsta);
 	nstime_add(&nsta, &nstb);
 	nstime_init2(&nstb, 85, BILLION - 2);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect addition result");
 }
 TEST_END
@@ -91,13 +91,13 @@ TEST_BEGIN(test_nstime_iadd) {
 	nstime_init2(&nsta, 42, BILLION - 1);
 	nstime_iadd(&nsta, 1);
 	nstime_init2(&nstb, 43, 0);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect addition result");
 
 	nstime_init2(&nsta, 42, 1);
 	nstime_iadd(&nsta, BILLION + 1);
 	nstime_init2(&nstb, 43, 2);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect addition result");
 }
 TEST_END
@@ -109,14 +109,14 @@ TEST_BEGIN(test_nstime_subtract) {
 	nstime_copy(&nstb, &nsta);
 	nstime_subtract(&nsta, &nstb);
 	nstime_init_zero(&nstb);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect subtraction result");
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_init2(&nstb, 41, 44);
 	nstime_subtract(&nsta, &nstb);
 	nstime_init2(&nstb, 0, BILLION - 1);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect subtraction result");
 }
 TEST_END
@@ -127,13 +127,13 @@ TEST_BEGIN(test_nstime_isubtract) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_isubtract(&nsta, 42*BILLION + 43);
 	nstime_init_zero(&nstb);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect subtraction result");
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_isubtract(&nsta, 41*BILLION + 44);
 	nstime_init2(&nstb, 0, BILLION - 1);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect subtraction result");
 }
 TEST_END
@@ -144,13 +144,13 @@ TEST_BEGIN(test_nstime_imultiply) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_imultiply(&nsta, 10);
 	nstime_init2(&nstb, 420, 430);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect multiplication result");
 
 	nstime_init2(&nsta, 42, 666666666);
 	nstime_imultiply(&nsta, 3);
 	nstime_init2(&nstb, 127, 999999998);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect multiplication result");
 }
 TEST_END
@@ -162,14 +162,14 @@ TEST_BEGIN(test_nstime_idivide) {
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
 	nstime_idivide(&nsta, 10);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 666666666);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 3);
 	nstime_idivide(&nsta, 3);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect division result");
 }
 TEST_END
@@ -180,7 +180,7 @@ TEST_BEGIN(test_nstime_divide) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
-	assert_u64_eq(nstime_divide(&nsta, &nstb), 10,
+	expect_u64_eq(nstime_divide(&nsta, &nstb), 10,
 	    "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 43);
@@ -188,7 +188,7 @@ TEST_BEGIN(test_nstime_divide) {
 	nstime_imultiply(&nsta, 10);
 	nstime_init(&nstc, 1);
 	nstime_add(&nsta, &nstc);
-	assert_u64_eq(nstime_divide(&nsta, &nstb), 10,
+	expect_u64_eq(nstime_divide(&nsta, &nstb), 10,
 	    "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 43);
@@ -196,7 +196,7 @@ TEST_BEGIN(test_nstime_divide) {
 	nstime_imultiply(&nsta, 10);
 	nstime_init(&nstc, 1);
 	nstime_subtract(&nsta, &nstc);
-	assert_u64_eq(nstime_divide(&nsta, &nstb), 9,
+	expect_u64_eq(nstime_divide(&nsta, &nstb), 9,
 	    "Incorrect division result");
 }
 TEST_END
@@ -209,7 +209,7 @@ TEST_END
 TEST_BEGIN(test_nstime_update) {
 	nstime_t nst;
 
-	assert_false(nstime_init_update(&nst), "Basic time update failed.");
+	expect_false(nstime_init_update(&nst), "Basic time update failed.");
 
 	/* Only Rip Van Winkle sleeps this long. */
 	{
@@ -220,9 +220,9 @@ TEST_BEGIN(test_nstime_update) {
 	{
 		nstime_t nst0;
 		nstime_copy(&nst0, &nst);
-		assert_true(nstime_update(&nst),
+		expect_true(nstime_update(&nst),
 		    "Update should detect time roll-back.");
-		assert_d_eq(nstime_compare(&nst, &nst0), 0,
+		expect_d_eq(nstime_compare(&nst, &nst0), 0,
 		    "Time should not have been modified");
 	}
 }
diff --git a/test/unit/pack.c b/test/unit/pack.c
index fc188b0..e639282 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -22,7 +22,7 @@ binind_compute(void) {
 	unsigned nbins, i;
 
 	sz = sizeof(nbins);
-	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
 	for (i = 0; i < nbins; i++) {
@@ -30,12 +30,12 @@ binind_compute(void) {
 		size_t miblen = sizeof(mib)/sizeof(size_t);
 		size_t size;
 
-		assert_d_eq(mallctlnametomib("arenas.bin.0.size", mib,
+		expect_d_eq(mallctlnametomib("arenas.bin.0.size", mib,
 		    &miblen), 0, "Unexpected mallctlnametomb failure");
 		mib[2] = (size_t)i;
 
 		sz = sizeof(size);
-		assert_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL,
+		expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL,
 		    0), 0, "Unexpected mallctlbymib failure");
 		if (size == SZ) {
 			return i;
@@ -54,11 +54,11 @@ nregs_per_run_compute(void) {
 	size_t mib[4];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
 
-	assert_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
 	    "Unexpected mallctlnametomb failure");
 	mib[2] = (size_t)binind;
 	sz = sizeof(nregs);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL,
 	    0), 0, "Unexpected mallctlbymib failure");
 	return nregs;
 }
@@ -69,7 +69,7 @@ arenas_create_mallctl(void) {
 	size_t sz;
 
 	sz = sizeof(arena_ind);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Error in arenas.create");
 
 	return arena_ind;
@@ -80,10 +80,10 @@ arena_reset_mallctl(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
 
-	assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
@@ -105,7 +105,7 @@ TEST_BEGIN(test_pack) {
 		for (j = 0; j < nregs_per_run; j++) {
 			void *p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
 			    MALLOCX_TCACHE_NONE);
-			assert_ptr_not_null(p,
+			expect_ptr_not_null(p,
 			    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u) |"
 			    " MALLOCX_TCACHE_NONE) failure, run=%zu, reg=%zu",
 			    SZ, arena_ind, i, j);
@@ -148,7 +148,7 @@ TEST_BEGIN(test_pack) {
 			}
 			p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
 			    MALLOCX_TCACHE_NONE);
-			assert_ptr_eq(p, ptrs[(i * nregs_per_run) + j],
+			expect_ptr_eq(p, ptrs[(i * nregs_per_run) + j],
 			    "Unexpected refill discrepancy, run=%zu, reg=%zu\n",
 			    i, j);
 		}
diff --git a/test/unit/pages.c b/test/unit/pages.c
index ee729ee..8dfd1a7 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -8,13 +8,13 @@ TEST_BEGIN(test_pages_huge) {
 	alloc_size = HUGEPAGE * 2 - PAGE;
 	commit = true;
 	pages = pages_map(NULL, alloc_size, PAGE, &commit);
-	assert_ptr_not_null(pages, "Unexpected pages_map() error");
+	expect_ptr_not_null(pages, "Unexpected pages_map() error");
 
 	if (init_system_thp_mode == thp_mode_default) {
 	    hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
-	    assert_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
+	    expect_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
 	        "Unexpected pages_huge() result");
-	    assert_false(pages_nohuge(hugepage, HUGEPAGE),
+	    expect_false(pages_nohuge(hugepage, HUGEPAGE),
 	        "Unexpected pages_nohuge() result");
 	}
 
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 88bf56f..0f7c991 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -30,8 +30,8 @@ node_cmp(const node_t *a, const node_t *b) {
 static int
 node_cmp_magic(const node_t *a, const node_t *b) {
 
-	assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
-	assert_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
+	expect_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
+	expect_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
 
 	return node_cmp(a, b);
 }
@@ -74,7 +74,7 @@ heap_print(const heap_t *heap) {
 
 	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
 	    auxelm = phn_next_get(node_t, link, auxelm)) {
-		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		expect_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
 		    link, auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		node_print(auxelm, 0);
@@ -90,7 +90,7 @@ node_validate(const node_t *node, const node_t *parent) {
 	node_t *leftmost_child, *sibling;
 
 	if (parent != NULL) {
-		assert_d_ge(node_cmp_magic(node, parent), 0,
+		expect_d_ge(node_cmp_magic(node, parent), 0,
 		    "Child is less than parent");
 	}
 
@@ -98,13 +98,13 @@ node_validate(const node_t *node, const node_t *parent) {
 	if (leftmost_child == NULL) {
 		return nnodes;
 	}
-	assert_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child),
+	expect_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child),
 	    (void *)node, "Leftmost child does not link to node");
 	nnodes += node_validate(leftmost_child, node);
 
 	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
 	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
-		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		expect_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
 		    link, sibling)), sibling,
 		    "sibling's prev doesn't link to sibling");
 		nnodes += node_validate(sibling, node);
@@ -125,7 +125,7 @@ heap_validate(const heap_t *heap) {
 
 	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
 	    auxelm = phn_next_get(node_t, link, auxelm)) {
-		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		expect_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
 		    link, auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		nnodes += node_validate(auxelm, NULL);
@@ -142,9 +142,9 @@ TEST_BEGIN(test_ph_empty) {
 	heap_t heap;
 
 	heap_new(&heap);
-	assert_true(heap_empty(&heap), "Heap should be empty");
-	assert_ptr_null(heap_first(&heap), "Unexpected node");
-	assert_ptr_null(heap_any(&heap), "Unexpected node");
+	expect_true(heap_empty(&heap), "Heap should be empty");
+	expect_ptr_null(heap_first(&heap), "Unexpected node");
+	expect_ptr_null(heap_any(&heap), "Unexpected node");
 }
 TEST_END
 
@@ -203,7 +203,7 @@ TEST_BEGIN(test_ph_random) {
 		for (j = 1; j <= NNODES; j++) {
 			/* Initialize heap and nodes. */
 			heap_new(&heap);
-			assert_u_eq(heap_validate(&heap), 0,
+			expect_u_eq(heap_validate(&heap), 0,
 			    "Incorrect node count");
 			for (k = 0; k < j; k++) {
 				nodes[k].magic = NODE_MAGIC;
@@ -214,34 +214,34 @@ TEST_BEGIN(test_ph_random) {
 			for (k = 0; k < j; k++) {
 				heap_insert(&heap, &nodes[k]);
 				if (i % 13 == 12) {
-					assert_ptr_not_null(heap_any(&heap),
+					expect_ptr_not_null(heap_any(&heap),
 					    "Heap should not be empty");
 					/* Trigger merging. */
-					assert_ptr_not_null(heap_first(&heap),
+					expect_ptr_not_null(heap_first(&heap),
 					    "Heap should not be empty");
 				}
-				assert_u_eq(heap_validate(&heap), k + 1,
+				expect_u_eq(heap_validate(&heap), k + 1,
 				    "Incorrect node count");
 			}
 
-			assert_false(heap_empty(&heap),
+			expect_false(heap_empty(&heap),
 			    "Heap should not be empty");
 
 			/* Remove nodes. */
 			switch (i % 6) {
 			case 0:
 				for (k = 0; k < j; k++) {
-					assert_u_eq(heap_validate(&heap), j - k,
+					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					node_remove(&heap, &nodes[k]);
-					assert_u_eq(heap_validate(&heap), j - k
+					expect_u_eq(heap_validate(&heap), j - k
 					    - 1, "Incorrect node count");
 				}
 				break;
 			case 1:
 				for (k = j; k > 0; k--) {
 					node_remove(&heap, &nodes[k-1]);
-					assert_u_eq(heap_validate(&heap), k - 1,
+					expect_u_eq(heap_validate(&heap), k - 1,
 					    "Incorrect node count");
 				}
 				break;
@@ -249,10 +249,10 @@ TEST_BEGIN(test_ph_random) {
 				node_t *prev = NULL;
 				for (k = 0; k < j; k++) {
 					node_t *node = node_remove_first(&heap);
-					assert_u_eq(heap_validate(&heap), j - k
+					expect_u_eq(heap_validate(&heap), j - k
 					    - 1, "Incorrect node count");
 					if (prev != NULL) {
-						assert_d_ge(node_cmp(node,
+						expect_d_ge(node_cmp(node,
 						    prev), 0,
 						    "Bad removal order");
 					}
@@ -263,15 +263,15 @@ TEST_BEGIN(test_ph_random) {
 				node_t *prev = NULL;
 				for (k = 0; k < j; k++) {
 					node_t *node = heap_first(&heap);
-					assert_u_eq(heap_validate(&heap), j - k,
+					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					if (prev != NULL) {
-						assert_d_ge(node_cmp(node,
+						expect_d_ge(node_cmp(node,
 						    prev), 0,
 						    "Bad removal order");
 					}
 					node_remove(&heap, node);
-					assert_u_eq(heap_validate(&heap), j - k
+					expect_u_eq(heap_validate(&heap), j - k
 					    - 1, "Incorrect node count");
 					prev = node;
 				}
@@ -279,17 +279,17 @@ TEST_BEGIN(test_ph_random) {
 			} case 4: {
 				for (k = 0; k < j; k++) {
 					node_remove_any(&heap);
-					assert_u_eq(heap_validate(&heap), j - k
+					expect_u_eq(heap_validate(&heap), j - k
 					    - 1, "Incorrect node count");
 				}
 				break;
 			} case 5: {
 				for (k = 0; k < j; k++) {
 					node_t *node = heap_any(&heap);
-					assert_u_eq(heap_validate(&heap), j - k,
+					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					node_remove(&heap, node);
-					assert_u_eq(heap_validate(&heap), j - k
+					expect_u_eq(heap_validate(&heap), j - k
 					    - 1, "Incorrect node count");
 				}
 				break;
@@ -297,11 +297,11 @@ TEST_BEGIN(test_ph_random) {
 				not_reached();
 			}
 
-			assert_ptr_null(heap_first(&heap),
+			expect_ptr_null(heap_first(&heap),
 			    "Heap should be empty");
-			assert_ptr_null(heap_any(&heap),
+			expect_ptr_null(heap_any(&heap),
 			    "Heap should be empty");
-			assert_true(heap_empty(&heap), "Heap should be empty");
+			expect_true(heap_empty(&heap), "Heap should be empty");
 		}
 	}
 	fini_gen_rand(sfmt);
diff --git a/test/unit/prng.c b/test/unit/prng.c
index b5795c2..915b350 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -10,18 +10,18 @@ test_prng_lg_range_u32(bool atomic) {
 	ra = prng_lg_range_u32(&sa, 32, atomic);
 	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
 	rb = prng_lg_range_u32(&sa, 32, atomic);
-	assert_u32_eq(ra, rb,
+	expect_u32_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
 	atomic_store_u32(&sb, 42, ATOMIC_RELAXED);
 	rb = prng_lg_range_u32(&sb, 32, atomic);
-	assert_u32_eq(ra, rb,
+	expect_u32_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
 	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
 	ra = prng_lg_range_u32(&sa, 32, atomic);
 	rb = prng_lg_range_u32(&sa, 32, atomic);
-	assert_u32_ne(ra, rb,
+	expect_u32_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
 	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
@@ -29,9 +29,9 @@ test_prng_lg_range_u32(bool atomic) {
 	for (lg_range = 31; lg_range > 0; lg_range--) {
 		atomic_store_u32(&sb, 42, ATOMIC_RELAXED);
 		rb = prng_lg_range_u32(&sb, lg_range, atomic);
-		assert_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
+		expect_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
-		assert_u32_eq(rb, (ra >> (32 - lg_range)),
+		expect_u32_eq(rb, (ra >> (32 - lg_range)),
 		    "Expected high order bits of full-width result, "
 		    "lg_range=%u", lg_range);
 	}
@@ -46,18 +46,18 @@ test_prng_lg_range_u64(void) {
 	ra = prng_lg_range_u64(&sa, 64);
 	sa = 42;
 	rb = prng_lg_range_u64(&sa, 64);
-	assert_u64_eq(ra, rb,
+	expect_u64_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
 	sb = 42;
 	rb = prng_lg_range_u64(&sb, 64);
-	assert_u64_eq(ra, rb,
+	expect_u64_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
 	sa = 42;
 	ra = prng_lg_range_u64(&sa, 64);
 	rb = prng_lg_range_u64(&sa, 64);
-	assert_u64_ne(ra, rb,
+	expect_u64_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
 	sa = 42;
@@ -65,9 +65,9 @@ test_prng_lg_range_u64(void) {
 	for (lg_range = 63; lg_range > 0; lg_range--) {
 		sb = 42;
 		rb = prng_lg_range_u64(&sb, lg_range);
-		assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)),
+		expect_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
-		assert_u64_eq(rb, (ra >> (64 - lg_range)),
+		expect_u64_eq(rb, (ra >> (64 - lg_range)),
 		    "Expected high order bits of full-width result, "
 		    "lg_range=%u", lg_range);
 	}
@@ -83,18 +83,18 @@ test_prng_lg_range_zu(bool atomic) {
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
 	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
-	assert_zu_eq(ra, rb,
+	expect_zu_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
 	atomic_store_zu(&sb, 42, ATOMIC_RELAXED);
 	rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
-	assert_zu_eq(ra, rb,
+	expect_zu_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
 	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
-	assert_zu_ne(ra, rb,
+	expect_zu_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
 	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
@@ -103,9 +103,9 @@ test_prng_lg_range_zu(bool atomic) {
 	    lg_range--) {
 		atomic_store_zu(&sb, 42, ATOMIC_RELAXED);
 		rb = prng_lg_range_zu(&sb, lg_range, atomic);
-		assert_zu_eq((rb & (SIZE_T_MAX << lg_range)),
+		expect_zu_eq((rb & (SIZE_T_MAX << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
-		assert_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) -
+		expect_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) -
 		    lg_range)), "Expected high order bits of full-width "
 		    "result, lg_range=%u", lg_range);
 	}
@@ -151,7 +151,7 @@ test_prng_range_u32(bool atomic) {
 		for (rep = 0; rep < NREPS; rep++) {
 			uint32_t r = prng_range_u32(&s, range, atomic);
 
-			assert_u32_lt(r, range, "Out of range");
+			expect_u32_lt(r, range, "Out of range");
 		}
 	}
 }
@@ -171,7 +171,7 @@ test_prng_range_u64(void) {
 		for (rep = 0; rep < NREPS; rep++) {
 			uint64_t r = prng_range_u64(&s, range);
 
-			assert_u64_lt(r, range, "Out of range");
+			expect_u64_lt(r, range, "Out of range");
 		}
 	}
 }
@@ -191,7 +191,7 @@ test_prng_range_zu(bool atomic) {
 		for (rep = 0; rep < NREPS; rep++) {
 			size_t r = prng_range_zu(&s, range, atomic);
 
-			assert_zu_lt(r, range, "Out of range");
+			expect_zu_lt(r, range, "Out of range");
 		}
 	}
 }
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index 2522006..8dfa678 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -10,7 +10,7 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	int fd;
 
 	fd = open("/dev/null", O_WRONLY);
-	assert_d_ne(fd, -1, "Unexpected open() failure");
+	expect_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -32,14 +32,14 @@ thd_start(void *varg) {
 		void *p = alloc_from_permuted_backtrace(thd_ind, i);
 		dallocx(p, 0);
 		if (i % DUMP_INTERVAL == 0) {
-			assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+			expect_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
 			    0, "Unexpected error while dumping heap profile");
 		}
 
 		if (i % BT_COUNT_CHECK_INTERVAL == 0 ||
 		    i+1 == NALLOCS_PER_THREAD) {
 			bt_count = prof_bt_count();
-			assert_zu_le(bt_count_prev+(i-i_prev), bt_count,
+			expect_zu_le(bt_count_prev+(i-i_prev), bt_count,
 			    "Expected larger backtrace count increase");
 			i_prev = i;
 			bt_count_prev = bt_count;
@@ -58,7 +58,7 @@ TEST_BEGIN(test_idump) {
 	test_skip_if(!config_prof);
 
 	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");
 
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index 850a24a..41c0512 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -6,9 +6,9 @@ mallctl_bool_get(const char *name, bool expected, const char *func, int line) {
 	size_t sz;
 
 	sz = sizeof(old);
-	assert_d_eq(mallctl(name, (void *)&old, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl(name, (void *)&old, &sz, NULL, 0), 0,
 	    "%s():%d: Unexpected mallctl failure reading %s", func, line, name);
-	assert_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line,
+	expect_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line,
 	    name);
 }
 
@@ -19,11 +19,11 @@ mallctl_bool_set(const char *name, bool old_expected, bool val_new,
 	size_t sz;
 
 	sz = sizeof(old);
-	assert_d_eq(mallctl(name, (void *)&old, &sz, (void *)&val_new,
+	expect_d_eq(mallctl(name, (void *)&old, &sz, (void *)&val_new,
 	    sizeof(val_new)), 0,
 	    "%s():%d: Unexpected mallctl failure reading/writing %s", func,
 	    line, name);
-	assert_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func,
+	expect_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func,
 	    line, name);
 }
 
@@ -67,11 +67,11 @@ prof_sampling_probe_impl(bool expect_sample, const char *func, int line) {
 	void *p;
 	size_t expected_backtraces = expect_sample ? 1 : 0;
 
-	assert_zu_eq(prof_bt_count(), 0, "%s():%d: Expected 0 backtraces", func,
+	expect_zu_eq(prof_bt_count(), 0, "%s():%d: Expected 0 backtraces", func,
 	    line);
 	p = mallocx(1, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	assert_zu_eq(prof_bt_count(), expected_backtraces,
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_zu_eq(prof_bt_count(), expected_backtraces,
 	    "%s():%d: Unexpected backtrace count", func, line);
 	dallocx(p, 0);
 }
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index f7e0aac..4c6afbd 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -9,7 +9,7 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	did_prof_dump_open = true;
 
 	fd = open("/dev/null", O_WRONLY);
-	assert_d_ne(fd, -1, "Unexpected open() failure");
+	expect_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -22,7 +22,7 @@ TEST_BEGIN(test_gdump) {
 	test_skip_if(!config_prof);
 
 	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");
 
@@ -30,35 +30,35 @@ TEST_BEGIN(test_gdump) {
 
 	did_prof_dump_open = false;
 	p = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	assert_true(did_prof_dump_open, "Expected a profile dump");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_true(did_prof_dump_open, "Expected a profile dump");
 
 	did_prof_dump_open = false;
 	q = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
-	assert_ptr_not_null(q, "Unexpected mallocx() failure");
-	assert_true(did_prof_dump_open, "Expected a profile dump");
+	expect_ptr_not_null(q, "Unexpected mallocx() failure");
+	expect_true(did_prof_dump_open, "Expected a profile dump");
 
 	gdump = false;
 	sz = sizeof(gdump_old);
-	assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
+	expect_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
 	    (void *)&gdump, sizeof(gdump)), 0,
 	    "Unexpected mallctl failure while disabling prof.gdump");
 	assert(gdump_old);
 	did_prof_dump_open = false;
 	r = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
-	assert_ptr_not_null(q, "Unexpected mallocx() failure");
-	assert_false(did_prof_dump_open, "Unexpected profile dump");
+	expect_ptr_not_null(q, "Unexpected mallocx() failure");
+	expect_false(did_prof_dump_open, "Unexpected profile dump");
 
 	gdump = true;
 	sz = sizeof(gdump_old);
-	assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
+	expect_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
 	    (void *)&gdump, sizeof(gdump)), 0,
 	    "Unexpected mallctl failure while enabling prof.gdump");
 	assert(!gdump_old);
 	did_prof_dump_open = false;
 	s = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
-	assert_ptr_not_null(q, "Unexpected mallocx() failure");
-	assert_true(did_prof_dump_open, "Expected a profile dump");
+	expect_ptr_not_null(q, "Unexpected mallocx() failure");
+	expect_true(did_prof_dump_open, "Expected a profile dump");
 
 	dallocx(p, 0);
 	dallocx(q, 0);
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 7a9b288..dfcc0ff 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -11,11 +11,11 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	did_prof_dump_open = true;
 
 	const char filename_prefix[] = TEST_PREFIX ".";
-	assert_d_eq(strncmp(filename_prefix, filename, sizeof(filename_prefix)
+	expect_d_eq(strncmp(filename_prefix, filename, sizeof(filename_prefix)
 	    - 1), 0, "Dump file name should start with \"" TEST_PREFIX ".\"");
 
 	fd = open("/dev/null", O_WRONLY);
-	assert_d_ne(fd, -1, "Unexpected open() failure");
+	expect_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -30,11 +30,11 @@ TEST_BEGIN(test_idump) {
 
 	active = true;
 
-	assert_d_eq(mallctl("prof.dump_prefix", NULL, NULL,
+	expect_d_eq(mallctl("prof.dump_prefix", NULL, NULL,
 	    (void *)&dump_prefix, sizeof(dump_prefix)), 0,
 	    "Unexpected mallctl failure while overwriting dump prefix");
 
-	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");
 
@@ -42,9 +42,9 @@ TEST_BEGIN(test_idump) {
 
 	did_prof_dump_open = false;
 	p = mallocx(1, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	dallocx(p, 0);
-	assert_true(did_prof_dump_open, "Expected a profile dump");
+	expect_true(did_prof_dump_open, "Expected a profile dump");
 }
 TEST_END
 
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index 4b14fd5..6b2336d 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -4,16 +4,16 @@
 #define N_PARAM 100
 #define N_THREADS 10
 
-static void assert_rep() {
-	assert_b_eq(prof_log_rep_check(), false, "Rep check failed");
+static void expect_rep() {
+	expect_b_eq(prof_log_rep_check(), false, "Rep check failed");
 }
 
-static void assert_log_empty() {
-	assert_zu_eq(prof_log_bt_count(), 0,
+static void expect_log_empty() {
+	expect_zu_eq(prof_log_bt_count(), 0,
 	    "The log has backtraces; it isn't empty");
-	assert_zu_eq(prof_log_thr_count(), 0,
+	expect_zu_eq(prof_log_thr_count(), 0,
 	    "The log has threads; it isn't empty");
-	assert_zu_eq(prof_log_alloc_count(), 0,
+	expect_zu_eq(prof_log_alloc_count(), 0,
 	    "The log has allocations; it isn't empty");
 }
 
@@ -35,22 +35,22 @@ TEST_BEGIN(test_prof_log_many_logs) {
 	test_skip_if(!config_prof);
 
 	for (i = 0; i < N_PARAM; i++) {
-		assert_b_eq(prof_log_is_logging(), false,
+		expect_b_eq(prof_log_is_logging(), false,
 		    "Logging shouldn't have started yet");
-		assert_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
+		expect_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
 		    "Unexpected mallctl failure when starting logging");
-		assert_b_eq(prof_log_is_logging(), true,
+		expect_b_eq(prof_log_is_logging(), true,
 		    "Logging should be started by now");
-		assert_log_empty();
-		assert_rep();
+		expect_log_empty();
+		expect_rep();
 		f();
-		assert_zu_eq(prof_log_thr_count(), 1, "Wrong thread count");
-		assert_rep();
-		assert_b_eq(prof_log_is_logging(), true,
+		expect_zu_eq(prof_log_thr_count(), 1, "Wrong thread count");
+		expect_rep();
+		expect_b_eq(prof_log_is_logging(), true,
 		    "Logging should still be on");
-		assert_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
+		expect_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 		    "Unexpected mallctl failure when stopping logging");
-		assert_b_eq(prof_log_is_logging(), false,
+		expect_b_eq(prof_log_is_logging(), false,
 		    "Logging should have turned off");
 	}
 }
@@ -74,7 +74,7 @@ TEST_BEGIN(test_prof_log_many_threads) {
 	test_skip_if(!config_prof);
 
 	int i;
-	assert_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure when starting logging");
 	for (i = 0; i < N_THREADS; i++) {
 		thd_create(&thr_buf[i], &f_thread, NULL);
@@ -83,10 +83,10 @@ TEST_BEGIN(test_prof_log_many_threads) {
 	for (i = 0; i < N_THREADS; i++) {
 		thd_join(thr_buf[i], NULL);
 	}
-	assert_zu_eq(prof_log_thr_count(), N_THREADS,
+	expect_zu_eq(prof_log_thr_count(), N_THREADS,
 	    "Wrong number of thread entries");
-	assert_rep();
-	assert_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
+	expect_rep();
+	expect_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure when stopping logging");
 }
 TEST_END
@@ -111,19 +111,19 @@ TEST_BEGIN(test_prof_log_many_traces) {
 
 	test_skip_if(!config_prof);
 
-	assert_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure when starting logging");
 	int i;
-	assert_rep();
-	assert_log_empty();
+	expect_rep();
+	expect_log_empty();
 	for (i = 0; i < N_PARAM; i++) {
-		assert_rep();
+		expect_rep();
 		f1();
-		assert_rep();
+		expect_rep();
 		f2();
-		assert_rep();
+		expect_rep();
 		f3();
-		assert_rep();
+		expect_rep();
 	}
 	/*
 	 * There should be 8 total backtraces: two for malloc/free in f1(), two
@@ -132,9 +132,9 @@ TEST_BEGIN(test_prof_log_many_traces) {
 	 * optimizations such as loop unrolling might generate more call sites.
 	 * So >= 8 traces are expected.
 	 */
-	assert_zu_ge(prof_log_bt_count(), 8,
+	expect_zu_ge(prof_log_bt_count(), 8,
 	    "Expect at least 8 backtraces given sample workload");
-	assert_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure when stopping logging");
 }
 TEST_END
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 962be74..7400d6c 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -8,14 +8,14 @@
 /* Invariant before and after every test (when config_prof is on) */
 static void confirm_prof_setup(tsd_t *tsd) {
 	/* Options */
-	assert_true(opt_prof, "opt_prof not on");
-	assert_true(opt_prof_active, "opt_prof_active not on");
-	assert_zd_eq(opt_prof_recent_alloc_max, OPT_ALLOC_MAX,
+	expect_true(opt_prof, "opt_prof not on");
+	expect_true(opt_prof_active, "opt_prof_active not on");
+	expect_zd_eq(opt_prof_recent_alloc_max, OPT_ALLOC_MAX,
 	    "opt_prof_recent_alloc_max not set correctly");
 
 	/* Dynamics */
-	assert_true(prof_active, "prof_active not on");
-	assert_zd_eq(prof_recent_alloc_max_ctl_read(tsd), OPT_ALLOC_MAX,
+	expect_true(prof_active, "prof_active not on");
+	expect_zd_eq(prof_recent_alloc_max_ctl_read(tsd), OPT_ALLOC_MAX,
 	    "prof_recent_alloc_max not set correctly");
 }
 
@@ -35,11 +35,11 @@ TEST_BEGIN(test_prof_recent_off) {
 	size_t len = len_ref;
 
 #define ASSERT_SHOULD_FAIL(opt, a, b, c, d) do {			\
-	assert_d_eq(mallctl("experimental.prof_recent." opt, a, b, c,	\
+	expect_d_eq(mallctl("experimental.prof_recent." opt, a, b, c,	\
 	    d), ENOENT, "Should return ENOENT when config_prof is off");\
-	assert_zd_eq(past, past_ref, "output was touched");		\
-	assert_zu_eq(len, len_ref, "output length was touched");	\
-	assert_zd_eq(future, future_ref, "input was touched");		\
+	expect_zd_eq(past, past_ref, "output was touched");		\
+	expect_zu_eq(len, len_ref, "output length was touched");	\
+	expect_zd_eq(future, future_ref, "input was touched");		\
 } while (0)
 
 	ASSERT_SHOULD_FAIL("alloc_max", NULL, NULL, NULL, 0);
@@ -61,35 +61,35 @@ TEST_BEGIN(test_prof_recent_on) {
 
 	confirm_prof_setup(tsd);
 
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, NULL, 0), 0, "no-op mallctl should be allowed");
 	confirm_prof_setup(tsd);
 
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, NULL, 0), 0, "Read error");
-	assert_zd_eq(past, OPT_ALLOC_MAX, "Wrong read result");
+	expect_zd_eq(past, OPT_ALLOC_MAX, "Wrong read result");
 	future = OPT_ALLOC_MAX + 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, len), 0, "Write error");
 	future = -1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len), 0, "Read/write error");
-	assert_zd_eq(past, OPT_ALLOC_MAX + 1, "Wrong read result");
+	expect_zd_eq(past, OPT_ALLOC_MAX + 1, "Wrong read result");
 	future = -2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len), EINVAL,
 	    "Invalid write should return EINVAL");
-	assert_zd_eq(past, OPT_ALLOC_MAX + 1,
+	expect_zd_eq(past, OPT_ALLOC_MAX + 1,
 	    "Output should not be touched given invalid write");
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len), 0, "Read/write error");
-	assert_zd_eq(past, -1, "Wrong read result");
+	expect_zd_eq(past, -1, "Wrong read result");
 	future = OPT_ALLOC_MAX + 2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len * 2), EINVAL,
 	    "Invalid write should return EINVAL");
-	assert_zd_eq(past, -1,
+	expect_zd_eq(past, -1,
 	    "Output should not be touched given invalid write");
 
 	confirm_prof_setup(tsd);
@@ -100,44 +100,44 @@ TEST_END
 #define NTH_REQ_SIZE(n) ((n) * 97 + 101)
 
 static void confirm_malloc(tsd_t *tsd, void *p) {
-	assert_ptr_not_null(p, "malloc failed unexpectedly");
+	expect_ptr_not_null(p, "malloc failed unexpectedly");
 	edata_t *e = emap_edata_lookup(TSDN_NULL, &emap_global, p);
-	assert_ptr_not_null(e, "NULL edata for living pointer");
+	expect_ptr_not_null(e, "NULL edata for living pointer");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *n = edata_prof_recent_alloc_get(tsd, e);
-	assert_ptr_not_null(n, "Record in edata should not be NULL");
-	assert_ptr_not_null(n->alloc_tctx,
+	expect_ptr_not_null(n, "Record in edata should not be NULL");
+	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	assert_ptr_eq(e, n->alloc_edata,
+	expect_ptr_eq(e, n->alloc_edata,
 	    "edata pointer in record is not correct");
-	assert_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
+	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 }
 
 static void confirm_record_size(tsd_t *tsd, prof_recent_t *n, unsigned kth) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_zu_eq(n->size, NTH_REQ_SIZE(kth),
+	expect_zu_eq(n->size, NTH_REQ_SIZE(kth),
 	    "Recorded allocation size is wrong");
 }
 
 static void confirm_record_living(tsd_t *tsd, prof_recent_t *n) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_ptr_not_null(n->alloc_tctx,
+	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	assert_ptr_not_null(n->alloc_edata,
+	expect_ptr_not_null(n->alloc_edata,
 	    "Recorded edata should not be NULL for living pointer");
-	assert_ptr_eq(n, edata_prof_recent_alloc_get(tsd, n->alloc_edata),
+	expect_ptr_eq(n, edata_prof_recent_alloc_get(tsd, n->alloc_edata),
 	    "Record in edata is not correct");
-	assert_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
+	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
 
 static void confirm_record_released(tsd_t *tsd, prof_recent_t *n) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_ptr_not_null(n->alloc_tctx,
+	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	assert_ptr_null(n->alloc_edata,
+	expect_ptr_null(n->alloc_edata,
 	    "Recorded edata should be NULL for released pointer");
-	assert_ptr_not_null(n->dalloc_tctx,
+	expect_ptr_not_null(n->dalloc_tctx,
 	    "dalloc_tctx in record should not be NULL for released pointer");
 }
 
@@ -167,7 +167,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		if (i < OPT_ALLOC_MAX - 1) {
 			malloc_mutex_lock(tsd_tsdn(tsd),
 			    &prof_recent_alloc_mtx);
-			assert_ptr_ne(prof_recent_alloc_begin(tsd),
+			expect_ptr_ne(prof_recent_alloc_begin(tsd),
 			    prof_recent_alloc_end(tsd),
 			    "Empty recent allocation");
 			malloc_mutex_unlock(tsd_tsdn(tsd),
@@ -194,7 +194,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			}
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		assert_u_eq(c, OPT_ALLOC_MAX,
+		expect_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
 	}
@@ -202,7 +202,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	confirm_prof_setup(tsd);
 
 	b = false;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
 	    "mallctl for turning off prof_active failed");
 
 	/*
@@ -212,7 +212,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	for (; i < 3 * OPT_ALLOC_MAX; ++i) {
 		req_size = NTH_REQ_SIZE(i);
 		p = malloc(req_size);
-		assert_ptr_not_null(p, "malloc failed unexpectedly");
+		expect_ptr_not_null(p, "malloc failed unexpectedly");
 		c = 0;
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		for (n = prof_recent_alloc_begin(tsd);
@@ -223,13 +223,13 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			++c;
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		assert_u_eq(c, OPT_ALLOC_MAX,
+		expect_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
 	}
 
 	b = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
 	    "mallctl for turning on prof_active failed");
 
 	confirm_prof_setup(tsd);
@@ -267,14 +267,14 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			}
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		assert_u_eq(c, OPT_ALLOC_MAX,
+		expect_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
 	}
 
 	/* Increasing the limit shouldn't alter the list of records. */
 	future = OPT_ALLOC_MAX + 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -286,7 +286,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		++c;
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_u_eq(c, OPT_ALLOC_MAX,
+	expect_u_eq(c, OPT_ALLOC_MAX,
 	    "Incorrect total number of allocations");
 
 	/*
@@ -294,7 +294,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	 * the new limit is still no less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -306,7 +306,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		++c;
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_u_eq(c, OPT_ALLOC_MAX,
+	expect_u_eq(c, OPT_ALLOC_MAX,
 	    "Incorrect total number of allocations");
 
 	/*
@@ -314,7 +314,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	 * limit is less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX - 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -326,12 +326,12 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		confirm_record_released(tsd, n);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_u_eq(c, OPT_ALLOC_MAX - 1,
+	expect_u_eq(c, OPT_ALLOC_MAX - 1,
 	    "Incorrect total number of allocations");
 
 	/* Setting to unlimited shouldn't alter the list of records. */
 	future = -1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -343,12 +343,12 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		confirm_record_released(tsd, n);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_u_eq(c, OPT_ALLOC_MAX - 1,
+	expect_u_eq(c, OPT_ALLOC_MAX - 1,
 	    "Incorrect total number of allocations");
 
 	/* Downshift to only one record. */
 	future = 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	n = prof_recent_alloc_begin(tsd);
@@ -361,7 +361,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 
 	/* Completely turn off. */
 	future = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
@@ -369,7 +369,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 
 	/* Restore the settings. */
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
@@ -395,7 +395,7 @@ static void test_dump_write_cb(void *not_used, const char *str) {
 static void call_dump() {
 	static void *in[2] = {test_dump_write_cb, NULL};
 	dump_out_len = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_dump",
 	    NULL, NULL, in, sizeof(in)), 0, "Dump mallctl raised error");
 }
 
@@ -418,9 +418,9 @@ static void confirm_record(const char *template,
 	 * "{\"recent_alloc_max\":XYZ,\"recent_alloc\":[...]}".
 	 * Using "- 2" serves to cut right before the ending "]}".
 	 */
-	assert_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
+	expect_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
 	    DUMP_ERROR);
-	assert_d_eq(memcmp(dump_out + strlen(dump_out) - 2,
+	expect_d_eq(memcmp(dump_out + strlen(dump_out) - 2,
 	    template + strlen(template) - 2, 2), 0, DUMP_ERROR);
 
 	const char *start = dump_out + strlen(template) - 2;
@@ -429,14 +429,14 @@ static void confirm_record(const char *template,
 	for (record = records; record < records + n_records; ++record) {
 
 #define ASSERT_CHAR(c) do {						\
-	assert_true(start < end, DUMP_ERROR);				\
-	assert_c_eq(*start++, c, DUMP_ERROR);				\
+	expect_true(start < end, DUMP_ERROR);				\
+	expect_c_eq(*start++, c, DUMP_ERROR);				\
 } while (0)
 
 #define ASSERT_STR(s) do {						\
 	const size_t len = strlen(s);					\
-	assert_true(start + len <= end, DUMP_ERROR);			\
-	assert_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);		\
+	expect_true(start + len <= end, DUMP_ERROR);			\
+	expect_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);		\
 	start += len;							\
 } while (0)
 
@@ -512,8 +512,8 @@ static void confirm_record(const char *template,
 #undef ASSERT_CHAR
 
 	}
-	assert_ptr_eq(record, records + n_records, DUMP_ERROR);
-	assert_ptr_eq(start, end, DUMP_ERROR);
+	expect_ptr_eq(record, records + n_records, DUMP_ERROR);
+	expect_ptr_eq(start, end, DUMP_ERROR);
 }
 
 TEST_BEGIN(test_prof_recent_alloc_dump) {
@@ -527,18 +527,18 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	confirm_record_t records[2];
 
 	future = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	call_dump();
-	assert_str_eq(dump_out, "{\"recent_alloc_max\":0,\"recent_alloc\":[]}",
+	expect_str_eq(dump_out, "{\"recent_alloc_max\":0,\"recent_alloc\":[]}",
 	    DUMP_ERROR);
 
 	future = 2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	call_dump();
 	const char *template = "{\"recent_alloc_max\":2,\"recent_alloc\":[]}";
-	assert_str_eq(dump_out, template, DUMP_ERROR);
+	expect_str_eq(dump_out, template, DUMP_ERROR);
 
 	p = malloc(7);
 	call_dump();
@@ -563,7 +563,7 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	confirm_record(template, records, 2);
 
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	confirm_prof_setup(tsd);
 }
@@ -632,7 +632,7 @@ static void *f_thread(void *arg) {
 			last_max =
 			    prof_recent_alloc_max_ctl_write(tsd, test_max / 2);
 		}
-		assert_zd_ge(last_max, -1, "Illegal last-N max");
+		expect_zd_ge(last_max, -1, "Illegal last-N max");
 	}
 
 	while (data_p->count > 0) {
@@ -660,7 +660,7 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = STRESS_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
 	for (size_t i = 0; i < N_THREADS; i++) {
 		thd_data_t *data_p = thd_data + i;
@@ -673,7 +673,7 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
 	confirm_prof_setup(tsd);
 }
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 7cce42d..e643e54 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -5,14 +5,14 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	int fd;
 
 	fd = open("/dev/null", O_WRONLY);
-	assert_d_ne(fd, -1, "Unexpected open() failure");
+	expect_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
 
 static void
 set_prof_active(bool active) {
-	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
 	    sizeof(active)), 0, "Unexpected mallctl failure");
 }
 
@@ -21,7 +21,7 @@ get_lg_prof_sample(void) {
 	size_t lg_prof_sample;
 	size_t sz = sizeof(size_t);
 
-	assert_d_eq(mallctl("prof.lg_sample", (void *)&lg_prof_sample, &sz,
+	expect_d_eq(mallctl("prof.lg_sample", (void *)&lg_prof_sample, &sz,
 	    NULL, 0), 0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
 	return lg_prof_sample;
@@ -29,10 +29,10 @@ get_lg_prof_sample(void) {
 
 static void
 do_prof_reset(size_t lg_prof_sample) {
-	assert_d_eq(mallctl("prof.reset", NULL, NULL,
+	expect_d_eq(mallctl("prof.reset", NULL, NULL,
 	    (void *)&lg_prof_sample, sizeof(size_t)), 0,
 	    "Unexpected mallctl failure while resetting profile data");
-	assert_zu_eq(lg_prof_sample, get_lg_prof_sample(),
+	expect_zu_eq(lg_prof_sample, get_lg_prof_sample(),
 	    "Expected profile sample rate change");
 }
 
@@ -44,22 +44,22 @@ TEST_BEGIN(test_prof_reset_basic) {
 	test_skip_if(!config_prof);
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("opt.lg_prof_sample", (void *)&lg_prof_sample_orig,
+	expect_d_eq(mallctl("opt.lg_prof_sample", (void *)&lg_prof_sample_orig,
 	    &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
-	assert_zu_eq(lg_prof_sample_orig, 0,
+	expect_zu_eq(lg_prof_sample_orig, 0,
 	    "Unexpected profiling sample rate");
 	lg_prof_sample = get_lg_prof_sample();
-	assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
 	    "\"prof.lg_sample\"");
 
 	/* Test simple resets. */
 	for (i = 0; i < 2; i++) {
-		assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
+		expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 		    "Unexpected mallctl failure while resetting profile data");
 		lg_prof_sample = get_lg_prof_sample();
-		assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+		expect_zu_eq(lg_prof_sample_orig, lg_prof_sample,
 		    "Unexpected profile sample rate change");
 	}
 
@@ -68,14 +68,14 @@ TEST_BEGIN(test_prof_reset_basic) {
 	for (i = 0; i < 2; i++) {
 		do_prof_reset(lg_prof_sample_next);
 		lg_prof_sample = get_lg_prof_sample();
-		assert_zu_eq(lg_prof_sample, lg_prof_sample_next,
+		expect_zu_eq(lg_prof_sample, lg_prof_sample_next,
 		    "Expected profile sample rate change");
 		lg_prof_sample_next = lg_prof_sample_orig;
 	}
 
 	/* Make sure the test code restored prof.lg_sample. */
 	lg_prof_sample = get_lg_prof_sample();
-	assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
 	    "\"prof.lg_sample\"");
 }
@@ -100,31 +100,31 @@ TEST_BEGIN(test_prof_reset_cleanup) {
 
 	set_prof_active(true);
 
-	assert_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
+	expect_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
 	p = mallocx(1, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	assert_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
 
 	prof_dump_header_orig = prof_dump_header;
 	prof_dump_header = prof_dump_header_intercept;
-	assert_false(prof_dump_header_intercepted, "Unexpected intercept");
+	expect_false(prof_dump_header_intercepted, "Unexpected intercept");
 
-	assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
 	    0, "Unexpected error while dumping heap profile");
-	assert_true(prof_dump_header_intercepted, "Expected intercept");
-	assert_u64_eq(cnt_all_copy.curobjs, 1, "Expected 1 allocation");
+	expect_true(prof_dump_header_intercepted, "Expected intercept");
+	expect_u64_eq(cnt_all_copy.curobjs, 1, "Expected 1 allocation");
 
-	assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 	    "Unexpected error while resetting heap profile data");
-	assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
 	    0, "Unexpected error while dumping heap profile");
-	assert_u64_eq(cnt_all_copy.curobjs, 0, "Expected 0 allocations");
-	assert_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
+	expect_u64_eq(cnt_all_copy.curobjs, 0, "Expected 0 allocations");
+	expect_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
 
 	prof_dump_header = prof_dump_header_orig;
 
 	dallocx(p, 0);
-	assert_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
+	expect_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
 
 	set_prof_active(false);
 }
@@ -145,13 +145,13 @@ thd_start(void *varg) {
 
 	for (i = 0; i < NALLOCS_PER_THREAD; i++) {
 		if (i % RESET_INTERVAL == 0) {
-			assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0),
+			expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0),
 			    0, "Unexpected error while resetting heap profile "
 			    "data");
 		}
 
 		if (i % DUMP_INTERVAL == 0) {
-			assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+			expect_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
 			    0, "Unexpected error while dumping heap profile");
 		}
 
@@ -162,7 +162,7 @@ thd_start(void *varg) {
 				*pp = NULL;
 			}
 			*pp = btalloc(1, thd_ind*NALLOCS_PER_THREAD + i);
-			assert_ptr_not_null(*pp,
+			expect_ptr_not_null(*pp,
 			    "Unexpected btalloc() failure");
 		}
 	}
@@ -189,7 +189,7 @@ TEST_BEGIN(test_prof_reset) {
 	test_skip_if(!config_prof);
 
 	bt_count = prof_bt_count();
-	assert_zu_eq(bt_count, 0,
+	expect_zu_eq(bt_count, 0,
 	    "Unexpected pre-existing tdata structures");
 	tdata_count = prof_tdata_count();
 
@@ -206,9 +206,9 @@ TEST_BEGIN(test_prof_reset) {
 		thd_join(thds[i], NULL);
 	}
 
-	assert_zu_eq(prof_bt_count(), bt_count,
+	expect_zu_eq(prof_bt_count(), bt_count,
 	    "Unexpected bactrace count change");
-	assert_zu_eq(prof_tdata_count(), tdata_count,
+	expect_zu_eq(prof_tdata_count(), tdata_count,
 	    "Unexpected remaining tdata structures");
 
 	set_prof_active(false);
@@ -246,19 +246,19 @@ TEST_BEGIN(test_xallocx) {
 
 		/* Allocate small object (which will be promoted). */
 		p = ptrs[i] = mallocx(1, 0);
-		assert_ptr_not_null(p, "Unexpected mallocx() failure");
+		expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
 		/* Reset profiling. */
 		do_prof_reset(0);
 
 		/* Perform successful xallocx(). */
 		sz = sallocx(p, 0);
-		assert_zu_eq(xallocx(p, sz, 0, 0), sz,
+		expect_zu_eq(xallocx(p, sz, 0, 0), sz,
 		    "Unexpected xallocx() failure");
 
 		/* Perform unsuccessful xallocx(). */
 		nsz = nallocx(sz+1, 0);
-		assert_zu_eq(xallocx(p, nsz, 0, 0), sz,
+		expect_zu_eq(xallocx(p, nsz, 0, 0), sz,
 		    "Unexpected xallocx() success");
 	}
 
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 4dde0ab..4bc597b 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -14,27 +14,27 @@ TEST_BEGIN(test_prof_realloc) {
 
 	prof_cnt_all(&curobjs_0, NULL, NULL, NULL);
 	p = mallocx(1024, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	prof_info_get(tsd, p, NULL, &prof_info_p);
-	assert_ptr_ne(prof_info_p.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
+	expect_ptr_ne(prof_info_p.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
-	assert_u64_eq(curobjs_0 + 1, curobjs_1,
+	expect_u64_eq(curobjs_0 + 1, curobjs_1,
 	    "Allocation should have increased sample size");
 
 	q = rallocx(p, 2048, flags);
-	assert_ptr_ne(p, q, "Expected move");
-	assert_ptr_not_null(p, "Unexpected rmallocx() failure");
+	expect_ptr_ne(p, q, "Expected move");
+	expect_ptr_not_null(p, "Unexpected rmallocx() failure");
 	prof_info_get(tsd, q, NULL, &prof_info_q);
-	assert_ptr_ne(prof_info_q.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
+	expect_ptr_ne(prof_info_q.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_2, NULL, NULL, NULL);
-	assert_u64_eq(curobjs_1, curobjs_2,
+	expect_u64_eq(curobjs_1, curobjs_2,
 	    "Reallocation should not have changed sample size");
 
 	dallocx(q, flags);
 	prof_cnt_all(&curobjs_3, NULL, NULL, NULL);
-	assert_u64_eq(curobjs_0, curobjs_3,
+	expect_u64_eq(curobjs_0, curobjs_3,
 	    "Sample size should have returned to base level");
 }
 TEST_END
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index c9c2a2b..4a9d38a 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -7,11 +7,11 @@ mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
 	size_t sz;
 
 	sz = sizeof(thread_name_old);
-	assert_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
+	expect_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
 	    NULL, 0), 0,
 	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
 	    func, line);
-	assert_str_eq(thread_name_old, thread_name_expected,
+	expect_str_eq(thread_name_old, thread_name_expected,
 	    "%s():%d: Unexpected thread.prof.name value", func, line);
 }
 #define mallctl_thread_name_get(a)					\
@@ -20,7 +20,7 @@ mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
 static void
 mallctl_thread_name_set_impl(const char *thread_name, const char *func,
     int line) {
-	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
 	    (void *)&thread_name, sizeof(thread_name)), 0,
 	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
 	    func, line);
@@ -39,14 +39,14 @@ TEST_BEGIN(test_prof_thread_name_validation) {
 
 	/* NULL input shouldn't be allowed. */
 	thread_name = NULL;
-	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
 	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
 	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
 	    thread_name);
 
 	/* '\n' shouldn't be allowed. */
 	thread_name = "hi\nthere";
-	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
 	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
 	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
 	    thread_name);
@@ -57,7 +57,7 @@ TEST_BEGIN(test_prof_thread_name_validation) {
 		size_t sz;
 
 		sz = sizeof(thread_name_old);
-		assert_d_eq(mallctl("thread.prof.name",
+		expect_d_eq(mallctl("thread.prof.name",
 		    (void *)&thread_name_old, &sz, (void *)&thread_name,
 		    sizeof(thread_name)), EPERM,
 		    "Unexpected mallctl result writing \"%s\" to "
@@ -82,7 +82,7 @@ thd_start(void *varg) {
 	mallctl_thread_name_set(thread_name);
 
 	for (i = 0; i < NRESET; i++) {
-		assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
+		expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 		    "Unexpected error while resetting heap profile data");
 		mallctl_thread_name_get(thread_name);
 	}
diff --git a/test/unit/ql.c b/test/unit/ql.c
index b76c24c..04da35f 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -18,21 +18,21 @@ test_empty_list(list_head_t *head) {
 	list_t *t;
 	unsigned i;
 
-	assert_ptr_null(ql_first(head), "Unexpected element for empty list");
-	assert_ptr_null(ql_last(head, link),
+	expect_ptr_null(ql_first(head), "Unexpected element for empty list");
+	expect_ptr_null(ql_last(head, link),
 	    "Unexpected element for empty list");
 
 	i = 0;
 	ql_foreach(t, head, link) {
 		i++;
 	}
-	assert_u_eq(i, 0, "Unexpected element for empty list");
+	expect_u_eq(i, 0, "Unexpected element for empty list");
 
 	i = 0;
 	ql_reverse_foreach(t, head, link) {
 		i++;
 	}
-	assert_u_eq(i, 0, "Unexpected element for empty list");
+	expect_u_eq(i, 0, "Unexpected element for empty list");
 }
 
 TEST_BEGIN(test_ql_empty) {
@@ -58,34 +58,34 @@ test_entries_list(list_head_t *head, list_t *entries, unsigned nentries) {
 	list_t *t;
 	unsigned i;
 
-	assert_c_eq(ql_first(head)->id, entries[0].id, "Element id mismatch");
-	assert_c_eq(ql_last(head, link)->id, entries[nentries-1].id,
+	expect_c_eq(ql_first(head)->id, entries[0].id, "Element id mismatch");
+	expect_c_eq(ql_last(head, link)->id, entries[nentries-1].id,
 	    "Element id mismatch");
 
 	i = 0;
 	ql_foreach(t, head, link) {
-		assert_c_eq(t->id, entries[i].id, "Element id mismatch");
+		expect_c_eq(t->id, entries[i].id, "Element id mismatch");
 		i++;
 	}
 
 	i = 0;
 	ql_reverse_foreach(t, head, link) {
-		assert_c_eq(t->id, entries[nentries-i-1].id,
+		expect_c_eq(t->id, entries[nentries-i-1].id,
 		    "Element id mismatch");
 		i++;
 	}
 
 	for (i = 0; i < nentries-1; i++) {
 		t = ql_next(head, &entries[i], link);
-		assert_c_eq(t->id, entries[i+1].id, "Element id mismatch");
+		expect_c_eq(t->id, entries[i+1].id, "Element id mismatch");
 	}
-	assert_ptr_null(ql_next(head, &entries[nentries-1], link),
+	expect_ptr_null(ql_next(head, &entries[nentries-1], link),
 	    "Unexpected element");
 
-	assert_ptr_null(ql_prev(head, &entries[0], link), "Unexpected element");
+	expect_ptr_null(ql_prev(head, &entries[0], link), "Unexpected element");
 	for (i = 1; i < nentries; i++) {
 		t = ql_prev(head, &entries[i], link);
-		assert_c_eq(t->id, entries[i-1].id, "Element id mismatch");
+		expect_c_eq(t->id, entries[i-1].id, "Element id mismatch");
 	}
 }
 
diff --git a/test/unit/qr.c b/test/unit/qr.c
index 271a109..95c1692 100644
--- a/test/unit/qr.c
+++ b/test/unit/qr.c
@@ -34,7 +34,7 @@ test_independent_entries(ring_t *entries) {
 		qr_foreach(t, &entries[i], link) {
 			j++;
 		}
-		assert_u_eq(j, 1,
+		expect_u_eq(j, 1,
 		    "Iteration over single-element ring should visit precisely "
 		    "one element");
 	}
@@ -43,19 +43,19 @@ test_independent_entries(ring_t *entries) {
 		qr_reverse_foreach(t, &entries[i], link) {
 			j++;
 		}
-		assert_u_eq(j, 1,
+		expect_u_eq(j, 1,
 		    "Iteration over single-element ring should visit precisely "
 		    "one element");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_next(&entries[i], link);
-		assert_ptr_eq(t, &entries[i],
+		expect_ptr_eq(t, &entries[i],
 		    "Next element in single-element ring should be same as "
 		    "current element");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_prev(&entries[i], link);
-		assert_ptr_eq(t, &entries[i],
+		expect_ptr_eq(t, &entries[i],
 		    "Previous element in single-element ring should be same as "
 		    "current element");
 	}
@@ -77,7 +77,7 @@ test_entries_ring(ring_t *entries) {
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_foreach(t, &entries[i], link) {
-			assert_c_eq(t->id, entries[(i+j) % NENTRIES].id,
+			expect_c_eq(t->id, entries[(i+j) % NENTRIES].id,
 			    "Element id mismatch");
 			j++;
 		}
@@ -85,19 +85,19 @@ test_entries_ring(ring_t *entries) {
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
-			assert_c_eq(t->id, entries[(NENTRIES+i-j-1) %
+			expect_c_eq(t->id, entries[(NENTRIES+i-j-1) %
 			    NENTRIES].id, "Element id mismatch");
 			j++;
 		}
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_next(&entries[i], link);
-		assert_c_eq(t->id, entries[(i+1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(i+1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_prev(&entries[i], link);
-		assert_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 }
@@ -127,13 +127,13 @@ TEST_BEGIN(test_qr_remove) {
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_foreach(t, &entries[i], link) {
-			assert_c_eq(t->id, entries[i+j].id,
+			expect_c_eq(t->id, entries[i+j].id,
 			    "Element id mismatch");
 			j++;
 		}
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
-			assert_c_eq(t->id, entries[NENTRIES - 1 - j].id,
+			expect_c_eq(t->id, entries[NENTRIES - 1 - j].id,
 			"Element id mismatch");
 			j++;
 		}
@@ -155,7 +155,7 @@ TEST_BEGIN(test_qr_before_insert) {
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_foreach(t, &entries[i], link) {
-			assert_c_eq(t->id, entries[(NENTRIES+i-j) %
+			expect_c_eq(t->id, entries[(NENTRIES+i-j) %
 			    NENTRIES].id, "Element id mismatch");
 			j++;
 		}
@@ -163,19 +163,19 @@ TEST_BEGIN(test_qr_before_insert) {
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
-			assert_c_eq(t->id, entries[(i+j+1) % NENTRIES].id,
+			expect_c_eq(t->id, entries[(i+j+1) % NENTRIES].id,
 			    "Element id mismatch");
 			j++;
 		}
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_next(&entries[i], link);
-		assert_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_prev(&entries[i], link);
-		assert_c_eq(t->id, entries[(i+1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(i+1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 }
@@ -190,11 +190,11 @@ test_split_entries(ring_t *entries) {
 		j = 0;
 		qr_foreach(t, &entries[i], link) {
 			if (i < SPLIT_INDEX) {
-				assert_c_eq(t->id,
+				expect_c_eq(t->id,
 				    entries[(i+j) % SPLIT_INDEX].id,
 				    "Element id mismatch");
 			} else {
-				assert_c_eq(t->id, entries[(i+j-SPLIT_INDEX) %
+				expect_c_eq(t->id, entries[(i+j-SPLIT_INDEX) %
 				    (NENTRIES-SPLIT_INDEX) + SPLIT_INDEX].id,
 				    "Element id mismatch");
 			}
diff --git a/test/unit/rb.c b/test/unit/rb.c
index 65c0492..2509a6d 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -26,8 +26,8 @@ static int
 node_cmp(const node_t *a, const node_t *b) {
 	int ret;
 
-	assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
-	assert_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
+	expect_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
+	expect_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
 
 	ret = (a->key > b->key) - (a->key < b->key);
 	if (ret == 0) {
@@ -50,21 +50,21 @@ TEST_BEGIN(test_rb_empty) {
 
 	tree_new(&tree);
 
-	assert_true(tree_empty(&tree), "Tree should be empty");
-	assert_ptr_null(tree_first(&tree), "Unexpected node");
-	assert_ptr_null(tree_last(&tree), "Unexpected node");
+	expect_true(tree_empty(&tree), "Tree should be empty");
+	expect_ptr_null(tree_first(&tree), "Unexpected node");
+	expect_ptr_null(tree_last(&tree), "Unexpected node");
 
 	key.key = 0;
 	key.magic = NODE_MAGIC;
-	assert_ptr_null(tree_search(&tree, &key), "Unexpected node");
+	expect_ptr_null(tree_search(&tree, &key), "Unexpected node");
 
 	key.key = 0;
 	key.magic = NODE_MAGIC;
-	assert_ptr_null(tree_nsearch(&tree, &key), "Unexpected node");
+	expect_ptr_null(tree_nsearch(&tree, &key), "Unexpected node");
 
 	key.key = 0;
 	key.magic = NODE_MAGIC;
-	assert_ptr_null(tree_psearch(&tree, &key), "Unexpected node");
+	expect_ptr_null(tree_psearch(&tree, &key), "Unexpected node");
 }
 TEST_END
 
@@ -88,17 +88,17 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	/* Red nodes must be interleaved with black nodes. */
 	if (rbtn_red_get(node_t, link, node)) {
 		if (left_node != NULL) {
-			assert_false(rbtn_red_get(node_t, link, left_node),
+			expect_false(rbtn_red_get(node_t, link, left_node),
 				"Node should be black");
 		}
 		if (right_node != NULL) {
-			assert_false(rbtn_red_get(node_t, link, right_node),
+			expect_false(rbtn_red_get(node_t, link, right_node),
 			    "Node should be black");
 		}
 	}
 
 	/* Self. */
-	assert_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
+	expect_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
 
 	/* Left subtree. */
 	if (left_node != NULL) {
@@ -122,21 +122,21 @@ tree_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *i = (unsigned *)data;
 	node_t *search_node;
 
-	assert_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
+	expect_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
 
 	/* Test rb_search(). */
 	search_node = tree_search(tree, node);
-	assert_ptr_eq(search_node, node,
+	expect_ptr_eq(search_node, node,
 	    "tree_search() returned unexpected node");
 
 	/* Test rb_nsearch(). */
 	search_node = tree_nsearch(tree, node);
-	assert_ptr_eq(search_node, node,
+	expect_ptr_eq(search_node, node,
 	    "tree_nsearch() returned unexpected node");
 
 	/* Test rb_psearch(). */
 	search_node = tree_psearch(tree, node);
-	assert_ptr_eq(search_node, node,
+	expect_ptr_eq(search_node, node,
 	    "tree_psearch() returned unexpected node");
 
 	(*i)++;
@@ -174,14 +174,14 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
 	/* Test rb_nsearch(). */
 	search_node = tree_nsearch(tree, node);
 	if (search_node != NULL) {
-		assert_u64_ge(search_node->key, node->key,
+		expect_u64_ge(search_node->key, node->key,
 		    "Key ordering error");
 	}
 
 	/* Test rb_psearch(). */
 	search_node = tree_psearch(tree, node);
 	if (search_node != NULL) {
-		assert_u64_le(search_node->key, node->key,
+		expect_u64_le(search_node->key, node->key,
 		    "Key ordering error");
 	}
 
@@ -189,10 +189,10 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
 
 	rbtn_black_height(node_t, link, tree, black_height);
 	imbalances = tree_recurse(tree->rbt_root, black_height, 0);
-	assert_u_eq(imbalances, 0, "Tree is unbalanced");
-	assert_u_eq(tree_iterate(tree), nnodes-1,
+	expect_u_eq(imbalances, 0, "Tree is unbalanced");
+	expect_u_eq(tree_iterate(tree), nnodes-1,
 	    "Unexpected node iteration count");
-	assert_u_eq(tree_iterate_reverse(tree), nnodes-1,
+	expect_u_eq(tree_iterate_reverse(tree), nnodes-1,
 	    "Unexpected node iteration count");
 }
 
@@ -220,7 +220,7 @@ static void
 destroy_cb(node_t *node, void *data) {
 	unsigned *nnodes = (unsigned *)data;
 
-	assert_u_gt(*nnodes, 0, "Destruction removed too many nodes");
+	expect_u_gt(*nnodes, 0, "Destruction removed too many nodes");
 	(*nnodes)--;
 }
 
@@ -271,19 +271,19 @@ TEST_BEGIN(test_rb_random) {
 				    black_height);
 				imbalances = tree_recurse(tree.rbt_root,
 				    black_height, 0);
-				assert_u_eq(imbalances, 0,
+				expect_u_eq(imbalances, 0,
 				    "Tree is unbalanced");
 
-				assert_u_eq(tree_iterate(&tree), k+1,
+				expect_u_eq(tree_iterate(&tree), k+1,
 				    "Unexpected node iteration count");
-				assert_u_eq(tree_iterate_reverse(&tree), k+1,
+				expect_u_eq(tree_iterate_reverse(&tree), k+1,
 				    "Unexpected node iteration count");
 
-				assert_false(tree_empty(&tree),
+				expect_false(tree_empty(&tree),
 				    "Tree should not be empty");
-				assert_ptr_not_null(tree_first(&tree),
+				expect_ptr_not_null(tree_first(&tree),
 				    "Tree should not be empty");
-				assert_ptr_not_null(tree_last(&tree),
+				expect_ptr_not_null(tree_last(&tree),
 				    "Tree should not be empty");
 
 				tree_next(&tree, &nodes[k]);
@@ -312,7 +312,7 @@ TEST_BEGIN(test_rb_random) {
 					    remove_iterate_cb, (void *)&nnodes);
 					nnodes--;
 				} while (start != NULL);
-				assert_u_eq(nnodes, 0,
+				expect_u_eq(nnodes, 0,
 				    "Removal terminated early");
 				break;
 			} case 3: {
@@ -326,13 +326,13 @@ TEST_BEGIN(test_rb_random) {
 					    (void *)&nnodes);
 					nnodes--;
 				} while (start != NULL);
-				assert_u_eq(nnodes, 0,
+				expect_u_eq(nnodes, 0,
 				    "Removal terminated early");
 				break;
 			} case 4: {
 				unsigned nnodes = j;
 				tree_destroy(&tree, destroy_cb, &nnodes);
-				assert_u_eq(nnodes, 0,
+				expect_u_eq(nnodes, 0,
 				    "Destruction terminated early");
 				break;
 			} default:
diff --git a/test/unit/retained.c b/test/unit/retained.c
index e6b6f7b..9ac83ef 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -14,7 +14,7 @@ static unsigned
 do_arena_create(extent_hooks_t *h) {
 	unsigned arena_ind;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
 	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
 	    "Unexpected mallctl() failure");
 	return arena_ind;
@@ -26,17 +26,17 @@ do_arena_destroy(unsigned arena_ind) {
 	size_t miblen;
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
 static void
 do_refresh(void) {
 	uint64_t epoch = 1;
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)), 0, "Unexpected mallctl() failure");
 }
 
@@ -46,11 +46,11 @@ do_get_size_impl(const char *cmd, unsigned arena_ind) {
 	size_t miblen = sizeof(mib) / sizeof(size_t);
 	size_t z = sizeof(size_t);
 
-	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = arena_ind;
 	size_t size;
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\"], ...) failure", cmd);
 
 	return size;
@@ -76,7 +76,7 @@ thd_start(void *arg) {
 		    next_epoch) {
 			spin_adaptive(&spinner);
 		}
-		assert_u_eq(cur_epoch, next_epoch, "Unexpected epoch");
+		expect_u_eq(cur_epoch, next_epoch, "Unexpected epoch");
 
 		/*
 		 * Allocate.  The main thread will reset the arena, so there's
@@ -86,7 +86,7 @@ thd_start(void *arg) {
 			void *p = mallocx(sz, MALLOCX_ARENA(arena_ind) |
 			    MALLOCX_TCACHE_NONE
 			    );
-			assert_ptr_not_null(p,
+			expect_ptr_not_null(p,
 			    "Unexpected mallocx() failure\n");
 		}
 
@@ -134,9 +134,9 @@ TEST_BEGIN(test_retained) {
 
 		size_t allocated = esz * nthreads * PER_THD_NALLOCS;
 		size_t active = do_get_active(arena_ind);
-		assert_zu_le(allocated, active, "Unexpected active memory");
+		expect_zu_le(allocated, active, "Unexpected active memory");
 		size_t mapped = do_get_mapped(arena_ind);
-		assert_zu_le(active, mapped, "Unexpected mapped memory");
+		expect_zu_le(active, mapped, "Unexpected mapped memory");
 
 		arena_t *arena = arena_get(tsdn_fetch(), arena_ind, false);
 		size_t usable = 0;
@@ -150,7 +150,7 @@ TEST_BEGIN(test_retained) {
 			 * Only consider size classes that wouldn't be skipped.
 			 */
 			if (psz_usable > 0) {
-				assert_zu_lt(usable, allocated,
+				expect_zu_lt(usable, allocated,
 				    "Excessive retained memory "
 				    "(%#zx[+%#zx] > %#zx)", usable, psz_usable,
 				    allocated);
@@ -165,7 +165,7 @@ TEST_BEGIN(test_retained) {
 		 * (rather than retaining) during reset.
 		 */
 		do_arena_destroy(arena_ind);
-		assert_u_eq(do_arena_create(NULL), arena_ind,
+		expect_u_eq(do_arena_create(NULL), arena_ind,
 		    "Unexpected arena index");
 	}
 
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index b5ece82..3545cfc 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -13,14 +13,14 @@ TEST_BEGIN(test_rtree_read_empty) {
 	tsdn = tsdn_fetch();
 
 	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
-	assert_ptr_not_null(base, "Unexpected base_new failure");
+	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	assert_false(rtree_new(rtree, base, false),
+	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
-	assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE,
+	expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE,
 	    false), "rtree_edata_read() should return NULL for empty tree");
 
 	base_delete(tsdn, base);
@@ -42,27 +42,27 @@ TEST_BEGIN(test_rtree_extrema) {
 	tsdn_t *tsdn = tsdn_fetch();
 
 	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
-	assert_ptr_not_null(base, "Unexpected base_new failure");
+	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	assert_false(rtree_new(rtree, base, false),
+	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
 
-	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &edata_a,
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &edata_a,
 	    edata_szind_get(&edata_a), edata_slab_get(&edata_a)),
 	    "Unexpected rtree_write() failure");
 	rtree_szind_slab_update(tsdn, rtree, &rtree_ctx, PAGE,
 	    edata_szind_get(&edata_a), edata_slab_get(&edata_a));
-	assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE, true),
+	expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE, true),
 	    &edata_a,
 	    "rtree_edata_read() should return previously set value");
 
-	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
 	    &edata_b, edata_szind_get_maybe_invalid(&edata_b),
 	    edata_slab_get(&edata_b)), "Unexpected rtree_write() failure");
-	assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+	expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 	    ~((uintptr_t)0), true), &edata_b,
 	    "rtree_edata_read() should return previously set value");
 
@@ -73,7 +73,7 @@ TEST_END
 TEST_BEGIN(test_rtree_bits) {
 	tsdn_t *tsdn = tsdn_fetch();
 	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
-	assert_ptr_not_null(base, "Unexpected base_new failure");
+	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[] = {PAGE, PAGE + 1,
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
@@ -85,22 +85,22 @@ TEST_BEGIN(test_rtree_bits) {
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	assert_false(rtree_new(rtree, base, false),
+	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
-		assert_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
+		expect_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
 		    &edata, SC_NSIZES, false),
 		    "Unexpected rtree_write() failure");
 		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+			expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 			    keys[j], true), &edata,
 			    "rtree_edata_read() should return previously set "
 			    "value and ignore insignificant key bits; i=%u, "
 			    "j=%u, set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
 			    j, keys[i], keys[j]);
 		}
-		assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    (((uintptr_t)2) << LG_PAGE), false),
 		    "Only leftmost rtree leaf should be set; i=%u", i);
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
@@ -117,7 +117,7 @@ TEST_BEGIN(test_rtree_random) {
 	tsdn_t *tsdn = tsdn_fetch();
 
 	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
-	assert_ptr_not_null(base, "Unexpected base_new failure");
+	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[NSET];
 	rtree_t *rtree = &test_rtree;
@@ -128,23 +128,23 @@ TEST_BEGIN(test_rtree_random) {
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
-	assert_false(rtree_new(rtree, base, false),
+	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < NSET; i++) {
 		keys[i] = (uintptr_t)gen_rand64(sfmt);
 		rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree,
 		    &rtree_ctx, keys[i], false, true);
-		assert_ptr_not_null(elm,
+		expect_ptr_not_null(elm,
 		    "Unexpected rtree_leaf_elm_lookup() failure");
 		rtree_leaf_elm_write(tsdn, rtree, elm, &edata, SC_NSIZES,
 		    false);
-		assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true), &edata,
 		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true), &edata,
 		    "rtree_edata_read() should return previously set value, "
 		    "i=%u", i);
@@ -152,12 +152,12 @@ TEST_BEGIN(test_rtree_random) {
 
 	for (unsigned i = 0; i < NSET; i++) {
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
-		assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true),
 		   "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true),
 		    "rtree_edata_read() should return previously set value");
 	}
diff --git a/test/unit/safety_check.c b/test/unit/safety_check.c
index bf4bd86..516a096 100644
--- a/test/unit/safety_check.c
+++ b/test/unit/safety_check.c
@@ -24,7 +24,7 @@ TEST_BEGIN(test_malloc_free_overflow) {
 	free(ptr);
 	safety_check_set_abort(NULL);
 
-	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	expect_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
 	fake_abort_called = false;
 }
 TEST_END
@@ -40,7 +40,7 @@ TEST_BEGIN(test_mallocx_dallocx_overflow) {
 	dallocx(ptr, 0);
 	safety_check_set_abort(NULL);
 
-	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	expect_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
 	fake_abort_called = false;
 }
 TEST_END
@@ -56,7 +56,7 @@ TEST_BEGIN(test_malloc_sdallocx_overflow) {
 	sdallocx(ptr, 128, 0);
 	safety_check_set_abort(NULL);
 
-	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	expect_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
 	fake_abort_called = false;
 }
 TEST_END
@@ -73,7 +73,7 @@ TEST_BEGIN(test_realloc_overflow) {
 	safety_check_set_abort(NULL);
 	free(ptr);
 
-	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	expect_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
 	fake_abort_called = false;
 }
 TEST_END
@@ -90,7 +90,7 @@ TEST_BEGIN(test_rallocx_overflow) {
 	safety_check_set_abort(NULL);
 	free(ptr);
 
-	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	expect_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
 	fake_abort_called = false;
 }
 TEST_END
@@ -104,9 +104,9 @@ TEST_BEGIN(test_xallocx_overflow) {
 	char* ptr = malloc(128);
 	ptr[128] = 0;
 	size_t result = xallocx(ptr, 129, 0, 0);
-	assert_zu_eq(result, 128, "");
+	expect_zu_eq(result, 128, "");
 	free(ptr);
-	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	expect_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
 	fake_abort_called = false;
 	safety_check_set_abort(NULL);
 }
diff --git a/test/unit/sc.c b/test/unit/sc.c
index bf51d8e..d207481 100644
--- a/test/unit/sc.c
+++ b/test/unit/sc.c
@@ -9,7 +9,7 @@ TEST_BEGIN(test_update_slab_size) {
 	    + (ZU(tiny->ndelta) << tiny->lg_delta);
 	size_t pgs_too_big = (tiny_size * BITMAP_MAXBITS + PAGE - 1) / PAGE + 1;
 	sc_data_update_slab_size(&data, tiny_size, tiny_size, (int)pgs_too_big);
-	assert_zu_lt((size_t)tiny->pgs, pgs_too_big, "Allowed excessive pages");
+	expect_zu_lt((size_t)tiny->pgs, pgs_too_big, "Allowed excessive pages");
 
 	sc_data_update_slab_size(&data, 1, 10 * PAGE, 1);
 	for (int i = 0; i < data.nbins; i++) {
@@ -17,9 +17,9 @@ TEST_BEGIN(test_update_slab_size) {
 		size_t reg_size = (ZU(1) << sc->lg_base)
 		    + (ZU(sc->ndelta) << sc->lg_delta);
 		if (reg_size <= PAGE) {
-			assert_d_eq(sc->pgs, 1, "Ignored valid page size hint");
+			expect_d_eq(sc->pgs, 1, "Ignored valid page size hint");
 		} else {
-			assert_d_gt(sc->pgs, 1,
+			expect_d_gt(sc->pgs, 1,
 			    "Allowed invalid page size hint");
 		}
 	}
diff --git a/test/unit/seq.c b/test/unit/seq.c
index 19613b0..06ed683 100644
--- a/test/unit/seq.c
+++ b/test/unit/seq.c
@@ -15,10 +15,10 @@ set_data(data_t *data, int num) {
 }
 
 static void
-assert_data(data_t *data) {
+expect_data(data_t *data) {
 	int num = data->arr[0];
 	for (int i = 0; i < 10; i++) {
-		assert_d_eq(num, data->arr[i], "Data consistency error");
+		expect_d_eq(num, data->arr[i], "Data consistency error");
 	}
 }
 
@@ -37,8 +37,8 @@ seq_reader_thd(void *arg) {
 	while (iter < 1000 * 1000 - 1) {
 		bool success = seq_try_load_data(&local_data, &thd_data->data);
 		if (success) {
-			assert_data(&local_data);
-			assert_d_le(iter, local_data.arr[0],
+			expect_data(&local_data);
+			expect_d_le(iter, local_data.arr[0],
 			    "Seq read went back in time.");
 			iter = local_data.arr[0];
 		}
@@ -82,8 +82,8 @@ TEST_BEGIN(test_seq_simple) {
 		seq_store_data(&seq, &data);
 		set_data(&data, 0);
 		bool success = seq_try_load_data(&data, &seq);
-		assert_b_eq(success, true, "Failed non-racing read");
-		assert_data(&data);
+		expect_b_eq(success, true, "Failed non-racing read");
+		expect_data(&data);
 	}
 }
 TEST_END
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 6947336..c70eb59 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -7,16 +7,16 @@ get_max_size_class(void) {
 	size_t sz, miblen, max_size_class;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
+	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
 	    0), 0, "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() error");
 	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
 	    NULL, 0), 0, "Unexpected mallctlbymib() error");
 
 	return max_size_class;
@@ -32,50 +32,50 @@ TEST_BEGIN(test_size_classes) {
 	for (index = 0, size_class = sz_index2size(index); index < max_index ||
 	    size_class < max_size_class; index++, size_class =
 	    sz_index2size(index)) {
-		assert_true(index < max_index,
+		expect_true(index < max_index,
 		    "Loop conditionals should be equivalent; index=%u, "
 		    "size_class=%zu (%#zx)", index, size_class, size_class);
-		assert_true(size_class < max_size_class,
+		expect_true(size_class < max_size_class,
 		    "Loop conditionals should be equivalent; index=%u, "
 		    "size_class=%zu (%#zx)", index, size_class, size_class);
 
-		assert_u_eq(index, sz_size2index(size_class),
+		expect_u_eq(index, sz_size2index(size_class),
 		    "sz_size2index() does not reverse sz_index2size(): index=%u"
 		    " --> size_class=%zu --> index=%u --> size_class=%zu",
 		    index, size_class, sz_size2index(size_class),
 		    sz_index2size(sz_size2index(size_class)));
-		assert_zu_eq(size_class,
+		expect_zu_eq(size_class,
 		    sz_index2size(sz_size2index(size_class)),
 		    "sz_index2size() does not reverse sz_size2index(): index=%u"
 		    " --> size_class=%zu --> index=%u --> size_class=%zu",
 		    index, size_class, sz_size2index(size_class),
 		    sz_index2size(sz_size2index(size_class)));
 
-		assert_u_eq(index+1, sz_size2index(size_class+1),
+		expect_u_eq(index+1, sz_size2index(size_class+1),
 		    "Next size_class does not round up properly");
 
-		assert_zu_eq(size_class, (index > 0) ?
+		expect_zu_eq(size_class, (index > 0) ?
 		    sz_s2u(sz_index2size(index-1)+1) : sz_s2u(1),
 		    "sz_s2u() does not round up to size class");
-		assert_zu_eq(size_class, sz_s2u(size_class-1),
+		expect_zu_eq(size_class, sz_s2u(size_class-1),
 		    "sz_s2u() does not round up to size class");
-		assert_zu_eq(size_class, sz_s2u(size_class),
+		expect_zu_eq(size_class, sz_s2u(size_class),
 		    "sz_s2u() does not compute same size class");
-		assert_zu_eq(sz_s2u(size_class+1), sz_index2size(index+1),
+		expect_zu_eq(sz_s2u(size_class+1), sz_index2size(index+1),
 		    "sz_s2u() does not round up to next size class");
 	}
 
-	assert_u_eq(index, sz_size2index(sz_index2size(index)),
+	expect_u_eq(index, sz_size2index(sz_index2size(index)),
 	    "sz_size2index() does not reverse sz_index2size()");
-	assert_zu_eq(max_size_class, sz_index2size(
+	expect_zu_eq(max_size_class, sz_index2size(
 	    sz_size2index(max_size_class)),
 	    "sz_index2size() does not reverse sz_size2index()");
 
-	assert_zu_eq(size_class, sz_s2u(sz_index2size(index-1)+1),
+	expect_zu_eq(size_class, sz_s2u(sz_index2size(index-1)+1),
 	    "sz_s2u() does not round up to size class");
-	assert_zu_eq(size_class, sz_s2u(size_class-1),
+	expect_zu_eq(size_class, sz_s2u(size_class-1),
 	    "sz_s2u() does not round up to size class");
-	assert_zu_eq(size_class, sz_s2u(size_class),
+	expect_zu_eq(size_class, sz_s2u(size_class),
 	    "sz_s2u() does not compute same size class");
 }
 TEST_END
@@ -90,53 +90,53 @@ TEST_BEGIN(test_psize_classes) {
 	for (pind = 0, size_class = sz_pind2sz(pind);
 	    pind < max_pind || size_class < max_psz;
 	    pind++, size_class = sz_pind2sz(pind)) {
-		assert_true(pind < max_pind,
+		expect_true(pind < max_pind,
 		    "Loop conditionals should be equivalent; pind=%u, "
 		    "size_class=%zu (%#zx)", pind, size_class, size_class);
-		assert_true(size_class < max_psz,
+		expect_true(size_class < max_psz,
 		    "Loop conditionals should be equivalent; pind=%u, "
 		    "size_class=%zu (%#zx)", pind, size_class, size_class);
 
-		assert_u_eq(pind, sz_psz2ind(size_class),
+		expect_u_eq(pind, sz_psz2ind(size_class),
 		    "sz_psz2ind() does not reverse sz_pind2sz(): pind=%u -->"
 		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
 		    size_class, sz_psz2ind(size_class),
 		    sz_pind2sz(sz_psz2ind(size_class)));
-		assert_zu_eq(size_class, sz_pind2sz(sz_psz2ind(size_class)),
+		expect_zu_eq(size_class, sz_pind2sz(sz_psz2ind(size_class)),
 		    "sz_pind2sz() does not reverse sz_psz2ind(): pind=%u -->"
 		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
 		    size_class, sz_psz2ind(size_class),
 		    sz_pind2sz(sz_psz2ind(size_class)));
 
 		if (size_class == SC_LARGE_MAXCLASS) {
-			assert_u_eq(SC_NPSIZES, sz_psz2ind(size_class + 1),
+			expect_u_eq(SC_NPSIZES, sz_psz2ind(size_class + 1),
 			    "Next size_class does not round up properly");
 		} else {
-			assert_u_eq(pind + 1, sz_psz2ind(size_class + 1),
+			expect_u_eq(pind + 1, sz_psz2ind(size_class + 1),
 			    "Next size_class does not round up properly");
 		}
 
-		assert_zu_eq(size_class, (pind > 0) ?
+		expect_zu_eq(size_class, (pind > 0) ?
 		    sz_psz2u(sz_pind2sz(pind-1)+1) : sz_psz2u(1),
 		    "sz_psz2u() does not round up to size class");
-		assert_zu_eq(size_class, sz_psz2u(size_class-1),
+		expect_zu_eq(size_class, sz_psz2u(size_class-1),
 		    "sz_psz2u() does not round up to size class");
-		assert_zu_eq(size_class, sz_psz2u(size_class),
+		expect_zu_eq(size_class, sz_psz2u(size_class),
 		    "sz_psz2u() does not compute same size class");
-		assert_zu_eq(sz_psz2u(size_class+1), sz_pind2sz(pind+1),
+		expect_zu_eq(sz_psz2u(size_class+1), sz_pind2sz(pind+1),
 		    "sz_psz2u() does not round up to next size class");
 	}
 
-	assert_u_eq(pind, sz_psz2ind(sz_pind2sz(pind)),
+	expect_u_eq(pind, sz_psz2ind(sz_pind2sz(pind)),
 	    "sz_psz2ind() does not reverse sz_pind2sz()");
-	assert_zu_eq(max_psz, sz_pind2sz(sz_psz2ind(max_psz)),
+	expect_zu_eq(max_psz, sz_pind2sz(sz_psz2ind(max_psz)),
 	    "sz_pind2sz() does not reverse sz_psz2ind()");
 
-	assert_zu_eq(size_class, sz_psz2u(sz_pind2sz(pind-1)+1),
+	expect_zu_eq(size_class, sz_psz2u(sz_pind2sz(pind-1)+1),
 	    "sz_psz2u() does not round up to size class");
-	assert_zu_eq(size_class, sz_psz2u(size_class-1),
+	expect_zu_eq(size_class, sz_psz2u(size_class-1),
 	    "sz_psz2u() does not round up to size class");
-	assert_zu_eq(size_class, sz_psz2u(size_class),
+	expect_zu_eq(size_class, sz_psz2u(size_class),
 	    "sz_psz2u() does not compute same size class");
 }
 TEST_END
@@ -147,34 +147,34 @@ TEST_BEGIN(test_overflow) {
 	max_size_class = get_max_size_class();
 	max_psz = max_size_class + PAGE;
 
-	assert_u_eq(sz_size2index(max_size_class+1), SC_NSIZES,
+	expect_u_eq(sz_size2index(max_size_class+1), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
-	assert_u_eq(sz_size2index(ZU(PTRDIFF_MAX)+1), SC_NSIZES,
+	expect_u_eq(sz_size2index(ZU(PTRDIFF_MAX)+1), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
-	assert_u_eq(sz_size2index(SIZE_T_MAX), SC_NSIZES,
+	expect_u_eq(sz_size2index(SIZE_T_MAX), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
 
-	assert_zu_eq(sz_s2u(max_size_class+1), 0,
+	expect_zu_eq(sz_s2u(max_size_class+1), 0,
 	    "sz_s2u() should return 0 for unsupported size");
-	assert_zu_eq(sz_s2u(ZU(PTRDIFF_MAX)+1), 0,
+	expect_zu_eq(sz_s2u(ZU(PTRDIFF_MAX)+1), 0,
 	    "sz_s2u() should return 0 for unsupported size");
-	assert_zu_eq(sz_s2u(SIZE_T_MAX), 0,
+	expect_zu_eq(sz_s2u(SIZE_T_MAX), 0,
 	    "sz_s2u() should return 0 on overflow");
 
-	assert_u_eq(sz_psz2ind(max_size_class+1), SC_NPSIZES,
+	expect_u_eq(sz_psz2ind(max_size_class+1), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
-	assert_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX)+1), SC_NPSIZES,
+	expect_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX)+1), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
-	assert_u_eq(sz_psz2ind(SIZE_T_MAX), SC_NPSIZES,
+	expect_u_eq(sz_psz2ind(SIZE_T_MAX), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
 
-	assert_zu_eq(sz_psz2u(max_size_class+1), max_psz,
+	expect_zu_eq(sz_psz2u(max_size_class+1), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported"
 	    " size");
-	assert_zu_eq(sz_psz2u(ZU(PTRDIFF_MAX)+1), max_psz,
+	expect_zu_eq(sz_psz2u(ZU(PTRDIFF_MAX)+1), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported "
 	    "size");
-	assert_zu_eq(sz_psz2u(SIZE_T_MAX), max_psz,
+	expect_zu_eq(sz_psz2u(SIZE_T_MAX), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) on overflow");
 }
 TEST_END
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 5d2b35f..304a93d 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -14,12 +14,12 @@ TEST_BEGIN(test_arena_slab_regind) {
 		    bin_info->slab_size, true,
 		    binind, 0, extent_state_active, false, true, true,
 		    EXTENT_NOT_HEAD);
-		assert_ptr_not_null(edata_addr_get(&slab),
+		expect_ptr_not_null(edata_addr_get(&slab),
 		    "Unexpected malloc() failure");
 		for (regind = 0; regind < bin_info->nregs; regind++) {
 			void *reg = (void *)((uintptr_t)edata_addr_get(&slab) +
 			    (bin_info->reg_size * regind));
-			assert_zu_eq(arena_slab_regind(&slab, binind, reg),
+			expect_zu_eq(arena_slab_regind(&slab, binind, reg),
 			    regind,
 			    "Incorrect region index computed for size %zu",
 			    bin_info->reg_size);
diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c
index 7c5dbb7..588c9f4 100644
--- a/test/unit/smoothstep.c
+++ b/test/unit/smoothstep.c
@@ -26,9 +26,9 @@ TEST_BEGIN(test_smoothstep_integral) {
 	max = (KQU(1) << (SMOOTHSTEP_BFP-1)) * (SMOOTHSTEP_NSTEPS+1);
 	min = max - SMOOTHSTEP_NSTEPS;
 
-	assert_u64_ge(sum, min,
+	expect_u64_ge(sum, min,
 	    "Integral too small, even accounting for truncation");
-	assert_u64_le(sum, max, "Integral exceeds 1/2");
+	expect_u64_le(sum, max, "Integral exceeds 1/2");
 	if (false) {
 		malloc_printf("%"FMTu64" ulps under 1/2 (limit %d)\n",
 		    max - sum, SMOOTHSTEP_NSTEPS);
@@ -49,10 +49,10 @@ TEST_BEGIN(test_smoothstep_monotonic) {
 	prev_h = 0;
 	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
 		uint64_t h = smoothstep_tab[i];
-		assert_u64_ge(h, prev_h, "Piecewise non-monotonic, i=%u", i);
+		expect_u64_ge(h, prev_h, "Piecewise non-monotonic, i=%u", i);
 		prev_h = h;
 	}
-	assert_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS-1],
+	expect_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS-1],
 	    (KQU(1) << SMOOTHSTEP_BFP), "Last step must equal 1");
 }
 TEST_END
@@ -72,7 +72,7 @@ TEST_BEGIN(test_smoothstep_slope) {
 	for (i = 0; i < SMOOTHSTEP_NSTEPS / 2 + SMOOTHSTEP_NSTEPS % 2; i++) {
 		uint64_t h = smoothstep_tab[i];
 		uint64_t delta = h - prev_h;
-		assert_u64_ge(delta, prev_delta,
+		expect_u64_ge(delta, prev_delta,
 		    "Slope must monotonically increase in 0.0 <= x <= 0.5, "
 		    "i=%u", i);
 		prev_h = h;
@@ -84,7 +84,7 @@ TEST_BEGIN(test_smoothstep_slope) {
 	for (i = SMOOTHSTEP_NSTEPS-1; i >= SMOOTHSTEP_NSTEPS / 2; i--) {
 		uint64_t h = smoothstep_tab[i];
 		uint64_t delta = prev_h - h;
-		assert_u64_ge(delta, prev_delta,
+		expect_u64_ge(delta, prev_delta,
 		    "Slope must monotonically decrease in 0.5 <= x <= 1.0, "
 		    "i=%u", i);
 		prev_h = h;
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 646768e..f4ac154 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -5,21 +5,21 @@ TEST_BEGIN(test_stats_summary) {
 	int expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.allocated", (void *)&allocated, &sz, NULL,
+	expect_d_eq(mallctl("stats.allocated", (void *)&allocated, &sz, NULL,
 	    0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.active", (void *)&active, &sz, NULL, 0),
+	expect_d_eq(mallctl("stats.active", (void *)&active, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.resident", (void *)&resident, &sz, NULL, 0),
+	expect_d_eq(mallctl("stats.resident", (void *)&resident, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.mapped", (void *)&mapped, &sz, NULL, 0),
+	expect_d_eq(mallctl("stats.mapped", (void *)&mapped, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		assert_zu_le(allocated, active,
+		expect_zu_le(allocated, active,
 		    "allocated should be no larger than active");
-		assert_zu_lt(active, resident,
+		expect_zu_lt(active, resident,
 		    "active should be less than resident");
-		assert_zu_lt(active, mapped,
+		expect_zu_lt(active, mapped,
 		    "active should be less than mapped");
 	}
 }
@@ -34,30 +34,30 @@ TEST_BEGIN(test_stats_large) {
 	int expected = config_stats ? 0 : ENOENT;
 
 	p = mallocx(SC_SMALL_MAXCLASS + 1, MALLOCX_ARENA(0));
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.allocated",
+	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
 	    (void *)&allocated, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
+	expect_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
+	expect_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.large.nrequests",
+	expect_d_eq(mallctl("stats.arenas.0.large.nrequests",
 	    (void *)&nrequests, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 
 	if (config_stats) {
-		assert_zu_gt(allocated, 0,
+		expect_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_u64_ge(nmalloc, ndalloc,
+		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		assert_u64_le(nmalloc, nrequests,
+		expect_u64_le(nmalloc, nrequests,
 		    "nmalloc should no larger than nrequests");
 	}
 
@@ -75,54 +75,54 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 
 	little = mallocx(SC_SMALL_MAXCLASS, MALLOCX_ARENA(0));
-	assert_ptr_not_null(little, "Unexpected mallocx() failure");
+	expect_ptr_not_null(little, "Unexpected mallocx() failure");
 	large = mallocx((1U << SC_LG_LARGE_MINCLASS),
 	    MALLOCX_ARENA(0));
-	assert_ptr_not_null(large, "Unexpected mallocx() failure");
+	expect_ptr_not_null(large, "Unexpected mallocx() failure");
 
 	dallocx(little, 0);
 	dallocx(large, 0);
 
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL,
+	expect_d_eq(mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL,
 	    0), expected, "Unexepected mallctl() result");
 
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.dirty_npurge",
+	expect_d_eq(mallctl("stats.arenas.0.dirty_npurge",
 	    (void *)&dirty_npurge, &sz, NULL, 0), expected,
 	    "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.dirty_nmadvise",
+	expect_d_eq(mallctl("stats.arenas.0.dirty_nmadvise",
 	    (void *)&dirty_nmadvise, &sz, NULL, 0), expected,
 	    "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.dirty_purged",
+	expect_d_eq(mallctl("stats.arenas.0.dirty_purged",
 	    (void *)&dirty_purged, &sz, NULL, 0), expected,
 	    "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.muzzy_npurge",
+	expect_d_eq(mallctl("stats.arenas.0.muzzy_npurge",
 	    (void *)&muzzy_npurge, &sz, NULL, 0), expected,
 	    "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.muzzy_nmadvise",
+	expect_d_eq(mallctl("stats.arenas.0.muzzy_nmadvise",
 	    (void *)&muzzy_nmadvise, &sz, NULL, 0), expected,
 	    "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.muzzy_purged",
+	expect_d_eq(mallctl("stats.arenas.0.muzzy_purged",
 	    (void *)&muzzy_purged, &sz, NULL, 0), expected,
 	    "Unexepected mallctl() result");
 
 	if (config_stats) {
 		if (!background_thread_enabled()) {
-			assert_u64_gt(dirty_npurge + muzzy_npurge, 0,
+			expect_u64_gt(dirty_npurge + muzzy_npurge, 0,
 			    "At least one purge should have occurred");
 		}
-		assert_u64_le(dirty_nmadvise, dirty_purged,
+		expect_u64_le(dirty_nmadvise, dirty_purged,
 		    "dirty_nmadvise should be no greater than dirty_purged");
-		assert_u64_le(muzzy_nmadvise, muzzy_purged,
+		expect_u64_le(muzzy_nmadvise, muzzy_purged,
 		    "muzzy_nmadvise should be no greater than muzzy_purged");
 	}
 }
@@ -150,35 +150,35 @@ TEST_BEGIN(test_stats_arenas_small) {
 	no_lazy_lock(); /* Lazy locking would dodge tcache testing. */
 
 	p = mallocx(SC_SMALL_MAXCLASS, MALLOCX_ARENA(0));
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.small.allocated",
+	expect_d_eq(mallctl("stats.arenas.0.small.allocated",
 	    (void *)&allocated, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.small.nmalloc", (void *)&nmalloc,
+	expect_d_eq(mallctl("stats.arenas.0.small.nmalloc", (void *)&nmalloc,
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.small.ndalloc", (void *)&ndalloc,
+	expect_d_eq(mallctl("stats.arenas.0.small.ndalloc", (void *)&ndalloc,
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.small.nrequests",
+	expect_d_eq(mallctl("stats.arenas.0.small.nrequests",
 	    (void *)&nrequests, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 
 	if (config_stats) {
-		assert_zu_gt(allocated, 0,
+		expect_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_u64_gt(nmalloc, 0,
+		expect_u64_gt(nmalloc, 0,
 		    "nmalloc should be no greater than zero");
-		assert_u64_ge(nmalloc, ndalloc,
+		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		assert_u64_gt(nrequests, 0,
+		expect_u64_gt(nrequests, 0,
 		    "nrequests should be greater than zero");
 	}
 
@@ -193,27 +193,27 @@ TEST_BEGIN(test_stats_arenas_large) {
 	int expected = config_stats ? 0 : ENOENT;
 
 	p = mallocx((1U << SC_LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.allocated",
+	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
 	    (void *)&allocated, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
+	expect_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
+	expect_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		assert_zu_gt(allocated, 0,
+		expect_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_u64_gt(nmalloc, 0,
+		expect_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_u64_ge(nmalloc, ndalloc,
+		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 	}
 
@@ -234,85 +234,85 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	int expected = config_stats ? 0 : ENOENT;
 
 	/* Make sure allocation below isn't satisfied by tcache. */
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
 
 	unsigned arena_ind, old_arena_ind;
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Arena creation failure");
 	sz = sizeof(arena_ind);
-	assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
 	    (void *)&arena_ind, sizeof(arena_ind)), 0,
 	    "Unexpected mallctl() failure");
 
 	p = malloc(bin_infos[0].reg_size);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
 
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
 	char cmd[128];
 	sz = sizeof(uint64_t);
 	gen_mallctl_str(cmd, "nmalloc", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nmalloc, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&nmalloc, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	gen_mallctl_str(cmd, "ndalloc", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&ndalloc, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&ndalloc, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	gen_mallctl_str(cmd, "nrequests", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nrequests, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&nrequests, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	sz = sizeof(size_t);
 	gen_mallctl_str(cmd, "curregs", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&curregs, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&curregs, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 
 	sz = sizeof(uint64_t);
 	gen_mallctl_str(cmd, "nfills", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nfills, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&nfills, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	gen_mallctl_str(cmd, "nflushes", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nflushes, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&nflushes, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 
 	gen_mallctl_str(cmd, "nslabs", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nslabs, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&nslabs, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	gen_mallctl_str(cmd, "nreslabs", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nreslabs, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&nreslabs, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	sz = sizeof(size_t);
 	gen_mallctl_str(cmd, "curslabs", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&curslabs, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&curslabs, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	gen_mallctl_str(cmd, "nonfull_slabs", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nonfull_slabs, &sz, NULL, 0),
+	expect_d_eq(mallctl(cmd, (void *)&nonfull_slabs, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		assert_u64_gt(nmalloc, 0,
+		expect_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_u64_ge(nmalloc, ndalloc,
+		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		assert_u64_gt(nrequests, 0,
+		expect_u64_gt(nrequests, 0,
 		    "nrequests should be greater than zero");
-		assert_zu_gt(curregs, 0,
+		expect_zu_gt(curregs, 0,
 		    "allocated should be greater than zero");
 		if (opt_tcache) {
-			assert_u64_gt(nfills, 0,
+			expect_u64_gt(nfills, 0,
 			    "At least one fill should have occurred");
-			assert_u64_gt(nflushes, 0,
+			expect_u64_gt(nflushes, 0,
 			    "At least one flush should have occurred");
 		}
-		assert_u64_gt(nslabs, 0,
+		expect_u64_gt(nslabs, 0,
 		    "At least one slab should have been allocated");
-		assert_zu_gt(curslabs, 0,
+		expect_zu_gt(curslabs, 0,
 		    "At least one slab should be currently allocated");
-		assert_zu_eq(nonfull_slabs, 0,
+		expect_zu_eq(nonfull_slabs, 0,
 		    "slabs_nonfull should be empty");
 	}
 
@@ -327,33 +327,33 @@ TEST_BEGIN(test_stats_arenas_lextents) {
 	int expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL,
 	    0), 0, "Unexpected mallctl() failure");
 
 	p = mallocx(hsize, MALLOCX_ARENA(0));
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.lextents.0.nmalloc",
+	expect_d_eq(mallctl("stats.arenas.0.lextents.0.nmalloc",
 	    (void *)&nmalloc, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.lextents.0.ndalloc",
+	expect_d_eq(mallctl("stats.arenas.0.lextents.0.ndalloc",
 	    (void *)&ndalloc, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.lextents.0.curlextents",
+	expect_d_eq(mallctl("stats.arenas.0.lextents.0.curlextents",
 	    (void *)&curlextents, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 
 	if (config_stats) {
-		assert_u64_gt(nmalloc, 0,
+		expect_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_u64_ge(nmalloc, ndalloc,
+		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		assert_u64_gt(curlextents, 0,
+		expect_u64_gt(curlextents, 0,
 		    "At least one extent should be currently allocated");
 	}
 
diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
index 014d002..3b31775 100644
--- a/test/unit/stats_print.c
+++ b/test/unit/stats_print.c
@@ -136,7 +136,7 @@ parser_tokenize(parser_t *parser) {
 	size_t token_line JEMALLOC_CC_SILENCE_INIT(1);
 	size_t token_col JEMALLOC_CC_SILENCE_INIT(0);
 
-	assert_zu_le(parser->pos, parser->len,
+	expect_zu_le(parser->pos, parser->len,
 	    "Position is past end of buffer");
 
 	while (state != STATE_ACCEPT) {
@@ -686,7 +686,7 @@ parser_parse_value(parser_t *parser) {
 
 static bool
 parser_parse_pair(parser_t *parser) {
-	assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
+	expect_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
 	    "Pair should start with string");
 	if (parser_tokenize(parser)) {
 		return true;
@@ -731,7 +731,7 @@ parser_parse_values(parser_t *parser) {
 
 static bool
 parser_parse_array(parser_t *parser) {
-	assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACKET,
+	expect_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACKET,
 	    "Array should start with [");
 	if (parser_tokenize(parser)) {
 		return true;
@@ -747,7 +747,7 @@ parser_parse_array(parser_t *parser) {
 
 static bool
 parser_parse_pairs(parser_t *parser) {
-	assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
+	expect_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
 	    "Object should start with string");
 	if (parser_parse_pair(parser)) {
 		return true;
@@ -782,7 +782,7 @@ parser_parse_pairs(parser_t *parser) {
 
 static bool
 parser_parse_object(parser_t *parser) {
-	assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACE,
+	expect_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACE,
 	    "Object should start with {");
 	if (parser_tokenize(parser)) {
 		return true;
@@ -899,9 +899,9 @@ TEST_BEGIN(test_json_parser) {
 		const char *input = invalid_inputs[i];
 		parser_t parser;
 		parser_init(&parser, false);
-		assert_false(parser_append(&parser, input),
+		expect_false(parser_append(&parser, input),
 		    "Unexpected input appending failure");
-		assert_true(parser_parse(&parser),
+		expect_true(parser_parse(&parser),
 		    "Unexpected parse success for input: %s", input);
 		parser_fini(&parser);
 	}
@@ -910,9 +910,9 @@ TEST_BEGIN(test_json_parser) {
 		const char *input = valid_inputs[i];
 		parser_t parser;
 		parser_init(&parser, true);
-		assert_false(parser_append(&parser, input),
+		expect_false(parser_append(&parser, input),
 		    "Unexpected input appending failure");
-		assert_false(parser_parse(&parser),
+		expect_false(parser_parse(&parser),
 		    "Unexpected parse error for input: %s", input);
 		parser_fini(&parser);
 	}
@@ -961,17 +961,17 @@ TEST_BEGIN(test_stats_print_json) {
 			break;
 		case 1: {
 			size_t sz = sizeof(arena_ind);
-			assert_d_eq(mallctl("arenas.create", (void *)&arena_ind,
+			expect_d_eq(mallctl("arenas.create", (void *)&arena_ind,
 			    &sz, NULL, 0), 0, "Unexpected mallctl failure");
 			break;
 		} case 2: {
 			size_t mib[3];
 			size_t miblen = sizeof(mib)/sizeof(size_t);
-			assert_d_eq(mallctlnametomib("arena.0.destroy",
+			expect_d_eq(mallctlnametomib("arena.0.destroy",
 			    mib, &miblen), 0,
 			    "Unexpected mallctlnametomib failure");
 			mib[1] = arena_ind;
-			assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL,
+			expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL,
 			    0), 0, "Unexpected mallctlbymib failure");
 			break;
 		} default:
@@ -983,7 +983,7 @@ TEST_BEGIN(test_stats_print_json) {
 
 			parser_init(&parser, true);
 			malloc_stats_print(write_cb, (void *)&parser, opts[j]);
-			assert_false(parser_parse(&parser),
+			expect_false(parser_parse(&parser),
 			    "Unexpected parse error, opts=\"%s\"", opts[j]);
 			parser_fini(&parser);
 		}
diff --git a/test/unit/test_hooks.c b/test/unit/test_hooks.c
index ded8698..2a5b3d5 100644
--- a/test/unit/test_hooks.c
+++ b/test/unit/test_hooks.c
@@ -17,16 +17,16 @@ func_to_hook(int arg1, int arg2) {
 TEST_BEGIN(unhooked_call) {
 	test_hooks_libc_hook = NULL;
 	hook_called = false;
-	assert_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
-	assert_false(hook_called, "Nulling out hook didn't take.");
+	expect_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
+	expect_false(hook_called, "Nulling out hook didn't take.");
 }
 TEST_END
 
 TEST_BEGIN(hooked_call) {
 	test_hooks_libc_hook = &hook;
 	hook_called = false;
-	assert_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
-	assert_true(hook_called, "Hook should have executed.");
+	expect_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
+	expect_true(hook_called, "Hook should have executed.");
 }
 TEST_END
 
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index 0855829..ef3b95c 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -15,7 +15,7 @@ TEST_BEGIN(test_next_event_fast_roll_back) {
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 	void *p = malloc(16U);
-	assert_ptr_not_null(p, "malloc() failed");
+	expect_ptr_not_null(p, "malloc() failed");
 	free(p);
 }
 TEST_END
@@ -37,7 +37,7 @@ TEST_BEGIN(test_next_event_fast_resume) {
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 	void *p = malloc(SC_LOOKUP_MAXCLASS);
-	assert_ptr_not_null(p, "malloc() failed");
+	expect_ptr_not_null(p, "malloc() failed");
 	free(p);
 }
 TEST_END
@@ -50,7 +50,7 @@ TEST_BEGIN(test_event_rollback) {
 	while (count-- != 0) {
 		te_alloc_rollback(tsd, diff);
 		uint64_t thread_allocated_after = thread_allocated_get(tsd);
-		assert_u64_eq(thread_allocated - thread_allocated_after, diff,
+		expect_u64_eq(thread_allocated - thread_allocated_after, diff,
 		    "thread event counters are not properly rolled back");
 		thread_allocated = thread_allocated_after;
 	}
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index e5790a3..1cf10b0 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -11,16 +11,16 @@ TEST_BEGIN(test_ticker_tick) {
 	ticker_init(&ticker, NTICKS);
 	for (i = 0; i < NREPS; i++) {
 		for (j = 0; j < NTICKS; j++) {
-			assert_u_eq(ticker_read(&ticker), NTICKS - j,
+			expect_u_eq(ticker_read(&ticker), NTICKS - j,
 			    "Unexpected ticker value (i=%d, j=%d)", i, j);
-			assert_false(ticker_tick(&ticker),
+			expect_false(ticker_tick(&ticker),
 			    "Unexpected ticker fire (i=%d, j=%d)", i, j);
 		}
-		assert_u32_eq(ticker_read(&ticker), 0,
+		expect_u32_eq(ticker_read(&ticker), 0,
 		    "Expected ticker depletion");
-		assert_true(ticker_tick(&ticker),
+		expect_true(ticker_tick(&ticker),
 		    "Expected ticker fire (i=%d)", i);
-		assert_u32_eq(ticker_read(&ticker), NTICKS,
+		expect_u32_eq(ticker_read(&ticker), NTICKS,
 		    "Expected ticker reset");
 	}
 #undef NTICKS
@@ -33,14 +33,14 @@ TEST_BEGIN(test_ticker_ticks) {
 
 	ticker_init(&ticker, NTICKS);
 
-	assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
-	assert_false(ticker_ticks(&ticker, NTICKS), "Unexpected ticker fire");
-	assert_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value");
-	assert_true(ticker_ticks(&ticker, NTICKS), "Expected ticker fire");
-	assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
+	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
+	expect_false(ticker_ticks(&ticker, NTICKS), "Unexpected ticker fire");
+	expect_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value");
+	expect_true(ticker_ticks(&ticker, NTICKS), "Expected ticker fire");
+	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 
-	assert_true(ticker_ticks(&ticker, NTICKS + 1), "Expected ticker fire");
-	assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
+	expect_true(ticker_ticks(&ticker, NTICKS + 1), "Expected ticker fire");
+	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 #undef NTICKS
 }
 TEST_END
@@ -51,15 +51,15 @@ TEST_BEGIN(test_ticker_copy) {
 
 	ticker_init(&ta, NTICKS);
 	ticker_copy(&tb, &ta);
-	assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
-	assert_true(ticker_ticks(&tb, NTICKS + 1), "Expected ticker fire");
-	assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
+	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
+	expect_true(ticker_ticks(&tb, NTICKS + 1), "Expected ticker fire");
+	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
 
 	ticker_tick(&ta);
 	ticker_copy(&tb, &ta);
-	assert_u_eq(ticker_read(&tb), NTICKS - 1, "Unexpected ticker value");
-	assert_true(ticker_ticks(&tb, NTICKS), "Expected ticker fire");
-	assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
+	expect_u_eq(ticker_read(&tb), NTICKS - 1, "Unexpected ticker value");
+	expect_true(ticker_ticks(&tb, NTICKS), "Expected ticker fire");
+	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
 #undef NTICKS
 }
 TEST_END
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 917884d..3f3ca73 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -10,7 +10,7 @@ static int data_cleanup_count;
 void
 data_cleanup(int *data) {
 	if (data_cleanup_count == 0) {
-		assert_x_eq(*data, MALLOC_TSD_TEST_DATA_INIT,
+		expect_x_eq(*data, MALLOC_TSD_TEST_DATA_INIT,
 		    "Argument passed into cleanup function should match tsd "
 		    "value");
 	}
@@ -38,7 +38,7 @@ data_cleanup(int *data) {
 
 	if (reincarnate) {
 		void *p = mallocx(1, 0);
-		assert_ptr_not_null(p, "Unexpeced mallocx() failure");
+		expect_ptr_not_null(p, "Unexpeced mallocx() failure");
 		dallocx(p, 0);
 	}
 }
@@ -49,18 +49,18 @@ thd_start(void *arg) {
 	void *p;
 
 	tsd_t *tsd = tsd_fetch();
-	assert_x_eq(tsd_test_data_get(tsd), MALLOC_TSD_TEST_DATA_INIT,
+	expect_x_eq(tsd_test_data_get(tsd), MALLOC_TSD_TEST_DATA_INIT,
 	    "Initial tsd get should return initialization value");
 
 	p = malloc(1);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
 
 	tsd_test_data_set(tsd, d);
-	assert_x_eq(tsd_test_data_get(tsd), d,
+	expect_x_eq(tsd_test_data_get(tsd), d,
 	    "After tsd set, tsd get should return value that was set");
 
 	d = 0;
-	assert_x_eq(tsd_test_data_get(tsd), (int)(uintptr_t)arg,
+	expect_x_eq(tsd_test_data_get(tsd), (int)(uintptr_t)arg,
 	    "Resetting local data should have no effect on tsd");
 
 	tsd_test_callback_set(tsd, &data_cleanup);
@@ -84,7 +84,7 @@ TEST_BEGIN(test_tsd_sub_thread) {
 	 * We reincarnate twice in the data cleanup, so it should execute at
 	 * least 3 times.
 	 */
-	assert_x_ge(data_cleanup_count, 3,
+	expect_x_ge(data_cleanup_count, 3,
 	    "Cleanup function should have executed multiple times.");
 }
 TEST_END
@@ -95,28 +95,28 @@ thd_start_reincarnated(void *arg) {
 	assert(tsd);
 
 	void *p = malloc(1);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
 
 	/* Manually trigger reincarnation. */
-	assert_ptr_not_null(tsd_arena_get(tsd),
+	expect_ptr_not_null(tsd_arena_get(tsd),
 	    "Should have tsd arena set.");
 	tsd_cleanup((void *)tsd);
-	assert_ptr_null(*tsd_arenap_get_unsafe(tsd),
+	expect_ptr_null(*tsd_arenap_get_unsafe(tsd),
 	    "TSD arena should have been cleared.");
-	assert_u_eq(tsd_state_get(tsd), tsd_state_purgatory,
+	expect_u_eq(tsd_state_get(tsd), tsd_state_purgatory,
 	    "TSD state should be purgatory\n");
 
 	free(p);
-	assert_u_eq(tsd_state_get(tsd), tsd_state_reincarnated,
+	expect_u_eq(tsd_state_get(tsd), tsd_state_reincarnated,
 	    "TSD state should be reincarnated\n");
 	p = mallocx(1, MALLOCX_TCACHE_NONE);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
-	assert_ptr_null(*tsd_arenap_get_unsafe(tsd),
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_null(*tsd_arenap_get_unsafe(tsd),
 	    "Should not have tsd arena set after reincarnation.");
 
 	free(p);
 	tsd_cleanup((void *)tsd);
-	assert_ptr_null(*tsd_arenap_get_unsafe(tsd),
+	expect_ptr_null(*tsd_arenap_get_unsafe(tsd),
 	    "TSD arena should have been cleared after 2nd cleanup.");
 
 	return NULL;
@@ -206,46 +206,46 @@ TEST_BEGIN(test_tsd_global_slow) {
 		 * Spin-wait.
 		 */
 	}
-	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	expect_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
 	tsd_global_slow_inc(tsd_tsdn(tsd));
 	free(mallocx(1, 0));
-	assert_false(tsd_fast(tsd), "");
+	expect_false(tsd_fast(tsd), "");
 	atomic_store_u32(&data.phase, 2, ATOMIC_SEQ_CST);
 
 	/* PHASE 3 */
 	while (atomic_load_u32(&data.phase, ATOMIC_SEQ_CST) != 3) {
 	}
-	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	expect_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
 	/* Increase again, so that we can test multiple fast/slow changes. */
 	tsd_global_slow_inc(tsd_tsdn(tsd));
 	atomic_store_u32(&data.phase, 4, ATOMIC_SEQ_CST);
 	free(mallocx(1, 0));
-	assert_false(tsd_fast(tsd), "");
+	expect_false(tsd_fast(tsd), "");
 
 	/* PHASE 5 */
 	while (atomic_load_u32(&data.phase, ATOMIC_SEQ_CST) != 5) {
 	}
-	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	expect_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
 	tsd_global_slow_dec(tsd_tsdn(tsd));
 	atomic_store_u32(&data.phase, 6, ATOMIC_SEQ_CST);
 	/* We only decreased once; things should still be slow. */
 	free(mallocx(1, 0));
-	assert_false(tsd_fast(tsd), "");
+	expect_false(tsd_fast(tsd), "");
 
 	/* PHASE 7 */
 	while (atomic_load_u32(&data.phase, ATOMIC_SEQ_CST) != 7) {
 	}
-	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	expect_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
 	tsd_global_slow_dec(tsd_tsdn(tsd));
 	atomic_store_u32(&data.phase, 8, ATOMIC_SEQ_CST);
 	/* We incremented and then decremented twice; we should be fast now. */
 	free(mallocx(1, 0));
-	assert_true(!originally_fast || tsd_fast(tsd), "");
+	expect_true(!originally_fast || tsd_fast(tsd), "");
 
 	/* PHASE 9 */
 	while (atomic_load_u32(&data.phase, ATOMIC_SEQ_CST) != 9) {
 	}
-	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	expect_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
 
 	thd_join(thd, NULL);
 }
diff --git a/test/unit/witness.c b/test/unit/witness.c
index 5986da4..5a6c448 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -34,7 +34,7 @@ witness_depth_error_intercept(const witness_list_t *witnesses,
 
 static int
 witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob) {
-	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+	expect_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
 
 	assert(oa == (void *)a);
 	assert(ob == (void *)b);
@@ -45,7 +45,7 @@ witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob) {
 static int
 witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b,
     void *ob) {
-	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+	expect_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
 
 	assert(oa == (void *)a);
 	assert(ob == (void *)b);
@@ -121,9 +121,9 @@ TEST_BEGIN(test_witness_comp) {
 
 	witness_init(&c, "c", 1, witness_comp_reverse, &c);
 	witness_assert_not_owner(&witness_tsdn, &c);
-	assert_false(saw_lock_error, "Unexpected witness lock error");
+	expect_false(saw_lock_error, "Unexpected witness lock error");
 	witness_lock(&witness_tsdn, &c);
-	assert_true(saw_lock_error, "Expected witness lock error");
+	expect_true(saw_lock_error, "Expected witness lock error");
 	witness_unlock(&witness_tsdn, &c);
 	witness_assert_depth(&witness_tsdn, 1);
 
@@ -131,9 +131,9 @@ TEST_BEGIN(test_witness_comp) {
 
 	witness_init(&d, "d", 1, NULL, NULL);
 	witness_assert_not_owner(&witness_tsdn, &d);
-	assert_false(saw_lock_error, "Unexpected witness lock error");
+	expect_false(saw_lock_error, "Unexpected witness lock error");
 	witness_lock(&witness_tsdn, &d);
-	assert_true(saw_lock_error, "Expected witness lock error");
+	expect_true(saw_lock_error, "Expected witness lock error");
 	witness_unlock(&witness_tsdn, &d);
 	witness_assert_depth(&witness_tsdn, 1);
 
@@ -162,9 +162,9 @@ TEST_BEGIN(test_witness_reversal) {
 
 	witness_lock(&witness_tsdn, &b);
 	witness_assert_depth(&witness_tsdn, 1);
-	assert_false(saw_lock_error, "Unexpected witness lock error");
+	expect_false(saw_lock_error, "Unexpected witness lock error");
 	witness_lock(&witness_tsdn, &a);
-	assert_true(saw_lock_error, "Expected witness lock error");
+	expect_true(saw_lock_error, "Expected witness lock error");
 
 	witness_unlock(&witness_tsdn, &a);
 	witness_assert_depth(&witness_tsdn, 1);
@@ -195,11 +195,11 @@ TEST_BEGIN(test_witness_recursive) {
 	witness_init(&a, "a", 1, NULL, NULL);
 
 	witness_lock(&witness_tsdn, &a);
-	assert_false(saw_lock_error, "Unexpected witness lock error");
-	assert_false(saw_not_owner_error, "Unexpected witness not owner error");
+	expect_false(saw_lock_error, "Unexpected witness lock error");
+	expect_false(saw_not_owner_error, "Unexpected witness not owner error");
 	witness_lock(&witness_tsdn, &a);
-	assert_true(saw_lock_error, "Expected witness lock error");
-	assert_true(saw_not_owner_error, "Expected witness not owner error");
+	expect_true(saw_lock_error, "Expected witness lock error");
+	expect_true(saw_not_owner_error, "Expected witness not owner error");
 
 	witness_unlock(&witness_tsdn, &a);
 
@@ -225,9 +225,9 @@ TEST_BEGIN(test_witness_unlock_not_owned) {
 
 	witness_init(&a, "a", 1, NULL, NULL);
 
-	assert_false(saw_owner_error, "Unexpected owner error");
+	expect_false(saw_owner_error, "Unexpected owner error");
 	witness_unlock(&witness_tsdn, &a);
-	assert_true(saw_owner_error, "Expected owner error");
+	expect_true(saw_owner_error, "Expected owner error");
 
 	witness_assert_lockless(&witness_tsdn);
 
@@ -250,14 +250,14 @@ TEST_BEGIN(test_witness_depth) {
 
 	witness_init(&a, "a", 1, NULL, NULL);
 
-	assert_false(saw_depth_error, "Unexpected depth error");
+	expect_false(saw_depth_error, "Unexpected depth error");
 	witness_assert_lockless(&witness_tsdn);
 	witness_assert_depth(&witness_tsdn, 0);
 
 	witness_lock(&witness_tsdn, &a);
 	witness_assert_lockless(&witness_tsdn);
 	witness_assert_depth(&witness_tsdn, 0);
-	assert_true(saw_depth_error, "Expected depth error");
+	expect_true(saw_depth_error, "Expected depth error");
 
 	witness_unlock(&witness_tsdn, &a);
 
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 271fd5c..d3e81f1 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -8,21 +8,21 @@ test_zero(size_t sz_min, size_t sz_max) {
 
 	sz_prev = 0;
 	s = (uint8_t *)mallocx(sz_min, 0);
-	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
+	expect_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
 	    sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
-			assert_u_eq(s[0], MAGIC,
+			expect_u_eq(s[0], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			assert_u_eq(s[sz_prev-1], MAGIC,
+			expect_u_eq(s[sz_prev-1], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    sz_prev-1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
-			assert_u_eq(s[i], 0x0,
+			expect_u_eq(s[i], 0x0,
 			    "Newly allocated byte %zu/%zu isn't zero-filled",
 			    i, sz);
 			s[i] = MAGIC;
@@ -30,7 +30,7 @@ test_zero(size_t sz_min, size_t sz_max) {
 
 		if (xallocx(s, sz+1, 0, 0) == sz) {
 			s = (uint8_t *)rallocx(s, sz+1, 0);
-			assert_ptr_not_null((void *)s,
+			expect_ptr_not_null((void *)s,
 			    "Unexpected rallocx() failure");
 		}
 	}
diff --git a/test/unit/zero_realloc_abort.c b/test/unit/zero_realloc_abort.c
index 2f49392..a880d10 100644
--- a/test/unit/zero_realloc_abort.c
+++ b/test/unit/zero_realloc_abort.c
@@ -12,9 +12,9 @@ TEST_BEGIN(test_realloc_abort) {
 	abort_called = false;
 	safety_check_set_abort(&set_abort_called);
 	void *ptr = mallocx(42, 0);
-	assert_ptr_not_null(ptr, "Unexpected mallocx error");
+	expect_ptr_not_null(ptr, "Unexpected mallocx error");
 	ptr = realloc(ptr, 0);
-	assert_true(abort_called, "Realloc with zero size didn't abort");
+	expect_true(abort_called, "Realloc with zero size didn't abort");
 }
 TEST_END
 
diff --git a/test/unit/zero_realloc_free.c b/test/unit/zero_realloc_free.c
index a073688..baed86c 100644
--- a/test/unit/zero_realloc_free.c
+++ b/test/unit/zero_realloc_free.c
@@ -7,20 +7,20 @@ deallocated() {
 	}
 	uint64_t deallocated;
 	size_t sz = sizeof(deallocated);
-	assert_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
+	expect_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
 	    NULL, 0), 0, "Unexpected mallctl failure");
 	return deallocated;
 }
 
 TEST_BEGIN(test_realloc_free) {
 	void *ptr = mallocx(42, 0);
-	assert_ptr_not_null(ptr, "Unexpected mallocx error");
+	expect_ptr_not_null(ptr, "Unexpected mallocx error");
 	uint64_t deallocated_before = deallocated();
 	ptr = realloc(ptr, 0);
 	uint64_t deallocated_after = deallocated();
-	assert_ptr_null(ptr, "Realloc didn't free");
+	expect_ptr_null(ptr, "Realloc didn't free");
 	if (config_stats) {
-		assert_u64_gt(deallocated_after, deallocated_before,
+		expect_u64_gt(deallocated_after, deallocated_before,
 		    "Realloc didn't free");
 	}
 }
diff --git a/test/unit/zero_realloc_strict.c b/test/unit/zero_realloc_strict.c
index b709951..249d838 100644
--- a/test/unit/zero_realloc_strict.c
+++ b/test/unit/zero_realloc_strict.c
@@ -7,7 +7,7 @@ allocated() {
 	}
 	uint64_t allocated;
 	size_t sz = sizeof(allocated);
-	assert_d_eq(mallctl("thread.allocated", (void *)&allocated, &sz, NULL,
+	expect_d_eq(mallctl("thread.allocated", (void *)&allocated, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 	return allocated;
 }
@@ -19,23 +19,23 @@ deallocated() {
 	}
 	uint64_t deallocated;
 	size_t sz = sizeof(deallocated);
-	assert_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
+	expect_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
 	    NULL, 0), 0, "Unexpected mallctl failure");
 	return deallocated;
 }
 
 TEST_BEGIN(test_realloc_strict) {
 	void *ptr = mallocx(1, 0);
-	assert_ptr_not_null(ptr, "Unexpected mallocx error");
+	expect_ptr_not_null(ptr, "Unexpected mallocx error");
 	uint64_t allocated_before = allocated();
 	uint64_t deallocated_before = deallocated();
 	ptr = realloc(ptr, 0);
 	uint64_t allocated_after = allocated();
 	uint64_t deallocated_after = deallocated();
 	if (config_stats) {
-		assert_u64_lt(allocated_before, allocated_after,
+		expect_u64_lt(allocated_before, allocated_after,
 		    "Unexpected stats change");
-		assert_u64_lt(deallocated_before, deallocated_after,
+		expect_u64_lt(deallocated_before, deallocated_after,
 		    "Unexpected stats change");
 	}
 	dallocx(ptr, 0);
diff --git a/test/unit/zero_reallocs.c b/test/unit/zero_reallocs.c
index fd33aaf..66c7a40 100644
--- a/test/unit/zero_reallocs.c
+++ b/test/unit/zero_reallocs.c
@@ -8,7 +8,7 @@ zero_reallocs() {
 	size_t count = 12345;
 	size_t sz = sizeof(count);
 
-	assert_d_eq(mallctl("stats.zero_reallocs", (void *)&count, &sz,
+	expect_d_eq(mallctl("stats.zero_reallocs", (void *)&count, &sz,
 	    NULL, 0), 0, "Unexpected mallctl failure");
 	return count;
 }
@@ -18,13 +18,13 @@ TEST_BEGIN(test_zero_reallocs) {
 
 	for (size_t i = 0; i < 100; ++i) {
 		void *ptr = mallocx(i * i + 1, 0);
-		assert_ptr_not_null(ptr, "Unexpected mallocx error");
+		expect_ptr_not_null(ptr, "Unexpected mallocx error");
 		size_t count = zero_reallocs();
-		assert_zu_eq(i, count, "Incorrect zero realloc count");
+		expect_zu_eq(i, count, "Incorrect zero realloc count");
 		ptr = realloc(ptr, 0);
-		assert_ptr_null(ptr, "Realloc didn't free");
+		expect_ptr_null(ptr, "Realloc didn't free");
 		count = zero_reallocs();
-		assert_zu_eq(i + 1, count, "Realloc didn't adjust count");
+		expect_zu_eq(i + 1, count, "Realloc didn't adjust count");
 	}
 }
 TEST_END
-- 
cgit v0.12


From fa615793821219f8ad62e40aa23c848e5136aa5c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 6 Feb 2020 10:10:10 -0800
Subject: Add assert_* functionality to tests

---
 test/include/test/test.h | 237 ++++++++++++++++++++++++++++++++++++++++++++++-
 test/src/test.c          |   5 +
 2 files changed, 240 insertions(+), 2 deletions(-)

diff --git a/test/include/test/test.h b/test/include/test/test.h
index cf6616b..a1b8ff3 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -245,7 +245,7 @@
 #define expect_true(a, ...)	expect_b_eq(a, true, __VA_ARGS__)
 #define expect_false(a, ...)	expect_b_eq(a, false, __VA_ARGS__)
 
-#define expect_str_eq(a, b, ...) do {				\
+#define expect_str_eq(a, b, ...) do {					\
 	if (strcmp((a), (b))) {						\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -258,7 +258,7 @@
 		p_test_fail(prefix, message);				\
 	}								\
 } while (0)
-#define expect_str_ne(a, b, ...) do {				\
+#define expect_str_ne(a, b, ...) do {					\
 	if (!strcmp((a), (b))) {					\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -282,6 +282,238 @@
 	p_test_fail(prefix, message);					\
 } while (0)
 
+#define p_abort_test_if_failed() do {					\
+	if (p_test_failed()) {						\
+		abort();						\
+	}								\
+} while (0)
+
+#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
+	expect_cmp(t, a, b, cmp, neg_cmp, pri, __VA_ARGS__);		\
+	p_abort_test_if_failed();					\
+} while (0)
+
+#define assert_ptr_eq(a, b, ...)	assert_cmp(void *, a, b, ==,	\
+    !=, "p", __VA_ARGS__)
+#define assert_ptr_ne(a, b, ...)	assert_cmp(void *, a, b, !=,	\
+    ==, "p", __VA_ARGS__)
+#define assert_ptr_null(a, ...)		assert_cmp(void *, a, NULL, ==,	\
+    !=, "p", __VA_ARGS__)
+#define assert_ptr_not_null(a, ...)	assert_cmp(void *, a, NULL, !=,	\
+    ==, "p", __VA_ARGS__)
+
+#define assert_c_eq(a, b, ...)	assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
+#define assert_c_ne(a, b, ...)	assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
+#define assert_c_lt(a, b, ...)	assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
+#define assert_c_le(a, b, ...)	assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
+#define assert_c_ge(a, b, ...)	assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
+#define assert_c_gt(a, b, ...)	assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
+
+#define assert_x_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
+#define assert_x_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
+#define assert_x_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
+#define assert_x_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
+#define assert_x_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
+#define assert_x_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
+
+#define assert_d_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
+#define assert_d_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
+#define assert_d_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
+#define assert_d_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
+#define assert_d_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
+#define assert_d_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
+
+#define assert_u_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
+#define assert_u_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
+#define assert_u_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
+#define assert_u_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
+#define assert_u_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
+#define assert_u_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
+
+#define assert_ld_eq(a, b, ...)	assert_cmp(long, a, b, ==,	\
+    !=, "ld", __VA_ARGS__)
+#define assert_ld_ne(a, b, ...)	assert_cmp(long, a, b, !=,	\
+    ==, "ld", __VA_ARGS__)
+#define assert_ld_lt(a, b, ...)	assert_cmp(long, a, b, <,	\
+    >=, "ld", __VA_ARGS__)
+#define assert_ld_le(a, b, ...)	assert_cmp(long, a, b, <=,	\
+    >, "ld", __VA_ARGS__)
+#define assert_ld_ge(a, b, ...)	assert_cmp(long, a, b, >=,	\
+    <, "ld", __VA_ARGS__)
+#define assert_ld_gt(a, b, ...)	assert_cmp(long, a, b, >,	\
+    <=, "ld", __VA_ARGS__)
+
+#define assert_lu_eq(a, b, ...)	assert_cmp(unsigned long,	\
+    a, b, ==, !=, "lu", __VA_ARGS__)
+#define assert_lu_ne(a, b, ...)	assert_cmp(unsigned long,	\
+    a, b, !=, ==, "lu", __VA_ARGS__)
+#define assert_lu_lt(a, b, ...)	assert_cmp(unsigned long,	\
+    a, b, <, >=, "lu", __VA_ARGS__)
+#define assert_lu_le(a, b, ...)	assert_cmp(unsigned long,	\
+    a, b, <=, >, "lu", __VA_ARGS__)
+#define assert_lu_ge(a, b, ...)	assert_cmp(unsigned long,	\
+    a, b, >=, <, "lu", __VA_ARGS__)
+#define assert_lu_gt(a, b, ...)	assert_cmp(unsigned long,	\
+    a, b, >, <=, "lu", __VA_ARGS__)
+
+#define assert_qd_eq(a, b, ...)	assert_cmp(long long, a, b, ==,	\
+    !=, "qd", __VA_ARGS__)
+#define assert_qd_ne(a, b, ...)	assert_cmp(long long, a, b, !=,	\
+    ==, "qd", __VA_ARGS__)
+#define assert_qd_lt(a, b, ...)	assert_cmp(long long, a, b, <,	\
+    >=, "qd", __VA_ARGS__)
+#define assert_qd_le(a, b, ...)	assert_cmp(long long, a, b, <=,	\
+    >, "qd", __VA_ARGS__)
+#define assert_qd_ge(a, b, ...)	assert_cmp(long long, a, b, >=,	\
+    <, "qd", __VA_ARGS__)
+#define assert_qd_gt(a, b, ...)	assert_cmp(long long, a, b, >,	\
+    <=, "qd", __VA_ARGS__)
+
+#define assert_qu_eq(a, b, ...)	assert_cmp(unsigned long long,	\
+    a, b, ==, !=, "qu", __VA_ARGS__)
+#define assert_qu_ne(a, b, ...)	assert_cmp(unsigned long long,	\
+    a, b, !=, ==, "qu", __VA_ARGS__)
+#define assert_qu_lt(a, b, ...)	assert_cmp(unsigned long long,	\
+    a, b, <, >=, "qu", __VA_ARGS__)
+#define assert_qu_le(a, b, ...)	assert_cmp(unsigned long long,	\
+    a, b, <=, >, "qu", __VA_ARGS__)
+#define assert_qu_ge(a, b, ...)	assert_cmp(unsigned long long,	\
+    a, b, >=, <, "qu", __VA_ARGS__)
+#define assert_qu_gt(a, b, ...)	assert_cmp(unsigned long long,	\
+    a, b, >, <=, "qu", __VA_ARGS__)
+
+#define assert_jd_eq(a, b, ...)	assert_cmp(intmax_t, a, b, ==,	\
+    !=, "jd", __VA_ARGS__)
+#define assert_jd_ne(a, b, ...)	assert_cmp(intmax_t, a, b, !=,	\
+    ==, "jd", __VA_ARGS__)
+#define assert_jd_lt(a, b, ...)	assert_cmp(intmax_t, a, b, <,	\
+    >=, "jd", __VA_ARGS__)
+#define assert_jd_le(a, b, ...)	assert_cmp(intmax_t, a, b, <=,	\
+    >, "jd", __VA_ARGS__)
+#define assert_jd_ge(a, b, ...)	assert_cmp(intmax_t, a, b, >=,	\
+    <, "jd", __VA_ARGS__)
+#define assert_jd_gt(a, b, ...)	assert_cmp(intmax_t, a, b, >,	\
+    <=, "jd", __VA_ARGS__)
+
+#define assert_ju_eq(a, b, ...)	assert_cmp(uintmax_t, a, b, ==,	\
+    !=, "ju", __VA_ARGS__)
+#define assert_ju_ne(a, b, ...)	assert_cmp(uintmax_t, a, b, !=,	\
+    ==, "ju", __VA_ARGS__)
+#define assert_ju_lt(a, b, ...)	assert_cmp(uintmax_t, a, b, <,	\
+    >=, "ju", __VA_ARGS__)
+#define assert_ju_le(a, b, ...)	assert_cmp(uintmax_t, a, b, <=,	\
+    >, "ju", __VA_ARGS__)
+#define assert_ju_ge(a, b, ...)	assert_cmp(uintmax_t, a, b, >=,	\
+    <, "ju", __VA_ARGS__)
+#define assert_ju_gt(a, b, ...)	assert_cmp(uintmax_t, a, b, >,	\
+    <=, "ju", __VA_ARGS__)
+
+#define assert_zd_eq(a, b, ...)	assert_cmp(ssize_t, a, b, ==,	\
+    !=, "zd", __VA_ARGS__)
+#define assert_zd_ne(a, b, ...)	assert_cmp(ssize_t, a, b, !=,	\
+    ==, "zd", __VA_ARGS__)
+#define assert_zd_lt(a, b, ...)	assert_cmp(ssize_t, a, b, <,	\
+    >=, "zd", __VA_ARGS__)
+#define assert_zd_le(a, b, ...)	assert_cmp(ssize_t, a, b, <=,	\
+    >, "zd", __VA_ARGS__)
+#define assert_zd_ge(a, b, ...)	assert_cmp(ssize_t, a, b, >=,	\
+    <, "zd", __VA_ARGS__)
+#define assert_zd_gt(a, b, ...)	assert_cmp(ssize_t, a, b, >,	\
+    <=, "zd", __VA_ARGS__)
+
+#define assert_zu_eq(a, b, ...)	assert_cmp(size_t, a, b, ==,	\
+    !=, "zu", __VA_ARGS__)
+#define assert_zu_ne(a, b, ...)	assert_cmp(size_t, a, b, !=,	\
+    ==, "zu", __VA_ARGS__)
+#define assert_zu_lt(a, b, ...)	assert_cmp(size_t, a, b, <,	\
+    >=, "zu", __VA_ARGS__)
+#define assert_zu_le(a, b, ...)	assert_cmp(size_t, a, b, <=,	\
+    >, "zu", __VA_ARGS__)
+#define assert_zu_ge(a, b, ...)	assert_cmp(size_t, a, b, >=,	\
+    <, "zu", __VA_ARGS__)
+#define assert_zu_gt(a, b, ...)	assert_cmp(size_t, a, b, >,	\
+    <=, "zu", __VA_ARGS__)
+
+#define assert_d32_eq(a, b, ...)	assert_cmp(int32_t, a, b, ==,	\
+    !=, FMTd32, __VA_ARGS__)
+#define assert_d32_ne(a, b, ...)	assert_cmp(int32_t, a, b, !=,	\
+    ==, FMTd32, __VA_ARGS__)
+#define assert_d32_lt(a, b, ...)	assert_cmp(int32_t, a, b, <,	\
+    >=, FMTd32, __VA_ARGS__)
+#define assert_d32_le(a, b, ...)	assert_cmp(int32_t, a, b, <=,	\
+    >, FMTd32, __VA_ARGS__)
+#define assert_d32_ge(a, b, ...)	assert_cmp(int32_t, a, b, >=,	\
+    <, FMTd32, __VA_ARGS__)
+#define assert_d32_gt(a, b, ...)	assert_cmp(int32_t, a, b, >,	\
+    <=, FMTd32, __VA_ARGS__)
+
+#define assert_u32_eq(a, b, ...)	assert_cmp(uint32_t, a, b, ==,	\
+    !=, FMTu32, __VA_ARGS__)
+#define assert_u32_ne(a, b, ...)	assert_cmp(uint32_t, a, b, !=,	\
+    ==, FMTu32, __VA_ARGS__)
+#define assert_u32_lt(a, b, ...)	assert_cmp(uint32_t, a, b, <,	\
+    >=, FMTu32, __VA_ARGS__)
+#define assert_u32_le(a, b, ...)	assert_cmp(uint32_t, a, b, <=,	\
+    >, FMTu32, __VA_ARGS__)
+#define assert_u32_ge(a, b, ...)	assert_cmp(uint32_t, a, b, >=,	\
+    <, FMTu32, __VA_ARGS__)
+#define assert_u32_gt(a, b, ...)	assert_cmp(uint32_t, a, b, >,	\
+    <=, FMTu32, __VA_ARGS__)
+
+#define assert_d64_eq(a, b, ...)	assert_cmp(int64_t, a, b, ==,	\
+    !=, FMTd64, __VA_ARGS__)
+#define assert_d64_ne(a, b, ...)	assert_cmp(int64_t, a, b, !=,	\
+    ==, FMTd64, __VA_ARGS__)
+#define assert_d64_lt(a, b, ...)	assert_cmp(int64_t, a, b, <,	\
+    >=, FMTd64, __VA_ARGS__)
+#define assert_d64_le(a, b, ...)	assert_cmp(int64_t, a, b, <=,	\
+    >, FMTd64, __VA_ARGS__)
+#define assert_d64_ge(a, b, ...)	assert_cmp(int64_t, a, b, >=,	\
+    <, FMTd64, __VA_ARGS__)
+#define assert_d64_gt(a, b, ...)	assert_cmp(int64_t, a, b, >,	\
+    <=, FMTd64, __VA_ARGS__)
+
+#define assert_u64_eq(a, b, ...)	assert_cmp(uint64_t, a, b, ==,	\
+    !=, FMTu64, __VA_ARGS__)
+#define assert_u64_ne(a, b, ...)	assert_cmp(uint64_t, a, b, !=,	\
+    ==, FMTu64, __VA_ARGS__)
+#define assert_u64_lt(a, b, ...)	assert_cmp(uint64_t, a, b, <,	\
+    >=, FMTu64, __VA_ARGS__)
+#define assert_u64_le(a, b, ...)	assert_cmp(uint64_t, a, b, <=,	\
+    >, FMTu64, __VA_ARGS__)
+#define assert_u64_ge(a, b, ...)	assert_cmp(uint64_t, a, b, >=,	\
+    <, FMTu64, __VA_ARGS__)
+#define assert_u64_gt(a, b, ...)	assert_cmp(uint64_t, a, b, >,	\
+    <=, FMTu64, __VA_ARGS__)
+
+#define assert_b_eq(a, b, ...) do {					\
+	expect_b_eq(a, b, __VA_ARGS__);					\
+	p_abort_test_if_failed();					\
+} while (0)
+
+#define assert_b_ne(a, b, ...) do {					\
+	expect_b_ne(a, b, __VA_ARGS__);					\
+	p_abort_test_if_failed();					\
+} while (0)
+
+#define assert_true(a, ...)	assert_b_eq(a, true, __VA_ARGS__)
+#define assert_false(a, ...)	assert_b_eq(a, false, __VA_ARGS__)
+
+#define assert_str_eq(a, b, ...) do {					\
+	expect_str_eq(a, b, __VA_ARGS__);				\
+	p_abort_test_if_failed();					\
+} while (0)
+
+#define assert_str_ne(a, b, ...) do {					\
+	expect_str_ne(a, b, __VA_ARGS__);				\
+	p_abort_test_if_failed();					\
+} while (0)
+
+#define assert_not_reached(...) do {					\
+	expect_not_reached(__VA_ARGS__);				\
+	p_abort_test_if_failed();					\
+} while (0)
+
 /*
  * If this enum changes, corresponding changes in test/test.sh.in are also
  * necessary.
@@ -336,5 +568,6 @@ test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
 void	p_test_fail(const char *prefix, const char *message);
+bool	p_test_failed(void);
 
 void strncpy_cond(void *dst, const char *src, bool cond);
diff --git a/test/src/test.c b/test/src/test.c
index 4583e55..b40fbc6 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -233,6 +233,11 @@ p_test_fail(const char *prefix, const char *message) {
 	test_status = test_status_fail;
 }
 
+bool
+p_test_failed() {
+	return test_status == test_status_fail;
+}
+
 void
 strncpy_cond(void *dst, const char *src, bool cond) {
 	if (cond) {
-- 
cgit v0.12


From 0ceb31184d145646ff30b03f566069307cd570d8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 6 Feb 2020 10:39:42 -0800
Subject: Make use of assert_* in test/unit/buf_writer.c

---
 test/unit/buf_writer.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 37314db..01f2411 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -10,19 +10,21 @@ static char test_buf[TEST_BUF_SIZE];
 static uint64_t arg;
 static uint64_t arg_store;
 
-static void test_write_cb(void *cbopaque, const char *s) {
+static void
+test_write_cb(void *cbopaque, const char *s) {
 	size_t prev_test_write_len = test_write_len;
 	test_write_len += strlen(s); /* only increase the length */
 	arg_store = *(uint64_t *)cbopaque; /* only pass along the argument */
-	expect_zu_le(prev_test_write_len, test_write_len,
+	assert_zu_le(prev_test_write_len, test_write_len,
 	    "Test write overflowed");
 }
 
-static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
+static void
+test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 	char s[UNIT_MAX + 1];
 	size_t n_unit, remain, i;
 	ssize_t unit;
-	expect_ptr_not_null(buf_writer->buf, "Buffer is null");
+	assert(buf_writer->buf != NULL);
 	write_cb_t *write_cb = buf_writer_get_write_cb(buf_writer);
 	void *cbopaque = buf_writer_get_cbopaque(buf_writer);
 
@@ -41,7 +43,7 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 				remain += unit;
 				if (remain > buf_writer->buf_size) {
 					/* Flushes should have happened. */
-					expect_u64_eq(arg_store, arg, "Call "
+					assert_u64_eq(arg_store, arg, "Call "
 					    "back argument didn't get through");
 					remain %= buf_writer->buf_size;
 					if (remain == 0) {
@@ -49,7 +51,7 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 						remain += buf_writer->buf_size;
 					}
 				}
-				expect_zu_eq(test_write_len + remain, i * unit,
+				assert_zu_eq(test_write_len + remain, i * unit,
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}
@@ -65,7 +67,7 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 TEST_BEGIN(test_buf_write_static) {
 	buf_writer_t buf_writer;
 	tsdn_t *tsdn = tsdn_fetch();
-	expect_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    test_buf, TEST_BUF_SIZE),
 	    "buf_writer_init() should not encounter error on static buffer");
 	test_buf_writer_body(tsdn, &buf_writer);
@@ -75,7 +77,7 @@ TEST_END
 TEST_BEGIN(test_buf_write_dynamic) {
 	buf_writer_t buf_writer;
 	tsdn_t *tsdn = tsdn_fetch();
-	expect_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    NULL, TEST_BUF_SIZE), "buf_writer_init() should not OOM");
 	test_buf_writer_body(tsdn, &buf_writer);
 }
@@ -84,13 +86,13 @@ TEST_END
 TEST_BEGIN(test_buf_write_oom) {
 	buf_writer_t buf_writer;
 	tsdn_t *tsdn = tsdn_fetch();
-	expect_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
-	expect_ptr_null(buf_writer.buf, "Buffer should be null");
+	assert(buf_writer.buf == NULL);
 	write_cb_t *write_cb = buf_writer_get_write_cb(&buf_writer);
-	expect_ptr_eq(write_cb, test_write_cb, "Should use test_write_cb");
+	assert_ptr_eq(write_cb, test_write_cb, "Should use test_write_cb");
 	void *cbopaque = buf_writer_get_cbopaque(&buf_writer);
-	expect_ptr_eq(cbopaque, &arg, "Should use arg");
+	assert_ptr_eq(cbopaque, &arg, "Should use arg");
 
 	char s[UNIT_MAX + 1];
 	size_t n_unit, i;
@@ -107,9 +109,9 @@ TEST_BEGIN(test_buf_write_oom) {
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
 				write_cb(cbopaque, s);
-				expect_u64_eq(arg_store, arg,
+				assert_u64_eq(arg_store, arg,
 				    "Call back argument didn't get through");
-				expect_zu_eq(test_write_len, i * unit,
+				assert_zu_eq(test_write_len, i * unit,
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}
-- 
cgit v0.12


From a88d22ea114b4db398aad021aa1dcd1b33b4038d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 6 Feb 2020 10:27:09 -0800
Subject: Make use of assert_* in test/unit/inspect.c

---
 test/unit/inspect.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/test/unit/inspect.c b/test/unit/inspect.c
index 41ef6c2..384b1ad 100644
--- a/test/unit/inspect.c
+++ b/test/unit/inspect.c
@@ -1,11 +1,11 @@
 #include "test/jemalloc_test.h"
 
 #define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do {		\
-	expect_d_eq(mallctl("experimental.utilization." node,		\
+	assert_d_eq(mallctl("experimental.utilization." node,		\
 	    a, b, c, d), EINVAL, "Should fail when " why_inval);	\
-	expect_zu_eq(out_sz, out_sz_ref,				\
+	assert_zu_eq(out_sz, out_sz_ref,				\
 	    "Output size touched when given invalid arguments");	\
-	expect_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
+	assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
 	    "Output content touched when given invalid arguments");	\
 } while (0)
 
@@ -15,7 +15,7 @@
 	TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval)
 
 #define TEST_UTIL_VALID(node) do {					\
-        expect_d_eq(mallctl("experimental.utilization." node,		\
+        assert_d_eq(mallctl("experimental.utilization." node,		\
 	    out, &out_sz, in, in_sz), 0,				\
 	    "Should return 0 on correct arguments");			\
         expect_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
@@ -43,11 +43,11 @@ TEST_BEGIN(test_query) {
 		void *out_ref = mallocx(out_sz, 0);
 		size_t out_sz_ref = out_sz;
 
-		expect_ptr_not_null(p,
+		assert_ptr_not_null(p,
 		    "test pointer allocation failed");
-		expect_ptr_not_null(out,
+		assert_ptr_not_null(out,
 		    "test output allocation failed");
-		expect_ptr_not_null(out_ref,
+		assert_ptr_not_null(out_ref,
 		    "test reference output allocation failed");
 
 #define SLABCUR_READ(out) (*(void **)out)
@@ -174,8 +174,8 @@ TEST_BEGIN(test_batch) {
 		size_t out_ref[] = {-1, -1, -1, -1, -1, -1};
 		size_t out_sz_ref = out_sz;
 
-		expect_ptr_not_null(p, "test pointer allocation failed");
-		expect_ptr_not_null(q, "test pointer allocation failed");
+		assert_ptr_not_null(p, "test pointer allocation failed");
+		assert_ptr_not_null(q, "test pointer allocation failed");
 
 		/* Test invalid argument(s) errors */
 		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz,
@@ -201,7 +201,7 @@ TEST_BEGIN(test_batch) {
 
 	/* Examine output for valid calls */
 #define TEST_EQUAL_REF(i, message) \
-	expect_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
+	assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
 
 #define NFREE_READ(out, i) out[(i) * 3]
 #define NREGS_READ(out, i) out[(i) * 3 + 1]
@@ -261,7 +261,7 @@ TEST_END
 
 int
 main(void) {
-	expect_zu_lt(SC_SMALL_MAXCLASS, TEST_MAX_SIZE,
+	assert_zu_lt(SC_SMALL_MAXCLASS + 100000, TEST_MAX_SIZE,
 	    "Test case cannot cover large classes");
 	return test(test_query, test_batch);
 }
-- 
cgit v0.12


From 9d2cc3b0fa8365d69747bf0d04686fe41fe44d3e Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 6 Feb 2020 10:55:19 -0800
Subject: Make use of assert_* in test/unit/prof_recent.c

---
 test/unit/prof_recent.c | 147 ++++++++++++++++++++++++++----------------------
 1 file changed, 80 insertions(+), 67 deletions(-)

diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 7400d6c..35a2333 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -6,16 +6,17 @@
 #define OPT_ALLOC_MAX 3
 
 /* Invariant before and after every test (when config_prof is on) */
-static void confirm_prof_setup(tsd_t *tsd) {
+static void
+confirm_prof_setup(tsd_t *tsd) {
 	/* Options */
-	expect_true(opt_prof, "opt_prof not on");
-	expect_true(opt_prof_active, "opt_prof_active not on");
-	expect_zd_eq(opt_prof_recent_alloc_max, OPT_ALLOC_MAX,
+	assert_true(opt_prof, "opt_prof not on");
+	assert_true(opt_prof_active, "opt_prof_active not on");
+	assert_zd_eq(opt_prof_recent_alloc_max, OPT_ALLOC_MAX,
 	    "opt_prof_recent_alloc_max not set correctly");
 
 	/* Dynamics */
-	expect_true(prof_active, "prof_active not on");
-	expect_zd_eq(prof_recent_alloc_max_ctl_read(tsd), OPT_ALLOC_MAX,
+	assert_true(prof_active, "prof_active not on");
+	assert_zd_eq(prof_recent_alloc_max_ctl_read(tsd), OPT_ALLOC_MAX,
 	    "prof_recent_alloc_max not set correctly");
 }
 
@@ -35,11 +36,11 @@ TEST_BEGIN(test_prof_recent_off) {
 	size_t len = len_ref;
 
 #define ASSERT_SHOULD_FAIL(opt, a, b, c, d) do {			\
-	expect_d_eq(mallctl("experimental.prof_recent." opt, a, b, c,	\
+	assert_d_eq(mallctl("experimental.prof_recent." opt, a, b, c,	\
 	    d), ENOENT, "Should return ENOENT when config_prof is off");\
-	expect_zd_eq(past, past_ref, "output was touched");		\
-	expect_zu_eq(len, len_ref, "output length was touched");	\
-	expect_zd_eq(future, future_ref, "input was touched");		\
+	assert_zd_eq(past, past_ref, "output was touched");		\
+	assert_zu_eq(len, len_ref, "output length was touched");	\
+	assert_zd_eq(future, future_ref, "input was touched");		\
 } while (0)
 
 	ASSERT_SHOULD_FAIL("alloc_max", NULL, NULL, NULL, 0);
@@ -61,32 +62,32 @@ TEST_BEGIN(test_prof_recent_on) {
 
 	confirm_prof_setup(tsd);
 
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, NULL, 0), 0, "no-op mallctl should be allowed");
 	confirm_prof_setup(tsd);
 
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, NULL, 0), 0, "Read error");
 	expect_zd_eq(past, OPT_ALLOC_MAX, "Wrong read result");
 	future = OPT_ALLOC_MAX + 1;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, len), 0, "Write error");
 	future = -1;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len), 0, "Read/write error");
 	expect_zd_eq(past, OPT_ALLOC_MAX + 1, "Wrong read result");
 	future = -2;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len), EINVAL,
 	    "Invalid write should return EINVAL");
 	expect_zd_eq(past, OPT_ALLOC_MAX + 1,
 	    "Output should not be touched given invalid write");
 	future = OPT_ALLOC_MAX;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len), 0, "Read/write error");
 	expect_zd_eq(past, -1, "Wrong read result");
 	future = OPT_ALLOC_MAX + 2;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len * 2), EINVAL,
 	    "Invalid write should return EINVAL");
 	expect_zd_eq(past, -1,
@@ -99,13 +100,14 @@ TEST_END
 /* Reproducible sequence of request sizes */
 #define NTH_REQ_SIZE(n) ((n) * 97 + 101)
 
-static void confirm_malloc(tsd_t *tsd, void *p) {
-	expect_ptr_not_null(p, "malloc failed unexpectedly");
+static void
+confirm_malloc(tsd_t *tsd, void *p) {
+	assert_ptr_not_null(p, "malloc failed unexpectedly");
 	edata_t *e = emap_edata_lookup(TSDN_NULL, &emap_global, p);
-	expect_ptr_not_null(e, "NULL edata for living pointer");
+	assert_ptr_not_null(e, "NULL edata for living pointer");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *n = edata_prof_recent_alloc_get(tsd, e);
-	expect_ptr_not_null(n, "Record in edata should not be NULL");
+	assert_ptr_not_null(n, "Record in edata should not be NULL");
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
 	expect_ptr_eq(e, n->alloc_edata,
@@ -114,24 +116,27 @@ static void confirm_malloc(tsd_t *tsd, void *p) {
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 }
 
-static void confirm_record_size(tsd_t *tsd, prof_recent_t *n, unsigned kth) {
+static void
+confirm_record_size(tsd_t *tsd, prof_recent_t *n, unsigned kth) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	expect_zu_eq(n->size, NTH_REQ_SIZE(kth),
 	    "Recorded allocation size is wrong");
 }
 
-static void confirm_record_living(tsd_t *tsd, prof_recent_t *n) {
+static void
+confirm_record_living(tsd_t *tsd, prof_recent_t *n) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	expect_ptr_not_null(n->alloc_edata,
+	assert_ptr_not_null(n->alloc_edata,
 	    "Recorded edata should not be NULL for living pointer");
 	expect_ptr_eq(n, edata_prof_recent_alloc_get(tsd, n->alloc_edata),
 	    "Record in edata is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
 
-static void confirm_record_released(tsd_t *tsd, prof_recent_t *n) {
+static void
+confirm_record_released(tsd_t *tsd, prof_recent_t *n) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
@@ -167,7 +172,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		if (i < OPT_ALLOC_MAX - 1) {
 			malloc_mutex_lock(tsd_tsdn(tsd),
 			    &prof_recent_alloc_mtx);
-			expect_ptr_ne(prof_recent_alloc_begin(tsd),
+			assert_ptr_ne(prof_recent_alloc_begin(tsd),
 			    prof_recent_alloc_end(tsd),
 			    "Empty recent allocation");
 			malloc_mutex_unlock(tsd_tsdn(tsd),
@@ -194,7 +199,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			}
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		expect_u_eq(c, OPT_ALLOC_MAX,
+		assert_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
 	}
@@ -202,7 +207,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	confirm_prof_setup(tsd);
 
 	b = false;
-	expect_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
+	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
 	    "mallctl for turning off prof_active failed");
 
 	/*
@@ -212,7 +217,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	for (; i < 3 * OPT_ALLOC_MAX; ++i) {
 		req_size = NTH_REQ_SIZE(i);
 		p = malloc(req_size);
-		expect_ptr_not_null(p, "malloc failed unexpectedly");
+		assert_ptr_not_null(p, "malloc failed unexpectedly");
 		c = 0;
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		for (n = prof_recent_alloc_begin(tsd);
@@ -223,13 +228,13 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			++c;
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		expect_u_eq(c, OPT_ALLOC_MAX,
+		assert_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
 	}
 
 	b = true;
-	expect_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
+	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
 	    "mallctl for turning on prof_active failed");
 
 	confirm_prof_setup(tsd);
@@ -267,14 +272,14 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			}
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		expect_u_eq(c, OPT_ALLOC_MAX,
+		assert_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
 	}
 
 	/* Increasing the limit shouldn't alter the list of records. */
 	future = OPT_ALLOC_MAX + 1;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -286,7 +291,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		++c;
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	expect_u_eq(c, OPT_ALLOC_MAX,
+	assert_u_eq(c, OPT_ALLOC_MAX,
 	    "Incorrect total number of allocations");
 
 	/*
@@ -294,7 +299,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	 * the new limit is still no less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -306,7 +311,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		++c;
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	expect_u_eq(c, OPT_ALLOC_MAX,
+	assert_u_eq(c, OPT_ALLOC_MAX,
 	    "Incorrect total number of allocations");
 
 	/*
@@ -314,7 +319,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	 * limit is less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX - 1;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -326,12 +331,12 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		confirm_record_released(tsd, n);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	expect_u_eq(c, OPT_ALLOC_MAX - 1,
+	assert_u_eq(c, OPT_ALLOC_MAX - 1,
 	    "Incorrect total number of allocations");
 
 	/* Setting to unlimited shouldn't alter the list of records. */
 	future = -1;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -343,36 +348,39 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		confirm_record_released(tsd, n);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	expect_u_eq(c, OPT_ALLOC_MAX - 1,
+	assert_u_eq(c, OPT_ALLOC_MAX - 1,
 	    "Incorrect total number of allocations");
 
 	/* Downshift to only one record. */
 	future = 1;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	n = prof_recent_alloc_begin(tsd);
-	assert(n != prof_recent_alloc_end(tsd));
+	assert_ptr_ne(n, prof_recent_alloc_end(tsd), "Recent list is empty");
 	confirm_record_size(tsd, n, 4 * OPT_ALLOC_MAX - 1);
 	confirm_record_released(tsd, n);
 	n = prof_recent_alloc_next(tsd, n);
-	assert(n == prof_recent_alloc_end(tsd));
+	assert_ptr_eq(n, prof_recent_alloc_end(tsd),
+	    "Recent list should be empty");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	/* Completely turn off. */
 	future = 0;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
+	assert_ptr_eq(prof_recent_alloc_begin(tsd), prof_recent_alloc_end(tsd),
+	    "Recent list should be empty");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	/* Restore the settings. */
 	future = OPT_ALLOC_MAX;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
+	assert_ptr_eq(prof_recent_alloc_begin(tsd), prof_recent_alloc_end(tsd),
+	    "Recent list should be empty");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	confirm_prof_setup(tsd);
@@ -385,17 +393,19 @@ TEST_END
 static char dump_out[DUMP_OUT_SIZE];
 static size_t dump_out_len = 0;
 
-static void test_dump_write_cb(void *not_used, const char *str) {
+static void
+test_dump_write_cb(void *not_used, const char *str) {
 	size_t len = strlen(str);
 	assert(dump_out_len + len < DUMP_OUT_SIZE);
 	memcpy(dump_out + dump_out_len, str, len + 1);
 	dump_out_len += len;
 }
 
-static void call_dump() {
+static void
+call_dump() {
 	static void *in[2] = {test_dump_write_cb, NULL};
 	dump_out_len = 0;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_dump",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump",
 	    NULL, NULL, in, sizeof(in)), 0, "Dump mallctl raised error");
 }
 
@@ -406,7 +416,8 @@ typedef struct {
 
 #define DUMP_ERROR "Dump output is wrong"
 
-static void confirm_record(const char *template,
+static void
+confirm_record(const char *template,
     const confirm_record_t *records, const size_t n_records) {
 	static const char *types[2] = {"alloc", "dalloc"};
 	static char buf[64];
@@ -418,9 +429,9 @@ static void confirm_record(const char *template,
 	 * "{\"recent_alloc_max\":XYZ,\"recent_alloc\":[...]}".
 	 * Using "- 2" serves to cut right before the ending "]}".
 	 */
-	expect_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
+	assert_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
 	    DUMP_ERROR);
-	expect_d_eq(memcmp(dump_out + strlen(dump_out) - 2,
+	assert_d_eq(memcmp(dump_out + strlen(dump_out) - 2,
 	    template + strlen(template) - 2, 2), 0, DUMP_ERROR);
 
 	const char *start = dump_out + strlen(template) - 2;
@@ -429,14 +440,14 @@ static void confirm_record(const char *template,
 	for (record = records; record < records + n_records; ++record) {
 
 #define ASSERT_CHAR(c) do {						\
-	expect_true(start < end, DUMP_ERROR);				\
-	expect_c_eq(*start++, c, DUMP_ERROR);				\
+	assert_true(start < end, DUMP_ERROR);				\
+	assert_c_eq(*start++, c, DUMP_ERROR);				\
 } while (0)
 
 #define ASSERT_STR(s) do {						\
 	const size_t len = strlen(s);					\
-	expect_true(start + len <= end, DUMP_ERROR);			\
-	expect_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);		\
+	assert_true(start + len <= end, DUMP_ERROR);			\
+	assert_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);		\
 	start += len;							\
 } while (0)
 
@@ -512,8 +523,8 @@ static void confirm_record(const char *template,
 #undef ASSERT_CHAR
 
 	}
-	expect_ptr_eq(record, records + n_records, DUMP_ERROR);
-	expect_ptr_eq(start, end, DUMP_ERROR);
+	assert_ptr_eq(record, records + n_records, DUMP_ERROR);
+	assert_ptr_eq(start, end, DUMP_ERROR);
 }
 
 TEST_BEGIN(test_prof_recent_alloc_dump) {
@@ -527,14 +538,14 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	confirm_record_t records[2];
 
 	future = 0;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	call_dump();
 	expect_str_eq(dump_out, "{\"recent_alloc_max\":0,\"recent_alloc\":[]}",
 	    DUMP_ERROR);
 
 	future = 2;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	call_dump();
 	const char *template = "{\"recent_alloc_max\":2,\"recent_alloc\":[]}";
@@ -563,7 +574,7 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	confirm_record(template, records, 2);
 
 	future = OPT_ALLOC_MAX;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	confirm_prof_setup(tsd);
 }
@@ -588,11 +599,13 @@ typedef struct {
 static thd_data_t thd_data[N_THREADS];
 static ssize_t test_max;
 
-static void test_write_cb(void *cbopaque, const char *str) {
+static void
+test_write_cb(void *cbopaque, const char *str) {
 	sleep_ns(1000 * 1000);
 }
 
-static void *f_thread(void *arg) {
+static void *
+f_thread(void *arg) {
 	const size_t thd_id = *(size_t *)arg;
 	thd_data_t *data_p = thd_data + thd_id;
 	assert(data_p->id == thd_id);
@@ -632,7 +645,7 @@ static void *f_thread(void *arg) {
 			last_max =
 			    prof_recent_alloc_max_ctl_write(tsd, test_max / 2);
 		}
-		expect_zd_ge(last_max, -1, "Illegal last-N max");
+		assert_zd_ge(last_max, -1, "Illegal last-N max");
 	}
 
 	while (data_p->count > 0) {
@@ -660,7 +673,7 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = STRESS_ALLOC_MAX;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
 	for (size_t i = 0; i < N_THREADS; i++) {
 		thd_data_t *data_p = thd_data + i;
@@ -673,7 +686,7 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = OPT_ALLOC_MAX;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
 	confirm_prof_setup(tsd);
 }
-- 
cgit v0.12


From 51bd147422d95bfcd3919f11a6a7dd7a574e05cd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 6 Feb 2020 10:57:47 -0800
Subject: Make use of assert_* in test/unit/thread_event.c

---
 test/unit/thread_event.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index ef3b95c..0855829 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -15,7 +15,7 @@ TEST_BEGIN(test_next_event_fast_roll_back) {
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 	void *p = malloc(16U);
-	expect_ptr_not_null(p, "malloc() failed");
+	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
 }
 TEST_END
@@ -37,7 +37,7 @@ TEST_BEGIN(test_next_event_fast_resume) {
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 	void *p = malloc(SC_LOOKUP_MAXCLASS);
-	expect_ptr_not_null(p, "malloc() failed");
+	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
 }
 TEST_END
@@ -50,7 +50,7 @@ TEST_BEGIN(test_event_rollback) {
 	while (count-- != 0) {
 		te_alloc_rollback(tsd, diff);
 		uint64_t thread_allocated_after = thread_allocated_get(tsd);
-		expect_u64_eq(thread_allocated - thread_allocated_after, diff,
+		assert_u64_eq(thread_allocated - thread_allocated_after, diff,
 		    "thread event counters are not properly rolled back");
 		thread_allocated = thread_allocated_after;
 	}
-- 
cgit v0.12


From bc31041edb183d739574d622888d818dbc1bfadf Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 21 Feb 2020 11:05:57 -0800
Subject: Cirrus-CI: test on new freebsd releases.

---
 .cirrus.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index a9de953..d01954f 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -6,8 +6,6 @@ task:
   freebsd_instance:
     matrix:
       image: freebsd-12-1-release-amd64
-      image: freebsd-12-0-release-amd64
-      image: freebsd-11-2-release-amd64
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y
-- 
cgit v0.12


From 9f4fc273892f130fd81d26e7cb9e561fb5a10679 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Tue, 25 Feb 2020 07:47:04 -0800
Subject: Ehooks: Fix a build warning.

We wrote `return some_void_func()` in a function returning void, which is
confusing and triggers warnings on MSVC.
---
 include/jemalloc/internal/ehooks.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 1bd44cb..bae468b 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -222,9 +222,9 @@ ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
-		return ehooks_default_destroy_impl(addr, size);
+		ehooks_default_destroy_impl(addr, size);
 	} else if (extent_hooks->destroy == NULL) {
-		return;
+		/* Do nothing. */
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		extent_hooks->destroy(extent_hooks, addr, size, committed,
-- 
cgit v0.12


From 6c3491ad3105994f8b804fc6ddb1aa88024a4d4b Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 23 Feb 2020 20:33:04 -0800
Subject: Tcache: Unify bin flush logic.

The small and large pathways share most of their logic, even if some of the
individual operations are different.  We pull out the common logic into a
force-inlined function, and then specialize twice, once for each value of
"small".
---
 .../jemalloc/internal/jemalloc_internal_decls.h    |   9 +
 src/tcache.c                                       | 306 +++++++++++----------
 2 files changed, 170 insertions(+), 145 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 042a1fa..32058ce 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -92,4 +92,13 @@ isblank(int c) {
 #endif
 #include <fcntl.h>
 
+/*
+ * The Win32 midl compiler has #define small char; we don't use midl, but
+ * "small" is a nice identifier to have available when talking about size
+ * classes.
+ */
+#ifdef small
+#  undef small
+#endif
+
 #endif /* JEMALLOC_INTERNAL_H */
diff --git a/src/tcache.c b/src/tcache.c
index 782d883..7ffa6fc 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -139,21 +139,44 @@ tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 	}
 }
 
-void
-tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
-    szind_t binind, unsigned rem) {
-	assert(binind < SC_NBINS);
+JEMALLOC_ALWAYS_INLINE bool
+tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
+    unsigned cur_binshard, bool small) {
+	if (small) {
+		return edata_arena_ind_get(edata) == cur_arena_ind
+		    && edata_binshard_get(edata) == cur_binshard;
+	} else {
+		return edata_arena_ind_get(edata) == cur_arena_ind;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+    szind_t binind, unsigned rem, bool small) {
+	/*
+	 * A couple lookup calls take tsdn; declare it once for convenience
+	 * instead of calling tsd_tsdn(tsd) all the time.
+	 */
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+
+	if (small) {
+		assert(binind < SC_NBINS);
+	} else {
+		assert(binind < nhbins);
+	}
 	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
 	assert((cache_bin_sz_t)rem <= ncached);
+	arena_t *tcache_arena = tcache->arena;
+	assert(tcache_arena != NULL);
 
-	arena_t *arena = tcache->arena;
-	assert(arena != NULL);
 	unsigned nflush = ncached - rem;
-	/* Variable length array must have > 0 length. */
+	/*
+	 * Variable length array must have > 0 length; the last element is never
+	 * touched (it's just included to satisfy the no-zero-length rule).
+	 */
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
-
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
-	tsdn_t *tsdn = tsd_tsdn(tsd);
+	
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
 		tbin_edatas_lookup_size_check(tsd, tbin, binind, nflush,
@@ -165,173 +188,154 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		}
 	}
 
-	bool merged_stats = false;
+	/*
+	 * The slabs where we freed the last remaining object in the slab (and
+	 * so need to free the slab itself).
+	 * Used only if small == true.
+	 */
 	unsigned dalloc_count = 0;
 	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
+
+	/*
+	 * We're about to grab a bunch of locks.  If one of them happens to be
+	 * the one guarding the arena-level stats counters we flush our
+	 * thread-local ones to, we do so under one critical section.
+	 */
+	bool merged_stats = false;
 	while (nflush > 0) {
-		/* Lock the arena bin associated with the first object. */
+		/* Lock the arena, or bin, associated with the first object. */
 		edata_t *edata = item_edata[0];
-		unsigned bin_arena_ind = edata_arena_ind_get(edata);
-		arena_t *bin_arena = arena_get(tsdn, bin_arena_ind, false);
-		unsigned binshard = edata_binshard_get(edata);
-		assert(binshard < bin_infos[binind].n_shards);
-		bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard];
-
-		malloc_mutex_lock(tsdn, &bin->lock);
-		if (config_stats && bin_arena == arena && !merged_stats) {
-			merged_stats = true;
-			bin->stats.nflushes++;
-			bin->stats.nrequests += tbin->tstats.nrequests;
-			tbin->tstats.nrequests = 0;
-		}
-		unsigned ndeferred = 0;
-		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = *(bottom_item - i);
-			edata = item_edata[i];
-			assert(ptr != NULL && edata != NULL);
+		unsigned cur_arena_ind = edata_arena_ind_get(edata);
+		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
 
-			if (edata_arena_ind_get(edata) == bin_arena_ind
-			    && edata_binshard_get(edata) == binshard) {
-				if (arena_dalloc_bin_junked_locked(tsdn,
-				    bin_arena, bin, binind, edata, ptr)) {
-					dalloc_slabs[dalloc_count++] = edata;
-				}
-			} else {
-				/*
-				 * This object was allocated via a different
-				 * arena bin than the one that is currently
-				 * locked.  Stash the object, so that it can be
-				 * handled in a future pass.
-				 */
-				*(bottom_item - ndeferred) = ptr;
-				item_edata[ndeferred] = edata;
-				ndeferred++;
-			}
-		}
-		malloc_mutex_unlock(tsdn, &bin->lock);
-		arena_decay_ticks(tsdn, bin_arena, nflush - ndeferred);
-		nflush = ndeferred;
-	}
-	/* Handle all deferred slab dalloc. */
-	for (unsigned i = 0; i < dalloc_count; i++) {
-		edata_t *slab = dalloc_slabs[i];
-		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
-	}
-
-	if (config_stats && !merged_stats) {
 		/*
-		 * The flush loop didn't happen to flush to this thread's
-		 * arena, so the stats didn't get merged.  Manually do so now.
+		 * These assignments are always overwritten when small is true,
+		 * and their values are always ignored when small is false, but
+		 * to avoid the technical UB when we pass them as parameters, we
+		 * need to intialize them.
 		 */
-		unsigned binshard;
-		bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind,
-		    &binshard);
-		bin->stats.nflushes++;
-		bin->stats.nrequests += tbin->tstats.nrequests;
-		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(tsdn, &bin->lock);
-	}
-
-	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
-	    sizeof(void *));
-	cache_bin_ncached_set(tbin, binind, rem);
-	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
-		tbin->low_water_position = tbin->cur_ptr.lowbits;
-	}
-}
-
-void
-tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t binind,
-    unsigned rem) {
-	bool merged_stats = false;
-
-	assert(binind < nhbins);
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
-	assert((cache_bin_sz_t)rem <= ncached);
-
-	arena_t *tcache_arena = tcache->arena;
-	assert(tcache_arena != NULL);
-	unsigned nflush = ncached - rem;
-	/* Variable length array must have > 0 length. */
-	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
-
-	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
-#ifndef JEMALLOC_EXTRA_SIZE_CHECK
-	/* Look up edata once per item. */
-	for (unsigned i = 0 ; i < nflush; i++) {
-		item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd), &emap_global,
-		    *(bottom_item - i));
-	}
-#else
-	tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
-	    item_edata);
-#endif
-	while (nflush > 0) {
-		/* Lock the arena associated with the first object. */
-		edata_t *edata = item_edata[0];
-		unsigned locked_arena_ind = edata_arena_ind_get(edata);
-		arena_t *locked_arena = arena_get(tsd_tsdn(tsd),
-		    locked_arena_ind, false);
+		unsigned cur_binshard = 0;
+		bin_t *cur_bin = NULL;
+		if (small) {
+			cur_binshard = edata_binshard_get(edata);
+			cur_bin = &cur_arena->bins[binind].bin_shards[
+			    cur_binshard];
+			assert(cur_binshard < bin_infos[binind].n_shards);
+		}
 
-		bool lock_large = !arena_is_auto(locked_arena);
-		if (lock_large) {
-			malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
+		if (small) {
+			malloc_mutex_lock(tsdn, &cur_bin->lock);
 		}
-		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = *(bottom_item - i);
-			assert(ptr != NULL);
-			edata = item_edata[i];
-			if (edata_arena_ind_get(edata) == locked_arena_ind) {
-				large_dalloc_prep_junked_locked(tsd_tsdn(tsd),
-				    edata);
-			}
+		if (!small && !arena_is_auto(cur_arena)) {
+			malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
 		}
-		if ((config_prof || config_stats) &&
-		    (locked_arena == tcache_arena)) {
-			if (config_stats) {
-				merged_stats = true;
-				arena_stats_large_flush_nrequests_add(
-				    tsd_tsdn(tsd), &tcache_arena->stats, binind,
+
+		/*
+		 * If we acquired the right lock and have some stats to flush,
+		 * flush them.
+		 */
+		if (config_stats && tcache_arena == cur_arena
+		    && !merged_stats) {
+			merged_stats = true;
+			if (small) {
+				cur_bin->stats.nflushes++;
+				cur_bin->stats.nrequests +=
+				    tbin->tstats.nrequests;
+				tbin->tstats.nrequests = 0;
+			} else {
+				arena_stats_large_flush_nrequests_add(tsdn,
+				    &tcache_arena->stats, binind,
 				    tbin->tstats.nrequests);
 				tbin->tstats.nrequests = 0;
 			}
 		}
-		if (lock_large) {
-			malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx);
+
+		/*
+		 * Large allocations need special prep done.  Afterwards, we can
+		 * drop the large lock.
+		 */
+		if (!small) {
+			for (unsigned i = 0; i < nflush; i++) {
+				void *ptr = *(bottom_item - i);
+				edata = item_edata[i];
+				assert(ptr != NULL && edata != NULL);
+
+				if (tcache_bin_flush_match(edata, cur_arena_ind,
+				    cur_binshard, small)) {
+					large_dalloc_prep_junked_locked(tsdn,
+					    edata);
+				}
+			}
+		}
+		if (!small && !arena_is_auto(cur_arena)) {
+			malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
 		}
 
+		/* Deallocate whatever we can. */
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(bottom_item - i);
 			edata = item_edata[i];
 			assert(ptr != NULL && edata != NULL);
-
-			if (edata_arena_ind_get(edata) == locked_arena_ind) {
-				large_dalloc_finish(tsd_tsdn(tsd), edata);
-			} else {
+			if (!tcache_bin_flush_match(edata, cur_arena_ind,
+			    cur_binshard, small)) {
 				/*
-				 * This object was allocated via a different
-				 * arena than the one that is currently locked.
-				 * Stash the object, so that it can be handled
-				 * in a future pass.
+				 * The object was allocated either via a
+				 * different arena, or a different bin in this
+				 * arena.  Either way, stash the object so that
+				 * it can be handled in a future pass.
 				 */
 				*(bottom_item - ndeferred) = ptr;
 				item_edata[ndeferred] = edata;
 				ndeferred++;
+				continue;
+			}
+			if (small) {
+				if (arena_dalloc_bin_junked_locked(tsdn,
+				    cur_arena, cur_bin, binind, edata, ptr)) {
+					dalloc_slabs[dalloc_count] = edata;
+					dalloc_count++;
+				}
+			} else {
+				large_dalloc_finish(tsdn, edata);
 			}
 		}
-		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
-		    ndeferred);
+
+		if (small) {
+			malloc_mutex_unlock(tsdn, &cur_bin->lock);
+		}
+		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
 		nflush = ndeferred;
 	}
+
+	/* Handle all deferred slab dalloc. */
+	assert(small || dalloc_count == 0);
+	for (unsigned i = 0; i < dalloc_count; i++) {
+		edata_t *slab = dalloc_slabs[i];
+		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
+
+	}
+
 	if (config_stats && !merged_stats) {
-		/*
-		 * The flush loop didn't happen to flush to this thread's
-		 * arena, so the stats didn't get merged.  Manually do so now.
-		 */
-		arena_stats_large_flush_nrequests_add(tsd_tsdn(tsd),
-		    &tcache_arena->stats, binind, tbin->tstats.nrequests);
-		tbin->tstats.nrequests = 0;
+		if (small) {
+			/*
+			 * The flush loop didn't happen to flush to this
+			 * thread's arena, so the stats didn't get merged.
+			 * Manually do so now.
+			 */
+			unsigned binshard;
+			bin_t *bin = arena_bin_choose_lock(tsdn, tcache_arena,
+			    binind, &binshard);
+			bin->stats.nflushes++;
+			bin->stats.nrequests += tbin->tstats.nrequests;
+			tbin->tstats.nrequests = 0;
+			malloc_mutex_unlock(tsdn, &bin->lock);
+		} else {
+			arena_stats_large_flush_nrequests_add(tsdn,
+			    &tcache_arena->stats, binind,
+			    tbin->tstats.nrequests);
+			tbin->tstats.nrequests = 0;
+		}
 	}
 
 	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
@@ -343,6 +347,18 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 }
 
 void
+tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+    szind_t binind, unsigned rem) {
+	tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, true);
+}
+
+void
+tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+    szind_t binind, unsigned rem) {
+	tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, false);
+}
+
+void
 tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	assert(tcache->arena == NULL);
 	tcache->arena = arena;
-- 
cgit v0.12


From 305b1f6d962c5b5a76b7ddb4b55b14d88bada9ba Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 4 Mar 2020 10:27:30 -0800
Subject: Correction on geometric sampling

---
 src/prof.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/prof.c b/src/prof.c
index 761cb95..82f88a2 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -468,9 +468,16 @@ prof_sample_threshold_update(tsd_t *tsd) {
 	 *   Springer-Verlag, New York, 1986
 	 *   pp 500
 	 *   (http://luc.devroye.org/rnbookindex.html)
+	 *
+	 * In the actual computation, there's a non-zero probability that our
+	 * pseudo random number generator generates an exact 0, and to avoid
+	 * log(0), we set u to 1.0 in case r is 0.  Therefore u effectively is
+	 * uniformly distributed in (0, 1] instead of [0, 1).  Further, rather
+	 * than taking the ceiling, we take the floor and then add 1, since
+	 * otherwise bytes_until_sample would be 0 if u is exactly 1.0.
 	 */
 	uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
-	double u = (double)r * (1.0/9007199254740992.0L);
+	double u = (r == 0U) ? 1.0 : (double)r * (1.0/9007199254740992.0L);
 	uint64_t bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
-- 
cgit v0.12


From 4a78c6d81b3f431070f362c29ab7b492ee0b9e70 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 6 Mar 2020 15:31:40 -0800
Subject: Correct thread event unit test

---
 test/unit/thread_event.c | 32 ++++++++------------------------
 1 file changed, 8 insertions(+), 24 deletions(-)

diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index 0855829..5501fa3 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_next_event_fast_roll_back) {
+TEST_BEGIN(test_next_event_fast) {
 	tsd_t *tsd = tsd_fetch();
 	te_ctx_t ctx;
 	te_ctx_get(tsd, &ctx, true);
@@ -14,31 +14,16 @@ TEST_BEGIN(test_next_event_fast_roll_back) {
 	}
 	ITERATE_OVER_ALL_EVENTS
 #undef E
+
+	/* Test next_event_fast rolling back to 0. */
 	void *p = malloc(16U);
 	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
-}
-TEST_END
-
-TEST_BEGIN(test_next_event_fast_resume) {
-	tsd_t *tsd = tsd_fetch();
-
-	te_ctx_t ctx;
-	te_ctx_get(tsd, &ctx, true);
 
-	te_ctx_last_event_set(&ctx, 0);
-	te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX + 8U);
-	te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX + 16U);
-#define E(event, condition, is_alloc)					\
-	if (is_alloc && condition) {					\
-		event##_event_wait_set(tsd,				\
-		    TE_NEXT_EVENT_FAST_MAX + 16U);			\
-	}
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-	void *p = malloc(SC_LOOKUP_MAXCLASS);
-	assert_ptr_not_null(p, "malloc() failed");
-	free(p);
+	/* Test next_event_fast resuming to be equal to next_event. */
+	void *q = malloc(SC_LOOKUP_MAXCLASS);
+	assert_ptr_not_null(q, "malloc() failed");
+	free(q);
 }
 TEST_END
 
@@ -60,7 +45,6 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_next_event_fast_roll_back,
-	    test_next_event_fast_resume,
+	    test_next_event_fast,
 	    test_event_rollback);
 }
-- 
cgit v0.12


From 22657a5e65953c25531caf155d52ed43eb0c653f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 1 Mar 2020 09:36:09 -0800
Subject: Extents: Silence the "potentially unused" warning.

---
 src/extent.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index d06b8d6..87dcec3 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -534,8 +534,8 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     bool slab, szind_t szind, edata_t *edata, bool growing_retained) {
 	edata_t *lead;
 	edata_t *trail;
-	edata_t *to_leak;
-	edata_t *to_salvage;
+	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
+	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(
 	    tsdn, arena, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
@@ -711,8 +711,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	edata_t *lead;
 	edata_t *trail;
-	edata_t *to_leak;
-	edata_t *to_salvage;
+	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
+	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
 	    arena, ehooks, &edata, &lead, &trail, &to_leak,
-- 
cgit v0.12


From b428dceeaf87fb35a16c2337ac13105f7d18dfd3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 29 Feb 2020 16:53:00 -0800
Subject: Config: Warn on void * pointer arithmetic.

This is handy while developing, but not portable.
---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index 6ccd009..324656b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -250,6 +250,7 @@ if test "x$GCC" = "xyes" ; then
   JE_CFLAGS_ADD([-Wsign-compare])
   JE_CFLAGS_ADD([-Wundef])
   JE_CFLAGS_ADD([-Wno-format-zero-length])
+  JE_CFLAGS_ADD([-Wpointer-arith])
   dnl This warning triggers on the use of the universal zero initializer, which
   dnl is a very handy idiom for things like the tcache static initializer (which
   dnl has lots of nested structs).  See the discussion at.
-- 
cgit v0.12


From 79f1ee2fc0163d3666f38cfc59f8c1a8ab07f056 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 28 Feb 2020 11:37:39 -0800
Subject: Move junking out of arena/tcache code.

This is debug only and we keep it off the fast path.  Moving it here simplifies
the internal logic.

This never tries to junk on regions that were shrunk via xallocx.  I think this
is fine for two reasons:
- The shrunk-with-xallocx case is rare.
- We don't always do that anyway before this diff (it depends on the opt
  settings and extent hooks in effect).
---
 include/jemalloc/internal/arena_externs.h          |   9 +-
 .../jemalloc/internal/jemalloc_internal_externs.h  |   2 +
 include/jemalloc/internal/large_externs.h          |   8 +-
 include/jemalloc/internal/tcache_inlines.h         |  35 +--
 src/arena.c                                        |  46 +---
 src/jemalloc.c                                     |  62 +++++
 src/large.c                                        |  53 +---
 src/tcache.c                                       |   8 +-
 test/unit/junk.c                                   | 276 ++++++++++++---------
 9 files changed, 250 insertions(+), 249 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 1b92766..4ef8d8e 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -50,11 +50,6 @@ void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind);
-void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info,
-    bool zero);
-
-typedef void (arena_dalloc_junk_small_t)(void *, const bin_info_t *);
-extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small;
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero);
@@ -63,9 +58,9 @@ void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
 void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
-bool arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, edata_t *edata, void *ptr);
 void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
+bool arena_dalloc_bin_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    szind_t binind, edata_t *edata, void *ptr);
 void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize);
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index e9dbde8..338a590 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -14,6 +14,8 @@ extern bool opt_confirm_conf;
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
+extern void (*junk_free_callback)(void *ptr, size_t size);
+extern void (*junk_alloc_callback)(void *ptr, size_t size);
 extern bool opt_utrace;
 extern bool opt_xmalloc;
 extern bool opt_zero;
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 05e6c44..2797964 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -12,13 +12,7 @@ void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
 
-typedef void (large_dalloc_junk_t)(void *, size_t);
-extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk;
-
-typedef void (large_dalloc_maybe_junk_t)(void *, size_t);
-extern large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk;
-
-void large_dalloc_prep_junked_locked(tsdn_t *tsdn, edata_t *edata);
+void large_dalloc_prep_locked(tsdn_t *tsdn, edata_t *edata);
 void large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
 void large_dalloc(tsdn_t *tsdn, edata_t *edata);
 size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index d356181..ff06935 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -61,23 +61,9 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 		usize = sz_index2size(binind);
 		assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
 	}
-
-	if (likely(!zero)) {
-		if (slow_path && config_fill) {
-			if (unlikely(opt_junk_alloc)) {
-				arena_alloc_junk_small(ret, &bin_infos[binind],
-				    false);
-			} else if (unlikely(opt_zero)) {
-				memset(ret, 0, usize);
-			}
-		}
-	} else {
-		if (slow_path && config_fill && unlikely(opt_junk_alloc)) {
-			arena_alloc_junk_small(ret, &bin_infos[binind], true);
-		}
+	if (unlikely(zero)) {
 		memset(ret, 0, usize);
 	}
-
 	if (config_stats) {
 		bin->tstats.nrequests++;
 	}
@@ -119,16 +105,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 			assert(usize <= tcache_maxclass);
 		}
 
-		if (likely(!zero)) {
-			if (slow_path && config_fill) {
-				if (unlikely(opt_junk_alloc)) {
-					memset(ret, JEMALLOC_ALLOC_JUNK,
-					    usize);
-				} else if (unlikely(opt_zero)) {
-					memset(ret, 0, usize);
-				}
-			}
-		} else {
+		if (unlikely(zero)) {
 			memset(ret, 0, usize);
 		}
 
@@ -148,10 +125,6 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
 	    <= SC_SMALL_MAXCLASS);
 
-	if (slow_path && config_fill && unlikely(opt_junk_free)) {
-		arena_dalloc_junk_small(ptr, &bin_infos[binind]);
-	}
-
 	bin = tcache_small_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_ncached_max_get(binind) >> 1;
@@ -170,10 +143,6 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	    > SC_SMALL_MAXCLASS);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
-	if (slow_path && config_fill && unlikely(opt_junk_free)) {
-		large_dalloc_junk(ptr, sz_index2size(binind));
-	}
-
 	bin = tcache_large_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_ncached_max_get(binind) >> 1;
diff --git a/src/arena.c b/src/arena.c
index aa19e09..0a9e4a9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1446,30 +1446,10 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	if (config_fill && unlikely(opt_junk_alloc)) {
-		for (unsigned i = 0; i < filled; i++) {
-			void *ptr = *(empty_position - nfill + filled + i);
-			arena_alloc_junk_small(ptr, bin_info, true);
-		}
-	}
 	cache_bin_ncached_set(tbin, binind, filled);
 	arena_decay_tick(tsdn, arena);
 }
 
-void
-arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, bool zero) {
-	if (!zero) {
-		memset(ptr, JEMALLOC_ALLOC_JUNK, bin_info->reg_size);
-	}
-}
-
-static void
-arena_dalloc_junk_small_impl(void *ptr, const bin_info_t *bin_info) {
-	memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size);
-}
-arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small =
-    arena_dalloc_junk_small_impl;
-
 /*
  * Without allocating a new slab, try arena_slab_reg_alloc() and re-fill
  * bin->slabcur if necessary.
@@ -1528,18 +1508,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	if (fresh_slab != NULL) {
 		arena_slab_dalloc(tsdn, arena, fresh_slab);
 	}
-	if (!zero) {
-		if (config_fill) {
-			if (unlikely(opt_junk_alloc)) {
-				arena_alloc_junk_small(ret, bin_info, false);
-			} else if (unlikely(opt_zero)) {
-				memset(ret, 0, usize);
-			}
-		}
-	} else {
-		if (config_fill && unlikely(opt_junk_alloc)) {
-			arena_alloc_junk_small(ret, bin_info, true);
-		}
+	if (zero) {
 		memset(ret, 0, usize);
 	}
 	arena_decay_tick(tsdn, arena);
@@ -1706,11 +1675,8 @@ arena_dalloc_bin_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
 /* Returns true if arena_slab_dalloc must be called on slab */
 static bool
 arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, edata_t *slab, void *ptr, bool junked) {
+    szind_t binind, edata_t *slab, void *ptr) {
 	const bin_info_t *bin_info = &bin_infos[binind];
-	if (!junked && config_fill && unlikely(opt_junk_free)) {
-		arena_dalloc_junk_small(ptr, bin_info);
-	}
 	arena_slab_reg_dalloc(slab, edata_slab_data_get(slab), ptr);
 
 	bool ret = false;
@@ -1733,10 +1699,10 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 }
 
 bool
-arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, edata_t *edata, void *ptr) {
+arena_dalloc_bin_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+szind_t binind, edata_t *edata, void *ptr) {
 	return arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
-	    ptr, true);
+	    ptr);
 }
 
 static void
@@ -1747,7 +1713,7 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	bool ret = arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
-	    ptr, false);
+	    ptr);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
 	if (ret) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b29ae47..12b4f6c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -81,6 +81,24 @@ const char *zero_realloc_mode_names[] = {
 	"abort",
 };
 
+/*
+ * These are the documented values for junk fill debugging facilities -- see the
+ * man page.
+ */
+static const uint8_t junk_alloc_byte = 0xa5;
+static const uint8_t junk_free_byte = 0x5a;
+
+static void default_junk_alloc(void *ptr, size_t usize) {
+	memset(ptr, junk_alloc_byte, usize);
+}
+
+static void default_junk_free(void *ptr, size_t usize) {
+	memset(ptr, junk_free_byte, usize);
+}
+
+void (*junk_alloc_callback)(void *ptr, size_t size) = &default_junk_alloc;
+void (*junk_free_callback)(void *ptr, size_t size) = &default_junk_free;
+
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_zero = false;
@@ -2210,6 +2228,14 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 	assert(usize == isalloc(tsd_tsdn(tsd), allocation));
 
+	if (config_fill && sopts->slow && !dopts->zero) {
+		if (unlikely(opt_junk_alloc)) {
+			junk_alloc_callback(allocation, usize);
+		} else if (unlikely(opt_zero)) {
+			memset(allocation, 0, usize);
+		}
+	}
+
 	if (sopts->slow) {
 		UTRACE(0, size, allocation);
 	}
@@ -2582,6 +2608,9 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
 		    false);
 	} else {
+		if (config_fill && slow_path && opt_junk_free) {
+			junk_free_callback(ptr, usize);
+		}
 		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
 		    true);
 	}
@@ -2648,6 +2677,9 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
 		    false);
 	} else {
+		if (config_fill && slow_path && opt_junk_free) {
+			junk_free_callback(ptr, usize);
+		}
 		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
 		    true);
 	}
@@ -2745,6 +2777,14 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
+
+	/*
+	 * If junking were enabled, this is where we would do it.  It's not
+	 * though, since we ensured above that we're on the fast path.  Assert
+	 * that to double-check.
+	 */
+	assert(!opt_junk_free);
+
 	if (!cache_bin_dalloc_easy(bin, ptr)) {
 		return false;
 	}
@@ -3180,6 +3220,16 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
+	if (config_fill && malloc_slow && !zero && usize > old_usize) {
+		size_t excess_len = usize - old_usize;
+		void *excess_start = (void *)((uintptr_t)p + old_usize);
+		if (unlikely(opt_junk_alloc)) {
+			junk_alloc_callback(excess_start, excess_len);
+		} else if (unlikely(opt_zero)) {
+			memset(excess_start, 0, excess_len);
+		}
+	}
+
 	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -3465,6 +3515,18 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 		goto label_not_resized;
 	}
 	thread_dalloc_event(tsd, old_usize);
+
+	if (config_fill && malloc_slow) {
+		if (usize > old_usize && !zero) {
+			size_t excess_len = usize - old_usize;
+			void *excess_start = (void *)((uintptr_t)ptr + old_usize);
+			if (unlikely(opt_junk_alloc)) {
+				junk_alloc_callback(excess_start, excess_len);
+			} else if (unlikely(opt_zero)) {
+				memset(excess_start, 0, excess_len);
+			}
+		}
+	}
 label_not_resized:
 	if (unlikely(!tsd_fast(tsd))) {
 		uintptr_t args[4] = {(uintptr_t)ptr, size, extra, flags};
diff --git a/src/large.c b/src/large.c
index f13b1e5..babb307 100644
--- a/src/large.c
+++ b/src/large.c
@@ -38,8 +38,8 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	}
 	/*
 	 * Copy zero into is_zeroed and pass the copy when allocating the
-	 * extent, so that it is possible to make correct junk/zero fill
-	 * decisions below, even if is_zeroed ends up true when zero is false.
+	 * extent, so that it is possible to make correct zero fill decisions
+	 * below, even if is_zeroed ends up true when zero is false.
 	 */
 	is_zeroed = zero;
 	if (likely(!tsdn_null(tsdn))) {
@@ -60,36 +60,12 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 	if (zero) {
 		assert(is_zeroed);
-	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset(edata_addr_get(edata), JEMALLOC_ALLOC_JUNK,
-		    edata_usize_get(edata));
 	}
 
 	arena_decay_tick(tsdn, arena);
 	return edata_addr_get(edata);
 }
 
-static void
-large_dalloc_junk_impl(void *ptr, size_t size) {
-	memset(ptr, JEMALLOC_FREE_JUNK, size);
-}
-large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk = large_dalloc_junk_impl;
-
-static void
-large_dalloc_maybe_junk_impl(void *ptr, size_t size) {
-	if (config_fill && have_dss && unlikely(opt_junk_free)) {
-		/*
-		 * Only bother junk filling if the extent isn't about to be
-		 * unmapped.
-		 */
-		if (opt_retain || (have_dss && extent_in_dss(ptr))) {
-			large_dalloc_junk(ptr, size);
-		}
-	}
-}
-large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk =
-    large_dalloc_maybe_junk_impl;
-
 static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 	arena_t *arena = arena_get_from_edata(edata);
@@ -112,11 +88,6 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 			return true;
 		}
 
-		if (config_fill && unlikely(opt_junk_free)) {
-			large_dalloc_maybe_junk(edata_addr_get(trail),
-			    edata_size_get(trail));
-		}
-
 		arena_extents_dirty_dalloc(tsdn, arena, ehooks, trail);
 	}
 
@@ -142,9 +113,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	}
 	/*
 	 * Copy zero into is_zeroed_trail and pass the copy when allocating the
-	 * extent, so that it is possible to make correct junk/zero fill
-	 * decisions below, even if is_zeroed_trail ends up true when zero is
-	 * false.
+	 * extent, so that it is possible to make correct zero fill decisions
+	 * below, even if is_zeroed_trail ends up true when zero is false.
 	 */
 	bool is_zeroed_trail = zero;
 	edata_t *trail;
@@ -201,11 +171,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 			memset(zbase, 0, nzero);
 		}
 		assert(is_zeroed_trail);
-	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset((void *)((uintptr_t)edata_addr_get(edata) + oldusize),
-		    JEMALLOC_ALLOC_JUNK, usize - oldusize);
 	}
-
 	arena_extent_ralloc_large_expand(tsdn, arena, edata, oldusize);
 
 	return false;
@@ -310,21 +276,18 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 }
 
 /*
- * junked_locked indicates whether the extent's data have been junk-filled, and
- * whether the arena's large_mtx is currently held.
+ * locked indicates whether the arena's large_mtx is currently held.
  */
 static void
 large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    bool junked_locked) {
-	if (!junked_locked) {
+    bool locked) {
+	if (!locked) {
 		/* See comments in arena_bin_slabs_full_insert(). */
 		if (!arena_is_auto(arena)) {
 			malloc_mutex_lock(tsdn, &arena->large_mtx);
 			edata_list_remove(&arena->large, edata);
 			malloc_mutex_unlock(tsdn, &arena->large_mtx);
 		}
-		large_dalloc_maybe_junk(edata_addr_get(edata),
-		    edata_usize_get(edata));
 	} else {
 		/* Only hold the large_mtx if necessary. */
 		if (!arena_is_auto(arena)) {
@@ -342,7 +305,7 @@ large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 }
 
 void
-large_dalloc_prep_junked_locked(tsdn_t *tsdn, edata_t *edata) {
+large_dalloc_prep_locked(tsdn_t *tsdn, edata_t *edata) {
 	large_dalloc_prep_impl(tsdn, arena_get_from_edata(edata), edata, true);
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 7ffa6fc..c736f56 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -176,7 +176,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	 */
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
-	
+
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
 		tbin_edatas_lookup_size_check(tsd, tbin, binind, nflush,
@@ -262,7 +262,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 
 				if (tcache_bin_flush_match(edata, cur_arena_ind,
 				    cur_binshard, small)) {
-					large_dalloc_prep_junked_locked(tsdn,
+					large_dalloc_prep_locked(tsdn,
 					    edata);
 				}
 			}
@@ -291,8 +291,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 				continue;
 			}
 			if (small) {
-				if (arena_dalloc_bin_junked_locked(tsdn,
-				    cur_arena, cur_bin, binind, edata, ptr)) {
+				if (arena_dalloc_bin_locked(tsdn, cur_arena,
+				    cur_bin, binind, edata, ptr)) {
 					dalloc_slabs[dalloc_count] = edata;
 					dalloc_count++;
 				}
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 772a0b4..5a74c3d 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -1,141 +1,191 @@
 #include "test/jemalloc_test.h"
 
-#include "jemalloc/internal/util.h"
-
-static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig;
-static large_dalloc_junk_t *large_dalloc_junk_orig;
-static large_dalloc_maybe_junk_t *large_dalloc_maybe_junk_orig;
-static void *watch_for_junking;
-static bool saw_junking;
+#define arraylen(arr) (sizeof(arr)/sizeof(arr[0]))
+static size_t ptr_ind;
+static void *volatile ptrs[100];
+static void *last_junked_ptr;
+static size_t last_junked_usize;
 
 static void
-watch_junking(void *p) {
-	watch_for_junking = p;
-	saw_junking = false;
+reset() {
+	ptr_ind = 0;
+	last_junked_ptr = NULL;
+	last_junked_usize = 0;
 }
 
 static void
-arena_dalloc_junk_small_intercept(void *ptr, const bin_info_t *bin_info) {
-	size_t i;
-
-	arena_dalloc_junk_small_orig(ptr, bin_info);
-	for (i = 0; i < bin_info->reg_size; i++) {
-		expect_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
-		    "Missing junk fill for byte %zu/%zu of deallocated region",
-		    i, bin_info->reg_size);
-	}
-	if (ptr == watch_for_junking) {
-		saw_junking = true;
-	}
+test_junk(void *ptr, size_t usize) {
+	last_junked_ptr = ptr;
+	last_junked_usize = usize;
 }
 
 static void
-large_dalloc_junk_intercept(void *ptr, size_t usize) {
-	size_t i;
-
-	large_dalloc_junk_orig(ptr, usize);
-	for (i = 0; i < usize; i++) {
-		expect_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
-		    "Missing junk fill for byte %zu/%zu of deallocated region",
-		    i, usize);
+do_allocs(size_t size, bool zero, size_t lg_align) {
+#define JUNK_ALLOC(...)							\
+	do {								\
+		assert(ptr_ind + 1 < arraylen(ptrs));			\
+		void *ptr = __VA_ARGS__;				\
+		assert_ptr_not_null(ptr, "");				\
+		ptrs[ptr_ind++] = ptr;					\
+		if (opt_junk_alloc && !zero) {				\
+			expect_ptr_eq(ptr, last_junked_ptr, "");	\
+			expect_zu_eq(last_junked_usize,			\
+			    malloc_usable_size(ptr), "");		\
+		}							\
+	} while (0)
+	if (!zero && lg_align == 0) {
+		JUNK_ALLOC(malloc(size));
 	}
-	if (ptr == watch_for_junking) {
-		saw_junking = true;
+	if (!zero) {
+		JUNK_ALLOC(aligned_alloc(1 << lg_align, size));
 	}
-}
-
-static void
-large_dalloc_maybe_junk_intercept(void *ptr, size_t usize) {
-	large_dalloc_maybe_junk_orig(ptr, usize);
-	if (ptr == watch_for_junking) {
-		saw_junking = true;
+#ifdef JEMALLOC_OVERRIDE_MEMALIGN
+	if (!zero) {
+		JUNK_ALLOC(je_memalign(1 << lg_align, size));
 	}
-}
-
-static void
-test_junk(size_t sz_min, size_t sz_max) {
-	uint8_t *s;
-	size_t sz_prev, sz, i;
-
-	if (opt_junk_free) {
-		arena_dalloc_junk_small_orig = arena_dalloc_junk_small;
-		arena_dalloc_junk_small = arena_dalloc_junk_small_intercept;
-		large_dalloc_junk_orig = large_dalloc_junk;
-		large_dalloc_junk = large_dalloc_junk_intercept;
-		large_dalloc_maybe_junk_orig = large_dalloc_maybe_junk;
-		large_dalloc_maybe_junk = large_dalloc_maybe_junk_intercept;
+#endif
+#ifdef JEMALLOC_OVERRIDE_VALLOC
+	if (!zero && lg_align == LG_PAGE) {
+		JUNK_ALLOC(je_valloc(size));
 	}
+#endif
+	int zero_flag = zero ? MALLOCX_ZERO : 0;
+	JUNK_ALLOC(mallocx(size, zero_flag | MALLOCX_LG_ALIGN(lg_align)));
+	JUNK_ALLOC(mallocx(size, zero_flag | MALLOCX_LG_ALIGN(lg_align)
+	    | MALLOCX_TCACHE_NONE));
+	if (lg_align >= LG_SIZEOF_PTR) {
+		void *memalign_result;
+		int err = posix_memalign(&memalign_result, (1 << lg_align),
+		    size);
+		assert_d_eq(err, 0, "");
+		JUNK_ALLOC(memalign_result);
+	}
+}
 
-	sz_prev = 0;
-	s = (uint8_t *)mallocx(sz_min, 0);
-	expect_ptr_not_null((void *)s, "Unexpected mallocx() failure");
-
-	for (sz = sallocx(s, 0); sz <= sz_max;
-	    sz_prev = sz, sz = sallocx(s, 0)) {
-		if (sz_prev > 0) {
-			expect_u_eq(s[0], 'a',
-			    "Previously allocated byte %zu/%zu is corrupted",
-			    ZU(0), sz_prev);
-			expect_u_eq(s[sz_prev-1], 'a',
-			    "Previously allocated byte %zu/%zu is corrupted",
-			    sz_prev-1, sz_prev);
-		}
-
-		for (i = sz_prev; i < sz; i++) {
-			if (opt_junk_alloc) {
-				expect_u_eq(s[i], JEMALLOC_ALLOC_JUNK,
-				    "Newly allocated byte %zu/%zu isn't "
-				    "junk-filled", i, sz);
-			}
-			s[i] = 'a';
-		}
-
-		if (xallocx(s, sz+1, 0, 0) == sz) {
-			uint8_t *t;
-			watch_junking(s);
-			t = (uint8_t *)rallocx(s, sz+1, 0);
-			expect_ptr_not_null((void *)t,
-			    "Unexpected rallocx() failure");
-			expect_zu_ge(sallocx(t, 0), sz+1,
-			    "Unexpectedly small rallocx() result");
-			if (!background_thread_enabled()) {
-				expect_ptr_ne(s, t,
-				    "Unexpected in-place rallocx()");
-				expect_true(!opt_junk_free || saw_junking,
-				    "Expected region of size %zu to be "
-				    "junk-filled", sz);
+TEST_BEGIN(test_junk_alloc_free) {
+	bool zerovals[] = {false, true};
+	size_t sizevals[] = {
+		1, 8, 100, 1000, 100*1000
+	/*
+	 * Memory allocation failure is a real possibility in 32-bit mode.
+	 * Rather than try to check in the face of resource exhaustion, we just
+	 * rely more on the 64-bit tests.  This is a little bit white-box-y in
+	 * the sense that this is only a good test strategy if we know that the
+	 * junk pathways don't touch interact with the allocation selection
+	 * mechanisms; but this is in fact the case.
+	 */
+#if LG_SIZEOF_PTR == 3
+		    , 10 * 1000 * 1000
+#endif
+	};
+	size_t lg_alignvals[] = {
+		0, 4, 10, 15, 16, LG_PAGE
+#if LG_SIZEOF_PTR == 3
+		    , 20, 24
+#endif
+	};
+
+#define JUNK_FREE(...)							\
+	do {								\
+		do_allocs(size, zero, lg_align);			\
+		for (size_t n = 0; n < ptr_ind; n++) {			\
+			void *ptr = ptrs[n];				\
+			__VA_ARGS__;					\
+			if (opt_junk_free) {				\
+				assert_ptr_eq(ptr, last_junked_ptr,	\
+				    "");				\
+				assert_zu_eq(usize, last_junked_usize,	\
+				    "");				\
+			}						\
+			reset();					\
+		}							\
+	} while (0)
+	for (size_t i = 0; i < arraylen(zerovals); i++) {
+		for (size_t j = 0; j < arraylen(sizevals); j++) {
+			for (size_t k = 0; k < arraylen(lg_alignvals); k++) {
+				bool zero = zerovals[i];
+				size_t size = sizevals[j];
+				size_t lg_align = lg_alignvals[k];
+				size_t usize = nallocx(size,
+				    MALLOCX_LG_ALIGN(lg_align));
+
+				JUNK_FREE(free(ptr));
+				JUNK_FREE(dallocx(ptr, 0));
+				JUNK_FREE(dallocx(ptr, MALLOCX_TCACHE_NONE));
+				JUNK_FREE(dallocx(ptr, MALLOCX_LG_ALIGN(
+				    lg_align)));
+				JUNK_FREE(sdallocx(ptr, usize, MALLOCX_LG_ALIGN(
+				    lg_align)));
+				JUNK_FREE(sdallocx(ptr, usize,
+				    MALLOCX_TCACHE_NONE | MALLOCX_LG_ALIGN(lg_align)));
+				if (opt_zero_realloc_action
+				    == zero_realloc_action_free) {
+					JUNK_FREE(realloc(ptr, 0));
+				}
 			}
-			s = t;
 		}
 	}
-
-	watch_junking(s);
-	dallocx(s, 0);
-	expect_true(!opt_junk_free || saw_junking,
-	    "Expected region of size %zu to be junk-filled", sz);
-
-	if (opt_junk_free) {
-		arena_dalloc_junk_small = arena_dalloc_junk_small_orig;
-		large_dalloc_junk = large_dalloc_junk_orig;
-		large_dalloc_maybe_junk = large_dalloc_maybe_junk_orig;
-	}
-}
-
-TEST_BEGIN(test_junk_small) {
-	test_skip_if(!config_fill);
-	test_junk(1, SC_SMALL_MAXCLASS - 1);
 }
 TEST_END
 
-TEST_BEGIN(test_junk_large) {
-	test_skip_if(!config_fill);
-	test_junk(SC_SMALL_MAXCLASS + 1, (1U << (SC_LG_LARGE_MINCLASS + 1)));
+TEST_BEGIN(test_realloc_expand) {
+	char *volatile ptr;
+	char *volatile expanded;
+
+	test_skip_if(!opt_junk_alloc);
+
+	/* Realloc */
+	ptr = malloc(SC_SMALL_MAXCLASS);
+	expanded = realloc(ptr, SC_LARGE_MINCLASS);
+	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
+	expect_zu_eq(last_junked_usize,
+	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	free(expanded);
+
+	/* rallocx(..., 0) */
+	ptr = malloc(SC_SMALL_MAXCLASS);
+	expanded = rallocx(ptr, SC_LARGE_MINCLASS, 0);
+	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
+	expect_zu_eq(last_junked_usize,
+	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	free(expanded);
+
+	/* rallocx(..., nonzero) */
+	ptr = malloc(SC_SMALL_MAXCLASS);
+	expanded = rallocx(ptr, SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
+	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
+	expect_zu_eq(last_junked_usize,
+	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	free(expanded);
+
+	/* rallocx(..., MALLOCX_ZERO) */
+	ptr = malloc(SC_SMALL_MAXCLASS);
+	last_junked_ptr = (void *)-1;
+	last_junked_usize = (size_t)-1;
+	expanded = rallocx(ptr, SC_LARGE_MINCLASS, MALLOCX_ZERO);
+	expect_ptr_eq(last_junked_ptr, (void *)-1, "");
+	expect_zu_eq(last_junked_usize, (size_t)-1, "");
+	free(expanded);
+
+	/*
+	 * Unfortunately, testing xallocx reliably is difficult to do portably
+	 * (since allocations can be expanded / not expanded differently on
+	 * different platforms.  We rely on manual inspection there -- the
+	 * xallocx pathway is easy to inspect, though.
+	 *
+	 * Likewise, we don't test the shrinking pathways.  It's difficult to do
+	 * so consistently (because of the risk of split failure or memory
+	 * exhaustion, in which case no junking should happen).  This is fine
+	 * -- junking is a best-effort debug mechanism in the first place.
+	 */
 }
 TEST_END
 
 int
 main(void) {
+	junk_alloc_callback = &test_junk;
+	junk_free_callback = &test_junk;
 	return test(
-	    test_junk_small,
-	    test_junk_large);
+	    test_junk_alloc_free,
+	    test_realloc_expand);
 }
-- 
cgit v0.12


From 909c501b07c101890c264fd717b0bf8b5cf27156 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 25 Feb 2020 12:14:48 -0800
Subject: Cache_bin: Shouldn't know about tcache.

Instead, have it take the cache_bin_info_ts to use by pointer.  While we're
here, add a src file for the cache bin.
---
 Makefile.in                                        |  1 +
 include/jemalloc/internal/cache_bin.h              | 58 ++++++++++++----------
 include/jemalloc/internal/tcache_externs.h         |  2 +
 include/jemalloc/internal/tcache_inlines.h         | 12 +++--
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 src/arena.c                                        | 18 ++++---
 src/cache_bin.c                                    |  3 ++
 src/tcache.c                                       | 33 +++++++-----
 test/unit/cache_bin.c                              | 33 ++++++------
 12 files changed, 101 insertions(+), 67 deletions(-)
 create mode 100644 src/cache_bin.c

diff --git a/Makefile.in b/Makefile.in
index 984bd72..b53846d 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -102,6 +102,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/bin_info.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/buf_writer.c \
+	$(srcroot)src/cache_bin.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/counter.c \
 	$(srcroot)src/ctl.c \
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 60feb15..ec2fdf4 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -35,7 +35,6 @@ struct cache_bin_info_s {
 	/* The size of the bin stack, i.e. ncached_max * sizeof(ptr). */
 	cache_bin_sz_t stack_size;
 };
-extern cache_bin_info_t	*tcache_bin_info;
 
 typedef struct cache_bin_s cache_bin_t;
 struct cache_bin_s {
@@ -115,29 +114,29 @@ struct cache_bin_array_descriptor_s {
 
 /* Returns ncached_max: Upper limit on ncached. */
 static inline cache_bin_sz_t
-cache_bin_ncached_max_get(szind_t ind) {
-	return tcache_bin_info[ind].stack_size / sizeof(void *);
+cache_bin_ncached_max_get(szind_t ind, cache_bin_info_t *infos) {
+	return infos[ind].stack_size / sizeof(void *);
 }
 
 static inline cache_bin_sz_t
-cache_bin_ncached_get(cache_bin_t *bin, szind_t ind) {
-	cache_bin_sz_t n = (cache_bin_sz_t)((tcache_bin_info[ind].stack_size +
+cache_bin_ncached_get(cache_bin_t *bin, szind_t ind, cache_bin_info_t *infos) {
+	cache_bin_sz_t n = (cache_bin_sz_t)((infos[ind].stack_size +
 	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *));
-	assert(n <= cache_bin_ncached_max_get(ind));
+	assert(n <= cache_bin_ncached_max_get(ind, infos));
 	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
 
 	return n;
 }
 
 static inline void **
-cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind) {
-	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, ind);
+cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind,
+    cache_bin_info_t *infos) {
+	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, ind, infos);
 	/* Low bits overflow disallowed when allocating the space. */
 	assert((uint32_t)(uintptr_t)ret >= bin->cur_ptr.lowbits);
 
 	/* Can also be computed via (full_position + ncached_max) | highbits. */
-	uintptr_t lowbits = bin->full_position +
-	    tcache_bin_info[ind].stack_size;
+	uintptr_t lowbits = bin->full_position + infos[ind].stack_size;
 	uintptr_t highbits = (uintptr_t)bin->cur_ptr.ptr &
 	    ~(((uint64_t)1 << 32) - 1);
 	assert(ret == (void **)(lowbits | highbits));
@@ -147,32 +146,35 @@ cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind) {
 
 /* Returns the position of the bottom item on the stack; for convenience. */
 static inline void **
-cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind) {
-	void **bottom = cache_bin_empty_position_get(bin, ind) - 1;
-	assert(cache_bin_ncached_get(bin, ind) == 0 || *bottom != NULL);
+cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind,
+    cache_bin_info_t *infos) {
+	void **bottom = cache_bin_empty_position_get(bin, ind, infos) - 1;
+	assert(cache_bin_ncached_get(bin, ind, infos) == 0 || *bottom != NULL);
 
 	return bottom;
 }
 
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
-cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
-	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind);
+cache_bin_low_water_get(cache_bin_t *bin, szind_t ind,
+    cache_bin_info_t *infos) {
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind, infos);
 	cache_bin_sz_t low_water = ncached_max -
 	    (cache_bin_sz_t)((bin->low_water_position - bin->full_position) /
 	    sizeof(void *));
 	assert(low_water <= ncached_max);
-	assert(low_water <= cache_bin_ncached_get(bin, ind));
+	assert(low_water <= cache_bin_ncached_get(bin, ind, infos));
 	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
 
 	return low_water;
 }
 
 static inline void
-cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n) {
-	bin->cur_ptr.lowbits = bin->full_position +
-	    tcache_bin_info[ind].stack_size - n * sizeof(void *);
-	assert(n <= cache_bin_ncached_max_get(ind));
+cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n,
+    cache_bin_info_t *infos) {
+	bin->cur_ptr.lowbits = bin->full_position + infos[ind].stack_size
+	    - n * sizeof(void *);
+	assert(n <= cache_bin_ncached_max_get(ind, infos));
 	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
 }
 
@@ -188,7 +190,7 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
-    const bool adjust_low_water) {
+    cache_bin_info_t *infos, const bool adjust_low_water) {
 	/*
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
@@ -197,14 +199,14 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
 	/*
 	 * Check for both bin->ncached == 0 and ncached < low_water in a single
 	 * branch.  When adjust_low_water is true, this also avoids accessing
-	 * tcache_bin_info (which is on a separate cacheline / page) in the
-	 * common case.
+	 * the cache_bin_info_ts (which is on a separate cacheline / page) in
+	 * the common case.
 	 */
 	if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
 		if (adjust_low_water) {
 			assert(ind != INVALID_SZIND);
 			uint32_t empty_position = bin->full_position +
-			    tcache_bin_info[ind].stack_size;
+			    infos[ind].stack_size;
 			if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
 				/* Over-allocated; revert. */
 				bin->cur_ptr.ptr--;
@@ -237,12 +239,14 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
 	/* The szind parameter won't be used. */
-	return cache_bin_alloc_easy_impl(bin, success, INVALID_SZIND, false);
+	return cache_bin_alloc_easy_impl(bin, success, INVALID_SZIND,
+	    /* infos */ NULL, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
-	return cache_bin_alloc_easy_impl(bin, success, ind, true);
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind,
+    cache_bin_info_t *infos) {
+	return cache_bin_alloc_easy_impl(bin, success, ind, infos, true);
 }
 
 #undef INVALID_SZIND
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index db6f98b..c5c8f48 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -13,6 +13,8 @@ extern unsigned	nhbins;
 /* Maximum cached size class. */
 extern size_t	tcache_maxclass;
 
+extern cache_bin_info_t *tcache_bin_info;
+
 /*
  * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
  * usable via the MALLOCX_TCACHE() flag.  The automatic per thread tcaches are
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index ff06935..dc6da94 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -36,7 +36,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 
 	assert(binind < SC_NBINS);
 	bin = tcache_small_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success, binind);
+	ret = cache_bin_alloc_easy(bin, &tcache_success, binind,
+	    tcache_bin_info);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -79,7 +80,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind >= SC_NBINS &&binind < nhbins);
 	bin = tcache_large_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success, binind);
+	ret = cache_bin_alloc_easy(bin, &tcache_success, binind,
+	    tcache_bin_info);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
@@ -127,7 +129,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_small_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_ncached_max_get(binind) >> 1;
+		unsigned remain = cache_bin_ncached_max_get(binind,
+		    tcache_bin_info) >> 1;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
@@ -145,7 +148,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_large_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_ncached_max_get(binind) >> 1;
+		unsigned remain = cache_bin_ncached_max_get(binind,
+		    tcache_bin_info) >> 1;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index d98bb85..920d55e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -42,6 +42,7 @@
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\buf_writer.c" />
+    <ClCompile Include="..\..\..\..\src\cache_bin.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\counter.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index fd3e11c..fe77170 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -25,6 +25,9 @@
     <ClCompile Include="..\..\..\..\src\buf_writer.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\cache_bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index b59d411..2db9401 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -42,6 +42,7 @@
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\buf_writer.c" />
+    <ClCompile Include="..\..\..\..\src\cache_bin.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\counter.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index fd3e11c..fe77170 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -25,6 +25,9 @@
     <ClCompile Include="..\..\..\..\src\buf_writer.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\cache_bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index 0a9e4a9..5ca884b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -200,13 +200,14 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		for (szind_t i = 0; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    cache_bin_ncached_get(tbin, i) * sz_index2size(i));
+			    cache_bin_ncached_get(tbin, i, tcache_bin_info)
+			    * sz_index2size(i));
 		}
 		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_large[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    cache_bin_ncached_get(tbin, i + SC_NBINS) *
-			    sz_index2size(i));
+			    cache_bin_ncached_get(tbin, i + SC_NBINS,
+			    tcache_bin_info) * sz_index2size(i));
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
@@ -1320,13 +1321,14 @@ arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind) {
-	assert(cache_bin_ncached_get(tbin, binind) == 0);
+	assert(cache_bin_ncached_get(tbin, binind, tcache_bin_info) == 0);
 	tcache->bin_refilled[binind] = true;
 
 	const bin_info_t *bin_info = &bin_infos[binind];
-	const unsigned nfill = cache_bin_ncached_max_get(binind) >>
-	    tcache->lg_fill_div[binind];
-	void **empty_position = cache_bin_empty_position_get(tbin, binind);
+	const unsigned nfill = cache_bin_ncached_max_get(binind,
+	    tcache_bin_info) >> tcache->lg_fill_div[binind];
+	void **empty_position = cache_bin_empty_position_get(tbin, binind,
+	    tcache_bin_info);
 
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
@@ -1446,7 +1448,7 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_ncached_set(tbin, binind, filled);
+	cache_bin_ncached_set(tbin, binind, filled, tcache_bin_info);
 	arena_decay_tick(tsdn, arena);
 }
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
new file mode 100644
index 0000000..454cb47
--- /dev/null
+++ b/src/cache_bin.c
@@ -0,0 +1,3 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
diff --git a/src/tcache.c b/src/tcache.c
index c736f56..62905f1 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -59,8 +59,10 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 		is_small = false;
 	}
 
-	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin, binind);
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin, binind,
+	    tcache_bin_info);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind,
+	    tcache_bin_info);
 	if (low_water > 0) {
 		/*
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
@@ -73,8 +75,8 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
 			 * the fill count is always at least 1.
 			 */
-			if ((cache_bin_ncached_max_get(binind) >>
-			     (tcache->lg_fill_div[binind] + 1)) >= 1) {
+			if ((cache_bin_ncached_max_get(binind, tcache_bin_info)
+			    >> (tcache->lg_fill_div[binind] + 1)) >= 1) {
 				tcache->lg_fill_div[binind]++;
 			}
 		} else {
@@ -107,7 +109,8 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 	assert(tcache->arena != NULL);
 	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
-	ret = cache_bin_alloc_easy(tbin, tcache_success, binind);
+	ret = cache_bin_alloc_easy(tbin, tcache_success, binind,
+	    tcache_bin_info);
 
 	return ret;
 }
@@ -126,7 +129,8 @@ tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 	 * builds, avoid the branch in the loop.
 	 */
 	size_t szind_sum = binind * nflush;
-	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
+	void **bottom_item = cache_bin_bottom_item_get(tbin, binind,
+	    tcache_bin_info);
 	for (unsigned i = 0 ; i < nflush; i++) {
 		emap_full_alloc_ctx_t full_alloc_ctx;
 		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
@@ -164,7 +168,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	} else {
 		assert(binind < nhbins);
 	}
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind,
+	    tcache_bin_info);
 	assert((cache_bin_sz_t)rem <= ncached);
 	arena_t *tcache_arena = tcache->arena;
 	assert(tcache_arena != NULL);
@@ -175,7 +180,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	 * touched (it's just included to satisfy the no-zero-length rule).
 	 */
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
-	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
+	void **bottom_item = cache_bin_bottom_item_get(tbin, binind,
+	    tcache_bin_info);
 
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
@@ -340,7 +346,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 
 	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
 	    sizeof(void *));
-	cache_bin_ncached_set(tbin, binind, rem);
+	cache_bin_ncached_set(tbin, binind, rem, tcache_bin_info);
 	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
 		tbin->low_water_position = tbin->cur_ptr.lowbits;
 	}
@@ -445,8 +451,9 @@ tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
 	bin->low_water_position = bin->cur_ptr.lowbits;
 	bin->full_position = (uint32_t)(uintptr_t)full_position;
 	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
-	assert(cache_bin_ncached_get(bin, ind) == 0);
-	assert(cache_bin_empty_position_get(bin, ind) == empty_position);
+	assert(cache_bin_ncached_get(bin, ind, tcache_bin_info) == 0);
+	assert(cache_bin_empty_position_get(bin, ind, tcache_bin_info)
+	    == empty_position);
 
 	return false;
 }
@@ -605,8 +612,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	if (tsd_tcache) {
 		/* Release the avail array for the TSD embedded auto tcache. */
 		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
-		assert(cache_bin_ncached_get(bin, 0) == 0);
-		assert(cache_bin_empty_position_get(bin, 0) ==
+		assert(cache_bin_ncached_get(bin, 0, tcache_bin_info) == 0);
+		assert(cache_bin_empty_position_get(bin, 0, tcache_bin_info) ==
 		    bin->cur_ptr.ptr);
 		void *avail_array = (void *)((uintptr_t)bin->cur_ptr.ptr -
 		    tcache_bin_info[0].stack_size);
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index f98a92c..5ef108d 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -10,48 +10,51 @@ TEST_BEGIN(test_cache_bin) {
 
 	expect_ptr_not_null(stack, "Unexpected mallocx failure");
 	/* Initialize to empty; bin 0. */
-	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(0);
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(0,
+	    tcache_bin_info);
 	void **empty_position = stack + ncached_max;
 	bin->cur_ptr.ptr = empty_position;
 	bin->low_water_position = bin->cur_ptr.lowbits;
 	bin->full_position = (uint32_t)(uintptr_t)stack;
-	expect_ptr_eq(cache_bin_empty_position_get(bin, 0), empty_position,
-	    "Incorrect empty position");
+	expect_ptr_eq(cache_bin_empty_position_get(bin, 0, tcache_bin_info),
+	    empty_position, "Incorrect empty position");
 	/* Not using expect_zu etc on cache_bin_sz_t since it may change. */
-	expect_true(cache_bin_ncached_get(bin, 0) == 0, "Incorrect cache size");
+	expect_true(cache_bin_ncached_get(bin, 0, tcache_bin_info) == 0,
+	    "Incorrect cache size");
 
 	bool success;
-	void *ret = cache_bin_alloc_easy(bin, &success, 0);
+	void *ret = cache_bin_alloc_easy(bin, &success, 0, tcache_bin_info);
 	expect_false(success, "Empty cache bin should not alloc");
-	expect_true(cache_bin_low_water_get(bin, 0) == 0,
+	expect_true(cache_bin_low_water_get(bin, 0, tcache_bin_info) == 0,
 	    "Incorrect low water mark");
 
-	cache_bin_ncached_set(bin, 0, 0);
+	cache_bin_ncached_set(bin, 0, 0, tcache_bin_info);
 	expect_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
 	for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
 		success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
-		expect_true(success && cache_bin_ncached_get(bin, 0) == i,
-		    "Bin dalloc failure");
+		expect_true(success && cache_bin_ncached_get(bin, 0,
+		    tcache_bin_info) == i, "Bin dalloc failure");
 	}
 	success = cache_bin_dalloc_easy(bin, (void *)1);
 	expect_false(success, "Bin should be full");
 	expect_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
 
-	cache_bin_ncached_set(bin, 0, ncached_max);
+	cache_bin_ncached_set(bin, 0, ncached_max, tcache_bin_info);
 	expect_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
 	/* Emulate low water after refill. */
 	bin->low_water_position = bin->full_position;
 	for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
-		ret = cache_bin_alloc_easy(bin, &success, 0);
-		cache_bin_sz_t ncached = cache_bin_ncached_get(bin, 0);
+		ret = cache_bin_alloc_easy(bin, &success, 0, tcache_bin_info);
+		cache_bin_sz_t ncached = cache_bin_ncached_get(bin, 0,
+		    tcache_bin_info);
 		expect_true(success && ncached == i - 1,
 		    "Cache bin alloc failure");
 		expect_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
-		expect_true(cache_bin_low_water_get(bin, 0) == ncached,
-		    "Incorrect low water mark");
+		expect_true(cache_bin_low_water_get(bin, 0, tcache_bin_info)
+		    == ncached, "Incorrect low water mark");
 	}
 
-	ret = cache_bin_alloc_easy(bin, &success, 0);
+	ret = cache_bin_alloc_easy(bin, &success, 0, tcache_bin_info);
 	expect_false(success, "Empty cache bin should not alloc.");
 	expect_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
 	    "Bin should be empty");
-- 
cgit v0.12


From da68f7329666a4375e9df04a0f441bb9ae2b4d6c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 25 Feb 2020 12:18:51 -0800
Subject: Move percpu_arena_update.

It's not really part of the API of the arena; it changes which arena we're using
that API on.
---
 include/jemalloc/internal/arena_inlines_a.h        | 22 ---------------------
 .../internal/jemalloc_internal_inlines_b.h         | 23 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index 27434c3..b83d0e8 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -21,26 +21,4 @@ arena_internal_get(arena_t *arena) {
 	return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED);
 }
 
-static inline void
-percpu_arena_update(tsd_t *tsd, unsigned cpu) {
-	assert(have_percpu_arena);
-	arena_t *oldarena = tsd_arena_get(tsd);
-	assert(oldarena != NULL);
-	unsigned oldind = arena_ind_get(oldarena);
-
-	if (oldind != cpu) {
-		unsigned newind = cpu;
-		arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true);
-		assert(newarena != NULL);
-
-		/* Set new arena/tcache associations. */
-		arena_migrate(tsd, oldind, newind);
-		tcache_t *tcache = tcache_get(tsd);
-		if (tcache != NULL) {
-			tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
-			    newarena);
-		}
-	}
-}
-
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index fc526c4..3a0bfc6 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -3,6 +3,29 @@
 
 #include "jemalloc/internal/extent.h"
 
+static inline void
+percpu_arena_update(tsd_t *tsd, unsigned cpu) {
+	assert(have_percpu_arena);
+	arena_t *oldarena = tsd_arena_get(tsd);
+	assert(oldarena != NULL);
+	unsigned oldind = arena_ind_get(oldarena);
+
+	if (oldind != cpu) {
+		unsigned newind = cpu;
+		arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true);
+		assert(newarena != NULL);
+
+		/* Set new arena/tcache associations. */
+		arena_migrate(tsd, oldind, newind);
+		tcache_t *tcache = tcache_get(tsd);
+		if (tcache != NULL) {
+			tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
+			    newarena);
+		}
+	}
+}
+
+
 /* Choose an arena based on a per-thread value. */
 static inline arena_t *
 arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
-- 
cgit v0.12


From b66c0973cc7811498a97783283c8ef06f83d6b9f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 26 Feb 2020 17:10:12 -0800
Subject: cache_bin: Don't allow direct internals access.

---
 include/jemalloc/internal/cache_bin.h | 38 ++++++++++++++++++++++++++---------
 src/tcache.c                          | 26 ++++++++++++------------
 2 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index ec2fdf4..2309204 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -144,16 +144,6 @@ cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind,
 	return ret;
 }
 
-/* Returns the position of the bottom item on the stack; for convenience. */
-static inline void **
-cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind,
-    cache_bin_info_t *infos) {
-	void **bottom = cache_bin_empty_position_get(bin, ind, infos) - 1;
-	assert(cache_bin_ncached_get(bin, ind, infos) == 0 || *bottom != NULL);
-
-	return bottom;
-}
-
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, szind_t ind,
@@ -263,4 +253,32 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	return true;
 }
 
+typedef struct cache_bin_ptr_array_s cache_bin_ptr_array_t;
+struct cache_bin_ptr_array_s {
+	cache_bin_sz_t nflush;
+	void **ptr;
+};
+
+#define CACHE_BIN_PTR_ARRAY_DECLARE(name, nflush_val)			\
+    cache_bin_ptr_array_t name;						\
+    name.nflush = (nflush_val)
+
+static inline void
+cache_bin_ptr_array_init(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
+    cache_bin_sz_t nflush, szind_t ind, cache_bin_info_t *infos) {
+	arr->ptr = cache_bin_empty_position_get(bin, ind, infos) - 1;
+	assert(cache_bin_ncached_get(bin, ind, infos) == 0
+	    || *arr->ptr != NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+cache_bin_ptr_array_get(cache_bin_ptr_array_t *arr, cache_bin_sz_t n) {
+	return *(arr->ptr - n);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
+	*(arr->ptr - n) = p;
+}
+
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/src/tcache.c b/src/tcache.c
index 62905f1..4096b05 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -117,8 +117,8 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 /* Enabled with --enable-extra-size-check. */
 static void
-tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
-    size_t nflush, edata_t **edatas) {
+tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_ptr_array_t *arr,
+    szind_t binind, size_t nflush, edata_t **edatas) {
 	/* Avoids null-checking tsdn in the loop below. */
 	util_assume(tsd != NULL);
 
@@ -129,15 +129,14 @@ tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 	 * builds, avoid the branch in the loop.
 	 */
 	size_t szind_sum = binind * nflush;
-	void **bottom_item = cache_bin_bottom_item_get(tbin, binind,
-	    tcache_bin_info);
-	for (unsigned i = 0 ; i < nflush; i++) {
+	for (unsigned i = 0; i < nflush; i++) {
 		emap_full_alloc_ctx_t full_alloc_ctx;
 		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
-		    *(bottom_item - i), &full_alloc_ctx);
+		    cache_bin_ptr_array_get(arr, i), &full_alloc_ctx);
 		edatas[i] = full_alloc_ctx.edata;
 		szind_sum -= full_alloc_ctx.szind;
 	}
+
 	if (szind_sum != 0) {
 		safety_check_fail_sized_dealloc(false);
 	}
@@ -180,17 +179,18 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	 * touched (it's just included to satisfy the no-zero-length rule).
 	 */
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
-	void **bottom_item = cache_bin_bottom_item_get(tbin, binind,
-	    tcache_bin_info);
+	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
+
+	cache_bin_ptr_array_init(&ptrs, tbin, nflush, binind, tcache_bin_info);
 
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
-		tbin_edatas_lookup_size_check(tsd, tbin, binind, nflush,
+		tbin_edatas_lookup_size_check(tsd, &ptrs, binind, nflush,
 		    item_edata);
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
 			item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd),
-			    &emap_global, *(bottom_item - i));
+			    &emap_global, cache_bin_ptr_array_get(&ptrs, i));
 		}
 	}
 
@@ -262,7 +262,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		 */
 		if (!small) {
 			for (unsigned i = 0; i < nflush; i++) {
-				void *ptr = *(bottom_item - i);
+				void *ptr = cache_bin_ptr_array_get(&ptrs, i);
 				edata = item_edata[i];
 				assert(ptr != NULL && edata != NULL);
 
@@ -280,7 +280,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		/* Deallocate whatever we can. */
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = *(bottom_item - i);
+			void *ptr = cache_bin_ptr_array_get(&ptrs, i);
 			edata = item_edata[i];
 			assert(ptr != NULL && edata != NULL);
 			if (!tcache_bin_flush_match(edata, cur_arena_ind,
@@ -291,7 +291,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 				 * arena.  Either way, stash the object so that
 				 * it can be handled in a future pass.
 				 */
-				*(bottom_item - ndeferred) = ptr;
+				cache_bin_ptr_array_set(&ptrs, ndeferred, ptr);
 				item_edata[ndeferred] = edata;
 				ndeferred++;
 				continue;
-- 
cgit v0.12


From 74d36d78efdea846d577dea933e4bb06a18efa10 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 26 Feb 2020 17:23:47 -0800
Subject: Cache bin: Make ncached_max a query on the info_t.

---
 include/jemalloc/internal/cache_bin.h      | 10 +++++-----
 include/jemalloc/internal/tcache_inlines.h |  8 ++++----
 src/arena.c                                |  4 ++--
 src/tcache.c                               |  5 +++--
 test/unit/cache_bin.c                      |  4 ++--
 5 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 2309204..3f0524e 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -114,15 +114,15 @@ struct cache_bin_array_descriptor_s {
 
 /* Returns ncached_max: Upper limit on ncached. */
 static inline cache_bin_sz_t
-cache_bin_ncached_max_get(szind_t ind, cache_bin_info_t *infos) {
-	return infos[ind].stack_size / sizeof(void *);
+cache_bin_info_ncached_max(cache_bin_info_t *info) {
+	return info->stack_size / sizeof(void *);
 }
 
 static inline cache_bin_sz_t
 cache_bin_ncached_get(cache_bin_t *bin, szind_t ind, cache_bin_info_t *infos) {
 	cache_bin_sz_t n = (cache_bin_sz_t)((infos[ind].stack_size +
 	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *));
-	assert(n <= cache_bin_ncached_max_get(ind, infos));
+	assert(n <= cache_bin_info_ncached_max(&infos[ind]));
 	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
 
 	return n;
@@ -148,7 +148,7 @@ cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind,
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, szind_t ind,
     cache_bin_info_t *infos) {
-	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind, infos);
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(&infos[ind]);
 	cache_bin_sz_t low_water = ncached_max -
 	    (cache_bin_sz_t)((bin->low_water_position - bin->full_position) /
 	    sizeof(void *));
@@ -164,7 +164,7 @@ cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n,
     cache_bin_info_t *infos) {
 	bin->cur_ptr.lowbits = bin->full_position + infos[ind].stack_size
 	    - n * sizeof(void *);
-	assert(n <= cache_bin_ncached_max_get(ind, infos));
+	assert(n <= cache_bin_info_ncached_max(&infos[ind]));
 	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
 }
 
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index dc6da94..28d6e3c 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -129,8 +129,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_small_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_ncached_max_get(binind,
-		    tcache_bin_info) >> 1;
+		unsigned remain = cache_bin_info_ncached_max(
+		    &tcache_bin_info[binind]) >> 1;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
@@ -148,8 +148,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_large_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_ncached_max_get(binind,
-		    tcache_bin_info) >> 1;
+		unsigned remain = cache_bin_info_ncached_max(
+		    &tcache_bin_info[binind]) >> 1;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/src/arena.c b/src/arena.c
index 5ca884b..2f8a03c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1325,8 +1325,8 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	tcache->bin_refilled[binind] = true;
 
 	const bin_info_t *bin_info = &bin_infos[binind];
-	const unsigned nfill = cache_bin_ncached_max_get(binind,
-	    tcache_bin_info) >> tcache->lg_fill_div[binind];
+	const unsigned nfill = cache_bin_info_ncached_max(
+	    &tcache_bin_info[binind]) >> tcache->lg_fill_div[binind];
 	void **empty_position = cache_bin_empty_position_get(tbin, binind,
 	    tcache_bin_info);
 
diff --git a/src/tcache.c b/src/tcache.c
index 4096b05..d2442ef 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -75,8 +75,9 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
 			 * the fill count is always at least 1.
 			 */
-			if ((cache_bin_ncached_max_get(binind, tcache_bin_info)
-			    >> (tcache->lg_fill_div[binind] + 1)) >= 1) {
+			if ((cache_bin_info_ncached_max(
+			    &tcache_bin_info[binind]) >>
+			    (tcache->lg_fill_div[binind] + 1)) >= 1) {
 				tcache->lg_fill_div[binind]++;
 			}
 		} else {
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 5ef108d..ab36a3a 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -10,8 +10,8 @@ TEST_BEGIN(test_cache_bin) {
 
 	expect_ptr_not_null(stack, "Unexpected mallocx failure");
 	/* Initialize to empty; bin 0. */
-	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(0,
-	    tcache_bin_info);
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(
+	    &tcache_bin_info[0]);
 	void **empty_position = stack + ncached_max;
 	bin->cur_ptr.ptr = empty_position;
 	bin->low_water_position = bin->cur_ptr.lowbits;
-- 
cgit v0.12


From d303f30796f0aef7f7fc9d907ef240b93d3fc674 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 26 Feb 2020 17:39:55 -0800
Subject: cache_bin nflush -> n.

We're going to use it on the fill pathway as well.
---
 include/jemalloc/internal/cache_bin.h | 8 ++++----
 src/tcache.c                          | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 3f0524e..1c67923 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -255,16 +255,16 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 
 typedef struct cache_bin_ptr_array_s cache_bin_ptr_array_t;
 struct cache_bin_ptr_array_s {
-	cache_bin_sz_t nflush;
+	cache_bin_sz_t n;
 	void **ptr;
 };
 
-#define CACHE_BIN_PTR_ARRAY_DECLARE(name, nflush_val)			\
+#define CACHE_BIN_PTR_ARRAY_DECLARE(name, nval)				\
     cache_bin_ptr_array_t name;						\
-    name.nflush = (nflush_val)
+    name.n = (nval)
 
 static inline void
-cache_bin_ptr_array_init(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
+cache_bin_ptr_array_init_for_flush(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
     cache_bin_sz_t nflush, szind_t ind, cache_bin_info_t *infos) {
 	arr->ptr = cache_bin_empty_position_get(bin, ind, infos) - 1;
 	assert(cache_bin_ncached_get(bin, ind, infos) == 0
diff --git a/src/tcache.c b/src/tcache.c
index d2442ef..3fc4ee6 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -182,7 +182,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 
-	cache_bin_ptr_array_init(&ptrs, tbin, nflush, binind, tcache_bin_info);
+	cache_bin_ptr_array_init_for_flush(&ptrs, tbin, nflush, binind,
+	    tcache_bin_info);
 
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
-- 
cgit v0.12


From 1b00d808d7bfb9ff41c643dcb32f96a078090932 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 27 Feb 2020 10:22:46 -0800
Subject: cache_bin: Don't let arena see empty position.

---
 include/jemalloc/internal/cache_bin.h | 20 ++++++++++++++++++++
 src/arena.c                           | 20 ++++++++------------
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 1c67923..775b71f 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -271,6 +271,13 @@ cache_bin_ptr_array_init_for_flush(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
 	    || *arr->ptr != NULL);
 }
 
+static inline void
+cache_bin_ptr_array_init_for_fill(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
+    cache_bin_sz_t nfill, szind_t ind, cache_bin_info_t *infos) {
+	arr->ptr = cache_bin_empty_position_get(bin, ind, infos) - nfill;
+	assert(cache_bin_ncached_get(bin, ind, infos) == 0);
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_ptr_array_get(cache_bin_ptr_array_t *arr, cache_bin_sz_t n) {
 	return *(arr->ptr - n);
@@ -281,4 +288,17 @@ cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
 	*(arr->ptr - n) = p;
 }
 
+static inline void
+cache_bin_fill_from_ptr_array(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
+    szind_t ind, szind_t nfilled, cache_bin_info_t *infos) {
+	assert(cache_bin_ncached_get(bin, ind, infos) == 0);
+	if (nfilled < arr->n) {
+		void **empty_position = cache_bin_empty_position_get(bin, ind,
+		    infos);
+		memmove(empty_position - nfilled, empty_position - arr->n,
+		    nfilled * sizeof(void *));
+	}
+	cache_bin_ncached_set(bin, ind, nfilled, infos);
+}
+
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/src/arena.c b/src/arena.c
index 2f8a03c..6b5f1d3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1327,7 +1327,9 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	const bin_info_t *bin_info = &bin_infos[binind];
 	const unsigned nfill = cache_bin_info_ncached_max(
 	    &tcache_bin_info[binind]) >> tcache->lg_fill_div[binind];
-	void **empty_position = cache_bin_empty_position_get(tbin, binind,
+
+	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill);
+	cache_bin_ptr_array_init_for_fill(&ptrs, tbin, nfill, binind,
 	    tcache_bin_info);
 
 	/*
@@ -1374,7 +1376,7 @@ label_refill:
 			unsigned cnt = tofill < nfree ? tofill : nfree;
 
 			arena_slab_reg_alloc_batch(slabcur, bin_info, cnt,
-			    empty_position - tofill);
+			    &ptrs.ptr[filled]);
 			made_progress = true;
 			filled += cnt;
 			continue;
@@ -1403,16 +1405,9 @@ label_refill:
 			break;
 		}
 
+		/* OOM. */
+
 		assert(fresh_slab == NULL);
-		/*
-		 * OOM.  tbin->avail isn't yet filled down to its first element,
-		 * so the successful allocations (if any) must be moved just
-		 * before tbin->avail before bailing out.
-		 */
-		if (filled > 0) {
-			memmove(empty_position - filled, empty_position - nfill,
-			    filled * sizeof(void *));
-		}
 		assert(!alloc_and_retry);
 		break;
 	} /* while (filled < nfill) loop. */
@@ -1448,7 +1443,8 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_ncached_set(tbin, binind, filled, tcache_bin_info);
+	cache_bin_fill_from_ptr_array(tbin, &ptrs, binind, filled,
+	    tcache_bin_info);
 	arena_decay_tick(tsdn, arena);
 }
 
-- 
cgit v0.12


From e1dcc557d68cfa1c7f1fab6c84a9e44e1d97e1d4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 28 Feb 2020 18:55:33 -0800
Subject: Cache bin: Only take the relevant cache_bin_info_t

Previously, we took an array of cache_bin_info_ts and an index, and dereferenced
ourselves.  But infos for other cache_bins aren't relevant to any particular
cache bin, so that should be the caller's job.
---
 include/jemalloc/internal/cache_bin.h      | 71 +++++++++++++-----------------
 include/jemalloc/internal/tcache_inlines.h |  8 ++--
 src/arena.c                                | 16 +++----
 src/tcache.c                               | 30 ++++++-------
 test/unit/cache_bin.c                      | 26 +++++------
 5 files changed, 70 insertions(+), 81 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 775b71f..bae669d 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -119,24 +119,23 @@ cache_bin_info_ncached_max(cache_bin_info_t *info) {
 }
 
 static inline cache_bin_sz_t
-cache_bin_ncached_get(cache_bin_t *bin, szind_t ind, cache_bin_info_t *infos) {
-	cache_bin_sz_t n = (cache_bin_sz_t)((infos[ind].stack_size +
+cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t n = (cache_bin_sz_t)((info->stack_size +
 	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *));
-	assert(n <= cache_bin_info_ncached_max(&infos[ind]));
+	assert(n <= cache_bin_info_ncached_max(info));
 	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
 
 	return n;
 }
 
 static inline void **
-cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind,
-    cache_bin_info_t *infos) {
-	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, ind, infos);
+cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, info);
 	/* Low bits overflow disallowed when allocating the space. */
 	assert((uint32_t)(uintptr_t)ret >= bin->cur_ptr.lowbits);
 
 	/* Can also be computed via (full_position + ncached_max) | highbits. */
-	uintptr_t lowbits = bin->full_position + infos[ind].stack_size;
+	uintptr_t lowbits = bin->full_position + info->stack_size;
 	uintptr_t highbits = (uintptr_t)bin->cur_ptr.ptr &
 	    ~(((uint64_t)1 << 32) - 1);
 	assert(ret == (void **)(lowbits | highbits));
@@ -146,25 +145,24 @@ cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind,
 
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
-cache_bin_low_water_get(cache_bin_t *bin, szind_t ind,
-    cache_bin_info_t *infos) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(&infos[ind]);
+cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
 	cache_bin_sz_t low_water = ncached_max -
 	    (cache_bin_sz_t)((bin->low_water_position - bin->full_position) /
 	    sizeof(void *));
 	assert(low_water <= ncached_max);
-	assert(low_water <= cache_bin_ncached_get(bin, ind, infos));
+	assert(low_water <= cache_bin_ncached_get(bin, info));
 	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
 
 	return low_water;
 }
 
 static inline void
-cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n,
-    cache_bin_info_t *infos) {
-	bin->cur_ptr.lowbits = bin->full_position + infos[ind].stack_size
+cache_bin_ncached_set(cache_bin_t *bin, cache_bin_sz_t n,
+    cache_bin_info_t *info) {
+	bin->cur_ptr.lowbits = bin->full_position + info->stack_size
 	    - n * sizeof(void *);
-	assert(n <= cache_bin_info_ncached_max(&infos[ind]));
+	assert(n <= cache_bin_info_ncached_max(info));
 	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
 }
 
@@ -176,11 +174,9 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 	descriptor->bins_large = bins_large;
 }
 
-#define INVALID_SZIND ((szind_t)(unsigned)-1)
-
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
-    cache_bin_info_t *infos, const bool adjust_low_water) {
+cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
+    cache_bin_info_t *info, const bool adjust_low_water) {
 	/*
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
@@ -194,9 +190,8 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
 	 */
 	if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
 		if (adjust_low_water) {
-			assert(ind != INVALID_SZIND);
 			uint32_t empty_position = bin->full_position +
-			    infos[ind].stack_size;
+			    info->stack_size;
 			if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
 				/* Over-allocated; revert. */
 				bin->cur_ptr.ptr--;
@@ -206,7 +201,6 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
 			}
 			bin->low_water_position = bin->cur_ptr.lowbits;
 		} else {
-			assert(ind == INVALID_SZIND);
 			bin->cur_ptr.ptr--;
 			assert(bin->cur_ptr.lowbits == bin->low_water_position);
 			*success = false;
@@ -228,19 +222,15 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
 
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
-	/* The szind parameter won't be used. */
-	return cache_bin_alloc_easy_impl(bin, success, INVALID_SZIND,
-	    /* infos */ NULL, false);
+	/* We don't look at info if we're not adjusting low-water. */
+	return cache_bin_alloc_easy_impl(bin, success, NULL, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind,
-    cache_bin_info_t *infos) {
-	return cache_bin_alloc_easy_impl(bin, success, ind, infos, true);
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_info_t *info) {
+	return cache_bin_alloc_easy_impl(bin, success, info, true);
 }
 
-#undef INVALID_SZIND
-
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {
@@ -265,17 +255,17 @@ struct cache_bin_ptr_array_s {
 
 static inline void
 cache_bin_ptr_array_init_for_flush(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
-    cache_bin_sz_t nflush, szind_t ind, cache_bin_info_t *infos) {
-	arr->ptr = cache_bin_empty_position_get(bin, ind, infos) - 1;
-	assert(cache_bin_ncached_get(bin, ind, infos) == 0
+    cache_bin_sz_t nflush, cache_bin_info_t *info) {
+	arr->ptr = cache_bin_empty_position_get(bin, info) - 1;
+	assert(cache_bin_ncached_get(bin, info) == 0
 	    || *arr->ptr != NULL);
 }
 
 static inline void
 cache_bin_ptr_array_init_for_fill(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
-    cache_bin_sz_t nfill, szind_t ind, cache_bin_info_t *infos) {
-	arr->ptr = cache_bin_empty_position_get(bin, ind, infos) - nfill;
-	assert(cache_bin_ncached_get(bin, ind, infos) == 0);
+    cache_bin_sz_t nfill, cache_bin_info_t *info) {
+	arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
+	assert(cache_bin_ncached_get(bin, info) == 0);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -290,15 +280,14 @@ cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
 
 static inline void
 cache_bin_fill_from_ptr_array(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
-    szind_t ind, szind_t nfilled, cache_bin_info_t *infos) {
-	assert(cache_bin_ncached_get(bin, ind, infos) == 0);
+    szind_t nfilled, cache_bin_info_t *info) {
+	assert(cache_bin_ncached_get(bin, info) == 0);
 	if (nfilled < arr->n) {
-		void **empty_position = cache_bin_empty_position_get(bin, ind,
-		    infos);
+		void **empty_position = cache_bin_empty_position_get(bin, info);
 		memmove(empty_position - nfilled, empty_position - arr->n,
 		    nfilled * sizeof(void *));
 	}
-	cache_bin_ncached_set(bin, ind, nfilled, infos);
+	cache_bin_ncached_set(bin, nfilled, info);
 }
 
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 28d6e3c..1b157ba 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -36,8 +36,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 
 	assert(binind < SC_NBINS);
 	bin = tcache_small_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success, binind,
-	    tcache_bin_info);
+	ret = cache_bin_alloc_easy(bin, &tcache_success,
+	    &tcache_bin_info[binind]);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -80,8 +80,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind >= SC_NBINS &&binind < nhbins);
 	bin = tcache_large_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success, binind,
-	    tcache_bin_info);
+	ret = cache_bin_alloc_easy(bin, &tcache_success,
+	    &tcache_bin_info[binind]);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
diff --git a/src/arena.c b/src/arena.c
index 6b5f1d3..ee357d7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -200,14 +200,14 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		for (szind_t i = 0; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    cache_bin_ncached_get(tbin, i, tcache_bin_info)
+			    cache_bin_ncached_get(tbin, &tcache_bin_info[i])
 			    * sz_index2size(i));
 		}
 		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_large[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    cache_bin_ncached_get(tbin, i + SC_NBINS,
-			    tcache_bin_info) * sz_index2size(i));
+			    cache_bin_ncached_get(tbin,
+			    &tcache_bin_info[i + SC_NBINS]) * sz_index2size(i));
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
@@ -1321,7 +1321,7 @@ arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind) {
-	assert(cache_bin_ncached_get(tbin, binind, tcache_bin_info) == 0);
+	assert(cache_bin_ncached_get(tbin, &tcache_bin_info[binind]) == 0);
 	tcache->bin_refilled[binind] = true;
 
 	const bin_info_t *bin_info = &bin_infos[binind];
@@ -1329,8 +1329,8 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	    &tcache_bin_info[binind]) >> tcache->lg_fill_div[binind];
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill);
-	cache_bin_ptr_array_init_for_fill(&ptrs, tbin, nfill, binind,
-	    tcache_bin_info);
+	cache_bin_ptr_array_init_for_fill(&ptrs, tbin, nfill,
+	    &tcache_bin_info[binind]);
 
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
@@ -1443,8 +1443,8 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_fill_from_ptr_array(tbin, &ptrs, binind, filled,
-	    tcache_bin_info);
+	cache_bin_fill_from_ptr_array(tbin, &ptrs, filled,
+	    &tcache_bin_info[binind]);
 	arena_decay_tick(tsdn, arena);
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 3fc4ee6..b2d46c3 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -59,10 +59,10 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 		is_small = false;
 	}
 
-	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin, binind,
-	    tcache_bin_info);
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind,
-	    tcache_bin_info);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin,
+	    &tcache_bin_info[binind]);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
+	    &tcache_bin_info[binind]);
 	if (low_water > 0) {
 		/*
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
@@ -110,8 +110,8 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 	assert(tcache->arena != NULL);
 	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
-	ret = cache_bin_alloc_easy(tbin, tcache_success, binind,
-	    tcache_bin_info);
+	ret = cache_bin_alloc_easy(tbin, tcache_success,
+	    &tcache_bin_info[binind]);
 
 	return ret;
 }
@@ -168,8 +168,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	} else {
 		assert(binind < nhbins);
 	}
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind,
-	    tcache_bin_info);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
+	    &tcache_bin_info[binind]);
 	assert((cache_bin_sz_t)rem <= ncached);
 	arena_t *tcache_arena = tcache->arena;
 	assert(tcache_arena != NULL);
@@ -182,8 +182,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 
-	cache_bin_ptr_array_init_for_flush(&ptrs, tbin, nflush, binind,
-	    tcache_bin_info);
+	cache_bin_ptr_array_init_for_flush(&ptrs, tbin, nflush,
+	    &tcache_bin_info[binind]);
 
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
@@ -348,7 +348,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 
 	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
 	    sizeof(void *));
-	cache_bin_ncached_set(tbin, binind, rem, tcache_bin_info);
+	cache_bin_ncached_set(tbin, rem, &tcache_bin_info[binind]);
 	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
 		tbin->low_water_position = tbin->cur_ptr.lowbits;
 	}
@@ -453,8 +453,8 @@ tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
 	bin->low_water_position = bin->cur_ptr.lowbits;
 	bin->full_position = (uint32_t)(uintptr_t)full_position;
 	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
-	assert(cache_bin_ncached_get(bin, ind, tcache_bin_info) == 0);
-	assert(cache_bin_empty_position_get(bin, ind, tcache_bin_info)
+	assert(cache_bin_ncached_get(bin, &tcache_bin_info[ind]) == 0);
+	assert(cache_bin_empty_position_get(bin, &tcache_bin_info[ind])
 	    == empty_position);
 
 	return false;
@@ -614,8 +614,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	if (tsd_tcache) {
 		/* Release the avail array for the TSD embedded auto tcache. */
 		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
-		assert(cache_bin_ncached_get(bin, 0, tcache_bin_info) == 0);
-		assert(cache_bin_empty_position_get(bin, 0, tcache_bin_info) ==
+		assert(cache_bin_ncached_get(bin, &tcache_bin_info[0]) == 0);
+		assert(cache_bin_empty_position_get(bin, &tcache_bin_info[0]) ==
 		    bin->cur_ptr.ptr);
 		void *avail_array = (void *)((uintptr_t)bin->cur_ptr.ptr -
 		    tcache_bin_info[0].stack_size);
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index ab36a3a..a019ae7 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -16,45 +16,45 @@ TEST_BEGIN(test_cache_bin) {
 	bin->cur_ptr.ptr = empty_position;
 	bin->low_water_position = bin->cur_ptr.lowbits;
 	bin->full_position = (uint32_t)(uintptr_t)stack;
-	expect_ptr_eq(cache_bin_empty_position_get(bin, 0, tcache_bin_info),
+	expect_ptr_eq(cache_bin_empty_position_get(bin, &tcache_bin_info[0]),
 	    empty_position, "Incorrect empty position");
 	/* Not using expect_zu etc on cache_bin_sz_t since it may change. */
-	expect_true(cache_bin_ncached_get(bin, 0, tcache_bin_info) == 0,
+	expect_true(cache_bin_ncached_get(bin, &tcache_bin_info[0]) == 0,
 	    "Incorrect cache size");
 
 	bool success;
-	void *ret = cache_bin_alloc_easy(bin, &success, 0, tcache_bin_info);
+	void *ret = cache_bin_alloc_easy(bin, &success, &tcache_bin_info[0]);
 	expect_false(success, "Empty cache bin should not alloc");
-	expect_true(cache_bin_low_water_get(bin, 0, tcache_bin_info) == 0,
+	expect_true(cache_bin_low_water_get(bin, &tcache_bin_info[0]) == 0,
 	    "Incorrect low water mark");
 
-	cache_bin_ncached_set(bin, 0, 0, tcache_bin_info);
+	cache_bin_ncached_set(bin, 0, &tcache_bin_info[0]);
 	expect_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
 	for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
 		success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
-		expect_true(success && cache_bin_ncached_get(bin, 0,
-		    tcache_bin_info) == i, "Bin dalloc failure");
+		expect_true(success && cache_bin_ncached_get(bin,
+		    &tcache_bin_info[0]) == i, "Bin dalloc failure");
 	}
 	success = cache_bin_dalloc_easy(bin, (void *)1);
 	expect_false(success, "Bin should be full");
 	expect_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
 
-	cache_bin_ncached_set(bin, 0, ncached_max, tcache_bin_info);
+	cache_bin_ncached_set(bin, ncached_max, &tcache_bin_info[0]);
 	expect_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
 	/* Emulate low water after refill. */
 	bin->low_water_position = bin->full_position;
 	for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
-		ret = cache_bin_alloc_easy(bin, &success, 0, tcache_bin_info);
-		cache_bin_sz_t ncached = cache_bin_ncached_get(bin, 0,
-		    tcache_bin_info);
+		ret = cache_bin_alloc_easy(bin, &success, &tcache_bin_info[0]);
+		cache_bin_sz_t ncached = cache_bin_ncached_get(bin,
+		    &tcache_bin_info[0]);
 		expect_true(success && ncached == i - 1,
 		    "Cache bin alloc failure");
 		expect_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
-		expect_true(cache_bin_low_water_get(bin, 0, tcache_bin_info)
+		expect_true(cache_bin_low_water_get(bin, &tcache_bin_info[0])
 		    == ncached, "Incorrect low water mark");
 	}
 
-	ret = cache_bin_alloc_easy(bin, &success, 0, tcache_bin_info);
+	ret = cache_bin_alloc_easy(bin, &success, &tcache_bin_info[0]);
 	expect_false(success, "Empty cache bin should not alloc.");
 	expect_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
 	    "Bin should be empty");
-- 
cgit v0.12


From ff6acc6ed503f9808efd74f9aca70ee201d9e87a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 28 Feb 2020 19:12:07 -0800
Subject: Cache bin: simplify names and argument ordering.

We always start with the cache bin, then its info (if necessary).
---
 include/jemalloc/internal/cache_bin.h      | 61 ++++++++++++++++--------------
 include/jemalloc/internal/tcache_inlines.h |  8 ++--
 src/arena.c                                |  8 ++--
 src/tcache.c                               | 10 ++---
 test/unit/cache_bin.c                      | 10 ++---
 5 files changed, 50 insertions(+), 47 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index bae669d..6895dca 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -158,8 +158,8 @@ cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
 }
 
 static inline void
-cache_bin_ncached_set(cache_bin_t *bin, cache_bin_sz_t n,
-    cache_bin_info_t *info) {
+cache_bin_ncached_set(cache_bin_t *bin, cache_bin_info_t *info,
+    cache_bin_sz_t n) {
 	bin->cur_ptr.lowbits = bin->full_position + info->stack_size
 	    - n * sizeof(void *);
 	assert(n <= cache_bin_info_ncached_max(info));
@@ -175,8 +175,8 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
-    cache_bin_info_t *info, const bool adjust_low_water) {
+cache_bin_alloc_easy_impl(cache_bin_t *bin, cache_bin_info_t *info,
+    bool *success, const bool adjust_low_water) {
 	/*
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
@@ -185,7 +185,7 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
 	/*
 	 * Check for both bin->ncached == 0 and ncached < low_water in a single
 	 * branch.  When adjust_low_water is true, this also avoids accessing
-	 * the cache_bin_info_ts (which is on a separate cacheline / page) in
+	 * the cache_bin_info_t (which is on a separate cacheline / page) in
 	 * the common case.
 	 */
 	if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
@@ -223,12 +223,12 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
 	/* We don't look at info if we're not adjusting low-water. */
-	return cache_bin_alloc_easy_impl(bin, success, NULL, false);
+	return cache_bin_alloc_easy_impl(bin, NULL, success, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_info_t *info) {
-	return cache_bin_alloc_easy_impl(bin, success, info, true);
+cache_bin_alloc_easy(cache_bin_t *bin, cache_bin_info_t *info, bool *success) {
+	return cache_bin_alloc_easy_impl(bin, info, success, true);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -254,18 +254,35 @@ struct cache_bin_ptr_array_s {
     name.n = (nval)
 
 static inline void
-cache_bin_ptr_array_init_for_flush(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
-    cache_bin_sz_t nflush, cache_bin_info_t *info) {
-	arr->ptr = cache_bin_empty_position_get(bin, info) - 1;
-	assert(cache_bin_ncached_get(bin, info) == 0
-	    || *arr->ptr != NULL);
+cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
+    cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
+	arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
+	assert(cache_bin_ncached_get(bin, info) == 0);
 }
 
+/*
+ * While nfill in cache_bin_init_ptr_array_for_fill is the number we *intend* to
+ * fill, nfilled here is the number we actually filled (which may be less, in
+ * case of OOM.
+ */
 static inline void
-cache_bin_ptr_array_init_for_fill(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
-    cache_bin_sz_t nfill, cache_bin_info_t *info) {
-	arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
+cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
+    cache_bin_ptr_array_t *arr, szind_t nfilled) {
 	assert(cache_bin_ncached_get(bin, info) == 0);
+	if (nfilled < arr->n) {
+		void **empty_position = cache_bin_empty_position_get(bin, info);
+		memmove(empty_position - nfilled, empty_position - arr->n,
+		    nfilled * sizeof(void *));
+	}
+	cache_bin_ncached_set(bin, info, nfilled);
+}
+
+static inline void
+cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
+    cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
+	arr->ptr = cache_bin_empty_position_get(bin, info) - 1;
+	assert(cache_bin_ncached_get(bin, info) == 0
+	    || *arr->ptr != NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -278,16 +295,4 @@ cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
 	*(arr->ptr - n) = p;
 }
 
-static inline void
-cache_bin_fill_from_ptr_array(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
-    szind_t nfilled, cache_bin_info_t *info) {
-	assert(cache_bin_ncached_get(bin, info) == 0);
-	if (nfilled < arr->n) {
-		void **empty_position = cache_bin_empty_position_get(bin, info);
-		memmove(empty_position - nfilled, empty_position - arr->n,
-		    nfilled * sizeof(void *));
-	}
-	cache_bin_ncached_set(bin, nfilled, info);
-}
-
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 1b157ba..2d31ad0 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -36,8 +36,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 
 	assert(binind < SC_NBINS);
 	bin = tcache_small_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success,
-	    &tcache_bin_info[binind]);
+	ret = cache_bin_alloc_easy(bin, &tcache_bin_info[binind],
+	    &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -80,8 +80,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind >= SC_NBINS &&binind < nhbins);
 	bin = tcache_large_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success,
-	    &tcache_bin_info[binind]);
+	ret = cache_bin_alloc_easy(bin, &tcache_bin_info[binind],
+	    &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
diff --git a/src/arena.c b/src/arena.c
index ee357d7..7f7c27f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1329,9 +1329,8 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	    &tcache_bin_info[binind]) >> tcache->lg_fill_div[binind];
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill);
-	cache_bin_ptr_array_init_for_fill(&ptrs, tbin, nfill,
-	    &tcache_bin_info[binind]);
-
+	cache_bin_init_ptr_array_for_fill(tbin, &tcache_bin_info[binind], &ptrs,
+	    nfill);
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
 	 * slabs.  After both are exhausted, new slabs will be allocated through
@@ -1443,8 +1442,7 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_fill_from_ptr_array(tbin, &ptrs, filled,
-	    &tcache_bin_info[binind]);
+	cache_bin_finish_fill(tbin, &tcache_bin_info[binind], &ptrs, filled);
 	arena_decay_tick(tsdn, arena);
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index b2d46c3..3c6d5d7 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -110,8 +110,8 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 	assert(tcache->arena != NULL);
 	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
-	ret = cache_bin_alloc_easy(tbin, tcache_success,
-	    &tcache_bin_info[binind]);
+	ret = cache_bin_alloc_easy(tbin, &tcache_bin_info[binind],
+	    tcache_success);
 
 	return ret;
 }
@@ -182,8 +182,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 
-	cache_bin_ptr_array_init_for_flush(&ptrs, tbin, nflush,
-	    &tcache_bin_info[binind]);
+	cache_bin_init_ptr_array_for_flush(tbin, &tcache_bin_info[binind],
+	    &ptrs, nflush);
 
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
@@ -348,7 +348,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 
 	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
 	    sizeof(void *));
-	cache_bin_ncached_set(tbin, rem, &tcache_bin_info[binind]);
+	cache_bin_ncached_set(tbin, &tcache_bin_info[binind], rem);
 	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
 		tbin->low_water_position = tbin->cur_ptr.lowbits;
 	}
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index a019ae7..37ebd30 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -23,12 +23,12 @@ TEST_BEGIN(test_cache_bin) {
 	    "Incorrect cache size");
 
 	bool success;
-	void *ret = cache_bin_alloc_easy(bin, &success, &tcache_bin_info[0]);
+	void *ret = cache_bin_alloc_easy(bin, &tcache_bin_info[0], &success);
 	expect_false(success, "Empty cache bin should not alloc");
 	expect_true(cache_bin_low_water_get(bin, &tcache_bin_info[0]) == 0,
 	    "Incorrect low water mark");
 
-	cache_bin_ncached_set(bin, 0, &tcache_bin_info[0]);
+	cache_bin_ncached_set(bin, &tcache_bin_info[0], 0);
 	expect_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
 	for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
 		success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
@@ -39,12 +39,12 @@ TEST_BEGIN(test_cache_bin) {
 	expect_false(success, "Bin should be full");
 	expect_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
 
-	cache_bin_ncached_set(bin, ncached_max, &tcache_bin_info[0]);
+	cache_bin_ncached_set(bin, &tcache_bin_info[0], ncached_max);
 	expect_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
 	/* Emulate low water after refill. */
 	bin->low_water_position = bin->full_position;
 	for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
-		ret = cache_bin_alloc_easy(bin, &success, &tcache_bin_info[0]);
+		ret = cache_bin_alloc_easy(bin, &tcache_bin_info[0], &success);
 		cache_bin_sz_t ncached = cache_bin_ncached_get(bin,
 		    &tcache_bin_info[0]);
 		expect_true(success && ncached == i - 1,
@@ -54,7 +54,7 @@ TEST_BEGIN(test_cache_bin) {
 		    == ncached, "Incorrect low water mark");
 	}
 
-	ret = cache_bin_alloc_easy(bin, &success, &tcache_bin_info[0]);
+	ret = cache_bin_alloc_easy(bin, &tcache_bin_info[0], &success);
 	expect_false(success, "Empty cache bin should not alloc.");
 	expect_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
 	    "Bin should be empty");
-- 
cgit v0.12


From 44529da8525ef811ea8cc7704ffa9910459656ce Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 29 Feb 2020 10:48:59 -0800
Subject: Cache-bin: Make flush modifications internal

I.e. the tcache code just calls a cache-bin function to finish flush (and move
pointers around, etc.).  It doesn't directly access the cache-bin's owned memory
any more.
---
 include/jemalloc/internal/cache_bin.h | 18 +++++++++++++++++-
 src/tcache.c                          |  8 ++------
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 6895dca..382883c 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -267,7 +267,7 @@ cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
  */
 static inline void
 cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_ptr_array_t *arr, szind_t nfilled) {
+    cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
 	assert(cache_bin_ncached_get(bin, info) == 0);
 	if (nfilled < arr->n) {
 		void **empty_position = cache_bin_empty_position_get(bin, info);
@@ -285,6 +285,10 @@ cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
 	    || *arr->ptr != NULL);
 }
 
+/*
+ * These accessors are used by the flush pathways -- they reverse ordinary flush
+ * ordering.
+ */
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_ptr_array_get(cache_bin_ptr_array_t *arr, cache_bin_sz_t n) {
 	return *(arr->ptr - n);
@@ -295,4 +299,16 @@ cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
 	*(arr->ptr - n) = p;
 }
 
+static inline void
+cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
+    cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
+	unsigned rem = cache_bin_ncached_get(bin, info) - nflushed;
+	memmove(bin->cur_ptr.ptr + nflushed, bin->cur_ptr.ptr,
+	    rem * sizeof(void *));
+	cache_bin_ncached_set(bin, info, rem);
+	if (bin->cur_ptr.lowbits > bin->low_water_position) {
+		bin->low_water_position = bin->cur_ptr.lowbits;
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/src/tcache.c b/src/tcache.c
index 3c6d5d7..e718858 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -346,12 +346,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		}
 	}
 
-	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
-	    sizeof(void *));
-	cache_bin_ncached_set(tbin, &tcache_bin_info[binind], rem);
-	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
-		tbin->low_water_position = tbin->cur_ptr.lowbits;
-	}
+	cache_bin_finish_flush(tbin, &tcache_bin_info[binind], &ptrs,
+	    ncached - rem);
 }
 
 void
-- 
cgit v0.12


From 60113dfe3b0fe89df5b9661ce27754a5a96cb070 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 29 Feb 2020 14:41:47 -0800
Subject: Cache bin: Move in initialization code.

---
 include/jemalloc/internal/cache_bin.h |  32 ++++++++-
 src/cache_bin.c                       | 101 +++++++++++++++++++++++++++
 src/tcache.c                          | 124 +++++++++++-----------------------
 3 files changed, 170 insertions(+), 87 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 382883c..6ab6baa 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -14,7 +14,10 @@
  * of the tcache at all.
  */
 
-/* The size in bytes of each cache bin stack. */
+/*
+ * The size in bytes of each cache bin stack.  We also use this to indicate
+ * *counts* of individual objects.
+ */
 typedef uint16_t cache_bin_sz_t;
 
 typedef struct cache_bin_stats_s cache_bin_stats_t;
@@ -311,4 +314,31 @@ cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
 	}
 }
 
+/*
+ * Initialize a cache_bin_info to represent up to the given number of items in
+ * the cache_bins it is associated with.
+ */
+void cache_bin_info_init(cache_bin_info_t *bin_info,
+    cache_bin_sz_t ncached_max);
+/*
+ * Given an array of initialized cache_bin_info_ts, determine how big an
+ * allocation is required to initialize a full set of cache_bin_ts.
+ */
+void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
+    size_t *size, size_t *alignment);
+
+/*
+ * Actually initialize some cache bins.  Callers should allocate the backing
+ * memory indicated by a call to cache_bin_compute_alloc.  They should then
+ * preincrement, call init once for each bin and info, and then call
+ * cache_bin_postincrement.  *alloc_cur will then point immediately past the end
+ * of the allocation.
+ */
+void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos,
+    void *alloc, size_t *cur_offset);
+void cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos,
+    void *alloc, size_t *cur_offset);
+void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
+    size_t *cur_offset);
+
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 454cb47..260c1b7 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -1,3 +1,104 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/bit_util.h"
+
+void
+cache_bin_info_init(cache_bin_info_t *info,
+    cache_bin_sz_t ncached_max) {
+	size_t stack_size = (size_t)ncached_max * sizeof(void *);
+	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
+	info->stack_size = (cache_bin_sz_t)stack_size;
+}
+
+void
+cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
+    size_t *size, size_t *alignment) {
+	/* For the total bin stack region (per tcache), reserve 2 more slots so
+	 * that
+	 * 1) the empty position can be safely read on the fast path before
+	 *    checking "is_empty"; and
+	 * 2) the cur_ptr can go beyond the empty position by 1 step safely on
+	 * the fast path (i.e. no overflow).
+	 */
+	*size = sizeof(void *) * 2;
+	for (szind_t i = 0; i < ninfos; i++) {
+		*size += infos[i].stack_size;
+	}
+
+	/*
+	 * 1) Align to at least PAGE, to minimize the # of TLBs needed by the
+	 * smaller sizes; also helps if the larger sizes don't get used at all.
+	 * 2) On 32-bit the pointers won't be compressed; use minimal alignment.
+	 */
+	if (LG_SIZEOF_PTR < 3 || *size < PAGE) {
+		*alignment = PAGE;
+	} else {
+		/*
+		 * Align pow2 to avoid overflow the cache bin compressed
+		 * pointers.
+		 */
+		*alignment = pow2_ceil_zu(*size);
+	}
+}
+
+void
+cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
+    size_t *cur_offset) {
+	if (config_debug) {
+		size_t computed_size;
+		size_t computed_alignment;
+
+		/* Pointer should be as aligned as we asked for. */
+		cache_bin_info_compute_alloc(infos, ninfos, &computed_size,
+		    &computed_alignment);
+		assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
+
+		/* And that alignment should disallow overflow. */
+		uint32_t lowbits = (uint32_t)((uintptr_t)alloc + computed_size);
+		assert((uint32_t)(uintptr_t)alloc < lowbits);
+	}
+	/*
+	 * Leave a noticeable mark pattern on the boundaries, in case a bug
+	 * starts leaking those.  Make it look like the junk pattern but be
+	 * distinct from it.
+	 */
+	uintptr_t preceding_ptr_junk = (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
+	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = preceding_ptr_junk;
+	*cur_offset += sizeof(void *);
+}
+
+void
+cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
+    size_t *cur_offset) {
+	/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
+	uintptr_t trailing_ptr_junk = (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
+	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = trailing_ptr_junk;
+	*cur_offset += sizeof(void *);
+}
+
+
+void
+cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
+    size_t *cur_offset) {
+	assert(sizeof(bin->cur_ptr) == sizeof(void *));
+	/*
+	 * The full_position points to the lowest available space.  Allocations
+	 * will access the slots toward higher addresses (for the benefit of
+	 * adjacent prefetch).
+	 */
+	void *stack_cur = (void *)((uintptr_t)alloc + *cur_offset);
+	void *full_position = stack_cur;
+	uint32_t bin_stack_size = info->stack_size;
+
+	*cur_offset += bin_stack_size;
+	void *empty_position = (void *)((uintptr_t)alloc + *cur_offset);
+
+	/* Init to the empty position. */
+	bin->cur_ptr.ptr = empty_position;
+	bin->low_water_position = bin->cur_ptr.lowbits;
+	bin->full_position = (uint32_t)(uintptr_t)full_position;
+	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
+	assert(cache_bin_ncached_get(bin, info) == 0);
+	assert(cache_bin_empty_position_get(bin, info) == empty_position);
+}
diff --git a/src/tcache.c b/src/tcache.c
index e718858..48f06b7 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -14,16 +14,10 @@ bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 
 cache_bin_info_t	*tcache_bin_info;
-/*
- * For the total bin stack region (per tcache), reserve 2 more slots so that 1)
- * the empty position can be safely read on the fast path before checking
- * "is_empty"; and 2) the cur_ptr can go beyond the empty position by 1 step
- * safely on the fast path (i.e. no overflow).
- */
-static const unsigned total_stack_padding = sizeof(void *) * 2;
 
 /* Total stack size required (per tcache).  Include the padding above. */
-static uint32_t total_stack_bytes;
+static size_t tcache_bin_alloc_size;
+static size_t tcache_bin_alloc_alignment;
 
 unsigned		nhbins;
 size_t			tcache_maxclass;
@@ -430,43 +424,8 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 	return false;
 }
 
-static bool
-tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
-	assert(sizeof(bin->cur_ptr) == sizeof(void *));
-	/*
-	 * The full_position points to the lowest available space.  Allocations
-	 * will access the slots toward higher addresses (for the benefit of
-	 * adjacent prefetch).
-	 */
-	void *full_position = (void *)*stack_cur;
-	uint32_t bin_stack_size = tcache_bin_info[ind].stack_size;
-
-	*stack_cur += bin_stack_size;
-	void *empty_position = (void *)*stack_cur;
-
-	/* Init to the empty position. */
-	bin->cur_ptr.ptr = empty_position;
-	bin->low_water_position = bin->cur_ptr.lowbits;
-	bin->full_position = (uint32_t)(uintptr_t)full_position;
-	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
-	assert(cache_bin_ncached_get(bin, &tcache_bin_info[ind]) == 0);
-	assert(cache_bin_empty_position_get(bin, &tcache_bin_info[ind])
-	    == empty_position);
-
-	return false;
-}
-
-/* Sanity check only. */
-static bool
-tcache_bin_lowbits_overflowable(void *ptr) {
-	uint32_t lowbits = (uint32_t)((uintptr_t)ptr + total_stack_bytes);
-	return lowbits < (uint32_t)(uintptr_t)ptr;
-}
-
 static void
 tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
-	assert(!tcache_bin_lowbits_overflowable(avail_stack));
-
 	memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
 	tcache->next_gc_bin = 0;
 	tcache->arena = NULL;
@@ -476,35 +435,25 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 	memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
 
 	unsigned i = 0;
-	uintptr_t stack_cur = (uintptr_t)avail_stack;
+	size_t cur_offset = 0;
+	cache_bin_preincrement(tcache_bin_info, nhbins, avail_stack,
+	    &cur_offset);
 	for (; i < SC_NBINS; i++) {
 		tcache->lg_fill_div[i] = 1;
 		tcache->bin_refilled[i] = false;
 		cache_bin_t *bin = tcache_small_bin_get(tcache, i);
-		tcache_bin_init(bin, i, &stack_cur);
+		cache_bin_init(bin, &tcache_bin_info[i], avail_stack,
+		    &cur_offset);
 	}
 	for (; i < nhbins; i++) {
 		cache_bin_t *bin = tcache_large_bin_get(tcache, i);
-		tcache_bin_init(bin, i, &stack_cur);
+		cache_bin_init(bin, &tcache_bin_info[i], avail_stack,
+		    &cur_offset);
 	}
-
+	cache_bin_postincrement(tcache_bin_info, nhbins, avail_stack,
+	    &cur_offset);
 	/* Sanity check that the whole stack is used. */
-	size_t stack_offset = stack_cur - (uintptr_t)avail_stack;
-	assert(stack_offset + total_stack_padding == total_stack_bytes);
-}
-
-static size_t
-tcache_bin_stack_alignment (size_t size) {
-	/*
-	 * 1) Align to at least PAGE, to minimize the # of TLBs needed by the
-	 * smaller sizes; also helps if the larger sizes don't get used at all.
-	 * 2) On 32-bit the pointers won't be compressed; use minimal alignment.
-	 */
-	if (LG_SIZEOF_PTR < 3 || size < PAGE) {
-		return PAGE;
-	}
-	/* Align pow2 to avoid overflow the cache bin compressed pointers. */
-	return pow2_ceil_zu(size);
+	assert(cur_offset == tcache_bin_alloc_size);
 }
 
 /* Initialize auto tcache (embedded in TSD). */
@@ -512,8 +461,8 @@ bool
 tsd_tcache_data_init(tsd_t *tsd) {
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 	assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr == NULL);
-	size_t alignment = tcache_bin_stack_alignment(total_stack_bytes);
-	size_t size = sz_sa2u(total_stack_bytes, alignment);
+	size_t alignment = tcache_bin_alloc_alignment;
+	size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
 
 	void *avail_array = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
 	    NULL, true, arena_get(TSDN_NULL, 0, true));
@@ -551,22 +500,29 @@ tsd_tcache_data_init(tsd_t *tsd) {
 /* Created manual tcache for tcache.create mallctl. */
 tcache_t *
 tcache_create_explicit(tsd_t *tsd) {
-	size_t size = sizeof(tcache_t);
+	/*
+	 * We place the cache bin stacks, then the tcache_t, then a pointer to
+	 * the beginning of the whole allocation (for freeing).  The makes sure
+	 * the cache bins have the requested alignment.
+	 */
+	size_t size = tcache_bin_alloc_size + sizeof(tcache_t) + sizeof(void *);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
-	size_t stack_offset = size;
-	size += total_stack_bytes;
-	size_t alignment = tcache_bin_stack_alignment(size);
-	size = sz_sa2u(size, alignment);
+	size = sz_sa2u(size, tcache_bin_alloc_alignment);
 
-	tcache_t *tcache = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
-	    NULL, true, arena_get(TSDN_NULL, 0, true));
-	if (tcache == NULL) {
+	void *mem = ipallocztm(tsd_tsdn(tsd), size, tcache_bin_alloc_alignment,
+	    true, NULL, true, arena_get(TSDN_NULL, 0, true));
+	if (mem == NULL) {
 		return NULL;
 	}
+	void *avail_array = mem;
+	tcache_t *tcache = (void *)((uintptr_t)avail_array
+	    + tcache_bin_alloc_size);
+	void **head_ptr = (void *)((uintptr_t)avail_array
+	    + tcache_bin_alloc_size + sizeof(tcache_t));
+	tcache_init(tsd, tcache, avail_array);
+	*head_ptr = mem;
 
-	void *avail_array = (void *)((uintptr_t)tcache +
-	    (uintptr_t)stack_offset);
 	tcache_init(tsd, tcache, avail_array);
 	tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
 
@@ -617,8 +573,10 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 		    tcache_bin_info[0].stack_size);
 		idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
 	} else {
+		/* See the comment at the top of tcache_create_explicit. */
+		void **mem_begin = (void **)((uintptr_t)tcache + sizeof(tcache_t));
 		/* Release both the tcache struct and avail array. */
-		idalloctm(tsd_tsdn(tsd), tcache, NULL, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), *mem_begin, NULL, NULL, true, true);
 	}
 
 	/*
@@ -816,7 +774,6 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 		return true;
 	}
 	unsigned i, ncached_max;
-	total_stack_bytes = 0;
 	for (i = 0; i < SC_NBINS; i++) {
 		if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
 			ncached_max = TCACHE_NSLOTS_SMALL_MIN;
@@ -826,18 +783,13 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 		} else {
 			ncached_max = TCACHE_NSLOTS_SMALL_MAX;
 		}
-		unsigned stack_size = ncached_max * sizeof(void *);
-		assert(stack_size < ((uint64_t)1 <<
-		    (sizeof(cache_bin_sz_t) * 8)));
-		tcache_bin_info[i].stack_size = stack_size;
-		total_stack_bytes += stack_size;
+		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
 	}
 	for (; i < nhbins; i++) {
-		unsigned stack_size = TCACHE_NSLOTS_LARGE * sizeof(void *);
-		tcache_bin_info[i].stack_size = stack_size;
-		total_stack_bytes += stack_size;
+		cache_bin_info_init(&tcache_bin_info[i], TCACHE_NSLOTS_LARGE);
 	}
-	total_stack_bytes += total_stack_padding;
+	cache_bin_info_compute_alloc(tcache_bin_info, i, &tcache_bin_alloc_size,
+	    &tcache_bin_alloc_alignment);
 
 	return false;
 }
-- 
cgit v0.12


From 7f5ebd211cd870e9c9a303e6145781bfca58e1bb Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 29 Feb 2020 15:07:38 -0800
Subject: Cache bin: set low-water internally.

---
 include/jemalloc/internal/cache_bin.h | 11 ++++++++++-
 src/tcache.c                          |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 6ab6baa..8629174 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -160,6 +160,15 @@ cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	return low_water;
 }
 
+/*
+ * Indicates that the current cache bin position should be the low water mark
+ * going forward.
+ */
+static inline void
+cache_bin_low_water_set(cache_bin_t *bin) {
+	bin->low_water_position = bin->cur_ptr.lowbits;
+}
+
 static inline void
 cache_bin_ncached_set(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_sz_t n) {
@@ -289,7 +298,7 @@ cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
 }
 
 /*
- * These accessors are used by the flush pathways -- they reverse ordinary flush
+ * These accessors are used by the flush pathways -- they reverse ordinary array
  * ordering.
  */
 JEMALLOC_ALWAYS_INLINE void *
diff --git a/src/tcache.c b/src/tcache.c
index 48f06b7..a059ecc 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -89,7 +89,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 		}
 		tcache->bin_refilled[binind] = false;
 	}
-	tbin->low_water_position = tbin->cur_ptr.lowbits;
+	cache_bin_low_water_set(tbin);
 
 	tcache->next_gc_bin++;
 	if (tcache->next_gc_bin == nhbins) {
-- 
cgit v0.12


From 370c1ea007e152a0f8ede3aad7f69c45d2397e54 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Mar 2020 14:14:08 -0800
Subject: Cache bin: Write the unit test in terms of the API

I.e. stop allowing the unit test to have secret access to implementation
internals.
---
 include/jemalloc/internal/cache_bin.h |   4 +
 test/unit/cache_bin.c                 | 241 ++++++++++++++++++++++++++--------
 2 files changed, 193 insertions(+), 52 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 8629174..42504ed 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -169,6 +169,10 @@ cache_bin_low_water_set(cache_bin_t *bin) {
 	bin->low_water_position = bin->cur_ptr.lowbits;
 }
 
+/*
+ * This is an internal implementation detail -- users should only affect ncached
+ * via single-item pushes or batch fills.
+ */
 static inline void
 cache_bin_ncached_set(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_sz_t n) {
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 37ebd30..2623b38 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -1,63 +1,200 @@
 #include "test/jemalloc_test.h"
 
-cache_bin_t test_bin;
+static void
+do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
+    cache_bin_sz_t ncached_max, cache_bin_sz_t nfill_attempt,
+    cache_bin_sz_t nfill_succeed) {
+	bool success;
+	void *ptr;
+	assert_true(cache_bin_ncached_get(bin, info) == 0, "");
+	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill_attempt);
+	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill_attempt);
+	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
+		arr.ptr[i] = &ptrs[i];
+	}
+	cache_bin_finish_fill(bin, info, &arr, nfill_succeed);
+	expect_true(cache_bin_ncached_get(bin, info) == nfill_succeed, "");
+	cache_bin_low_water_set(bin);
 
-TEST_BEGIN(test_cache_bin) {
-	cache_bin_t *bin = &test_bin;
-	assert(PAGE > TCACHE_NSLOTS_SMALL_MAX * sizeof(void *));
-	/* Page aligned to make sure lowbits not overflowable. */
-	void **stack = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ALIGN(PAGE));
-
-	expect_ptr_not_null(stack, "Unexpected mallocx failure");
-	/* Initialize to empty; bin 0. */
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(
-	    &tcache_bin_info[0]);
-	void **empty_position = stack + ncached_max;
-	bin->cur_ptr.ptr = empty_position;
-	bin->low_water_position = bin->cur_ptr.lowbits;
-	bin->full_position = (uint32_t)(uintptr_t)stack;
-	expect_ptr_eq(cache_bin_empty_position_get(bin, &tcache_bin_info[0]),
-	    empty_position, "Incorrect empty position");
-	/* Not using expect_zu etc on cache_bin_sz_t since it may change. */
-	expect_true(cache_bin_ncached_get(bin, &tcache_bin_info[0]) == 0,
-	    "Incorrect cache size");
+	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
+		ptr = cache_bin_alloc_easy(bin, info, &success);
+		expect_true(success, "");
+		expect_ptr_eq(ptr, (void *)&ptrs[i],
+		    "Should pop in order filled");
+		expect_true(cache_bin_low_water_get(bin, info)
+		    == nfill_succeed - i - 1, "");
+	}
+	expect_true(cache_bin_ncached_get(bin, info) == 0, "");
+	expect_true(cache_bin_low_water_get(bin, info) == 0, "");
+}
 
+static void
+do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
+    cache_bin_sz_t nfill, cache_bin_sz_t nflush) {
 	bool success;
-	void *ret = cache_bin_alloc_easy(bin, &tcache_bin_info[0], &success);
-	expect_false(success, "Empty cache bin should not alloc");
-	expect_true(cache_bin_low_water_get(bin, &tcache_bin_info[0]) == 0,
-	    "Incorrect low water mark");
-
-	cache_bin_ncached_set(bin, &tcache_bin_info[0], 0);
-	expect_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
-	for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
-		success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
-		expect_true(success && cache_bin_ncached_get(bin,
-		    &tcache_bin_info[0]) == i, "Bin dalloc failure");
+	assert_true(cache_bin_ncached_get(bin, info) == 0, "");
+
+	for (cache_bin_sz_t i = 0; i < nfill; i++) {
+		success = cache_bin_dalloc_easy(bin, &ptrs[i]);
+		expect_true(success, "");
+	}
+
+	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nflush);
+	cache_bin_init_ptr_array_for_flush(bin, info, &arr, nflush);
+	for (cache_bin_sz_t i = 0; i < nflush; i++) {
+		expect_ptr_eq(cache_bin_ptr_array_get(&arr, i), &ptrs[i], "");
 	}
-	success = cache_bin_dalloc_easy(bin, (void *)1);
-	expect_false(success, "Bin should be full");
-	expect_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
-
-	cache_bin_ncached_set(bin, &tcache_bin_info[0], ncached_max);
-	expect_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
-	/* Emulate low water after refill. */
-	bin->low_water_position = bin->full_position;
-	for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
-		ret = cache_bin_alloc_easy(bin, &tcache_bin_info[0], &success);
-		cache_bin_sz_t ncached = cache_bin_ncached_get(bin,
-		    &tcache_bin_info[0]);
-		expect_true(success && ncached == i - 1,
-		    "Cache bin alloc failure");
-		expect_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
-		expect_true(cache_bin_low_water_get(bin, &tcache_bin_info[0])
-		    == ncached, "Incorrect low water mark");
+	cache_bin_finish_flush(bin, info, &arr, nflush);
+
+	expect_true(cache_bin_ncached_get(bin, info) == nfill - nflush, "");
+	while (cache_bin_ncached_get(bin, info) > 0) {
+		cache_bin_alloc_easy(bin, info, &success);
 	}
+}
+
+TEST_BEGIN(test_cache_bin) {
+	bool success;
+	void *ptr;
+
+	cache_bin_t bin;
+	cache_bin_info_t info;
+	cache_bin_info_init(&info, TCACHE_NSLOTS_SMALL_MAX);
+
+	size_t size;
+	size_t alignment;
+	cache_bin_info_compute_alloc(&info, 1, &size, &alignment);
+	void *mem = mallocx(size, MALLOCX_ALIGN(alignment));
+	assert_ptr_not_null(mem, "Unexpected mallocx failure");
+
+	size_t cur_offset = 0;
+	cache_bin_preincrement(&info, 1, mem, &cur_offset);
+	cache_bin_init(&bin, &info, mem, &cur_offset);
+	cache_bin_postincrement(&info, 1, mem, &cur_offset);
+
+	assert_zu_eq(cur_offset, size, "Should use all requested memory");
+
+	/* Initialize to empty; should then have 0 elements. */
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(&info);
+	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
+	expect_true(cache_bin_low_water_get(&bin, &info) == 0, "");
+
+	ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+	expect_false(success, "Shouldn't successfully allocate when empty");
+	expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure");
+
+	ptr = cache_bin_alloc_easy(&bin, &info, &success);
+	expect_false(success, "Shouldn't successfully allocate when empty");
+	expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure");
+
+	/*
+	 * We allocate one more item than ncached_max, so we can test cache bin
+	 * exhaustion.
+	 */
+	void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+	for  (cache_bin_sz_t i = 0; i < ncached_max; i++) {
+		expect_true(cache_bin_ncached_get(&bin, &info) == i, "");
+		success = cache_bin_dalloc_easy(&bin, &ptrs[i]);
+		expect_true(success,
+		    "Should be able to dalloc into a non-full cache bin.");
+		expect_true(cache_bin_low_water_get(&bin, &info) == 0,
+		    "Pushes and pops shouldn't change low water of zero.");
+	}
+	expect_true(cache_bin_ncached_get(&bin, &info) == ncached_max, "");
+	success = cache_bin_dalloc_easy(&bin, &ptrs[ncached_max]);
+	expect_false(success, "Shouldn't be able to dalloc into a full bin.");
+
+	cache_bin_low_water_set(&bin);
+
+	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
+		expect_true(cache_bin_low_water_get(&bin, &info)
+		    == ncached_max - i, "");
+		expect_true(cache_bin_ncached_get(&bin, &info)
+		    == ncached_max - i, "");
+		/*
+		 * This should fail -- the reduced version can't change low
+		 * water.
+		 */
+		ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+		expect_ptr_null(ptr, "");
+		expect_false(success, "");
+		expect_true(cache_bin_low_water_get(&bin, &info)
+		    == ncached_max - i, "");
+		expect_true(cache_bin_ncached_get(&bin, &info)
+		    == ncached_max - i, "");
+
+		/* This should succeed, though. */
+		ptr = cache_bin_alloc_easy(&bin, &info, &success);
+		expect_true(success, "");
+		expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1],
+		    "Alloc should pop in stack order");
+		expect_true(cache_bin_low_water_get(&bin, &info)
+		    == ncached_max - i - 1, "");
+		expect_true(cache_bin_ncached_get(&bin, &info)
+		    == ncached_max - i - 1, "");
+	}
+	/* Now we're empty -- all alloc attempts should fail. */
+	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
+	ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+	expect_ptr_null(ptr, "");
+	expect_false(success, "");
+	ptr = cache_bin_alloc_easy(&bin, &info, &success);
+	expect_ptr_null(ptr, "");
+	expect_false(success, "");
+
+	for (cache_bin_sz_t i = 0; i < ncached_max / 2; i++) {
+		cache_bin_dalloc_easy(&bin, &ptrs[i]);
+	}
+	cache_bin_low_water_set(&bin);
+
+	for (cache_bin_sz_t i = ncached_max / 2; i < ncached_max; i++) {
+		cache_bin_dalloc_easy(&bin, &ptrs[i]);
+	}
+	expect_true(cache_bin_ncached_get(&bin, &info) == ncached_max, "");
+	for (cache_bin_sz_t i = ncached_max - 1; i >= ncached_max / 2; i--) {
+		/*
+		 * Size is bigger than low water -- the reduced version should
+		 * succeed.
+		 */
+		ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+		expect_true(success, "");
+		expect_ptr_eq(ptr, &ptrs[i], "");
+	}
+	/* But now, we've hit low-water. */
+	ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+	expect_false(success, "");
+	expect_ptr_null(ptr, "");
+
+	/* We're going to test filling -- we must be empty to start. */
+	while (cache_bin_ncached_get(&bin, &info)) {
+		cache_bin_alloc_easy(&bin, &info, &success);
+		expect_true(success, "");
+	}
+
+	/* Test fill. */
+	/* Try to fill all, succeed fully. */
+	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max, ncached_max);
+	/* Try to fill all, succeed partially. */
+	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max,
+	    ncached_max / 2);
+	/* Try to fill all, fail completely. */
+	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max, 0);
+
+	/* Try to fill some, succeed fully. */
+	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2,
+	    ncached_max / 2);
+	/* Try to fill some, succeed partially. */
+	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2,
+	    ncached_max / 2);
+	/* Try to fill some, fail completely. */
+	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2, 0);
 
-	ret = cache_bin_alloc_easy(bin, &tcache_bin_info[0], &success);
-	expect_false(success, "Empty cache bin should not alloc.");
-	expect_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
-	    "Bin should be empty");
+	do_flush_test(&bin, &info, ptrs, ncached_max, ncached_max);
+	do_flush_test(&bin, &info, ptrs, ncached_max, ncached_max / 2);
+	do_flush_test(&bin, &info, ptrs, ncached_max, 0);
+	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 2);
+	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
+	do_flush_test(&bin, &info, ptrs, ncached_max / 2, 0);
 }
 TEST_END
 
-- 
cgit v0.12


From 6a7aa46ef753108f9b0c065572abff14c33eb5d2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Mar 2020 18:07:19 -0800
Subject: Cache bin: Add a debug method for init checking.

---
 include/jemalloc/internal/cache_bin.h                   | 7 +++++++
 include/jemalloc/internal/jemalloc_internal_inlines_a.h | 4 ++--
 src/cache_bin.c                                         | 5 +++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 42504ed..461b20b 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -354,4 +354,11 @@ void cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos,
 void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
     size_t *cur_offset);
 
+/*
+ * If a cache bin was zero initialized (either because it lives in static or
+ * thread-local storage, or was memset to 0), this function indicates whether or
+ * not cache_bin_init was called on it.
+ */
+bool cache_bin_still_zero_initialized(cache_bin_t *bin);
+
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index f079e85..cc5e359 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -130,8 +130,8 @@ tcache_available(tsd_t *tsd) {
 	if (likely(tsd_tcache_enabled_get(tsd))) {
 		/* Associated arena == NULL implies tcache init in progress. */
 		assert(tsd_tcachep_get(tsd)->arena == NULL ||
-		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->cur_ptr.ptr
-		    != NULL);
+		    !cache_bin_still_zero_initialized(
+		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)));
 		return true;
 	}
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 260c1b7..94f3b32 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -102,3 +102,8 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	assert(cache_bin_ncached_get(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin, info) == empty_position);
 }
+
+bool
+cache_bin_still_zero_initialized(cache_bin_t *bin) {
+	return bin->cur_ptr.ptr == NULL;
+}
-- 
cgit v0.12


From d498a4bb08f1220c089b2c2c06c26b5ff937e30c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Mar 2020 18:14:19 -0800
Subject: Cache bin: Add an emptiness assertion.

---
 include/jemalloc/internal/cache_bin.h | 7 +++++++
 src/tcache.c                          | 4 +---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 461b20b..cc72af6 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -146,6 +146,13 @@ cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	return ret;
 }
 
+static inline void
+cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
+	assert(cache_bin_ncached_get(bin, info) == 0);
+	assert(cache_bin_empty_position_get(bin, info) == bin->cur_ptr.ptr);
+}
+
+
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
diff --git a/src/tcache.c b/src/tcache.c
index a059ecc..bffc04f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -566,9 +566,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	if (tsd_tcache) {
 		/* Release the avail array for the TSD embedded auto tcache. */
 		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
-		assert(cache_bin_ncached_get(bin, &tcache_bin_info[0]) == 0);
-		assert(cache_bin_empty_position_get(bin, &tcache_bin_info[0]) ==
-		    bin->cur_ptr.ptr);
+		cache_bin_assert_empty(bin, &tcache_bin_info[0]);
 		void *avail_array = (void *)((uintptr_t)bin->cur_ptr.ptr -
 		    tcache_bin_info[0].stack_size);
 		idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
-- 
cgit v0.12


From 0a2fcfac013e65a22548eeed09ebcaca1bdb63a3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Mar 2020 18:28:17 -0800
Subject: Tcache: Hold cache bin allocation explicitly.

---
 include/jemalloc/internal/tcache_structs.h |  6 +++++
 src/tcache.c                               | 41 ++++++++++--------------------
 2 files changed, 20 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 38a82fe..48dbf0f 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -50,6 +50,12 @@ struct tcache_s {
 	/* For small bins, whether has been refilled since last GC. */
 	bool		bin_refilled[SC_NBINS];
 	/*
+	 * The start of the allocation containing the dynamic allocation for
+	 * either the cache bins alone, or the cache bin memory as well as this
+	 * tcache_t.
+	 */
+	void		*dyn_alloc;
+	/*
 	 * We put the cache bins for large size classes at the end of the
 	 * struct, since some of them might not get used.  This might end up
 	 * letting us avoid touching an extra page if we don't have to.
diff --git a/src/tcache.c b/src/tcache.c
index bffc04f..f6b3776 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -425,10 +425,11 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 }
 
 static void
-tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
+tcache_init(tsd_t *tsd, tcache_t *tcache, void *mem) {
 	memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
 	tcache->next_gc_bin = 0;
 	tcache->arena = NULL;
+	tcache->dyn_alloc = mem;
 
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
@@ -436,21 +437,21 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 
 	unsigned i = 0;
 	size_t cur_offset = 0;
-	cache_bin_preincrement(tcache_bin_info, nhbins, avail_stack,
+	cache_bin_preincrement(tcache_bin_info, nhbins, mem,
 	    &cur_offset);
 	for (; i < SC_NBINS; i++) {
 		tcache->lg_fill_div[i] = 1;
 		tcache->bin_refilled[i] = false;
 		cache_bin_t *bin = tcache_small_bin_get(tcache, i);
-		cache_bin_init(bin, &tcache_bin_info[i], avail_stack,
+		cache_bin_init(bin, &tcache_bin_info[i], mem,
 		    &cur_offset);
 	}
 	for (; i < nhbins; i++) {
 		cache_bin_t *bin = tcache_large_bin_get(tcache, i);
-		cache_bin_init(bin, &tcache_bin_info[i], avail_stack,
+		cache_bin_init(bin, &tcache_bin_info[i], mem,
 		    &cur_offset);
 	}
-	cache_bin_postincrement(tcache_bin_info, nhbins, avail_stack,
+	cache_bin_postincrement(tcache_bin_info, nhbins, mem,
 	    &cur_offset);
 	/* Sanity check that the whole stack is used. */
 	assert(cur_offset == tcache_bin_alloc_size);
@@ -464,13 +465,13 @@ tsd_tcache_data_init(tsd_t *tsd) {
 	size_t alignment = tcache_bin_alloc_alignment;
 	size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
 
-	void *avail_array = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
-	    NULL, true, arena_get(TSDN_NULL, 0, true));
-	if (avail_array == NULL) {
+	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
+	    true, arena_get(TSDN_NULL, 0, true));
+	if (mem == NULL) {
 		return true;
 	}
 
-	tcache_init(tsd, tcache, avail_array);
+	tcache_init(tsd, tcache, mem);
 	/*
 	 * Initialization is a bit tricky here.  After malloc init is done, all
 	 * threads can rely on arena_choose and associate tcache accordingly.
@@ -505,7 +506,7 @@ tcache_create_explicit(tsd_t *tsd) {
 	 * the beginning of the whole allocation (for freeing).  The makes sure
 	 * the cache bins have the requested alignment.
 	 */
-	size_t size = tcache_bin_alloc_size + sizeof(tcache_t) + sizeof(void *);
+	size_t size = tcache_bin_alloc_size + sizeof(tcache_t);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
 	size = sz_sa2u(size, tcache_bin_alloc_alignment);
@@ -515,15 +516,9 @@ tcache_create_explicit(tsd_t *tsd) {
 	if (mem == NULL) {
 		return NULL;
 	}
-	void *avail_array = mem;
-	tcache_t *tcache = (void *)((uintptr_t)avail_array
-	    + tcache_bin_alloc_size);
-	void **head_ptr = (void *)((uintptr_t)avail_array
-	    + tcache_bin_alloc_size + sizeof(tcache_t));
-	tcache_init(tsd, tcache, avail_array);
-	*head_ptr = mem;
+	tcache_t *tcache = (void *)((uintptr_t)mem + tcache_bin_alloc_size);
+	tcache_init(tsd, tcache, mem);
 
-	tcache_init(tsd, tcache, avail_array);
 	tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
 
 	return tcache;
@@ -564,18 +559,10 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
 
 	if (tsd_tcache) {
-		/* Release the avail array for the TSD embedded auto tcache. */
 		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
 		cache_bin_assert_empty(bin, &tcache_bin_info[0]);
-		void *avail_array = (void *)((uintptr_t)bin->cur_ptr.ptr -
-		    tcache_bin_info[0].stack_size);
-		idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
-	} else {
-		/* See the comment at the top of tcache_create_explicit. */
-		void **mem_begin = (void **)((uintptr_t)tcache + sizeof(tcache_t));
-		/* Release both the tcache struct and avail array. */
-		idalloctm(tsd_tsdn(tsd), *mem_begin, NULL, NULL, true, true);
 	}
+	idalloctm(tsd_tsdn(tsd), tcache->dyn_alloc, NULL, NULL, true, true);
 
 	/*
 	 * The deallocation and tcache flush above may not trigger decay since
-- 
cgit v0.12


From fef0b1ffe4d1b92a38727449c802e24294284524 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Mar 2020 18:40:31 -0800
Subject: Cache bin: Remove last internals accesses.

---
 src/tcache.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index f6b3776..e963223 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -461,7 +461,8 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *mem) {
 bool
 tsd_tcache_data_init(tsd_t *tsd) {
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
-	assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr == NULL);
+	assert(cache_bin_still_zero_initialized(
+	    tcache_small_bin_get(tcache, 0)));
 	size_t alignment = tcache_bin_alloc_alignment;
 	size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
 
@@ -588,18 +589,23 @@ tcache_cleanup(tsd_t *tsd) {
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	if (!tcache_available(tsd)) {
 		assert(tsd_tcache_enabled_get(tsd) == false);
-		if (config_debug) {
-			assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr
-			    == NULL);
-		}
+		assert(cache_bin_still_zero_initialized(
+		    tcache_small_bin_get(tcache, 0)));
 		return;
 	}
 	assert(tsd_tcache_enabled_get(tsd));
-	assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr != NULL);
+	assert(!cache_bin_still_zero_initialized(
+	    tcache_small_bin_get(tcache, 0)));
 
 	tcache_destroy(tsd, tcache, true);
 	if (config_debug) {
-		tcache_small_bin_get(tcache, 0)->cur_ptr.ptr = NULL;
+		/*
+		 * For debug testing only, we want to pretend we're still in the
+		 * zero-initialized state.
+		 */
+		memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
+		memset(tcache->bins_large, 0,
+		    sizeof(cache_bin_t) * (nhbins - SC_NBINS));
 	}
 }
 
-- 
cgit v0.12


From 397da038656589cb3a263d1715ae27f90f6b30d1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Mar 2020 18:32:36 -0800
Subject: Cache bin: rewrite to track more state.

With this, we track all of the empty, full, and low water states together.  This
simplifies a lot of the tracking logic, since we now don't need the
cache_bin_info_t for state queries (except for some debugging).
---
 include/jemalloc/internal/cache_bin.h | 222 +++++++++++++++++-----------------
 src/cache_bin.c                       |  36 ++----
 2 files changed, 126 insertions(+), 132 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index cc72af6..0fb0842 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -35,67 +35,53 @@ struct cache_bin_stats_s {
  */
 typedef struct cache_bin_info_s cache_bin_info_t;
 struct cache_bin_info_s {
-	/* The size of the bin stack, i.e. ncached_max * sizeof(ptr). */
-	cache_bin_sz_t stack_size;
+	cache_bin_sz_t ncached_max;
 };
 
 typedef struct cache_bin_s cache_bin_t;
 struct cache_bin_s {
 	/*
-	 * The cache bin stack is represented using 3 pointers: cur_ptr,
-	 * low_water and full, optimized for the fast path efficiency.
-	 *
-	 * low addr ==> high addr
-	 * |----|----|----|item1|item2|.....................|itemN|
-	 *  full            cur                                    empty
-	 * (ncached == N; full + ncached_max == empty)
+	 * The stack grows down.  Whenever the bin is nonempty, the head points
+	 * to an array entry containing a valid allocation.  When it is empty,
+	 * the head points to one element past the owned array.
+	 */
+	void **stack_head;
+
+	/*
+	 * The low bits of the address of the first item in the stack that
+	 * hasn't been used since the last GC, to track the low water mark (min
+	 * # of cached items).
 	 *
-	 * Data directly stored:
-	 * 1) cur_ptr points to the current item to be allocated, i.e. *cur_ptr.
-	 * 2) full points to the top of the stack (i.e. ncached == ncached_max),
-	 * which is compared against on free_fastpath to check "is_full".
-	 * 3) low_water indicates a low water mark of ncached.
-	 * Range of low_water is [cur, empty], i.e. values of [ncached, 0].
+	 * Since the stack grows down, this is a higher address than
+	 * low_bits_full.
+	 */
+	uint16_t low_bits_low_water;
+
+	/*
+	 * The low bits of the value that stack_head will take on when the array
+	 * is full.  (But remember that stack_head always points to a valid item
+	 * when the array is nonempty -- this is in the array).
 	 *
-	 * The empty position (ncached == 0) is derived via full + ncached_max
-	 * and not accessed in the common case (guarded behind low_water).
+	 * Recall that since the stack grows down, this is the lowest address in
+	 * the array.
+	 */
+	uint16_t low_bits_full;
+
+	/*
+	 * The low bits of the value that stack_head will take on when the array
+	 * is empty.
 	 *
-	 * On 64-bit, 2 of the 3 pointers (full and low water) are compressed by
-	 * omitting the high 32 bits.  Overflow of the half pointers is avoided
-	 * when allocating / initializing the stack space.  As a result,
-	 * cur_ptr.lowbits can be safely used for pointer comparisons.
+	 * The stack grows down -- this is one past the highest address in the
+	 * array.
 	 */
-	union {
-		void **ptr;
-		struct {
-			/* highbits never accessed directly. */
-#if (LG_SIZEOF_PTR == 3 && defined(JEMALLOC_BIG_ENDIAN))
-			uint32_t __highbits;
-#endif
-			uint32_t lowbits;
-#if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN))
-			uint32_t __highbits;
-#endif
-		};
-	} cur_ptr;
+	uint16_t low_bits_empty;
+
 	/*
 	 * cur_ptr and stats are both modified frequently.  Let's keep them
 	 * close so that they have a higher chance of being on the same
 	 * cacheline, thus less write-backs.
 	 */
 	cache_bin_stats_t tstats;
-	/*
-	 * Points to the first item that hasn't been used since last GC, to
-	 * track the low water mark (min # of cached).
-	 */
-	uint32_t low_water_position;
-	/*
-	 * Points to the position when the cache is full.
-	 *
-	 * To make use of adjacent cacheline prefetch, the items in the avail
-	 * stack goes to higher address for newer allocations (i.e. cur_ptr++).
-	 */
-	uint32_t full_position;
 };
 
 typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
@@ -118,30 +104,51 @@ struct cache_bin_array_descriptor_s {
 /* Returns ncached_max: Upper limit on ncached. */
 static inline cache_bin_sz_t
 cache_bin_info_ncached_max(cache_bin_info_t *info) {
-	return info->stack_size / sizeof(void *);
+	return info->ncached_max;
 }
 
+/*
+ * Asserts that the pointer associated with earlier is <= the one associated
+ * with later.
+ */
+static inline void
+cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
+	if (earlier > later) {
+		assert(bin->low_bits_full > bin->low_bits_empty);
+	}
+}
+
+/*
+ * Internal -- does difference calculations that handle wraparound correctly.
+ * Earlier must be associated with the position earlier in memory.
+ */
+static inline uint16_t
+cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
+	cache_bin_assert_earlier(bin, earlier, later);
+	return later - earlier;
+}
+
+
 static inline cache_bin_sz_t
 cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = (cache_bin_sz_t)((info->stack_size +
-	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *));
+	cache_bin_sz_t diff = cache_bin_diff(bin,
+	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
+	cache_bin_sz_t n = diff / sizeof(void *);
+
 	assert(n <= cache_bin_info_ncached_max(info));
-	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
+	assert(n == 0 || *(bin->stack_head) != NULL);
 
 	return n;
 }
 
 static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, info);
-	/* Low bits overflow disallowed when allocating the space. */
-	assert((uint32_t)(uintptr_t)ret >= bin->cur_ptr.lowbits);
+	cache_bin_sz_t diff = cache_bin_diff(bin,
+	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
+	uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
+	void **ret = (void **)empty_bits;
 
-	/* Can also be computed via (full_position + ncached_max) | highbits. */
-	uintptr_t lowbits = bin->full_position + info->stack_size;
-	uintptr_t highbits = (uintptr_t)bin->cur_ptr.ptr &
-	    ~(((uint64_t)1 << 32) - 1);
-	assert(ret == (void **)(lowbits | highbits));
+	assert(ret >= bin->stack_head);
 
 	return ret;
 }
@@ -149,20 +156,29 @@ cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
 static inline void
 cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
 	assert(cache_bin_ncached_get(bin, info) == 0);
-	assert(cache_bin_empty_position_get(bin, info) == bin->cur_ptr.ptr);
+	assert(cache_bin_empty_position_get(bin, info) == bin->stack_head);
 }
 
+/*
+ * Get low water, but without any of the correctness checking we do for the
+ * caller-usable version, if we are temporarily breaking invariants (like
+ * ncached >= low_water during flush).
+ */
+static inline cache_bin_sz_t
+cache_bin_low_water_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
+	return cache_bin_diff(bin, bin->low_bits_low_water,
+	    bin->low_bits_empty) / sizeof(void *);
+}
 
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
-	cache_bin_sz_t low_water = ncached_max -
-	    (cache_bin_sz_t)((bin->low_water_position - bin->full_position) /
-	    sizeof(void *));
-	assert(low_water <= ncached_max);
+	cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin, info);
+	assert(low_water <= cache_bin_info_ncached_max(info));
 	assert(low_water <= cache_bin_ncached_get(bin, info));
-	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
+
+	cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
+	    bin->low_bits_low_water);
 
 	return low_water;
 }
@@ -173,20 +189,7 @@ cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
  */
 static inline void
 cache_bin_low_water_set(cache_bin_t *bin) {
-	bin->low_water_position = bin->cur_ptr.lowbits;
-}
-
-/*
- * This is an internal implementation detail -- users should only affect ncached
- * via single-item pushes or batch fills.
- */
-static inline void
-cache_bin_ncached_set(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_sz_t n) {
-	bin->cur_ptr.lowbits = bin->full_position + info->stack_size
-	    - n * sizeof(void *);
-	assert(n <= cache_bin_info_ncached_max(info));
-	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
+	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 }
 
 static inline void
@@ -198,38 +201,35 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy_impl(cache_bin_t *bin, cache_bin_info_t *info,
-    bool *success, const bool adjust_low_water) {
+cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
+    const bool adjust_low_water) {
 	/*
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
 	 */
-	void *ret = *(bin->cur_ptr.ptr++);
+	void *ret = *bin->stack_head;
+	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
+	void **new_head = bin->stack_head + 1;
 	/*
-	 * Check for both bin->ncached == 0 and ncached < low_water in a single
-	 * branch.  When adjust_low_water is true, this also avoids accessing
-	 * the cache_bin_info_t (which is on a separate cacheline / page) in
-	 * the common case.
+	 * Note that the low water mark is at most empty; if we pass this check,
+	 * we know we're non-empty.
 	 */
-	if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
+	if (unlikely(low_bits == bin->low_bits_low_water)) {
 		if (adjust_low_water) {
-			uint32_t empty_position = bin->full_position +
-			    info->stack_size;
-			if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
-				/* Over-allocated; revert. */
-				bin->cur_ptr.ptr--;
-				assert(bin->cur_ptr.lowbits == empty_position);
+			if (unlikely(low_bits == bin->low_bits_empty)) {
 				*success = false;
 				return NULL;
 			}
-			bin->low_water_position = bin->cur_ptr.lowbits;
+			/* Overflow should be impossible. */
+			assert(bin->low_bits_low_water
+			    < (uint16_t)(uintptr_t)new_head);
+			bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head;
 		} else {
-			bin->cur_ptr.ptr--;
-			assert(bin->cur_ptr.lowbits == bin->low_water_position);
 			*success = false;
 			return NULL;
 		}
 	}
+	bin->stack_head = new_head;
 
 	/*
 	 * success (instead of ret) should be checked upon the return of this
@@ -246,22 +246,27 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, cache_bin_info_t *info,
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
 	/* We don't look at info if we're not adjusting low-water. */
-	return cache_bin_alloc_easy_impl(bin, NULL, success, false);
+	return cache_bin_alloc_easy_impl(bin, success, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy(cache_bin_t *bin, cache_bin_info_t *info, bool *success) {
-	return cache_bin_alloc_easy_impl(bin, info, success, true);
+	/* We don't use info now, but we may want to in the future. */
+	(void)info;
+	return cache_bin_alloc_easy_impl(bin, success, true);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
-	if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {
+	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
+	if (unlikely(low_bits == bin->low_bits_full)) {
 		return false;
 	}
 
-	*(--bin->cur_ptr.ptr) = ptr;
-	assert(bin->cur_ptr.lowbits >= bin->full_position);
+	bin->stack_head--;
+	*bin->stack_head = ptr;
+	cache_bin_assert_earlier(bin, bin->low_bits_full,
+	    (uint16_t)(uintptr_t)bin->stack_head);
 
 	return true;
 }
@@ -279,8 +284,8 @@ struct cache_bin_ptr_array_s {
 static inline void
 cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
-	arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
 	assert(cache_bin_ncached_get(bin, info) == 0);
+	arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
 }
 
 /*
@@ -292,12 +297,12 @@ static inline void
 cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
 	assert(cache_bin_ncached_get(bin, info) == 0);
+	void **empty_position = cache_bin_empty_position_get(bin, info);
 	if (nfilled < arr->n) {
-		void **empty_position = cache_bin_empty_position_get(bin, info);
 		memmove(empty_position - nfilled, empty_position - arr->n,
 		    nfilled * sizeof(void *));
 	}
-	cache_bin_ncached_set(bin, info, nfilled);
+	bin->stack_head = empty_position - nfilled;
 }
 
 static inline void
@@ -326,11 +331,12 @@ static inline void
 cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
 	unsigned rem = cache_bin_ncached_get(bin, info) - nflushed;
-	memmove(bin->cur_ptr.ptr + nflushed, bin->cur_ptr.ptr,
+	memmove(bin->stack_head + nflushed, bin->stack_head,
 	    rem * sizeof(void *));
-	cache_bin_ncached_set(bin, info, rem);
-	if (bin->cur_ptr.lowbits > bin->low_water_position) {
-		bin->low_water_position = bin->cur_ptr.lowbits;
+	bin->stack_head = bin->stack_head + nflushed;
+	if (cache_bin_ncached_get(bin, info)
+	    < cache_bin_low_water_get_internal(bin, info)) {
+		bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 	}
 }
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 94f3b32..51b8749 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -8,7 +8,7 @@ cache_bin_info_init(cache_bin_info_t *info,
     cache_bin_sz_t ncached_max) {
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
 	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
-	info->stack_size = (cache_bin_sz_t)stack_size;
+	info->ncached_max = (cache_bin_sz_t)ncached_max;
 }
 
 void
@@ -23,23 +23,14 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
 	 */
 	*size = sizeof(void *) * 2;
 	for (szind_t i = 0; i < ninfos; i++) {
-		*size += infos[i].stack_size;
+		*size += infos[i].ncached_max * sizeof(void *);
 	}
 
 	/*
-	 * 1) Align to at least PAGE, to minimize the # of TLBs needed by the
+	 * Align to at least PAGE, to minimize the # of TLBs needed by the
 	 * smaller sizes; also helps if the larger sizes don't get used at all.
-	 * 2) On 32-bit the pointers won't be compressed; use minimal alignment.
 	 */
-	if (LG_SIZEOF_PTR < 3 || *size < PAGE) {
-		*alignment = PAGE;
-	} else {
-		/*
-		 * Align pow2 to avoid overflow the cache bin compressed
-		 * pointers.
-		 */
-		*alignment = pow2_ceil_zu(*size);
-	}
+	*alignment = PAGE;
 }
 
 void
@@ -53,10 +44,6 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
 		cache_bin_info_compute_alloc(infos, ninfos, &computed_size,
 		    &computed_alignment);
 		assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
-
-		/* And that alignment should disallow overflow. */
-		uint32_t lowbits = (uint32_t)((uintptr_t)alloc + computed_size);
-		assert((uint32_t)(uintptr_t)alloc < lowbits);
 	}
 	/*
 	 * Leave a noticeable mark pattern on the boundaries, in case a bug
@@ -81,7 +68,6 @@ cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
 void
 cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
     size_t *cur_offset) {
-	assert(sizeof(bin->cur_ptr) == sizeof(void *));
 	/*
 	 * The full_position points to the lowest available space.  Allocations
 	 * will access the slots toward higher addresses (for the benefit of
@@ -89,21 +75,23 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	 */
 	void *stack_cur = (void *)((uintptr_t)alloc + *cur_offset);
 	void *full_position = stack_cur;
-	uint32_t bin_stack_size = info->stack_size;
+	uint16_t bin_stack_size = info->ncached_max * sizeof(void *);
 
 	*cur_offset += bin_stack_size;
 	void *empty_position = (void *)((uintptr_t)alloc + *cur_offset);
 
 	/* Init to the empty position. */
-	bin->cur_ptr.ptr = empty_position;
-	bin->low_water_position = bin->cur_ptr.lowbits;
-	bin->full_position = (uint32_t)(uintptr_t)full_position;
-	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
+	bin->stack_head = (void **)empty_position;
+	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
+	bin->low_bits_full = (uint16_t)(uintptr_t)full_position;
+	bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
+	assert(cache_bin_diff(bin, bin->low_bits_full,
+	    (uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
 	assert(cache_bin_ncached_get(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin, info) == empty_position);
 }
 
 bool
 cache_bin_still_zero_initialized(cache_bin_t *bin) {
-	return bin->cur_ptr.ptr == NULL;
+	return bin->stack_head == NULL;
 }
-- 
cgit v0.12


From d701a085c29df6f6afc9a0b15c4732c8662fe80c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Mar 2020 08:58:42 -0800
Subject: Fast path: allow low-water mark changes.

This lets us put more allocations on an "almost as fast" path after a flush.
This results in around a 4% reduction in malloc cycles in prod workloads
(corresponding to about a 0.1% reduction in overall cycles).
---
 include/jemalloc/internal/cache_bin.h      | 77 +++++++++++++++---------------
 include/jemalloc/internal/tcache_inlines.h |  6 +--
 src/jemalloc.c                             | 34 +++++++++----
 src/tcache.c                               |  3 +-
 test/unit/cache_bin.c                      | 26 +++++-----
 5 files changed, 80 insertions(+), 66 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 0fb0842..f029704 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -46,6 +46,12 @@ struct cache_bin_s {
 	 * the head points to one element past the owned array.
 	 */
 	void **stack_head;
+	/*
+	 * cur_ptr and stats are both modified frequently.  Let's keep them
+	 * close so that they have a higher chance of being on the same
+	 * cacheline, thus less write-backs.
+	 */
+	cache_bin_stats_t tstats;
 
 	/*
 	 * The low bits of the address of the first item in the stack that
@@ -76,12 +82,6 @@ struct cache_bin_s {
 	 */
 	uint16_t low_bits_empty;
 
-	/*
-	 * cur_ptr and stats are both modified frequently.  Let's keep them
-	 * close so that they have a higher chance of being on the same
-	 * cacheline, thus less write-backs.
-	 */
-	cache_bin_stats_t tstats;
 };
 
 typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
@@ -201,8 +201,15 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
-    const bool adjust_low_water) {
+cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
+	/*
+	 * success (instead of ret) should be checked upon the return of this
+	 * function.  We avoid checking (ret == NULL) because there is never a
+	 * null stored on the avail stack (which is unknown to the compiler),
+	 * and eagerly checking ret would cause pipeline stall (waiting for the
+	 * cacheline).
+	 */
+
 	/*
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
@@ -210,50 +217,44 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
 	void *ret = *bin->stack_head;
 	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
 	void **new_head = bin->stack_head + 1;
+
 	/*
 	 * Note that the low water mark is at most empty; if we pass this check,
 	 * we know we're non-empty.
 	 */
-	if (unlikely(low_bits == bin->low_bits_low_water)) {
-		if (adjust_low_water) {
-			if (unlikely(low_bits == bin->low_bits_empty)) {
-				*success = false;
-				return NULL;
-			}
-			/* Overflow should be impossible. */
-			assert(bin->low_bits_low_water
-			    < (uint16_t)(uintptr_t)new_head);
-			bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head;
-		} else {
-			*success = false;
-			return NULL;
-		}
+	if (likely(low_bits != bin->low_bits_low_water)) {
+		bin->stack_head = new_head;
+		*success = true;
+		return ret;
+	}
+	if (!adjust_low_water) {
+		*success = false;
+		return NULL;
 	}
-	bin->stack_head = new_head;
-
 	/*
-	 * success (instead of ret) should be checked upon the return of this
-	 * function.  We avoid checking (ret == NULL) because there is never a
-	 * null stored on the avail stack (which is unknown to the compiler),
-	 * and eagerly checking ret would cause pipeline stall (waiting for the
-	 * cacheline).
+	 * In the fast-path case where we call alloc_easy and then alloc, the
+	 * previous checking and computation is optimized away -- we didn't
+	 * actually commit any of our operations.
 	 */
-	*success = true;
-
-	return ret;
+	if (likely(low_bits != bin->low_bits_empty)) {
+		bin->stack_head = new_head;
+		bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head;
+		*success = true;
+		return ret;
+	}
+	*success = false;
+	return NULL;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
 	/* We don't look at info if we're not adjusting low-water. */
-	return cache_bin_alloc_easy_impl(bin, success, false);
+	return cache_bin_alloc_impl(bin, success, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, cache_bin_info_t *info, bool *success) {
-	/* We don't use info now, but we may want to in the future. */
-	(void)info;
-	return cache_bin_alloc_easy_impl(bin, success, true);
+cache_bin_alloc(cache_bin_t *bin, bool *success) {
+	return cache_bin_alloc_impl(bin, success, true);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 2d31ad0..3b78ed2 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -36,8 +36,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 
 	assert(binind < SC_NBINS);
 	bin = tcache_small_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_bin_info[binind],
-	    &tcache_success);
+	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -80,8 +79,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind >= SC_NBINS &&binind < nhbins);
 	bin = tcache_large_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_bin_info[binind],
-	    &tcache_success);
+	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 12b4f6c..758e324 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2377,6 +2377,17 @@ malloc_default(size_t size) {
  * Begin malloc(3)-compatible functions.
  */
 
+JEMALLOC_ALWAYS_INLINE void
+fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
+    cache_bin_t *bin, void *ret) {
+	thread_allocated_set(tsd, allocated_after);
+	if (config_stats) {
+		bin->tstats.nrequests++;
+	}
+
+	LOG("core.malloc.exit", "result: %p", ret);
+}
+
 /*
  * malloc() fastpath.
  *
@@ -2451,17 +2462,22 @@ je_malloc(size_t size) {
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
 	bool tcache_success;
-	void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success);
+	void *ret;
 
+	/*
+	 * We split up the code this way so that redundant low-water
+	 * computation doesn't happen on the (more common) case in which we
+	 * don't touch the low water mark.  The compiler won't do this
+	 * duplication on its own.
+	 */
+	ret = cache_bin_alloc_easy(bin, &tcache_success);
 	if (tcache_success) {
-		thread_allocated_set(tsd, allocated_after);
-		if (config_stats) {
-			bin->tstats.nrequests++;
-		}
-
-		LOG("core.malloc.exit", "result: %p", ret);
-
-		/* Fastpath success */
+		fastpath_success_finish(tsd, allocated_after, bin, ret);
+		return ret;
+	}
+	ret = cache_bin_alloc(bin, &tcache_success);
+	if (tcache_success) {
+		fastpath_success_finish(tsd, allocated_after, bin, ret);
 		return ret;
 	}
 
diff --git a/src/tcache.c b/src/tcache.c
index e963223..9afc006 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -104,8 +104,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 	assert(tcache->arena != NULL);
 	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
-	ret = cache_bin_alloc_easy(tbin, &tcache_bin_info[binind],
-	    tcache_success);
+	ret = cache_bin_alloc(tbin, tcache_success);
 
 	return ret;
 }
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 2623b38..cbd8ce0 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -17,7 +17,7 @@ do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	cache_bin_low_water_set(bin);
 
 	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
-		ptr = cache_bin_alloc_easy(bin, info, &success);
+		ptr = cache_bin_alloc(bin, &success);
 		expect_true(success, "");
 		expect_ptr_eq(ptr, (void *)&ptrs[i],
 		    "Should pop in order filled");
@@ -48,7 +48,7 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 
 	expect_true(cache_bin_ncached_get(bin, info) == nfill - nflush, "");
 	while (cache_bin_ncached_get(bin, info) > 0) {
-		cache_bin_alloc_easy(bin, info, &success);
+		cache_bin_alloc(bin, &success);
 	}
 }
 
@@ -78,11 +78,11 @@ TEST_BEGIN(test_cache_bin) {
 	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
 	expect_true(cache_bin_low_water_get(&bin, &info) == 0, "");
 
-	ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_false(success, "Shouldn't successfully allocate when empty");
 	expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure");
 
-	ptr = cache_bin_alloc_easy(&bin, &info, &success);
+	ptr = cache_bin_alloc(&bin, &success);
 	expect_false(success, "Shouldn't successfully allocate when empty");
 	expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure");
 
@@ -112,10 +112,10 @@ TEST_BEGIN(test_cache_bin) {
 		expect_true(cache_bin_ncached_get(&bin, &info)
 		    == ncached_max - i, "");
 		/*
-		 * This should fail -- the reduced version can't change low
-		 * water.
+		 * This should fail -- the easy variant can't change the low
+		 * water mark.
 		 */
-		ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+		ptr = cache_bin_alloc_easy(&bin, &success);
 		expect_ptr_null(ptr, "");
 		expect_false(success, "");
 		expect_true(cache_bin_low_water_get(&bin, &info)
@@ -124,7 +124,7 @@ TEST_BEGIN(test_cache_bin) {
 		    == ncached_max - i, "");
 
 		/* This should succeed, though. */
-		ptr = cache_bin_alloc_easy(&bin, &info, &success);
+		ptr = cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 		expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1],
 		    "Alloc should pop in stack order");
@@ -135,10 +135,10 @@ TEST_BEGIN(test_cache_bin) {
 	}
 	/* Now we're empty -- all alloc attempts should fail. */
 	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
-	ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_ptr_null(ptr, "");
 	expect_false(success, "");
-	ptr = cache_bin_alloc_easy(&bin, &info, &success);
+	ptr = cache_bin_alloc(&bin, &success);
 	expect_ptr_null(ptr, "");
 	expect_false(success, "");
 
@@ -156,18 +156,18 @@ TEST_BEGIN(test_cache_bin) {
 		 * Size is bigger than low water -- the reduced version should
 		 * succeed.
 		 */
-		ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+		ptr = cache_bin_alloc_easy(&bin, &success);
 		expect_true(success, "");
 		expect_ptr_eq(ptr, &ptrs[i], "");
 	}
 	/* But now, we've hit low-water. */
-	ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_false(success, "");
 	expect_ptr_null(ptr, "");
 
 	/* We're going to test filling -- we must be empty to start. */
 	while (cache_bin_ncached_get(&bin, &info)) {
-		cache_bin_alloc_easy(&bin, &info, &success);
+		cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 	}
 
-- 
cgit v0.12


From 92485032b2e9184cada5a30e3df389fe164fbb4d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 7 Mar 2020 15:56:49 -0800
Subject: Cache bin: improve comments.

---
 include/jemalloc/internal/cache_bin.h | 95 +++++++++++++++++++++++++++++------
 1 file changed, 79 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index f029704..5a772bf 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -20,6 +20,11 @@
  */
 typedef uint16_t cache_bin_sz_t;
 
+/*
+ * This lives inside the cache_bin (for locality reasons), and is initialized
+ * alongside it, but is otherwise not modified by any cache bin operations.
+ * It's logically public and maintained by its callers.
+ */
 typedef struct cache_bin_stats_s cache_bin_stats_t;
 struct cache_bin_stats_s {
 	/*
@@ -38,6 +43,9 @@ struct cache_bin_info_s {
 	cache_bin_sz_t ncached_max;
 };
 
+/*
+ * Responsible for caching allocations associated with a single size.
+ */
 typedef struct cache_bin_s cache_bin_t;
 struct cache_bin_s {
 	/*
@@ -84,6 +92,12 @@ struct cache_bin_s {
 
 };
 
+/*
+ * The cache_bins live inside the tcache, but the arena (by design) isn't
+ * supposed to know much about tcache internals.  To let the arena iterate over
+ * associated bins, we keep (with the tcache) a linked list of
+ * cache_bin_array_descriptor_ts that tell the arena how to find the bins.
+ */
 typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
 struct cache_bin_array_descriptor_s {
 	/*
@@ -96,10 +110,13 @@ struct cache_bin_array_descriptor_s {
 	cache_bin_t *bins_large;
 };
 
-/*
- * None of the cache_bin_*_get / _set functions is used on the fast path, which
- * relies on pointer comparisons to determine if the cache is full / empty.
- */
+static inline void
+cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
+    cache_bin_t *bins_small, cache_bin_t *bins_large) {
+	ql_elm_new(descriptor, link);
+	descriptor->bins_small = bins_small;
+	descriptor->bins_large = bins_large;
+}
 
 /* Returns ncached_max: Upper limit on ncached. */
 static inline cache_bin_sz_t
@@ -108,6 +125,8 @@ cache_bin_info_ncached_max(cache_bin_info_t *info) {
 }
 
 /*
+ * Internal.
+ *
  * Asserts that the pointer associated with earlier is <= the one associated
  * with later.
  */
@@ -119,8 +138,10 @@ cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 }
 
 /*
- * Internal -- does difference calculations that handle wraparound correctly.
- * Earlier must be associated with the position earlier in memory.
+ * Internal.
+ *
+ * Does difference calculations that handle wraparound correctly.  Earlier must
+ * be associated with the position earlier in memory.
  */
 static inline uint16_t
 cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
@@ -128,7 +149,7 @@ cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 	return later - earlier;
 }
 
-
+/* Number of items currently cached in the bin. */
 static inline cache_bin_sz_t
 cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
@@ -141,6 +162,11 @@ cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	return n;
 }
 
+/*
+ * Internal.
+ *
+ * A pointer to the position one past the end of the backing array.
+ */
 static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
@@ -153,6 +179,10 @@ cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	return ret;
 }
 
+/*
+ * As the name implies.  This is important since it's not correct to try to
+ * batch fill a nonempty cache bin.
+ */
 static inline void
 cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
 	assert(cache_bin_ncached_get(bin, info) == 0);
@@ -192,14 +222,6 @@ cache_bin_low_water_set(cache_bin_t *bin) {
 	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 }
 
-static inline void
-cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
-    cache_bin_t *bins_small, cache_bin_t *bins_large) {
-	ql_elm_new(descriptor, link);
-	descriptor->bins_small = bins_small;
-	descriptor->bins_large = bins_large;
-}
-
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	/*
@@ -246,17 +268,27 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	return NULL;
 }
 
+/*
+ * Allocate an item out of the bin, failing if we're at the low-water mark.
+ */
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
 	/* We don't look at info if we're not adjusting low-water. */
 	return cache_bin_alloc_impl(bin, success, false);
 }
 
+/*
+ * Allocate an item out of the bin, even if we're currently at the low-water
+ * mark (and failing only if the bin is empty).
+ */
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc(cache_bin_t *bin, bool *success) {
 	return cache_bin_alloc_impl(bin, success, true);
 }
 
+/*
+ * Free an object into the given bin.  Fails only if the bin is full.
+ */
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
@@ -272,16 +304,46 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	return true;
 }
 
+/**
+ * Filling and flushing are done in batch, on arrays of void *s.  For filling,
+ * the arrays go forward, and can be accessed with ordinary array arithmetic.
+ * For flushing, we work from the end backwards, and so need to use special
+ * accessors that invert the usual ordering.
+ *
+ * This is important for maintaining first-fit; the arena code fills with
+ * earliest objects first, and so those are the ones we should return first for
+ * cache_bin_alloc calls.  When flushing, we should flush the objects that we
+ * wish to return later; those at the end of the array.  This is better for the
+ * first-fit heuristic as well as for cache locality; the most recently freed
+ * objects are the ones most likely to still be in cache.
+ *
+ * This all sounds very hand-wavey and theoretical, but reverting the ordering
+ * on one or the other pathway leads to measurable slowdowns.
+ */
+
 typedef struct cache_bin_ptr_array_s cache_bin_ptr_array_t;
 struct cache_bin_ptr_array_s {
 	cache_bin_sz_t n;
 	void **ptr;
 };
 
+/*
+ * Declare a cache_bin_ptr_array_t sufficient for nval items.
+ *
+ * In the current implementation, this could be just part of a
+ * cache_bin_ptr_array_init_... call, since we reuse the cache bin stack memory.
+ * Indirecting behind a macro, though, means experimenting with linked-list
+ * representations is easy (since they'll require an alloca in the calling
+ * frame).
+ */
 #define CACHE_BIN_PTR_ARRAY_DECLARE(name, nval)				\
     cache_bin_ptr_array_t name;						\
     name.n = (nval)
 
+/*
+ * Start a fill.  The bin must be empty, and This must be followed by a
+ * finish_fill call before doing any alloc/dalloc operations on the bin.
+ */
 static inline void
 cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
@@ -306,6 +368,7 @@ cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
 	bin->stack_head = empty_position - nfilled;
 }
 
+/* Same deal, but with flush. */
 static inline void
 cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
@@ -316,7 +379,7 @@ cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
 
 /*
  * These accessors are used by the flush pathways -- they reverse ordinary array
- * ordering.
+ * ordering.  See the note above.
  */
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_ptr_array_get(cache_bin_ptr_array_t *arr, cache_bin_sz_t n) {
-- 
cgit v0.12


From e732344ef18fa295c1ca77ffc40760f5873db1b8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 6 Mar 2020 12:41:16 -0800
Subject: Inspect test: Reduce checks when profiling is on.

Profiled small allocations don't live in bins, which is contrary to the test
expectation.
---
 test/unit/inspect.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/test/unit/inspect.c b/test/unit/inspect.c
index 384b1ad..fe59e59 100644
--- a/test/unit/inspect.c
+++ b/test/unit/inspect.c
@@ -87,7 +87,13 @@ TEST_BEGIN(test_query) {
 		    "Extent size should be at least allocation size");
 		expect_zu_eq(SIZE_READ(out) & (PAGE - 1), 0,
 		    "Extent size should be a multiple of page size");
-		if (sz <= SC_SMALL_MAXCLASS) {
+
+		/*
+		 * We don't do much bin checking if prof is on, since profiling
+		 * can produce extents that are for small size classes but not
+		 * slabs, which interferes with things like region counts.
+		 */
+		if (!opt_prof && sz <= SC_SMALL_MAXCLASS) {
 			expect_zu_le(NFREE_READ(out), NREGS_READ(out),
 			    "Extent free count exceeded region count");
 			expect_zu_le(NREGS_READ(out), SIZE_READ(out),
@@ -97,6 +103,7 @@ TEST_BEGIN(test_query) {
 			expect_true(NFREE_READ(out) == 0 || (SLABCUR_READ(out)
 			    != NULL && SLABCUR_READ(out) <= p),
 			    "Allocation should follow first fit principle");
+
 			if (config_stats) {
 				expect_zu_le(BIN_NFREE_READ(out),
 				    BIN_NREGS_READ(out),
@@ -125,7 +132,7 @@ TEST_BEGIN(test_query) {
 				    "Extent utilized count exceeded "
 				    "bin utilized count");
 			}
-		} else {
+		} else if (sz > SC_SMALL_MAXCLASS) {
 			expect_zu_eq(NFREE_READ(out), 0,
 			    "Extent free count should be zero");
 			expect_zu_eq(NREGS_READ(out), 1,
@@ -214,14 +221,18 @@ TEST_BEGIN(test_batch) {
 		    "Extent size should be at least allocation size");
 		expect_zu_eq(SIZE_READ(out, 0) & (PAGE - 1), 0,
 		    "Extent size should be a multiple of page size");
-		if (sz <= SC_SMALL_MAXCLASS) {
+		/*
+		 * See the corresponding comment in test_query; profiling breaks
+		 * our slab count expectations.
+		 */
+		if (sz <= SC_SMALL_MAXCLASS && !opt_prof) {
 			expect_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
 			    "Extent free count exceeded region count");
 			expect_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
 			    "Extent region count exceeded size");
 			expect_zu_ne(NREGS_READ(out, 0), 0,
 			    "Extent region count must be positive");
-		} else {
+		} else if (sz > SC_SMALL_MAXCLASS) {
 			expect_zu_eq(NFREE_READ(out, 0), 0,
 			    "Extent free count should be zero");
 			expect_zu_eq(NREGS_READ(out, 0), 1,
-- 
cgit v0.12


From 734109d9c28beb2da12af34e1d2e4324e4895191 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 18 Feb 2020 16:09:10 -0800
Subject: Edata cache: add a unit test.

---
 Makefile.in             |  1 +
 test/unit/edata_cache.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 test/unit/edata_cache.c

diff --git a/Makefile.in b/Makefile.in
index b53846d..7eca2f5 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -196,6 +196,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/counter.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
+	$(srcroot)test/unit/edata_cache.c \
 	$(srcroot)test/unit/emitter.c \
 	$(srcroot)test/unit/extent_quantize.c \
 	$(srcroot)test/unit/fork.c \
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
new file mode 100644
index 0000000..638e229
--- /dev/null
+++ b/test/unit/edata_cache.c
@@ -0,0 +1,54 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/edata_cache.h"
+
+static void
+test_edata_cache_init(edata_cache_t *edata_cache) {
+	base_t *base = base_new(TSDN_NULL, /* ind */ 1,
+	    &ehooks_default_extent_hooks);
+	assert_ptr_not_null(base, "");
+	bool err = edata_cache_init(edata_cache, base);
+	assert_false(err, "");
+}
+
+static void
+test_edata_cache_destroy(edata_cache_t *edata_cache) {
+	base_delete(TSDN_NULL, edata_cache->base);
+}
+
+TEST_BEGIN(test_edata_cache) {
+	edata_cache_t edc;
+	test_edata_cache_init(&edc);
+
+	/* Get one */
+	edata_t *ed1 = edata_cache_get(TSDN_NULL, &edc);
+	assert_ptr_not_null(ed1, "");
+
+	/* Cache should be empty */
+	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+
+	/* Get another */
+	edata_t *ed2 = edata_cache_get(TSDN_NULL, &edc);
+	assert_ptr_not_null(ed2, "");
+
+	/* Still empty */
+	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+
+	/* Put one back, and the cache should now have one item */
+	edata_cache_put(TSDN_NULL, &edc, ed1);
+	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 1, "");
+
+	/* Reallocating should reuse the item, and leave an empty cache. */
+	edata_t *ed1_again = edata_cache_get(TSDN_NULL, &edc);
+	assert_ptr_eq(ed1, ed1_again, "");
+	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+
+	test_edata_cache_destroy(&edc);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_edata_cache);
+}
-- 
cgit v0.12


From 99b1291d1760ad164346073b35ac03ce2eb35e68 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 18 Feb 2020 17:21:40 -0800
Subject: Edata cache: add edata_cache_small_t.

This can be used to amortize the synchronization costs of edata_cache accesses.
---
 include/jemalloc/internal/edata_cache.h | 28 +++++++++++++++
 src/edata_cache.c                       | 60 ++++++++++++++++++++++++++++++--
 test/unit/edata_cache.c                 | 61 ++++++++++++++++++++++++++-------
 3 files changed, 134 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 73ac7af..620360d 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -25,4 +25,32 @@ void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
 
+typedef struct edata_cache_small_s edata_cache_small_t;
+struct edata_cache_small_s {
+	edata_list_t list;
+	size_t count;
+	edata_cache_t *fallback;
+};
+
+/*
+ * An edata_cache_small is like an edata_cache, but it relies on external
+ * synchronization and avoids first-fit strategies.  You can call "prepare" to
+ * acquire at least num edata_t objects, and then "finish" to flush all
+ * excess ones back to their fallback edata_cache_t.  Once they have been
+ * acquired, they can be allocated without failing (and in fact, this is
+ * required -- it's not permitted to attempt to get an edata_t without first
+ * preparing for it).
+ */
+
+void edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback);
+
+/* Returns whether or not an error occurred. */
+bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    size_t num);
+edata_t *edata_cache_small_get(edata_cache_small_t *ecs);
+
+void edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata);
+void edata_cache_small_finish(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    size_t num);
+
 #endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/src/edata_cache.c b/src/edata_cache.c
index 1af7b96..b62972a 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -40,14 +40,68 @@ edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata) {
 	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
 }
 
-void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+void
+edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 	malloc_mutex_prefork(tsdn, &edata_cache->mtx);
 }
 
-void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+void
+edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 	malloc_mutex_postfork_parent(tsdn, &edata_cache->mtx);
 }
 
-void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+void
+edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 	malloc_mutex_postfork_child(tsdn, &edata_cache->mtx);
 }
+
+void
+edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
+	edata_list_init(&ecs->list);
+	ecs->count = 0;
+	ecs->fallback = fallback;
+}
+
+edata_t *
+edata_cache_small_get(edata_cache_small_t *ecs) {
+	assert(ecs->count > 0);
+	edata_t *edata = edata_list_first(&ecs->list);
+	assert(edata != NULL);
+	edata_list_remove(&ecs->list, edata);
+	ecs->count--;
+	return edata;
+}
+
+void
+edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata) {
+	assert(edata != NULL);
+	edata_list_append(&ecs->list, edata);
+	ecs->count++;
+}
+
+bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    size_t num) {
+	while (ecs->count < num) {
+		/*
+		 * Obviously, we can be smarter here and batch the locking that
+		 * happens inside of edata_cache_get.  But for now, something
+		 * quick-and-dirty is fine.
+		 */
+		edata_t *edata = edata_cache_get(tsdn, ecs->fallback);
+		if (edata == NULL) {
+			return true;
+		}
+		ql_elm_new(edata, ql_link);
+		edata_cache_small_put(ecs, edata);
+	}
+	return false;
+}
+
+void edata_cache_small_finish(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    size_t num) {
+	while (ecs->count > num) {
+		/* Same deal here -- we should be batching. */
+		edata_t *edata = edata_cache_small_get(ecs);
+		edata_cache_put(tsdn, ecs->fallback, edata);
+	}
+}
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
index 638e229..22c9dcb 100644
--- a/test/unit/edata_cache.c
+++ b/test/unit/edata_cache.c
@@ -17,38 +17,75 @@ test_edata_cache_destroy(edata_cache_t *edata_cache) {
 }
 
 TEST_BEGIN(test_edata_cache) {
-	edata_cache_t edc;
-	test_edata_cache_init(&edc);
+	edata_cache_t ec;
+	test_edata_cache_init(&ec);
 
 	/* Get one */
-	edata_t *ed1 = edata_cache_get(TSDN_NULL, &edc);
+	edata_t *ed1 = edata_cache_get(TSDN_NULL, &ec);
 	assert_ptr_not_null(ed1, "");
 
 	/* Cache should be empty */
-	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
 	/* Get another */
-	edata_t *ed2 = edata_cache_get(TSDN_NULL, &edc);
+	edata_t *ed2 = edata_cache_get(TSDN_NULL, &ec);
 	assert_ptr_not_null(ed2, "");
 
 	/* Still empty */
-	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
 	/* Put one back, and the cache should now have one item */
-	edata_cache_put(TSDN_NULL, &edc, ed1);
-	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 1, "");
+	edata_cache_put(TSDN_NULL, &ec, ed1);
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 1, "");
 
 	/* Reallocating should reuse the item, and leave an empty cache. */
-	edata_t *ed1_again = edata_cache_get(TSDN_NULL, &edc);
+	edata_t *ed1_again = edata_cache_get(TSDN_NULL, &ec);
 	assert_ptr_eq(ed1, ed1_again, "");
-	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	test_edata_cache_destroy(&edc);
+	test_edata_cache_destroy(&ec);
+}
+TEST_END
+
+TEST_BEGIN(test_edata_cache_small) {
+	edata_cache_t ec;
+	edata_cache_small_t ecs;
+
+	test_edata_cache_init(&ec);
+	edata_cache_small_init(&ecs, &ec);
+
+	bool err = edata_cache_small_prepare(TSDN_NULL, &ecs, 2);
+	assert_false(err, "");
+	assert_zu_eq(ecs.count, 2, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_t *ed1 = edata_cache_small_get(&ecs);
+	assert_zu_eq(ecs.count, 1, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_t *ed2 = edata_cache_small_get(&ecs);
+	assert_zu_eq(ecs.count, 0, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_cache_small_put(&ecs, ed1);
+	assert_zu_eq(ecs.count, 1, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_cache_small_put(&ecs, ed2);
+	assert_zu_eq(ecs.count, 2, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_cache_small_finish(TSDN_NULL, &ecs, 1);
+	assert_zu_eq(ecs.count, 1, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 1, "");
+
+	test_edata_cache_destroy(&ec);
 }
 TEST_END
 
 int
 main(void) {
 	return test(
-	    test_edata_cache);
+	    test_edata_cache,
+	    test_edata_cache_small);
 }
-- 
cgit v0.12


From 0dcd576600b7ad1b4a142eb993e4f7639ccc638c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 18 Feb 2020 17:26:32 -0800
Subject: Edata cache: atomic fetch-add -> load-store.

The modifications to count are protected by a mutex; there's no need to use the
more costly version.
---
 src/edata_cache.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/edata_cache.c b/src/edata_cache.c
index b62972a..4601f33 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -27,7 +27,8 @@ edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 		return base_alloc_edata(tsdn, edata_cache->base);
 	}
 	edata_avail_remove(&edata_cache->avail, edata);
-	atomic_fetch_sub_zu(&edata_cache->count, 1, ATOMIC_RELAXED);
+	size_t count = atomic_load_zu(&edata_cache->count, ATOMIC_RELAXED);
+	atomic_store_zu(&edata_cache->count, count - 1, ATOMIC_RELAXED);
 	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
 	return edata;
 }
@@ -36,7 +37,8 @@ void
 edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata) {
 	malloc_mutex_lock(tsdn, &edata_cache->mtx);
 	edata_avail_insert(&edata_cache->avail, edata);
-	atomic_fetch_add_zu(&edata_cache->count, 1, ATOMIC_RELAXED);
+	size_t count = atomic_load_zu(&edata_cache->count, ATOMIC_RELAXED);
+	atomic_store_zu(&edata_cache->count, count + 1, ATOMIC_RELAXED);
 	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
 }
 
-- 
cgit v0.12


From 441d88d1c78ecc38a7ffad3f88ea50513dabc0f8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 9 Mar 2020 15:49:15 -0700
Subject: Rewrite profiling thread event

---
 include/jemalloc/internal/prof_externs.h   |  2 +-
 include/jemalloc/internal/prof_inlines_b.h | 17 ++++--------
 include/jemalloc/internal/thread_event.h   |  7 +++++
 src/jemalloc.c                             | 44 ++++++++++--------------------
 src/prof.c                                 | 17 +-----------
 src/thread_event.c                         | 12 +-------
 6 files changed, 31 insertions(+), 68 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 0b6fecd..5a32754 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -53,7 +53,7 @@ void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 
-void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
+void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
 void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx);
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 7e28d83..6498387 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -85,11 +85,11 @@ prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update) {
+prof_sample_should_skip(tsd_t *tsd, size_t usize) {
 	cassert(config_prof);
 
 	/* Fastpath: no need to load tdata */
-	if (likely(prof_sample_event_wait_get(tsd) > 0)) {
+	if (likely(!te_prof_sample_event_lookahead(tsd, usize))) {
 		return true;
 	}
 
@@ -102,21 +102,16 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update) {
 		return true;
 	}
 
-	/* Compute new sample threshold. */
-	if (update) {
-		prof_sample_threshold_update(tsd);
-	}
 	return !tdata->active;
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
+prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active) {
 	prof_tctx_t *ret;
 
 	assert(usize == sz_s2u(usize));
 
-	if (!prof_active ||
-	    likely(prof_sample_accum_update(tsd, usize, update))) {
+	if (!prof_active || likely(prof_sample_should_skip(tsd, usize))) {
 		ret = (prof_tctx_t *)(uintptr_t)1U;
 	} else {
 		ret = prof_tctx_create(tsd);
@@ -150,7 +145,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 
 	if (prof_active && ptr != NULL) {
 		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
-		if (prof_sample_accum_update(tsd, usize, true)) {
+		if (prof_sample_should_skip(tsd, usize)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
 			 * was larger than what actually got allocated, so a
@@ -158,7 +153,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 			 * though its actual usize was insufficient to cross the
 			 * sample threshold.
 			 */
-			prof_alloc_rollback(tsd, tctx, true);
+			prof_alloc_rollback(tsd, tctx);
 			tctx = (prof_tctx_t *)(uintptr_t)1U;
 		}
 	}
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index d528c05..f9e2ba5 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -218,6 +218,13 @@ te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
+	return tsd_thread_allocated_get(tsd) + usize -
+	    tsd_thread_allocated_last_event_get(tsd) >=
+	    tsd_prof_sample_event_wait_get(tsd);
+}
+
 JEMALLOC_ALWAYS_INLINE void
 te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
 	te_assert_invariants(tsd);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 758e324..7a65db0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2177,8 +2177,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		dopts->arena_ind = 0;
 	}
 
-	thread_alloc_event(tsd, usize);
-
 	/*
 	 * If dopts->alignment > 0, then ind is still 0, but usize was computed
 	 * in the previous if statement.  Down the positive alignment path,
@@ -2187,8 +2185,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 	/* If profiling is on, get our profiling context. */
 	if (config_prof && opt_prof) {
-		prof_tctx_t *tctx = prof_alloc_prep(
-		    tsd, usize, prof_active_get_unlocked(), true);
+		bool prof_active = prof_active_get_unlocked();
+		prof_tctx_t *tctx = prof_alloc_prep(tsd, usize, prof_active);
 
 		emap_alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
@@ -2204,8 +2202,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 
 		if (unlikely(allocation == NULL)) {
-			te_alloc_rollback(tsd, usize);
-			prof_alloc_rollback(tsd, tctx, true);
+			prof_alloc_rollback(tsd, tctx);
 			goto label_oom;
 		}
 		prof_malloc(tsd, allocation, size, usize, &alloc_ctx, tctx);
@@ -2214,7 +2211,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
-			te_alloc_rollback(tsd, usize);
 			goto label_oom;
 		}
 	}
@@ -2223,6 +2219,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	 * Allocation has been done at this point.  We still have some
 	 * post-allocation work to do though.
 	 */
+
+	thread_alloc_event(tsd, usize);
+
 	assert(dopts->alignment == 0
 	    || ((uintptr_t)allocation & (dopts->alignment - 1)) == ZU(0));
 
@@ -3132,7 +3131,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	prof_info_t old_prof_info;
 	prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
 	bool prof_active = prof_active_get_unlocked();
-	prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
+	prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active);
 	void *p;
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
@@ -3142,7 +3141,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		    zero, tcache, arena, hook_args);
 	}
 	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, false);
+		prof_alloc_rollback(tsd, tctx);
 		return NULL;
 	}
 
@@ -3155,8 +3154,10 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		 * be the same as the current usize because of in-place large
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
+		assert(*usize >= isalloc(tsd_tsdn(tsd), p));
 		*usize = isalloc(tsd_tsdn(tsd), p);
 	}
+
 	prof_realloc(tsd, p, size, *usize, tctx, prof_active, old_ptr,
 	    old_usize, &old_prof_info);
 
@@ -3214,11 +3215,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
-		thread_alloc_event(tsd, usize);
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
 		    zero, tcache, arena, &alloc_ctx, &hook_args);
 		if (unlikely(p == NULL)) {
-			te_alloc_rollback(tsd, usize);
 			goto label_oom;
 		}
 	} else {
@@ -3228,9 +3227,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 			goto label_oom;
 		}
 		usize = isalloc(tsd_tsdn(tsd), p);
-		thread_alloc_event(tsd, usize);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
+	thread_alloc_event(tsd, usize);
 	thread_dalloc_event(tsd, old_usize);
 
 	UTRACE(ptr, size, p);
@@ -3416,9 +3415,8 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 			usize_max = SC_LARGE_MAXCLASS;
 		}
 	}
-	thread_alloc_event(tsd, usize_max);
 	bool prof_active = prof_active_get_unlocked();
-	prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
+	prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active);
 
 	size_t usize;
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
@@ -3428,18 +3426,6 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
 		    extra, alignment, zero);
 	}
-	if (usize <= usize_max) {
-		te_alloc_rollback(tsd, usize_max - usize);
-	} else {
-		/*
-		 * For downsizing request, usize_max can be less than usize.
-		 * We here further increase thread event counters so as to
-		 * record the true usize, and then when the execution goes back
-		 * to xallocx(), the entire usize will be rolled back if it's
-		 * equal to the old usize.
-		 */
-		thread_alloc_event(tsd, usize - usize_max);
-	}
 
 	/*
 	 * At this point we can still safely get the original profiling
@@ -3452,9 +3438,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	prof_info_t prof_info;
 	if (usize == old_usize) {
 		prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
-		prof_alloc_rollback(tsd, tctx, false);
+		prof_alloc_rollback(tsd, tctx);
 	} else {
 		prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
+		assert(usize <= usize_max);
 		prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr,
 		    old_usize, &prof_info);
 	}
@@ -3516,7 +3503,6 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	} else {
 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
 		    extra, alignment, zero);
-		thread_alloc_event(tsd, usize);
 	}
 
 	/*
@@ -3527,9 +3513,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	    == old_edata);
 
 	if (unlikely(usize == old_usize)) {
-		te_alloc_rollback(tsd, usize);
 		goto label_not_resized;
 	}
+	thread_alloc_event(tsd, usize);
 	thread_dalloc_event(tsd, old_usize);
 
 	if (config_fill && malloc_slow) {
diff --git a/src/prof.c b/src/prof.c
index 82f88a2..73e6d91 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -118,7 +118,7 @@ prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
 }
 
 void
-prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
+prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	cassert(config_prof);
 
 	if (tsd_reentrancy_level_get(tsd) > 0) {
@@ -126,21 +126,6 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 		return;
 	}
 
-	prof_tdata_t *tdata;
-
-	if (updated) {
-		/*
-		 * Compute a new sample threshold.  This isn't very important in
-		 * practice, because this function is rarely executed, so the
-		 * potential for sample bias is minimal except in contrived
-		 * programs.
-		 */
-		tdata = prof_tdata_get(tsd, true);
-		if (tdata != NULL) {
-			prof_sample_threshold_update(tsd);
-		}
-	}
-
 	if ((uintptr_t)tctx > (uintptr_t)1U) {
 		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 		tctx->prepared = false;
diff --git a/src/thread_event.c b/src/thread_event.c
index dadace3..75208f0 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -78,17 +78,7 @@ te_prof_sample_event_handler(tsd_t *tsd) {
 	if (prof_idump_accum(tsd_tsdn(tsd), last_event - last_sample_event)) {
 		prof_idump(tsd_tsdn(tsd));
 	}
-	if (!prof_active_get_unlocked()) {
-		/*
-		 * If prof_active is off, we reset prof_sample_event_wait to be
-		 * the sample interval when it drops to 0, so that there won't
-		 * be excessive routings to the slow path, and that when
-		 * prof_active is turned on later, the counting for sampling
-		 * can immediately resume as normal.
-		 */
-		te_prof_sample_event_update(tsd,
-		    (uint64_t)(1 << lg_prof_sample));
-	}
+	te_tsd_prof_sample_event_init(tsd);
 }
 
 static void
-- 
cgit v0.12


From ba783b3a0ff6d47d56a76ed298a1aaa2515d12d4 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 10 Mar 2020 14:21:05 -0700
Subject: Remove prof -> thread_event dependency

---
 include/jemalloc/internal/prof_inlines_b.h | 15 +++++++--------
 include/jemalloc/internal/thread_event.h   |  1 +
 src/jemalloc.c                             | 16 +++++++++++-----
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 6498387..29d4020 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -85,11 +85,11 @@ prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-prof_sample_should_skip(tsd_t *tsd, size_t usize) {
+prof_sample_should_skip(tsd_t *tsd, bool sample_event) {
 	cassert(config_prof);
 
 	/* Fastpath: no need to load tdata */
-	if (likely(!te_prof_sample_event_lookahead(tsd, usize))) {
+	if (likely(!sample_event)) {
 		return true;
 	}
 
@@ -106,12 +106,11 @@ prof_sample_should_skip(tsd_t *tsd, size_t usize) {
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active) {
+prof_alloc_prep(tsd_t *tsd, bool prof_active, bool sample_event) {
 	prof_tctx_t *ret;
 
-	assert(usize == sz_s2u(usize));
-
-	if (!prof_active || likely(prof_sample_should_skip(tsd, usize))) {
+	if (!prof_active ||
+	    likely(prof_sample_should_skip(tsd, sample_event))) {
 		ret = (prof_tctx_t *)(uintptr_t)1U;
 	} else {
 		ret = prof_tctx_create(tsd);
@@ -137,7 +136,7 @@ prof_malloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 JEMALLOC_ALWAYS_INLINE void
 prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
     prof_tctx_t *tctx, bool prof_active, const void *old_ptr, size_t old_usize,
-    prof_info_t *old_prof_info) {
+    prof_info_t *old_prof_info, bool sample_event) {
 	bool sampled, old_sampled, moved;
 
 	cassert(config_prof);
@@ -145,7 +144,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 
 	if (prof_active && ptr != NULL) {
 		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
-		if (prof_sample_should_skip(tsd, usize)) {
+		if (prof_sample_should_skip(tsd, sample_event)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
 			 * was larger than what actually got allocated, so a
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index f9e2ba5..cef404b 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -220,6 +220,7 @@ te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
 
 JEMALLOC_ALWAYS_INLINE bool
 te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
+	assert(usize == sz_s2u(usize));
 	return tsd_thread_allocated_get(tsd) + usize -
 	    tsd_thread_allocated_last_event_get(tsd) >=
 	    tsd_prof_sample_event_wait_get(tsd);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7a65db0..8561ef4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2186,7 +2186,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	/* If profiling is on, get our profiling context. */
 	if (config_prof && opt_prof) {
 		bool prof_active = prof_active_get_unlocked();
-		prof_tctx_t *tctx = prof_alloc_prep(tsd, usize, prof_active);
+		bool sample_event = te_prof_sample_event_lookahead(tsd, usize);
+		prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active,
+		    sample_event);
 
 		emap_alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
@@ -3131,7 +3133,8 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	prof_info_t old_prof_info;
 	prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
 	bool prof_active = prof_active_get_unlocked();
-	prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active);
+	bool sample_event = te_prof_sample_event_lookahead(tsd, *usize);
+	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
 	void *p;
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
@@ -3158,8 +3161,9 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		*usize = isalloc(tsd_tsdn(tsd), p);
 	}
 
+	sample_event = te_prof_sample_event_lookahead(tsd, *usize);
 	prof_realloc(tsd, p, size, *usize, tctx, prof_active, old_ptr,
-	    old_usize, &old_prof_info);
+	    old_usize, &old_prof_info, sample_event);
 
 	return p;
 }
@@ -3416,7 +3420,8 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		}
 	}
 	bool prof_active = prof_active_get_unlocked();
-	prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active);
+	bool sample_event = te_prof_sample_event_lookahead(tsd, usize_max);
+	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
 
 	size_t usize;
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
@@ -3442,8 +3447,9 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	} else {
 		prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
 		assert(usize <= usize_max);
+		sample_event = te_prof_sample_event_lookahead(tsd, usize);
 		prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr,
-		    old_usize, &prof_info);
+		    old_usize, &prof_info, sample_event);
 	}
 
 	assert(old_prof_info.alloc_tctx == prof_info.alloc_tctx);
-- 
cgit v0.12


From a5780598b3963648e217c89872e98b40d3e7b4ea Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 9 Mar 2020 17:05:06 -0700
Subject: Remove thread_event_rollback()

---
 include/jemalloc/internal/thread_event.h |  1 -
 src/thread_event.c                       | 49 --------------------------------
 test/unit/thread_event.c                 | 18 +-----------
 3 files changed, 1 insertion(+), 67 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index cef404b..b05ff25 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -33,7 +33,6 @@ typedef struct te_ctx_s {
 
 void te_assert_invariants_debug(tsd_t *tsd);
 void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event);
-void te_alloc_rollback(tsd_t *tsd, size_t diff);
 void te_event_update(tsd_t *tsd, bool alloc_event);
 void te_recompute_fast_threshold(tsd_t *tsd);
 void tsd_te_init(tsd_t *tsd);
diff --git a/src/thread_event.c b/src/thread_event.c
index 75208f0..163ca3f 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -321,55 +321,6 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event) {
 }
 
 void
-te_alloc_rollback(tsd_t *tsd, size_t diff) {
-	te_assert_invariants(tsd);
-	if (diff == 0U) {
-		return;
-	}
-
-	/* Rollback happens only on alloc events. */
-	te_ctx_t ctx;
-	te_ctx_get(tsd, &ctx, true);
-
-	uint64_t thread_allocated = te_ctx_current_bytes_get(&ctx);
-	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t thread_allocated_rollback = thread_allocated - diff;
-	te_ctx_current_bytes_set(&ctx, thread_allocated_rollback);
-
-	uint64_t last_event = te_ctx_last_event_get(&ctx);
-	/* Both subtractions are intentionally susceptible to underflow. */
-	if (thread_allocated_rollback - last_event <=
-	    thread_allocated - last_event) {
-		te_assert_invariants(tsd);
-		return;
-	}
-
-	te_ctx_last_event_set(&ctx, thread_allocated_rollback);
-	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t wait_diff = last_event - thread_allocated_rollback;
-	assert(wait_diff <= diff);
-
-#define E(event, condition, alloc_event)				\
-	if (alloc_event == true && condition) {				\
-		uint64_t event_wait = event##_event_wait_get(tsd);	\
-		assert(event_wait <= TE_MAX_START_WAIT);		\
-		if (event_wait > 0U) {					\
-			if (wait_diff >	TE_MAX_START_WAIT - event_wait) {\
-				event_wait = TE_MAX_START_WAIT;		\
-			} else {					\
-				event_wait += wait_diff;		\
-			}						\
-			assert(event_wait <= TE_MAX_START_WAIT);	\
-			event##_event_wait_set(tsd, event_wait);	\
-		}							\
-	}
-
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-	te_event_update(tsd, true);
-}
-
-void
 te_event_update(tsd_t *tsd, bool is_alloc) {
 	te_ctx_t ctx;
 	te_ctx_get(tsd, &ctx, is_alloc);
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index 5501fa3..e0b88a9 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -27,24 +27,8 @@ TEST_BEGIN(test_next_event_fast) {
 }
 TEST_END
 
-TEST_BEGIN(test_event_rollback) {
-	tsd_t *tsd = tsd_fetch();
-	const uint64_t diff = TE_MAX_INTERVAL >> 2;
-	size_t count = 10;
-	uint64_t thread_allocated = thread_allocated_get(tsd);
-	while (count-- != 0) {
-		te_alloc_rollback(tsd, diff);
-		uint64_t thread_allocated_after = thread_allocated_get(tsd);
-		assert_u64_eq(thread_allocated - thread_allocated_after, diff,
-		    "thread event counters are not properly rolled back");
-		thread_allocated = thread_allocated_after;
-	}
-}
-TEST_END
-
 int
 main(void) {
 	return test(
-	    test_next_event_fast,
-	    test_event_rollback);
+	    test_next_event_fast);
 }
-- 
cgit v0.12


From 2e5899c1299125c17fc428026a364368ff1531ed Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 12:42:10 -0700
Subject: Stats: Fix tcache_bytes reporting.

Previously, large allocations in tcaches would have their sizes reduced during
stats estimation.  Added a test, which fails before this change but passes now.

This fixes a bug introduced in 593484661261c20f75557279931eb2d9ca165185, which
was itself fixing a bug introduced in 9c0549007dcb64f4ff35d37390a9a6a8d3cea880.
---
 src/arena.c       |  3 ++-
 test/unit/stats.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 7f7c27f..d4b6979 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -207,7 +207,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 			cache_bin_t *tbin = &descriptor->bins_large[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    cache_bin_ncached_get(tbin,
-			    &tcache_bin_info[i + SC_NBINS]) * sz_index2size(i));
+			    &tcache_bin_info[i + SC_NBINS])
+			    * sz_index2size(i + SC_NBINS));
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
diff --git a/test/unit/stats.c b/test/unit/stats.c
index f4ac154..20a32dd 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -1,5 +1,8 @@
 #include "test/jemalloc_test.h"
 
+#define STRINGIFY_HELPER(x) #x
+#define STRINGIFY(x) STRINGIFY_HELPER(x)
+
 TEST_BEGIN(test_stats_summary) {
 	size_t sz, allocated, active, resident, mapped;
 	int expected = config_stats ? 0 : ENOENT;
@@ -361,6 +364,50 @@ TEST_BEGIN(test_stats_arenas_lextents) {
 }
 TEST_END
 
+static void
+test_tcache_bytes_for_usize(size_t usize) {
+	uint64_t epoch;
+	size_t tcache_bytes;
+	size_t sz = sizeof(tcache_bytes);
+
+	void *ptr = mallocx(usize, 0);
+
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl(
+	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
+	size_t tcache_bytes_before = tcache_bytes;
+	dallocx(ptr, 0);
+
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl(
+	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
+	size_t tcache_bytes_after = tcache_bytes;
+	assert_zu_eq(tcache_bytes_after - tcache_bytes_before,
+	    usize, "Incorrectly attributed a free");
+}
+
+TEST_BEGIN(test_stats_tcache_bytes_small) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if((ZU(1) << opt_lg_tcache_max) < SC_SMALL_MAXCLASS);
+
+	test_tcache_bytes_for_usize(SC_SMALL_MAXCLASS);
+}
+TEST_END
+
+TEST_BEGIN(test_stats_tcache_bytes_large) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if((ZU(1) << opt_lg_tcache_max) < SC_LARGE_MINCLASS);
+
+	test_tcache_bytes_for_usize(SC_LARGE_MINCLASS);
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -370,5 +417,7 @@ main(void) {
 	    test_stats_arenas_small,
 	    test_stats_arenas_large,
 	    test_stats_arenas_bins,
-	    test_stats_arenas_lextents);
+	    test_stats_arenas_lextents,
+	    test_stats_tcache_bytes_small,
+	    test_stats_tcache_bytes_large);
 }
-- 
cgit v0.12


From b30a5c2f9073b6f35f0023a443cd18ca406e972a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 13 Mar 2020 10:00:50 -0700
Subject: Reorganize cpp APIs and suppress unused function warnings

---
 src/jemalloc_cpp.cpp | 82 +++++++++++++++++++++++++---------------------------
 1 file changed, 39 insertions(+), 43 deletions(-)

diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index c2110a1..6959b27 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -97,43 +97,6 @@ newImpl(std::size_t size) noexcept(IsNoExcept) {
 	return handleOOM(size, IsNoExcept);
 }
 
-#if __cpp_aligned_new >= 201606
-template <bool IsNoExcept>
-JEMALLOC_ALWAYS_INLINE
-void *
-alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(IsNoExcept) {
-	void *ptr = je_aligned_alloc(static_cast<std::size_t>(alignment), size);
-	if (likely(ptr != nullptr)) {
-		return ptr;
-	}
-
-	return handleOOM(size, IsNoExcept);
-}
-#endif  // __cpp_aligned_new
-
-JEMALLOC_ALWAYS_INLINE
-void
-sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
-	if (unlikely(ptr == nullptr)) {
-		return;
-	}
-	je_sdallocx_noflags(ptr, size);
-}
-
-#if __cpp_aligned_new >= 201606
-JEMALLOC_ALWAYS_INLINE
-void
-alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
-	if (config_debug) {
-		assert(((size_t)alignment & ((size_t)alignment - 1)) == 0);
-	}
-	if (unlikely(ptr == nullptr)) {
-		return;
-	}
-	je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
-}
-#endif  // __cpp_aligned_new
-
 void *
 operator new(std::size_t size) {
 	return newImpl<false>(size);
@@ -156,14 +119,21 @@ operator new[](std::size_t size, const std::nothrow_t &) noexcept {
 
 #if __cpp_aligned_new >= 201606
 
+template <bool IsNoExcept>
+JEMALLOC_ALWAYS_INLINE
 void *
-operator new(std::size_t size, std::align_val_t alignment) {
-	return alignedNewImpl<false>(size, alignment);
+alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(IsNoExcept) {
+	void *ptr = je_aligned_alloc(static_cast<std::size_t>(alignment), size);
+	if (likely(ptr != nullptr)) {
+		return ptr;
+	}
+
+	return handleOOM(size, IsNoExcept);
 }
 
 void *
-operator new(std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
-	return alignedNewImpl<true>(size, alignment);
+operator new(std::size_t size, std::align_val_t alignment) {
+	return alignedNewImpl<false>(size, alignment);
 }
 
 void *
@@ -172,6 +142,11 @@ operator new[](std::size_t size, std::align_val_t alignment) {
 }
 
 void *
+operator new(std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
+	return alignedNewImpl<true>(size, alignment);
+}
+
+void *
 operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
 	return alignedNewImpl<true>(size, alignment);
 }
@@ -199,6 +174,15 @@ void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
 
 #if __cpp_sized_deallocation >= 201309
 
+JEMALLOC_ALWAYS_INLINE
+void
+sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
+	if (unlikely(ptr == nullptr)) {
+		return;
+	}
+	je_sdallocx_noflags(ptr, size);
+}
+
 void
 operator delete(void *ptr, std::size_t size) noexcept {
 	sizedDeleteImpl(ptr, size);
@@ -213,18 +197,30 @@ operator delete[](void *ptr, std::size_t size) noexcept {
 
 #if __cpp_aligned_new >= 201606
 
+JEMALLOC_ALWAYS_INLINE
+void
+alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+	if (config_debug) {
+		assert(((size_t)alignment & ((size_t)alignment - 1)) == 0);
+	}
+	if (unlikely(ptr == nullptr)) {
+		return;
+	}
+	je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
+}
+
 void
 operator delete(void* ptr, std::align_val_t) noexcept {
 	je_free(ptr);
 }
 
 void
-operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+operator delete[](void* ptr, std::align_val_t) noexcept {
 	je_free(ptr);
 }
 
 void
-operator delete[](void* ptr, std::align_val_t) noexcept {
+operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
 	je_free(ptr);
 }
 
-- 
cgit v0.12


From ccdc70a5ce7b9dd723d947025f99006e7e78d17e Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 18 Mar 2020 18:06:47 -0700
Subject: Fix: assertion could abort on past failures

---
 test/include/test/test.h | 98 +++++++++++++++++++++++++++---------------------
 test/src/test.c          |  5 ---
 2 files changed, 55 insertions(+), 48 deletions(-)

diff --git a/test/include/test/test.h b/test/include/test/test.h
index a1b8ff3..2167e8c 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -1,6 +1,6 @@
 #define ASSERT_BUFSIZE	256
 
-#define expect_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
+#define verify_cmp(may_abort, t, a, b, cmp, neg_cmp, pri, ...) do {	\
 	const t a_ = (a);						\
 	const t b_ = (b);						\
 	if (!(a_ cmp b_)) {						\
@@ -13,10 +13,17 @@
 		    __func__, __FILE__, __LINE__,			\
 		    #a, #b, a_, b_);					\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(prefix, message);				\
+		if (may_abort) {					\
+			abort();					\
+		} else {						\
+			p_test_fail(prefix, message);			\
+		}							\
 	}								\
 } while (0)
 
+#define expect_cmp(t, a, b, cmp, neg_cmp, pri, ...) verify_cmp(false,	\
+    t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
+
 #define expect_ptr_eq(a, b, ...)	expect_cmp(void *, a, b, ==,	\
     !=, "p", __VA_ARGS__)
 #define expect_ptr_ne(a, b, ...)	expect_cmp(void *, a, b, !=,	\
@@ -210,7 +217,7 @@
 #define expect_u64_gt(a, b, ...)	expect_cmp(uint64_t, a, b, >,	\
     <=, FMTu64, __VA_ARGS__)
 
-#define expect_b_eq(a, b, ...) do {					\
+#define verify_b_eq(may_abort, a, b, ...) do {				\
 	bool a_ = (a);							\
 	bool b_ = (b);							\
 	if (!(a_ == b_)) {						\
@@ -223,10 +230,15 @@
 		    #a, #b, a_ ? "true" : "false",			\
 		    b_ ? "true" : "false");				\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(prefix, message);				\
+		if (may_abort) {					\
+			abort();					\
+		} else {						\
+			p_test_fail(prefix, message);			\
+		}							\
 	}								\
 } while (0)
-#define expect_b_ne(a, b, ...) do {					\
+
+#define verify_b_ne(may_abort, a, b, ...) do {				\
 	bool a_ = (a);							\
 	bool b_ = (b);							\
 	if (!(a_ != b_)) {						\
@@ -239,13 +251,21 @@
 		    #a, #b, a_ ? "true" : "false",			\
 		    b_ ? "true" : "false");				\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(prefix, message);				\
+		if (may_abort) {					\
+			abort();					\
+		} else {						\
+			p_test_fail(prefix, message);			\
+		}							\
 	}								\
 } while (0)
+
+#define expect_b_eq(a, b, ...)	verify_b_eq(false, a, b, __VA_ARGS__)
+#define expect_b_ne(a, b, ...)	verify_b_ne(false, a, b, __VA_ARGS__)
+
 #define expect_true(a, ...)	expect_b_eq(a, true, __VA_ARGS__)
 #define expect_false(a, ...)	expect_b_eq(a, false, __VA_ARGS__)
 
-#define expect_str_eq(a, b, ...) do {					\
+#define verify_str_eq(may_abort, a, b, ...) do {			\
 	if (strcmp((a), (b))) {						\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -255,10 +275,15 @@
 		    "\"%s\" differs from \"%s\": ",			\
 		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(prefix, message);				\
+		if (may_abort) {					\
+			abort();					\
+		} else {						\
+			p_test_fail(prefix, message);			\
+		}							\
 	}								\
 } while (0)
-#define expect_str_ne(a, b, ...) do {					\
+
+#define verify_str_ne(may_abort, a, b, ...) do {			\
 	if (!strcmp((a), (b))) {					\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -268,30 +293,35 @@
 		    "\"%s\" same as \"%s\": ",				\
 		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(prefix, message);				\
+		if (may_abort) {					\
+			abort();					\
+		} else {						\
+			p_test_fail(prefix, message);			\
+		}							\
 	}								\
 } while (0)
 
-#define expect_not_reached(...) do {					\
+#define expect_str_eq(a, b, ...) verify_str_eq(false, a, b, __VA_ARGS__)
+#define expect_str_ne(a, b, ...) verify_str_ne(false, a, b, __VA_ARGS__)
+
+#define verify_not_reached(may_abort, ...) do {				\
 	char prefix[ASSERT_BUFSIZE];					\
 	char message[ASSERT_BUFSIZE];					\
 	malloc_snprintf(prefix, sizeof(prefix),				\
 	    "%s:%s:%d: Unreachable code reached: ",			\
 	    __func__, __FILE__, __LINE__);				\
 	malloc_snprintf(message, sizeof(message), __VA_ARGS__);		\
-	p_test_fail(prefix, message);					\
-} while (0)
-
-#define p_abort_test_if_failed() do {					\
-	if (p_test_failed()) {						\
+	if (may_abort) {						\
 		abort();						\
+	} else {							\
+		p_test_fail(prefix, message);				\
 	}								\
 } while (0)
 
-#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
-	expect_cmp(t, a, b, cmp, neg_cmp, pri, __VA_ARGS__);		\
-	p_abort_test_if_failed();					\
-} while (0)
+#define expect_not_reached(...) verify_not_reached(false, __VA_ARGS__)
+
+#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) verify_cmp(true,	\
+    t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
 
 #define assert_ptr_eq(a, b, ...)	assert_cmp(void *, a, b, ==,	\
     !=, "p", __VA_ARGS__)
@@ -486,33 +516,16 @@
 #define assert_u64_gt(a, b, ...)	assert_cmp(uint64_t, a, b, >,	\
     <=, FMTu64, __VA_ARGS__)
 
-#define assert_b_eq(a, b, ...) do {					\
-	expect_b_eq(a, b, __VA_ARGS__);					\
-	p_abort_test_if_failed();					\
-} while (0)
-
-#define assert_b_ne(a, b, ...) do {					\
-	expect_b_ne(a, b, __VA_ARGS__);					\
-	p_abort_test_if_failed();					\
-} while (0)
+#define assert_b_eq(a, b, ...)	verify_b_eq(true, a, b, __VA_ARGS__)
+#define assert_b_ne(a, b, ...)	verify_b_ne(true, a, b, __VA_ARGS__)
 
 #define assert_true(a, ...)	assert_b_eq(a, true, __VA_ARGS__)
 #define assert_false(a, ...)	assert_b_eq(a, false, __VA_ARGS__)
 
-#define assert_str_eq(a, b, ...) do {					\
-	expect_str_eq(a, b, __VA_ARGS__);				\
-	p_abort_test_if_failed();					\
-} while (0)
+#define assert_str_eq(a, b, ...) verify_str_eq(true, a, b, __VA_ARGS__)
+#define assert_str_ne(a, b, ...) verify_str_ne(true, a, b, __VA_ARGS__)
 
-#define assert_str_ne(a, b, ...) do {					\
-	expect_str_ne(a, b, __VA_ARGS__);				\
-	p_abort_test_if_failed();					\
-} while (0)
-
-#define assert_not_reached(...) do {					\
-	expect_not_reached(__VA_ARGS__);				\
-	p_abort_test_if_failed();					\
-} while (0)
+#define assert_not_reached(...) verify_not_reached(true, __VA_ARGS__)
 
 /*
  * If this enum changes, corresponding changes in test/test.sh.in are also
@@ -568,6 +581,5 @@ test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
 void	p_test_fail(const char *prefix, const char *message);
-bool	p_test_failed(void);
 
 void strncpy_cond(void *dst, const char *src, bool cond);
diff --git a/test/src/test.c b/test/src/test.c
index b40fbc6..4583e55 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -233,11 +233,6 @@ p_test_fail(const char *prefix, const char *message) {
 	test_status = test_status_fail;
 }
 
-bool
-p_test_failed() {
-	return test_status == test_status_fail;
-}
-
 void
 strncpy_cond(void *dst, const char *src, bool cond) {
 	if (cond) {
-- 
cgit v0.12


From 2256ef896177faf8af7b199595382348be054250 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 24 Mar 2020 17:53:41 -0700
Subject: Add option to fetch system thread name on each prof sample

---
 Makefile.in                              |   1 +
 include/jemalloc/internal/prof_externs.h |   5 +
 src/ctl.c                                |   5 +
 src/jemalloc.c                           |   3 +
 src/prof.c                               | 155 +++++++++++++++++++------------
 test/unit/mallctl.c                      |   1 +
 test/unit/prof_use_sys_thread_name.c     |  75 +++++++++++++++
 test/unit/prof_use_sys_thread_name.sh    |   5 +
 8 files changed, 192 insertions(+), 58 deletions(-)
 create mode 100644 test/unit/prof_use_sys_thread_name.c
 create mode 100644 test/unit/prof_use_sys_thread_name.sh

diff --git a/Makefile.in b/Makefile.in
index 7eca2f5..7300cb9 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -227,6 +227,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_reset.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
+	$(srcroot)test/unit/prof_use_sys_thread_name.c \
 	$(srcroot)test/unit/ql.c \
 	$(srcroot)test/unit/qr.c \
 	$(srcroot)test/unit/rb.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 5a32754..3518167 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -28,6 +28,9 @@ extern char opt_prof_prefix[
 extern ssize_t opt_prof_recent_alloc_max;
 extern malloc_mutex_t prof_recent_alloc_mtx;
 
+/* Whether to use thread name provided by the system or by mallctl. */
+extern bool opt_prof_experimental_use_sys_thread_name;
+
 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
 extern bool prof_active;
 
@@ -59,6 +62,8 @@ void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
 #ifdef JEMALLOC_JET
+typedef int (prof_read_sys_thread_name_t)(char *buf, size_t limit);
+extern prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name;
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
 #endif
diff --git a/src/ctl.c b/src/ctl.c
index d149ce6..86ac83e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -117,6 +117,7 @@ CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(opt_prof_recent_alloc_max)
+CTL_PROTO(opt_prof_experimental_use_sys_thread_name)
 CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
@@ -353,6 +354,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
 	{NAME("prof_accum"),	CTL(opt_prof_accum)},
 	{NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)},
+	{NAME("prof_experimental_use_sys_thread_name"),
+	    CTL(opt_prof_experimental_use_sys_thread_name)},
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
 };
 
@@ -1829,6 +1832,8 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
     opt_prof_recent_alloc_max, ssize_t)
+CTL_RO_NL_CGEN(config_prof, opt_prof_experimental_use_sys_thread_name,
+    opt_prof_experimental_use_sys_thread_name, bool)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8561ef4..ea331f8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1426,6 +1426,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
+				CONF_HANDLE_BOOL(
+				    opt_prof_experimental_use_sys_thread_name,
+				    "prof_experimental_use_sys_thread_name")
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {
diff --git a/src/prof.c b/src/prof.c
index 73e6d91..e68694a 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -48,6 +48,7 @@ bool opt_prof_final = false;
 bool opt_prof_leak = false;
 bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
+bool opt_prof_experimental_use_sys_thread_name = false;
 
 /* Accessed via prof_idump_[accum/rollback](). */
 static counter_accum_t prof_idump_accumulated;
@@ -133,9 +134,101 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	}
 }
 
+static char *
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
+	char *ret;
+	size_t size;
+
+	if (thread_name == NULL) {
+		return NULL;
+	}
+
+	size = strlen(thread_name) + 1;
+	if (size == 1) {
+		return "";
+	}
+
+	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (ret == NULL) {
+		return NULL;
+	}
+	memcpy(ret, thread_name, size);
+	return ret;
+}
+
+static int
+prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
+	prof_tdata_t *tdata;
+	unsigned i;
+	char *s;
+
+	tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return EAGAIN;
+	}
+
+	/* Validate input. */
+	if (thread_name == NULL) {
+		return EFAULT;
+	}
+	for (i = 0; thread_name[i] != '\0'; i++) {
+		char c = thread_name[i];
+		if (!isgraph(c) && !isblank(c)) {
+			return EFAULT;
+		}
+	}
+
+	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
+	if (s == NULL) {
+		return EAGAIN;
+	}
+
+	if (tdata->thread_name != NULL) {
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
+		    true);
+		tdata->thread_name = NULL;
+	}
+	if (strlen(s) > 0) {
+		tdata->thread_name = s;
+	}
+	return 0;
+}
+
+static int
+prof_read_sys_thread_name_impl(char *buf, size_t limit) {
+#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+	return pthread_getname_np(pthread_self(), buf, limit);
+#else
+	return ENOSYS;
+#endif
+}
+#ifdef JEMALLOC_JET
+prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name =
+    prof_read_sys_thread_name_impl;
+#else
+#define prof_read_sys_thread_name prof_read_sys_thread_name_impl
+#endif
+
+static void
+prof_fetch_sys_thread_name(tsd_t *tsd) {
+#define THREAD_NAME_MAX_LEN 16
+	char buf[THREAD_NAME_MAX_LEN];
+	if (!prof_read_sys_thread_name(buf, THREAD_NAME_MAX_LEN)) {
+		prof_thread_name_set_impl(tsd, buf);
+	}
+#undef THREAD_NAME_MAX_LEN
+}
+
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx) {
+	if (opt_prof_experimental_use_sys_thread_name) {
+		prof_fetch_sys_thread_name(tsd);
+	}
+
 	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	prof_info_set(tsd, edata, tctx);
 
@@ -710,29 +803,6 @@ prof_tdata_init(tsd_t *tsd) {
 	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)), false);
 }
 
-static char *
-prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
-	char *ret;
-	size_t size;
-
-	if (thread_name == NULL) {
-		return NULL;
-	}
-
-	size = strlen(thread_name) + 1;
-	if (size == 1) {
-		return "";
-	}
-
-	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (ret == NULL) {
-		return NULL;
-	}
-	memcpy(ret, thread_name, size);
-	return ret;
-}
-
 prof_tdata_t *
 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	uint64_t thr_uid = tdata->thr_uid;
@@ -799,42 +869,11 @@ prof_thread_name_get(tsd_t *tsd) {
 
 int
 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
-	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	prof_tdata_t *tdata;
-	unsigned i;
-	char *s;
-
-	tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return EAGAIN;
-	}
-
-	/* Validate input. */
-	if (thread_name == NULL) {
-		return EFAULT;
-	}
-	for (i = 0; thread_name[i] != '\0'; i++) {
-		char c = thread_name[i];
-		if (!isgraph(c) && !isblank(c)) {
-			return EFAULT;
-		}
-	}
-
-	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
-	if (s == NULL) {
-		return EAGAIN;
-	}
-
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
-		tdata->thread_name = NULL;
-	}
-	if (strlen(s) > 0) {
-		tdata->thread_name = s;
+	if (opt_prof_experimental_use_sys_thread_name) {
+		return ENOENT;
+	} else {
+		return prof_thread_name_set_impl(tsd, thread_name);
 	}
-	return 0;
 }
 
 bool
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index e38723f..cc1d531 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -192,6 +192,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_final, prof);
 	TEST_MALLCTL_OPT(bool, prof_leak, prof);
 	TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof);
+	TEST_MALLCTL_OPT(bool, prof_experimental_use_sys_thread_name, prof);
 
 #undef TEST_MALLCTL_OPT
 }
diff --git a/test/unit/prof_use_sys_thread_name.c b/test/unit/prof_use_sys_thread_name.c
new file mode 100644
index 0000000..60cb55b
--- /dev/null
+++ b/test/unit/prof_use_sys_thread_name.c
@@ -0,0 +1,75 @@
+#include "test/jemalloc_test.h"
+
+static const char *test_thread_name = "test_name";
+
+static int
+test_prof_read_sys_thread_name_error(char *buf, size_t limit) {
+	return ENOSYS;
+}
+
+static int
+test_prof_read_sys_thread_name(char *buf, size_t limit) {
+	assert(strlen(test_thread_name) < limit);
+	strncpy(buf, test_thread_name, limit);
+	return 0;
+}
+
+static int
+test_prof_read_sys_thread_name_clear(char *buf, size_t limit) {
+	assert(limit > 0);
+	buf[0] = '\0';
+	return 0;
+}
+
+TEST_BEGIN(test_prof_experimental_use_sys_thread_name) {
+	test_skip_if(!config_prof);
+
+	bool oldval;
+	size_t sz = sizeof(oldval);
+	assert_d_eq(mallctl("opt.prof_experimental_use_sys_thread_name",
+	    &oldval, &sz, NULL,	0), 0, "mallctl failed");
+	assert_true(oldval, "option was not set correctly");
+
+	const char *thread_name;
+	sz = sizeof(thread_name);
+	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
+	    "mallctl read for thread name should not fail");
+	expect_str_eq(thread_name, "", "Initial thread name should be empty");
+
+	thread_name = test_thread_name;
+	assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, sz),
+	    ENOENT, "mallctl write for thread name should fail");
+	assert_ptr_eq(thread_name, test_thread_name,
+	    "Thread name should not be touched");
+
+	prof_read_sys_thread_name = test_prof_read_sys_thread_name_error;
+	void *p = malloc(1);
+	free(p);
+	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
+	    "mallctl read for thread name should not fail");
+	assert_str_eq(thread_name, "",
+	    "Thread name should stay the same if the system call fails");
+
+	prof_read_sys_thread_name = test_prof_read_sys_thread_name;
+	p = malloc(1);
+	free(p);
+	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
+	    "mallctl read for thread name should not fail");
+	assert_str_eq(thread_name, test_thread_name,
+	    "Thread name should be changed if the system call succeeds");
+
+	prof_read_sys_thread_name = test_prof_read_sys_thread_name_clear;
+	p = malloc(1);
+	free(p);
+	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
+	    "mallctl read for thread name should not fail");
+	expect_str_eq(thread_name, "", "Thread name should be updated if the "
+	    "system call returns a different name");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_prof_experimental_use_sys_thread_name);
+}
diff --git a/test/unit/prof_use_sys_thread_name.sh b/test/unit/prof_use_sys_thread_name.sh
new file mode 100644
index 0000000..0e0e0d9
--- /dev/null
+++ b/test/unit/prof_use_sys_thread_name.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_experimental_use_sys_thread_name:true"
+fi
-- 
cgit v0.12


From 3b4a03b92b2e415415a08f0150fdb9eeb659cd52 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 26 Mar 2020 11:40:49 -0700
Subject: Mac: don't declare system functions as nothrow.

This contradicts the system headers, which can lead to breakages.
---
 include/jemalloc/jemalloc_macros.h.in |  6 ++++++
 include/jemalloc/jemalloc_protos.h.in | 19 ++++++++++---------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index b4469d8..1ceb7b1 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -134,3 +134,9 @@
 #  define JEMALLOC_RESTRICT_RETURN
 #  define JEMALLOC_ALLOCATOR
 #endif
+
+#if defined(__APPLE__) && !defined(JEMALLOC_NO_RENAME)
+#  define JEMALLOC_SYS_NOTHROW
+#else
+#  define JEMALLOC_SYS_NOTHROW JEMALLOC_NOTHROW
+#endif
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index a78414b..d75b222 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -8,21 +8,22 @@ extern JEMALLOC_EXPORT void		(*@je_@malloc_message)(void *cbopaque,
     const char *s);
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-    void JEMALLOC_NOTHROW	*@je_@malloc(size_t size)
+    void JEMALLOC_SYS_NOTHROW	*@je_@malloc(size_t size)
     JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1);
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-    void JEMALLOC_NOTHROW	*@je_@calloc(size_t num, size_t size)
+    void JEMALLOC_SYS_NOTHROW	*@je_@calloc(size_t num, size_t size)
     JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2);
-JEMALLOC_EXPORT int JEMALLOC_NOTHROW	@je_@posix_memalign(void **memptr,
-    size_t alignment, size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT int JEMALLOC_SYS_NOTHROW @je_@posix_memalign(
+    void **memptr, size_t alignment, size_t size) JEMALLOC_CXX_THROW
+    JEMALLOC_ATTR(nonnull(1));
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-    void JEMALLOC_NOTHROW	*@je_@aligned_alloc(size_t alignment,
+    void JEMALLOC_SYS_NOTHROW	*@je_@aligned_alloc(size_t alignment,
     size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc)
     JEMALLOC_ALLOC_SIZE(2);
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-    void JEMALLOC_NOTHROW	*@je_@realloc(void *ptr, size_t size)
+    void JEMALLOC_SYS_NOTHROW	*@je_@realloc(void *ptr, size_t size)
     JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2);
-JEMALLOC_EXPORT void JEMALLOC_NOTHROW	@je_@free(void *ptr)
+JEMALLOC_EXPORT void JEMALLOC_SYS_NOTHROW	@je_@free(void *ptr)
     JEMALLOC_CXX_THROW;
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -55,12 +56,12 @@ JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW	@je_@malloc_usable_size(
 
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-    void JEMALLOC_NOTHROW	*@je_@memalign(size_t alignment, size_t size)
+    void JEMALLOC_SYS_NOTHROW	*@je_@memalign(size_t alignment, size_t size)
     JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc);
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_VALLOC
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-    void JEMALLOC_NOTHROW	*@je_@valloc(size_t size) JEMALLOC_CXX_THROW
+    void JEMALLOC_SYS_NOTHROW	*@je_@valloc(size_t size) JEMALLOC_CXX_THROW
     JEMALLOC_ATTR(malloc);
 #endif
-- 
cgit v0.12


From d936b46d3a6320895ddd9a16dc4c5e79d5b9d8e9 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 29 Mar 2020 10:41:23 -0700
Subject: Add malloc_conf_2_conf_harder

This comes in handy when you're just a user of a canary system who wants to
change settings set by the configuration system itself.
---
 Makefile.in                |  1 +
 configure.ac               |  3 ++-
 src/jemalloc.c             | 35 ++++++++++++++++++++++++++++++++---
 test/unit/malloc_conf_2.c  | 29 +++++++++++++++++++++++++++++
 test/unit/malloc_conf_2.sh |  1 +
 5 files changed, 65 insertions(+), 4 deletions(-)
 create mode 100644 test/unit/malloc_conf_2.c
 create mode 100644 test/unit/malloc_conf_2.sh

diff --git a/Makefile.in b/Makefile.in
index 7300cb9..10af489 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -209,6 +209,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/junk_free.c \
 	$(srcroot)test/unit/log.c \
 	$(srcroot)test/unit/mallctl.c \
+	$(srcroot)test/unit/malloc_conf_2.c \
 	$(srcroot)test/unit/malloc_io.c \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
diff --git a/configure.ac b/configure.ac
index 324656b..daac205 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1000,7 +1000,8 @@ AC_ARG_WITH([export],
 fi]
 )
 
-public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
+public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib
+mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
 dnl Check for additional platform-specific public API functions.
 AC_CHECK_FUNC([memalign],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ea331f8..63f7ebf 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -32,6 +32,29 @@ const char	*je_malloc_conf
     JEMALLOC_ATTR(weak)
 #endif
     ;
+/*
+ * The usual rule is that the closer to runtime you are, the higher priority
+ * your configuration settings are (so the jemalloc config options get lower
+ * priority than the per-binary setting, which gets lower priority than the /etc
+ * setting, which gets lower priority than the environment settings).
+ *
+ * But it's a fairly common use case in some testing environments for a user to
+ * be able to control the binary, but nothing else (e.g. a performancy canary
+ * uses the production OS and environment variables, but can run any binary in
+ * those circumstances).  For these use cases, it's handy to have an in-binary
+ * mechanism for overriding environment variable settings, with the idea that if
+ * the results are positive they get promoted to the official settings, and
+ * moved from the binary to the environment variable.
+ *
+ * We don't actually want this to be widespread, so we'll give it a silly name
+ * and not mention it in headers or documentation.
+ */
+const char	*je_malloc_conf_2_conf_harder
+#ifndef _WIN32
+    JEMALLOC_ATTR(weak)
+#endif
+    ;
+
 bool	opt_abort =
 #ifdef JEMALLOC_DEBUG
     true
@@ -975,7 +998,7 @@ malloc_slow_flag_init(void) {
 }
 
 /* Number of sources for initializing malloc_conf */
-#define MALLOC_CONF_NSOURCES 4
+#define MALLOC_CONF_NSOURCES 5
 
 static const char *
 obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
@@ -1053,6 +1076,9 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 			ret = NULL;
 		}
 		break;
+	} case 4: {
+		ret = je_malloc_conf_2_conf_harder;
+		break;
 	} default:
 		not_reached();
 		ret = NULL;
@@ -1069,7 +1095,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 		"string pointed to by the global variable malloc_conf",
 		"\"name\" of the file referenced by the symbolic link named "
 		    "/etc/malloc.conf",
-		"value of the environment variable MALLOC_CONF"
+		"value of the environment variable MALLOC_CONF",
+		"string pointed to by the global variable "
+		    "malloc_conf_2_conf_harder",
 	};
 	unsigned i;
 	const char *opts, *k, *v;
@@ -1506,7 +1534,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 static void
 malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
-	const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL};
+	const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL,
+		NULL};
 	char buf[PATH_MAX + 1];
 
 	/* The first call only set the confirm_conf option and opts_cache */
diff --git a/test/unit/malloc_conf_2.c b/test/unit/malloc_conf_2.c
new file mode 100644
index 0000000..ecfa499
--- /dev/null
+++ b/test/unit/malloc_conf_2.c
@@ -0,0 +1,29 @@
+#include "test/jemalloc_test.h"
+
+const char *malloc_conf = "dirty_decay_ms:1000";
+const char *malloc_conf_2_conf_harder = "dirty_decay_ms:1234";
+
+TEST_BEGIN(test_malloc_conf_2) {
+#ifdef _WIN32
+	bool windows = true;
+#else
+	bool windows = false;
+#endif
+	/* Windows doesn't support weak symbol linker trickery. */
+	test_skip_if(windows);
+
+	ssize_t dirty_decay_ms;
+	size_t sz = sizeof(dirty_decay_ms);
+
+	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(dirty_decay_ms, 1234,
+	    "malloc_conf_2 setting didn't take effect");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_malloc_conf_2);
+}
diff --git a/test/unit/malloc_conf_2.sh b/test/unit/malloc_conf_2.sh
new file mode 100644
index 0000000..2c780f1
--- /dev/null
+++ b/test/unit/malloc_conf_2.sh
@@ -0,0 +1 @@
+export MALLOC_CONF="dirty_decay_ms:500"
-- 
cgit v0.12


From a166c20818e2f5a50c6f0b511ffc5b2ed66b81d2 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 26 Mar 2020 18:17:20 -0700
Subject: Make prof_tctx_t pointer a true prof atomic fence

---
 src/large.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/large.c b/src/large.c
index babb307..8982d10 100644
--- a/src/large.c
+++ b/src/large.c
@@ -360,9 +360,9 @@ large_prof_tctx_reset(edata_t *edata) {
 
 void
 large_prof_info_set(edata_t *edata, prof_tctx_t *tctx) {
-	large_prof_tctx_set(edata, tctx);
 	nstime_t t;
 	nstime_init_update(&t);
 	edata_prof_alloc_time_set(edata, &t);
 	edata_prof_recent_alloc_init(edata);
+	large_prof_tctx_set(edata, tctx);
 }
-- 
cgit v0.12


From 09cd79495f947a7a2e271eb9bc6ff36b15cfc72f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 20 Mar 2020 10:48:55 -0700
Subject: Encapsulate buffer allocation failure in buffered writer

---
 include/jemalloc/internal/buf_writer.h |  9 ++---
 src/buf_writer.c                       | 72 ++++++++++++----------------------
 src/jemalloc.c                         |  3 +-
 src/prof_log.c                         |  5 +--
 src/prof_recent.c                      |  5 +--
 test/unit/buf_writer.c                 | 12 ++----
 6 files changed, 35 insertions(+), 71 deletions(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index c1e2a82..b64c966 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -13,10 +13,8 @@
 typedef void (write_cb_t)(void *, const char *);
 
 typedef struct {
-	write_cb_t *public_write_cb;
-	void *public_cbopaque;
-	write_cb_t *private_write_cb;
-	void *private_cbopaque;
+	write_cb_t *write_cb;
+	void *cbopaque;
 	char *buf;
 	size_t buf_size;
 	size_t buf_end;
@@ -25,9 +23,8 @@ typedef struct {
 
 bool buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer,
     write_cb_t *write_cb, void *cbopaque, char *buf, size_t buf_len);
-write_cb_t *buf_writer_get_write_cb(buf_writer_t *buf_writer);
-void *buf_writer_get_cbopaque(buf_writer_t *buf_writer);
 void buf_writer_flush(buf_writer_t *buf_writer);
+write_cb_t buf_writer_cb;
 void buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer);
 
 #endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/src/buf_writer.c b/src/buf_writer.c
index bb8763b..fd0226a 100644
--- a/src/buf_writer.c
+++ b/src/buf_writer.c
@@ -25,28 +25,29 @@ buf_writer_free_internal_buf(tsdn_t *tsdn, void *buf) {
 	}
 }
 
-static write_cb_t buf_writer_cb;
-
 static void
 buf_writer_assert(buf_writer_t *buf_writer) {
+	assert(buf_writer != NULL);
+	assert(buf_writer->write_cb != NULL);
 	if (buf_writer->buf != NULL) {
-		assert(buf_writer->public_write_cb == buf_writer_cb);
-		assert(buf_writer->public_cbopaque == buf_writer);
-		assert(buf_writer->private_write_cb != buf_writer_cb);
-		assert(buf_writer->private_cbopaque != buf_writer);
 		assert(buf_writer->buf_size > 0);
 	} else {
-		assert(buf_writer->public_write_cb != buf_writer_cb);
-		assert(buf_writer->public_cbopaque != buf_writer);
-		assert(buf_writer->private_write_cb == NULL);
-		assert(buf_writer->private_cbopaque == NULL);
 		assert(buf_writer->buf_size == 0);
+		assert(buf_writer->internal_buf);
 	}
+	assert(buf_writer->buf_end <= buf_writer->buf_size);
 }
 
 bool
 buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer, write_cb_t *write_cb,
     void *cbopaque, char *buf, size_t buf_len) {
+	if (write_cb != NULL) {
+		buf_writer->write_cb = write_cb;
+	} else {
+		buf_writer->write_cb = je_malloc_message != NULL ?
+		    je_malloc_message : wrtmessage;
+	}
+	buf_writer->cbopaque = cbopaque;
 	assert(buf_len >= 2);
 	if (buf != NULL) {
 		buf_writer->buf = buf;
@@ -56,36 +57,14 @@ buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer, write_cb_t *write_cb,
 		    buf_len);
 		buf_writer->internal_buf = true;
 	}
-	buf_writer->buf_end = 0;
 	if (buf_writer->buf != NULL) {
-		buf_writer->public_write_cb = buf_writer_cb;
-		buf_writer->public_cbopaque = buf_writer;
-		buf_writer->private_write_cb = write_cb;
-		buf_writer->private_cbopaque = cbopaque;
 		buf_writer->buf_size = buf_len - 1; /* Allowing for '\0'. */
-		buf_writer_assert(buf_writer);
-		return false;
 	} else {
-		buf_writer->public_write_cb = write_cb;
-		buf_writer->public_cbopaque = cbopaque;
-		buf_writer->private_write_cb = NULL;
-		buf_writer->private_cbopaque = NULL;
 		buf_writer->buf_size = 0;
-		buf_writer_assert(buf_writer);
-		return true;
 	}
-}
-
-write_cb_t *
-buf_writer_get_write_cb(buf_writer_t *buf_writer) {
-	buf_writer_assert(buf_writer);
-	return buf_writer->public_write_cb;
-}
-
-void *
-buf_writer_get_cbopaque(buf_writer_t *buf_writer) {
+	buf_writer->buf_end = 0;
 	buf_writer_assert(buf_writer);
-	return buf_writer->public_cbopaque;
+	return buf_writer->buf == NULL;
 }
 
 void
@@ -94,34 +73,31 @@ buf_writer_flush(buf_writer_t *buf_writer) {
 	if (buf_writer->buf == NULL) {
 		return;
 	}
-	assert(buf_writer->buf_end <= buf_writer->buf_size);
 	buf_writer->buf[buf_writer->buf_end] = '\0';
-	if (buf_writer->private_write_cb == NULL) {
-		buf_writer->private_write_cb = je_malloc_message != NULL ?
-		    je_malloc_message : wrtmessage;
-	}
-	assert(buf_writer->private_write_cb != NULL);
-	buf_writer->private_write_cb(buf_writer->private_cbopaque,
-	    buf_writer->buf);
+	buf_writer->write_cb(buf_writer->cbopaque, buf_writer->buf);
 	buf_writer->buf_end = 0;
+	buf_writer_assert(buf_writer);
 }
 
-static void
+void
 buf_writer_cb(void *buf_writer_arg, const char *s) {
 	buf_writer_t *buf_writer = (buf_writer_t *)buf_writer_arg;
 	buf_writer_assert(buf_writer);
-	assert(buf_writer->buf != NULL);
-	assert(buf_writer->buf_end <= buf_writer->buf_size);
-	size_t i, slen, n, s_remain, buf_remain;
+	if (buf_writer->buf == NULL) {
+		buf_writer->write_cb(buf_writer->cbopaque, s);
+		return;
+	}
+	size_t i, slen, n;
 	for (i = 0, slen = strlen(s); i < slen; i += n) {
 		if (buf_writer->buf_end == buf_writer->buf_size) {
 			buf_writer_flush(buf_writer);
 		}
-		s_remain = slen - i;
-		buf_remain = buf_writer->buf_size - buf_writer->buf_end;
+		size_t s_remain = slen - i;
+		size_t buf_remain = buf_writer->buf_size - buf_writer->buf_end;
 		n = s_remain < buf_remain ? s_remain : buf_remain;
 		memcpy(buf_writer->buf + buf_writer->buf_end, s + i, n);
 		buf_writer->buf_end += n;
+		buf_writer_assert(buf_writer);
 	}
 	assert(i == slen);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 63f7ebf..72eb55b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3847,8 +3847,7 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		buf_writer_t buf_writer;
 		buf_writer_init(tsdn, &buf_writer, write_cb, cbopaque, NULL,
 		    STATS_PRINT_BUFSIZE);
-		stats_print(buf_writer_get_write_cb(&buf_writer),
-		    buf_writer_get_cbopaque(&buf_writer), opts);
+		stats_print(buf_writer_cb, &buf_writer, opts);
 		buf_writer_terminate(tsdn, &buf_writer);
 	}
 
diff --git a/src/prof_log.c b/src/prof_log.c
index c29fa35..1635979 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -632,9 +632,8 @@ prof_log_stop(tsdn_t *tsdn) {
 	buf_writer_t buf_writer;
 	buf_writer_init(tsdn, &buf_writer, prof_emitter_write_cb, &arg, NULL,
 	    PROF_LOG_STOP_BUFSIZE);
-	emitter_init(&emitter, emitter_output_json_compact,
-	    buf_writer_get_write_cb(&buf_writer),
-	    buf_writer_get_cbopaque(&buf_writer));
+	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
+	    &buf_writer);
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 7a98cc5..488cf17 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -466,9 +466,8 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
 	    PROF_RECENT_PRINT_BUFSIZE);
 	emitter_t emitter;
-	emitter_init(&emitter, emitter_output_json_compact,
-	    buf_writer_get_write_cb(&buf_writer),
-	    buf_writer_get_cbopaque(&buf_writer));
+	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
+	    &buf_writer);
 	emitter_begin(&emitter);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 01f2411..821cf61 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -24,10 +24,8 @@ test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 	char s[UNIT_MAX + 1];
 	size_t n_unit, remain, i;
 	ssize_t unit;
-	assert(buf_writer->buf != NULL);
-	write_cb_t *write_cb = buf_writer_get_write_cb(buf_writer);
-	void *cbopaque = buf_writer_get_cbopaque(buf_writer);
 
+	assert(buf_writer->buf != NULL);
 	memset(s, 'a', UNIT_MAX);
 	arg = 4; /* Starting value of random argument. */
 	arg_store = arg;
@@ -39,7 +37,7 @@ test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 			remain = 0;
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
-				write_cb(cbopaque, s);
+				buf_writer_cb(buf_writer, s);
 				remain += unit;
 				if (remain > buf_writer->buf_size) {
 					/* Flushes should have happened. */
@@ -89,10 +87,6 @@ TEST_BEGIN(test_buf_write_oom) {
 	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
 	assert(buf_writer.buf == NULL);
-	write_cb_t *write_cb = buf_writer_get_write_cb(&buf_writer);
-	assert_ptr_eq(write_cb, test_write_cb, "Should use test_write_cb");
-	void *cbopaque = buf_writer_get_cbopaque(&buf_writer);
-	assert_ptr_eq(cbopaque, &arg, "Should use arg");
 
 	char s[UNIT_MAX + 1];
 	size_t n_unit, i;
@@ -108,7 +102,7 @@ TEST_BEGIN(test_buf_write_oom) {
 			test_write_len = 0;
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
-				write_cb(cbopaque, s);
+				buf_writer_cb(&buf_writer, s);
 				assert_u64_eq(arg_store, arg,
 				    "Call back argument didn't get through");
 				assert_zu_eq(test_write_len, i * unit,
-- 
cgit v0.12


From f9aad7a49b14097a945316f10d2abe179fd0a8a5 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 20 Mar 2020 13:47:09 -0700
Subject: Add piping API to buffered writer

---
 include/jemalloc/internal/buf_writer.h |  4 ++
 src/buf_writer.c                       | 33 ++++++++++++++++
 test/unit/buf_writer.c                 | 70 +++++++++++++++++++++++++++++++++-
 3 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index b64c966..55b18ab 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -27,4 +27,8 @@ void buf_writer_flush(buf_writer_t *buf_writer);
 write_cb_t buf_writer_cb;
 void buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer);
 
+typedef ssize_t (read_cb_t)(void *read_cbopaque, void *buf, size_t limit);
+void buf_writer_pipe(buf_writer_t *buf_writer, read_cb_t *read_cb,
+    void *read_cbopaque);
+
 #endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/src/buf_writer.c b/src/buf_writer.c
index fd0226a..06a2735 100644
--- a/src/buf_writer.c
+++ b/src/buf_writer.c
@@ -110,3 +110,36 @@ buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 		buf_writer_free_internal_buf(tsdn, buf_writer->buf);
 	}
 }
+
+void
+buf_writer_pipe(buf_writer_t *buf_writer, read_cb_t *read_cb,
+    void *read_cbopaque) {
+	/*
+	 * A tiny local buffer in case the buffered writer failed to allocate
+	 * at init.
+	 */
+	static char backup_buf[16];
+	static buf_writer_t backup_buf_writer;
+
+	buf_writer_assert(buf_writer);
+	assert(read_cb != NULL);
+	if (buf_writer->buf == NULL) {
+		buf_writer_init(TSDN_NULL, &backup_buf_writer,
+		    buf_writer->write_cb, buf_writer->cbopaque, backup_buf,
+		    sizeof(backup_buf));
+		buf_writer = &backup_buf_writer;
+	}
+	assert(buf_writer->buf != NULL);
+	ssize_t nread = 0;
+	do {
+		buf_writer->buf_end += nread;
+		buf_writer_assert(buf_writer);
+		if (buf_writer->buf_end == buf_writer->buf_size) {
+			buf_writer_flush(buf_writer);
+		}
+		nread = read_cb(read_cbopaque,
+		    buf_writer->buf + buf_writer->buf_end,
+		    buf_writer->buf_size - buf_writer->buf_end);
+	} while (nread > 0);
+	buf_writer_flush(buf_writer);
+}
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 821cf61..d5e63a0 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -119,10 +119,78 @@ TEST_BEGIN(test_buf_write_oom) {
 }
 TEST_END
 
+static int test_read_count;
+static size_t test_read_len;
+static uint64_t arg_sum;
+
+ssize_t
+test_read_cb(void *cbopaque, void *buf, size_t limit) {
+	static uint64_t rand = 4;
+
+	arg_sum += *(uint64_t *)cbopaque;
+	assert_zu_gt(limit, 0, "Limit for read_cb must be positive");
+	--test_read_count;
+	if (test_read_count == 0) {
+		return -1;
+	} else {
+		size_t read_len = limit;
+		if (limit > 1) {
+			rand = prng_range_u64(&rand, (uint64_t)limit);
+			read_len -= (size_t)rand;
+		}
+		assert(read_len > 0);
+		memset(buf, 'a', read_len);
+		size_t prev_test_read_len = test_read_len;
+		test_read_len += read_len;
+		assert_zu_le(prev_test_read_len, test_read_len,
+		    "Test read overflowed");
+		return read_len;
+	}
+}
+
+static void
+test_buf_writer_pipe_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
+	arg = 4; /* Starting value of random argument. */
+	for (int count = 5; count > 0; --count) {
+		arg = prng_lg_range_u64(&arg, 64);
+		arg_sum = 0;
+		test_read_count = count;
+		test_read_len = 0;
+		test_write_len = 0;
+		buf_writer_pipe(buf_writer, test_read_cb, &arg);
+		assert(test_read_count == 0);
+		expect_u64_eq(arg_sum, arg * count, "");
+		expect_zu_eq(test_write_len, test_read_len,
+		    "Write length should be equal to read length");
+	}
+	buf_writer_terminate(tsdn, buf_writer);
+}
+
+TEST_BEGIN(test_buf_write_pipe) {
+	buf_writer_t buf_writer;
+	tsdn_t *tsdn = tsdn_fetch();
+	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	    test_buf, TEST_BUF_SIZE),
+	    "buf_writer_init() should not encounter error on static buffer");
+	test_buf_writer_pipe_body(tsdn, &buf_writer);
+}
+TEST_END
+
+TEST_BEGIN(test_buf_write_pipe_oom) {
+	buf_writer_t buf_writer;
+	tsdn_t *tsdn = tsdn_fetch();
+	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
+	test_buf_writer_pipe_body(tsdn, &buf_writer);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_buf_write_static,
 	    test_buf_write_dynamic,
-	    test_buf_write_oom);
+	    test_buf_write_oom,
+	    test_buf_write_pipe,
+	    test_buf_write_pipe_oom);
 }
-- 
cgit v0.12


From 0d6d9e85866b77b39d39e0957fd2a577b3091935 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 1 Apr 2020 11:16:21 -0700
Subject: configure.ac: Put public symbols on one line.

---
 configure.ac | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index daac205..1c2509a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1000,8 +1000,7 @@ AC_ARG_WITH([export],
 fi]
 )
 
-public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib
-mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
+public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
 dnl Check for additional platform-specific public API functions.
 AC_CHECK_FUNC([memalign],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])
-- 
cgit v0.12


From c9d56cddf27d52b77fc4e346fd841dcbf31ed671 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 1 Apr 2020 15:04:24 -0700
Subject: Optimize meld in qr module

The goal of `qr_meld()` is to change the following four fields
`(a->prev, a->prev->next, b->prev, b->prev->next)` from the values
`(a->prev, a, b->prev, b)` to `(b->prev, b, a->prev, a)`.

This commit changes

```
a->prev->next = b;
b->prev->next = a;
temp = a->prev;
a->prev = b->prev;
b->prev = temp;
```

to

```
temp = a->prev;
a->prev = b->prev;
b->prev = temp;
a->prev->next = a;
b->prev->next = b;
```

The benefit is that we can use `b->prev->next` for `temp`, and so
there's no need to pass in `a_type`.

The restriction is that `b` cannot be a `qr_next()` macro, so users
of `qr_meld()` must pay attention.  (Before this change, neither `a`
nor `b` could be a `qr_next()` macro.)
---
 include/jemalloc/internal/qr.h | 18 ++++++++++--------
 test/unit/qr.c                 | 12 ++++++------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index 1e1056b..e5be443 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -32,21 +32,23 @@ struct {								\
 	(a_qrelm)->a_field.qre_next = (a_qr);				\
 } while (0)
 
-#define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do {			\
-	a_type *t;							\
-	(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
-	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a);	\
-	t = (a_qr_a)->a_field.qre_prev;					\
+/* a_qr_a can directly be a qr_next() macro, but a_qr_b cannot.  */
+#define qr_meld(a_qr_a, a_qr_b, a_field) do {				\
+	(a_qr_b)->a_field.qre_prev->a_field.qre_next =			\
+	    (a_qr_a)->a_field.qre_prev;					\
 	(a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;	\
-	(a_qr_b)->a_field.qre_prev = t;					\
+	(a_qr_b)->a_field.qre_prev =					\
+	    (a_qr_b)->a_field.qre_prev->a_field.qre_next;		\
+	(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_a);	\
+	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
 } while (0)
 
 /*
  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
  * have two copies of the code.
  */
-#define qr_split(a_qr_a, a_qr_b, a_type, a_field)			\
-	qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
+#define qr_split(a_qr_a, a_qr_b, a_field)				\
+	qr_meld((a_qr_a), (a_qr_b), a_field)
 
 #define qr_remove(a_qr, a_field) do {					\
 	(a_qr)->a_field.qre_prev->a_field.qre_next			\
diff --git a/test/unit/qr.c b/test/unit/qr.c
index 95c1692..16eed0e 100644
--- a/test/unit/qr.c
+++ b/test/unit/qr.c
@@ -212,22 +212,22 @@ TEST_BEGIN(test_qr_meld_split) {
 		qr_after_insert(&entries[i - 1], &entries[i], link);
 	}
 
-	qr_split(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
+	qr_split(&entries[0], &entries[SPLIT_INDEX], link);
 	test_split_entries(entries);
 
-	qr_meld(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
+	qr_meld(&entries[0], &entries[SPLIT_INDEX], link);
 	test_entries_ring(entries);
 
-	qr_meld(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
+	qr_meld(&entries[0], &entries[SPLIT_INDEX], link);
 	test_split_entries(entries);
 
-	qr_split(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
+	qr_split(&entries[0], &entries[SPLIT_INDEX], link);
 	test_entries_ring(entries);
 
-	qr_split(&entries[0], &entries[0], ring_t, link);
+	qr_split(&entries[0], &entries[0], link);
 	test_entries_ring(entries);
 
-	qr_meld(&entries[0], &entries[0], ring_t, link);
+	qr_meld(&entries[0], &entries[0], link);
 	test_entries_ring(entries);
 }
 TEST_END
-- 
cgit v0.12


From 1ad06aa53bc5cca22dde934c3d46b6f683057346 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 1 Apr 2020 16:13:57 -0700
Subject: deduplicate insert and delete logic in qr module

---
 include/jemalloc/internal/qr.h | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index e5be443..559cbe4 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -18,20 +18,6 @@ struct {								\
 
 #define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
 
-#define qr_before_insert(a_qrelm, a_qr, a_field) do {			\
-	(a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev;		\
-	(a_qr)->a_field.qre_next = (a_qrelm);				\
-	(a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr);		\
-	(a_qrelm)->a_field.qre_prev = (a_qr);				\
-} while (0)
-
-#define qr_after_insert(a_qrelm, a_qr, a_field) do {			\
-	(a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next;		\
-	(a_qr)->a_field.qre_prev = (a_qrelm);				\
-	(a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr);		\
-	(a_qrelm)->a_field.qre_next = (a_qr);				\
-} while (0)
-
 /* a_qr_a can directly be a qr_next() macro, but a_qr_b cannot.  */
 #define qr_meld(a_qr_a, a_qr_b, a_field) do {				\
 	(a_qr_b)->a_field.qre_prev->a_field.qre_next =			\
@@ -43,6 +29,12 @@ struct {								\
 	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
 } while (0)
 
+#define qr_before_insert(a_qrelm, a_qr, a_field)			\
+	qr_meld((a_qrelm), (a_qr), a_field)
+
+#define qr_after_insert(a_qrelm, a_qr, a_field)				\
+	qr_before_insert(qr_next(a_qrelm, a_field), (a_qr), a_field)
+
 /*
  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
  * have two copies of the code.
@@ -50,14 +42,8 @@ struct {								\
 #define qr_split(a_qr_a, a_qr_b, a_field)				\
 	qr_meld((a_qr_a), (a_qr_b), a_field)
 
-#define qr_remove(a_qr, a_field) do {					\
-	(a_qr)->a_field.qre_prev->a_field.qre_next			\
-	    = (a_qr)->a_field.qre_next;					\
-	(a_qr)->a_field.qre_next->a_field.qre_prev			\
-	    = (a_qr)->a_field.qre_prev;					\
-	(a_qr)->a_field.qre_next = (a_qr);				\
-	(a_qr)->a_field.qre_prev = (a_qr);				\
-} while (0)
+#define qr_remove(a_qr, a_field)					\
+	qr_split(qr_next(a_qr, a_field), (a_qr), a_field)
 
 #define qr_foreach(var, a_qr, a_field)					\
 	for ((var) = (a_qr);						\
-- 
cgit v0.12


From 0dc95a882fee426a62cb93e7fe6a5b1ac171f9a2 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 1 Apr 2020 17:02:37 -0700
Subject: Add concat and split functionality to ql module

---
 include/jemalloc/internal/ql.h | 19 ++++++++++++
 test/unit/ql.c                 | 69 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index 8029040..93ddce5 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -58,6 +58,16 @@ struct {								\
 	ql_first(a_head) = qr_next((a_elm), a_field);			\
 } while (0)
 
+#define ql_concat(a_head_a, a_head_b, a_field) do {			\
+	if (ql_first(a_head_a) == NULL) {				\
+		ql_first(a_head_a) = ql_first(a_head_b);		\
+	} else if (ql_first(a_head_b) != NULL) {			\
+		qr_meld(ql_first(a_head_a), ql_first(a_head_b),		\
+		    a_field);						\
+	}								\
+	ql_first(a_head_b) = NULL;					\
+} while (0)
+
 #define ql_remove(a_head, a_elm, a_field) do {				\
 	if (ql_first(a_head) == (a_elm)) {				\
 		ql_first(a_head) = qr_next(ql_first(a_head), a_field);	\
@@ -79,6 +89,15 @@ struct {								\
 	ql_remove((a_head), t, a_field);				\
 } while (0)
 
+#define ql_split(a_head_a, a_elm, a_head_b, a_field) do {		\
+	if (ql_first(a_head_a) == (a_elm)) {				\
+		ql_first(a_head_a) = NULL;				\
+	} else {							\
+		qr_split(ql_first(a_head_a), (a_elm), a_field);		\
+	}								\
+	ql_first(a_head_b) = (a_elm);					\
+} while (0)
+
 #define ql_foreach(a_var, a_head, a_field)				\
 	qr_foreach((a_var), ql_first(a_head), a_field)
 
diff --git a/test/unit/ql.c b/test/unit/ql.c
index 04da35f..c2b1981 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -192,6 +192,72 @@ TEST_BEGIN(test_ql_insert) {
 }
 TEST_END
 
+static void
+test_concat_split_entries(list_t *entries, unsigned nentries_a,
+    unsigned nentries_b) {
+	init_entries(entries, nentries_a + nentries_b);
+
+	list_head_t head_a;
+	ql_new(&head_a);
+	for (unsigned i = 0; i < nentries_a; i++) {
+		ql_tail_insert(&head_a, &entries[i], link);
+	}
+	if (nentries_a == 0) {
+		test_empty_list(&head_a);
+	} else {
+		test_entries_list(&head_a, entries, nentries_a);
+	}
+
+	list_head_t head_b;
+	ql_new(&head_b);
+	for (unsigned i = 0; i < nentries_b; i++) {
+		ql_tail_insert(&head_b, &entries[nentries_a + i], link);
+	}
+	if (nentries_b == 0) {
+		test_empty_list(&head_b);
+	} else {
+		test_entries_list(&head_b, entries + nentries_a, nentries_b);
+	}
+
+	ql_concat(&head_a, &head_b, link);
+	if (nentries_a + nentries_b == 0) {
+		test_empty_list(&head_a);
+	} else {
+		test_entries_list(&head_a, entries, nentries_a + nentries_b);
+	}
+	test_empty_list(&head_b);
+
+	if (nentries_b == 0) {
+		return;
+	}
+
+	list_head_t head_c;
+	ql_split(&head_a, &entries[nentries_a], &head_c, link);
+	if (nentries_a == 0) {
+		test_empty_list(&head_a);
+	} else {
+		test_entries_list(&head_a, entries, nentries_a);
+	}
+	test_entries_list(&head_c, entries + nentries_a, nentries_b);
+}
+
+TEST_BEGIN(test_ql_concat_split) {
+	list_t entries[NENTRIES];
+
+	test_concat_split_entries(entries, 0, 0);
+
+	test_concat_split_entries(entries, 0, 1);
+	test_concat_split_entries(entries, 1, 0);
+
+	test_concat_split_entries(entries, 0, NENTRIES);
+	test_concat_split_entries(entries, 1, NENTRIES - 1);
+	test_concat_split_entries(entries, NENTRIES / 2,
+	    NENTRIES - NENTRIES / 2);
+	test_concat_split_entries(entries, NENTRIES - 1, 1);
+	test_concat_split_entries(entries, NENTRIES, 0);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -200,5 +266,6 @@ main(void) {
 	    test_ql_tail_remove,
 	    test_ql_head_insert,
 	    test_ql_head_remove,
-	    test_ql_insert);
+	    test_ql_insert,
+	    test_ql_concat_split);
 }
-- 
cgit v0.12


From 1dd24ca6d2daeaeb0b9d90f432809508a98b259b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 11:11:08 -0700
Subject: Add rotate functionality to ql module

---
 include/jemalloc/internal/ql.h | 10 ++++++++++
 test/unit/ql.c                 | 25 ++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index 93ddce5..3b78060 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -98,6 +98,16 @@ struct {								\
 	ql_first(a_head_b) = (a_elm);					\
 } while (0)
 
+/*
+ * An optimized version of:
+ *	a_type *t = ql_first(a_head);
+ *	ql_remove((a_head), t, a_field);
+ *	ql_tail_insert((a_head), t, a_field);
+ */
+#define ql_rotate(a_head, a_field) do {					\
+	ql_first(a_head) = qr_next(ql_first(a_head), a_field);		\
+} while (0)
+
 #define ql_foreach(a_var, a_head, a_field)				\
 	qr_foreach((a_var), ql_first(a_head), a_field)
 
diff --git a/test/unit/ql.c b/test/unit/ql.c
index c2b1981..662d1e8 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -258,6 +258,28 @@ TEST_BEGIN(test_ql_concat_split) {
 }
 TEST_END
 
+TEST_BEGIN(test_ql_rotate) {
+	list_head_t head;
+	list_t entries[NENTRIES];
+	unsigned i;
+
+	ql_new(&head);
+	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	for (i = 0; i < NENTRIES; i++) {
+		ql_tail_insert(&head, &entries[i], link);
+	}
+
+	char head_id = ql_first(&head)->id;
+	for (i = 0; i < NENTRIES; i++) {
+		assert_c_eq(ql_first(&head)->id, head_id, "");
+		ql_rotate(&head, link);
+		assert_c_eq(ql_last(&head, link)->id, head_id, "");
+		head_id++;
+	}
+	test_entries_list(&head, entries, NENTRIES);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -267,5 +289,6 @@ main(void) {
 	    test_ql_head_insert,
 	    test_ql_head_remove,
 	    test_ql_insert,
-	    test_ql_concat_split);
+	    test_ql_concat_split,
+	    test_ql_rotate);
 }
-- 
cgit v0.12


From a62b7ed92841070932d6aea649ff40933c307cae Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 13:05:16 -0700
Subject: Add emptiness checking to ql module

---
 include/jemalloc/internal/ql.h | 2 ++
 test/unit/ql.c                 | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index 3b78060..b1ce479 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -18,6 +18,8 @@ struct {								\
 	(a_head)->qlh_first = NULL;					\
 } while (0)
 
+#define ql_empty(a_head) ((a_head)->qlh_first == NULL)
+
 #define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
 
 #define ql_first(a_head) ((a_head)->qlh_first)
diff --git a/test/unit/ql.c b/test/unit/ql.c
index 662d1e8..8f68938 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -18,6 +18,7 @@ test_empty_list(list_head_t *head) {
 	list_t *t;
 	unsigned i;
 
+	expect_true(ql_empty(head), "Unexpected element for empty list");
 	expect_ptr_null(ql_first(head), "Unexpected element for empty list");
 	expect_ptr_null(ql_last(head, link),
 	    "Unexpected element for empty list");
@@ -58,6 +59,7 @@ test_entries_list(list_head_t *head, list_t *entries, unsigned nentries) {
 	list_t *t;
 	unsigned i;
 
+	expect_false(ql_empty(head), "List should not be empty");
 	expect_c_eq(ql_first(head)->id, entries[0].id, "Element id mismatch");
 	expect_c_eq(ql_last(head, link)->id, entries[nentries-1].id,
 	    "Element id mismatch");
-- 
cgit v0.12


From 4b66297ea0b0ed2ec5c4421878a31f5b27448624 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 13:14:24 -0700
Subject: Add move constructor to ql module

---
 include/jemalloc/internal/ql.h |  5 +++++
 test/unit/ql.c                 | 23 ++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index b1ce479..16cd88d 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -18,6 +18,11 @@ struct {								\
 	(a_head)->qlh_first = NULL;					\
 } while (0)
 
+#define ql_move(a_head_dest, a_head_src) do {				\
+	(a_head_dest)->qlh_first = (a_head_src)->qlh_first;		\
+	(a_head_src)->qlh_first = NULL;					\
+} while (0)
+
 #define ql_empty(a_head) ((a_head)->qlh_first == NULL)
 
 #define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
diff --git a/test/unit/ql.c b/test/unit/ql.c
index 8f68938..f913058 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -282,6 +282,26 @@ TEST_BEGIN(test_ql_rotate) {
 }
 TEST_END
 
+TEST_BEGIN(test_ql_move) {
+	list_head_t head_dest, head_src;
+	list_t entries[NENTRIES];
+	unsigned i;
+
+	ql_new(&head_src);
+	ql_move(&head_dest, &head_src);
+	test_empty_list(&head_src);
+	test_empty_list(&head_dest);
+
+	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	for (i = 0; i < NENTRIES; i++) {
+		ql_tail_insert(&head_src, &entries[i], link);
+	}
+	ql_move(&head_dest, &head_src);
+	test_empty_list(&head_src);
+	test_entries_list(&head_dest, entries, NENTRIES);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -292,5 +312,6 @@ main(void) {
 	    test_ql_head_remove,
 	    test_ql_insert,
 	    test_ql_concat_split,
-	    test_ql_rotate);
+	    test_ql_rotate,
+	    test_ql_move);
 }
-- 
cgit v0.12


From ce17af422172b9d924bccfc5d08bb44a10fb0cac Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 3 Apr 2020 15:05:20 -0700
Subject: Better structure ql module

---
 include/jemalloc/internal/ql.h | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index 16cd88d..db67219 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -14,24 +14,25 @@ struct {								\
 #define ql_elm(a_type)	qr(a_type)
 
 /* List functions. */
+#define ql_first(a_head) ((a_head)->qlh_first)
+
 #define ql_new(a_head) do {						\
-	(a_head)->qlh_first = NULL;					\
+	ql_first(a_head) = NULL;					\
 } while (0)
 
+#define ql_clear(a_head) ql_new(a_head)
+
 #define ql_move(a_head_dest, a_head_src) do {				\
-	(a_head_dest)->qlh_first = (a_head_src)->qlh_first;		\
-	(a_head_src)->qlh_first = NULL;					\
+	ql_first(a_head_dest) = ql_first(a_head_src);			\
+	ql_clear(a_head_src);						\
 } while (0)
 
-#define ql_empty(a_head) ((a_head)->qlh_first == NULL)
+#define ql_empty(a_head) (ql_first(a_head) == NULL)
 
 #define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
 
-#define ql_first(a_head) ((a_head)->qlh_first)
-
 #define ql_last(a_head, a_field)					\
-	((ql_first(a_head) != NULL)					\
-	    ? qr_prev(ql_first(a_head), a_field) : NULL)
+	(ql_empty(a_head) ? NULL : qr_prev(ql_first(a_head), a_field))
 
 #define ql_next(a_head, a_elm, a_field)					\
 	((ql_last(a_head, a_field) != (a_elm))				\
@@ -52,27 +53,27 @@ struct {								\
 	qr_after_insert((a_qlelm), (a_elm), a_field)
 
 #define ql_head_insert(a_head, a_elm, a_field) do {			\
-	if (ql_first(a_head) != NULL) {					\
+	if (!ql_empty(a_head)) {					\
 		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
 	}								\
 	ql_first(a_head) = (a_elm);					\
 } while (0)
 
 #define ql_tail_insert(a_head, a_elm, a_field) do {			\
-	if (ql_first(a_head) != NULL) {					\
+	if (!ql_empty(a_head)) {					\
 		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
 	}								\
 	ql_first(a_head) = qr_next((a_elm), a_field);			\
 } while (0)
 
 #define ql_concat(a_head_a, a_head_b, a_field) do {			\
-	if (ql_first(a_head_a) == NULL) {				\
-		ql_first(a_head_a) = ql_first(a_head_b);		\
-	} else if (ql_first(a_head_b) != NULL) {			\
+	if (ql_empty(a_head_a)) {					\
+		ql_move(a_head_a, a_head_b);				\
+	} else if (!ql_empty(a_head_b)) {				\
 		qr_meld(ql_first(a_head_a), ql_first(a_head_b),		\
 		    a_field);						\
+		ql_clear(a_head_b);					\
 	}								\
-	ql_first(a_head_b) = NULL;					\
 } while (0)
 
 #define ql_remove(a_head, a_elm, a_field) do {				\
@@ -82,7 +83,7 @@ struct {								\
 	if (ql_first(a_head) != (a_elm)) {				\
 		qr_remove((a_elm), a_field);				\
 	} else {							\
-		ql_first(a_head) = NULL;				\
+		ql_clear(a_head);					\
 	}								\
 } while (0)
 
@@ -98,11 +99,11 @@ struct {								\
 
 #define ql_split(a_head_a, a_elm, a_head_b, a_field) do {		\
 	if (ql_first(a_head_a) == (a_elm)) {				\
-		ql_first(a_head_a) = NULL;				\
+		ql_move(a_head_b, a_head_a);				\
 	} else {							\
 		qr_split(ql_first(a_head_a), (a_elm), a_field);		\
+		ql_first(a_head_b) = (a_elm);				\
 	}								\
-	ql_first(a_head_b) = (a_elm);					\
 } while (0)
 
 /*
-- 
cgit v0.12


From 8da6676a029f128753941eedcf2a8b4389cd80f1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 7 Apr 2020 11:12:53 -0700
Subject: Don't do reentrant testing in junk tests.

---
 test/unit/junk.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/unit/junk.c b/test/unit/junk.c
index 5a74c3d..314da3c 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -185,7 +185,11 @@ int
 main(void) {
 	junk_alloc_callback = &test_junk;
 	junk_free_callback = &test_junk;
-	return test(
+	/*
+	 * We check the last pointer junked.  If a reentrant call happens, that
+	 * might be an internal allocation.
+	 */
+	return test_no_reentrancy(
 	    test_junk_alloc_free,
 	    test_realloc_expand);
 }
-- 
cgit v0.12


From a5ddfa7d91f96cb1b648c6808488682e96880eb7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 10:48:58 -0700
Subject: Use ql for prof last-N list

---
 include/jemalloc/internal/prof_structs.h |   2 +-
 src/prof_recent.c                        | 125 +++++++++++++++----------------
 2 files changed, 61 insertions(+), 66 deletions(-)

diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 977eb1c..73ef8fc 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -203,7 +203,7 @@ struct prof_recent_s {
 	nstime_t alloc_time;
 	nstime_t dalloc_time;
 
-	prof_recent_t *next;
+	ql_elm(prof_recent_t) link;
 	size_t size;
 	prof_tctx_t *alloc_tctx;
 	edata_t *alloc_edata; /* NULL means allocation has been freed. */
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 488cf17..185e2b6 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -14,11 +14,13 @@
 #  define STATIC_INLINE_IF_NOT_TEST
 #endif
 
+typedef ql_head(prof_recent_t) prof_recent_list_t;
+
 ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
 malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
 static atomic_zd_t prof_recent_alloc_max;
 static ssize_t prof_recent_alloc_count = 0;
-static prof_recent_t *prof_recent_alloc_dummy = NULL;
+static prof_recent_list_t prof_recent_alloc_list;
 
 static void
 prof_recent_alloc_max_init() {
@@ -204,29 +206,26 @@ prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent) {
 STATIC_INLINE_IF_NOT_TEST prof_recent_t *
 prof_recent_alloc_begin(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert(prof_recent_alloc_dummy != NULL);
-	return prof_recent_alloc_dummy->next;
+	return ql_first(&prof_recent_alloc_list);
 }
 
 STATIC_INLINE_IF_NOT_TEST prof_recent_t *
 prof_recent_alloc_end(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert(prof_recent_alloc_dummy != NULL);
-	return prof_recent_alloc_dummy;
+	return NULL;
 }
 
 STATIC_INLINE_IF_NOT_TEST prof_recent_t *
 prof_recent_alloc_next(tsd_t *tsd, prof_recent_t *node) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert(prof_recent_alloc_dummy != NULL);
-	assert(node != NULL && node != prof_recent_alloc_dummy);
-	return node->next;
+	assert(node != NULL);
+	return ql_next(&prof_recent_alloc_list, node, link);
 }
 
 static bool
 prof_recent_alloc_is_empty(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	if (prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd)) {
+	if (ql_empty(&prof_recent_alloc_list)) {
 		assert(prof_recent_alloc_count == 0);
 		return true;
 	} else {
@@ -238,17 +237,17 @@ prof_recent_alloc_is_empty(tsd_t *tsd) {
 static void
 prof_recent_alloc_assert_count(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	if (config_debug) {
-		ssize_t count = 0;
-		prof_recent_t *n = prof_recent_alloc_begin(tsd);
-		while (n != prof_recent_alloc_end(tsd)) {
-			++count;
-			n = prof_recent_alloc_next(tsd, n);
-		}
-		assert(count == prof_recent_alloc_count);
-		assert(prof_recent_alloc_max_get(tsd) == -1 ||
-		    count <= prof_recent_alloc_max_get(tsd));
+	if (!config_debug) {
+		return;
+	}
+	ssize_t count = 0;
+	prof_recent_t *n;
+	ql_foreach(n, &prof_recent_alloc_list, link) {
+		++count;
 	}
+	assert(count == prof_recent_alloc_count);
+	assert(prof_recent_alloc_max_get(tsd) == -1 ||
+	    count <= prof_recent_alloc_max_get(tsd));
 }
 
 void
@@ -311,45 +310,42 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
 		goto label_rollback;
 	}
 
-	assert(prof_recent_alloc_dummy != NULL);
-	{
-		/* Fill content into the dummy node. */
-		prof_recent_t *node = prof_recent_alloc_dummy;
-		node->size = size;
-		nstime_copy(&node->alloc_time,
-		    edata_prof_alloc_time_get(edata));
-		node->alloc_tctx = tctx;
-		edata_prof_recent_alloc_set(tsd, edata, node);
-		nstime_init_zero(&node->dalloc_time);
-		node->dalloc_tctx = NULL;
-	}
-
 	prof_tctx_t *old_alloc_tctx, *old_dalloc_tctx;
 	if (prof_recent_alloc_count == prof_recent_alloc_max_get(tsd)) {
-		/* If upper limit is reached, simply shift the dummy. */
+		/* If upper limit is reached, rotate the head. */
 		assert(prof_recent_alloc_max_get(tsd) != -1);
 		assert(!prof_recent_alloc_is_empty(tsd));
-		prof_recent_alloc_dummy = prof_recent_alloc_dummy->next;
-		old_alloc_tctx = prof_recent_alloc_dummy->alloc_tctx;
+		prof_recent_t *head = ql_first(&prof_recent_alloc_list);
+		old_alloc_tctx = head->alloc_tctx;
 		assert(old_alloc_tctx != NULL);
-		old_dalloc_tctx = prof_recent_alloc_dummy->dalloc_tctx;
-		prof_recent_alloc_evict_edata(tsd, prof_recent_alloc_dummy);
+		old_dalloc_tctx = head->dalloc_tctx;
+		prof_recent_alloc_evict_edata(tsd, head);
+		ql_rotate(&prof_recent_alloc_list, link);
 	} else {
-		/* Otherwise use the new node as the dummy. */
+		/* Otherwise make use of the new node. */
 		assert(prof_recent_alloc_max_get(tsd) == -1 ||
 		    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd));
 		if (reserve == NULL) {
 			goto label_rollback;
 		}
-		reserve->next = prof_recent_alloc_dummy->next;
-		prof_recent_alloc_dummy->next = reserve;
-		prof_recent_alloc_dummy = reserve;
+		ql_elm_new(reserve, link);
+		ql_tail_insert(&prof_recent_alloc_list, reserve, link);
 		reserve = NULL;
 		old_alloc_tctx = NULL;
 		old_dalloc_tctx = NULL;
 		++prof_recent_alloc_count;
 	}
 
+	/* Fill content into the tail node. */
+	prof_recent_t *tail = ql_last(&prof_recent_alloc_list, link);
+	assert(tail != NULL);
+	tail->size = size;
+	nstime_copy(&tail->alloc_time, edata_prof_alloc_time_get(edata));
+	tail->alloc_tctx = tctx;
+	edata_prof_recent_alloc_set(tsd, edata, tail);
+	nstime_init_zero(&tail->dalloc_time);
+	tail->dalloc_tctx = NULL;
+
 	assert(!prof_recent_alloc_is_empty(tsd));
 	prof_recent_alloc_assert_count(tsd);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -403,19 +399,27 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 		return old_max;
 	}
 
-	prof_recent_t *begin = prof_recent_alloc_dummy->next;
 	/* For verification purpose only. */
 	ssize_t count = prof_recent_alloc_count - max;
-	do {
-		assert(!prof_recent_alloc_is_empty(tsd));
-		prof_recent_t *node = prof_recent_alloc_dummy->next;
-		assert(node != prof_recent_alloc_dummy);
+	prof_recent_t *node;
+	ql_foreach(node, &prof_recent_alloc_list, link) {
+		if (prof_recent_alloc_count == max) {
+			break;
+		}
 		prof_recent_alloc_evict_edata(tsd, node);
-		prof_recent_alloc_dummy->next = node->next;
 		--prof_recent_alloc_count;
-	} while (prof_recent_alloc_count > max);
-	prof_recent_t *end = prof_recent_alloc_dummy->next;
-	assert(begin != end);
+	}
+	assert(prof_recent_alloc_count == max);
+
+	prof_recent_list_t old_list;
+	ql_move(&old_list, &prof_recent_alloc_list);
+	if (max == 0) {
+		assert(node == NULL);
+	} else {
+		assert(node != NULL);
+		ql_split(&old_list, node, &prof_recent_alloc_list, link);
+	}
+	assert(!ql_empty(&old_list));
 
 	prof_recent_alloc_assert_count(tsd);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -432,15 +436,15 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 	 * to and controlled by application.
 	 */
 	do {
-		prof_recent_t *node = begin;
+		node = ql_first(&old_list);
+		ql_remove(&old_list, node, link);
 		decrement_recent_count(tsd, node->alloc_tctx);
 		if (node->dalloc_tctx != NULL) {
 			decrement_recent_count(tsd, node->dalloc_tctx);
 		}
-		begin = node->next;
 		idalloctm(tsd_tsdn(tsd), node, NULL, NULL, true, true);
 		--count;
-	} while (begin != end);
+	} while (!ql_empty(&old_list));
 	assert(count == 0);
 
 	return old_max;
@@ -482,9 +486,8 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize, &max);
 
 	emitter_json_array_kv_begin(&emitter, "recent_alloc");
-	for (prof_recent_t *n = prof_recent_alloc_begin(tsd);
-	    n != prof_recent_alloc_end(tsd);
-	    n = prof_recent_alloc_next(tsd, n)) {
+	prof_recent_t *n;
+	ql_foreach(n, &prof_recent_alloc_list, link) {
 		emitter_json_object_begin(&emitter);
 
 		emitter_json_kv(&emitter, "size", emitter_type_size, &n->size);
@@ -541,15 +544,7 @@ prof_recent_init() {
 		return true;
 	}
 
-	assert(prof_recent_alloc_dummy == NULL);
-	prof_recent_alloc_dummy = (prof_recent_t *)iallocztm(
-	    TSDN_NULL, sizeof(prof_recent_t),
-	    sz_size2index(sizeof(prof_recent_t)), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (prof_recent_alloc_dummy == NULL) {
-		return true;
-	}
-	prof_recent_alloc_dummy->next = prof_recent_alloc_dummy;
+	ql_new(&prof_recent_alloc_list);
 
 	return false;
 }
-- 
cgit v0.12


From 2deabac079440f843f833f1fe121bc62dff8092c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 13:40:22 -0700
Subject: Get rid of custom iterator for last-N records

---
 include/jemalloc/internal/prof_recent.h |  5 ++--
 src/prof_recent.c                       | 38 +++++++---------------------
 test/unit/prof_recent.c                 | 44 +++++++++++----------------------
 3 files changed, 25 insertions(+), 62 deletions(-)

diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index b2973db..d0869ae 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -7,9 +7,8 @@ void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
 bool prof_recent_init();
 void edata_prof_recent_alloc_init(edata_t *edata);
 #ifdef JEMALLOC_JET
-prof_recent_t *prof_recent_alloc_begin(tsd_t *tsd);
-prof_recent_t *prof_recent_alloc_end(tsd_t *tsd);
-prof_recent_t *prof_recent_alloc_next(tsd_t *tsd, prof_recent_t *node);
+typedef ql_head(prof_recent_t) prof_recent_list_t;
+extern prof_recent_list_t prof_recent_alloc_list;
 prof_recent_t *edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata);
 #endif
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 185e2b6..88effc4 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -8,19 +8,15 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_recent.h"
 
-#ifndef JEMALLOC_JET
-#  define STATIC_INLINE_IF_NOT_TEST static inline
-#else
-#  define STATIC_INLINE_IF_NOT_TEST
-#endif
-
-typedef ql_head(prof_recent_t) prof_recent_list_t;
-
 ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
 malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
 static atomic_zd_t prof_recent_alloc_max;
 static ssize_t prof_recent_alloc_count = 0;
-static prof_recent_list_t prof_recent_alloc_list;
+#ifndef JEMALLOC_JET
+typedef ql_head(prof_recent_t) prof_recent_list_t;
+static
+#endif
+prof_recent_list_t prof_recent_alloc_list;
 
 static void
 prof_recent_alloc_max_init() {
@@ -102,7 +98,10 @@ edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
 	return edata_prof_recent_alloc_get_dont_call_directly(edata);
 }
 
-STATIC_INLINE_IF_NOT_TEST prof_recent_t *
+#ifndef JEMALLOC_JET
+static inline
+#endif
+prof_recent_t *
 edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *recent_alloc =
@@ -203,25 +202,6 @@ prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent) {
 	}
 }
 
-STATIC_INLINE_IF_NOT_TEST prof_recent_t *
-prof_recent_alloc_begin(tsd_t *tsd) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	return ql_first(&prof_recent_alloc_list);
-}
-
-STATIC_INLINE_IF_NOT_TEST prof_recent_t *
-prof_recent_alloc_end(tsd_t *tsd) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	return NULL;
-}
-
-STATIC_INLINE_IF_NOT_TEST prof_recent_t *
-prof_recent_alloc_next(tsd_t *tsd, prof_recent_t *node) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert(node != NULL);
-	return ql_next(&prof_recent_alloc_list, node, link);
-}
-
 static bool
 prof_recent_alloc_is_empty(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 35a2333..e19d994 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -172,8 +172,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		if (i < OPT_ALLOC_MAX - 1) {
 			malloc_mutex_lock(tsd_tsdn(tsd),
 			    &prof_recent_alloc_mtx);
-			assert_ptr_ne(prof_recent_alloc_begin(tsd),
-			    prof_recent_alloc_end(tsd),
+			assert_false(ql_empty(&prof_recent_alloc_list),
 			    "Empty recent allocation");
 			malloc_mutex_unlock(tsd_tsdn(tsd),
 			    &prof_recent_alloc_mtx);
@@ -187,9 +186,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		}
 		c = 0;
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		for (n = prof_recent_alloc_begin(tsd);
-		    n != prof_recent_alloc_end(tsd);
-		    n = prof_recent_alloc_next(tsd, n)) {
+		ql_foreach(n, &prof_recent_alloc_list, link) {
 			++c;
 			confirm_record_size(tsd, n, i + c - OPT_ALLOC_MAX);
 			if (c == OPT_ALLOC_MAX) {
@@ -220,9 +217,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		assert_ptr_not_null(p, "malloc failed unexpectedly");
 		c = 0;
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		for (n = prof_recent_alloc_begin(tsd);
-		    n != prof_recent_alloc_end(tsd);
-		    n = prof_recent_alloc_next(tsd, n)) {
+		ql_foreach(n, &prof_recent_alloc_list, link) {
 			confirm_record_size(tsd, n, c + OPT_ALLOC_MAX);
 			confirm_record_released(tsd, n);
 			++c;
@@ -251,9 +246,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		confirm_malloc(tsd, p);
 		c = 0;
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		for (n = prof_recent_alloc_begin(tsd);
-		    n != prof_recent_alloc_end(tsd);
-		    n = prof_recent_alloc_next(tsd, n)) {
+		ql_foreach(n, &prof_recent_alloc_list, link) {
 			++c;
 			confirm_record_size(tsd, n,
 			    /* Is the allocation from the third batch? */
@@ -283,9 +276,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	for (n = prof_recent_alloc_begin(tsd);
-	    n != prof_recent_alloc_end(tsd);
-	    n = prof_recent_alloc_next(tsd, n)) {
+	ql_foreach(n, &prof_recent_alloc_list, link) {
 		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(tsd, n);
 		++c;
@@ -303,9 +294,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	for (n = prof_recent_alloc_begin(tsd);
-	    n != prof_recent_alloc_end(tsd);
-	    n = prof_recent_alloc_next(tsd, n)) {
+	ql_foreach(n, &prof_recent_alloc_list, link) {
 		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(tsd, n);
 		++c;
@@ -323,9 +312,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	for (n = prof_recent_alloc_begin(tsd);
-	    n != prof_recent_alloc_end(tsd);
-	    n = prof_recent_alloc_next(tsd, n)) {
+	ql_foreach(n, &prof_recent_alloc_list, link) {
 		++c;
 		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(tsd, n);
@@ -340,9 +327,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	for (n = prof_recent_alloc_begin(tsd);
-	    n != prof_recent_alloc_end(tsd);
-	    n = prof_recent_alloc_next(tsd, n)) {
+	ql_foreach(n, &prof_recent_alloc_list, link) {
 		++c;
 		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(tsd, n);
@@ -356,13 +341,12 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	n = prof_recent_alloc_begin(tsd);
-	assert_ptr_ne(n, prof_recent_alloc_end(tsd), "Recent list is empty");
+	assert_false(ql_empty(&prof_recent_alloc_list), "Recent list is empty");
+	n = ql_first(&prof_recent_alloc_list);
 	confirm_record_size(tsd, n, 4 * OPT_ALLOC_MAX - 1);
 	confirm_record_released(tsd, n);
-	n = prof_recent_alloc_next(tsd, n);
-	assert_ptr_eq(n, prof_recent_alloc_end(tsd),
-	    "Recent list should be empty");
+	n = ql_next(&prof_recent_alloc_list, n, link);
+	assert_ptr_null(n, "Recent list should only contain one record");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	/* Completely turn off. */
@@ -370,7 +354,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_ptr_eq(prof_recent_alloc_begin(tsd), prof_recent_alloc_end(tsd),
+	assert_true(ql_empty(&prof_recent_alloc_list),
 	    "Recent list should be empty");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
@@ -379,7 +363,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_ptr_eq(prof_recent_alloc_begin(tsd), prof_recent_alloc_end(tsd),
+	assert_true(ql_empty(&prof_recent_alloc_list),
 	    "Recent list should be empty");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
-- 
cgit v0.12


From c4e9ea8cc6c039af4f14f9e3ad7d92555693adbf Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 6 Apr 2020 16:19:53 -0700
Subject: Get rid of locks in prof recent test

---
 include/jemalloc/internal/prof_recent.h |  2 +-
 src/prof_recent.c                       | 10 ++--
 test/unit/prof_recent.c                 | 81 +++++++++++----------------------
 3 files changed, 32 insertions(+), 61 deletions(-)

diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index d0869ae..bd04652 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -9,7 +9,7 @@ void edata_prof_recent_alloc_init(edata_t *edata);
 #ifdef JEMALLOC_JET
 typedef ql_head(prof_recent_t) prof_recent_list_t;
 extern prof_recent_list_t prof_recent_alloc_list;
-prof_recent_t *edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata);
+prof_recent_t *edata_prof_recent_alloc_get_no_lock(const edata_t *edata);
 #endif
 
 #endif /* JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H */
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 88effc4..7fd77e9 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -93,15 +93,15 @@ edata_prof_recent_alloc_init(edata_t *edata) {
 	edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
 }
 
-static inline prof_recent_t *
-edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
-	return edata_prof_recent_alloc_get_dont_call_directly(edata);
-}
-
 #ifndef JEMALLOC_JET
 static inline
 #endif
 prof_recent_t *
+edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
+	return edata_prof_recent_alloc_get_dont_call_directly(edata);
+}
+
+static inline prof_recent_t *
 edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *recent_alloc =
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index e19d994..19ff15f 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -101,43 +101,38 @@ TEST_END
 #define NTH_REQ_SIZE(n) ((n) * 97 + 101)
 
 static void
-confirm_malloc(tsd_t *tsd, void *p) {
+confirm_malloc(void *p) {
 	assert_ptr_not_null(p, "malloc failed unexpectedly");
 	edata_t *e = emap_edata_lookup(TSDN_NULL, &emap_global, p);
 	assert_ptr_not_null(e, "NULL edata for living pointer");
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	prof_recent_t *n = edata_prof_recent_alloc_get(tsd, e);
+	prof_recent_t *n = edata_prof_recent_alloc_get_no_lock(e);
 	assert_ptr_not_null(n, "Record in edata should not be NULL");
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
 	expect_ptr_eq(e, n->alloc_edata,
 	    "edata pointer in record is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 }
 
 static void
-confirm_record_size(tsd_t *tsd, prof_recent_t *n, unsigned kth) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+confirm_record_size(prof_recent_t *n, unsigned kth) {
 	expect_zu_eq(n->size, NTH_REQ_SIZE(kth),
 	    "Recorded allocation size is wrong");
 }
 
 static void
-confirm_record_living(tsd_t *tsd, prof_recent_t *n) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+confirm_record_living(prof_recent_t *n) {
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
 	assert_ptr_not_null(n->alloc_edata,
 	    "Recorded edata should not be NULL for living pointer");
-	expect_ptr_eq(n, edata_prof_recent_alloc_get(tsd, n->alloc_edata),
+	expect_ptr_eq(n, edata_prof_recent_alloc_get_no_lock(n->alloc_edata),
 	    "Record in edata is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
 
 static void
-confirm_record_released(tsd_t *tsd, prof_recent_t *n) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+confirm_record_released(prof_recent_t *n) {
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
 	expect_ptr_null(n->alloc_edata,
@@ -168,14 +163,10 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	for (i = 0; i < 2 * OPT_ALLOC_MAX; ++i) {
 		req_size = NTH_REQ_SIZE(i);
 		p = malloc(req_size);
-		confirm_malloc(tsd, p);
+		confirm_malloc(p);
 		if (i < OPT_ALLOC_MAX - 1) {
-			malloc_mutex_lock(tsd_tsdn(tsd),
-			    &prof_recent_alloc_mtx);
 			assert_false(ql_empty(&prof_recent_alloc_list),
 			    "Empty recent allocation");
-			malloc_mutex_unlock(tsd_tsdn(tsd),
-			    &prof_recent_alloc_mtx);
 			free(p);
 			/*
 			 * The recorded allocations may still include some
@@ -185,17 +176,15 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			continue;
 		}
 		c = 0;
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		ql_foreach(n, &prof_recent_alloc_list, link) {
 			++c;
-			confirm_record_size(tsd, n, i + c - OPT_ALLOC_MAX);
+			confirm_record_size(n, i + c - OPT_ALLOC_MAX);
 			if (c == OPT_ALLOC_MAX) {
-				confirm_record_living(tsd, n);
+				confirm_record_living(n);
 			} else {
-				confirm_record_released(tsd, n);
+				confirm_record_released(n);
 			}
 		}
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		assert_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
@@ -216,13 +205,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		p = malloc(req_size);
 		assert_ptr_not_null(p, "malloc failed unexpectedly");
 		c = 0;
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		ql_foreach(n, &prof_recent_alloc_list, link) {
-			confirm_record_size(tsd, n, c + OPT_ALLOC_MAX);
-			confirm_record_released(tsd, n);
+			confirm_record_size(n, c + OPT_ALLOC_MAX);
+			confirm_record_released(n);
 			++c;
 		}
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		assert_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
@@ -243,12 +230,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	for (; i < 4 * OPT_ALLOC_MAX; ++i) {
 		req_size = NTH_REQ_SIZE(i);
 		p = malloc(req_size);
-		confirm_malloc(tsd, p);
+		confirm_malloc(p);
 		c = 0;
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		ql_foreach(n, &prof_recent_alloc_list, link) {
 			++c;
-			confirm_record_size(tsd, n,
+			confirm_record_size(n,
 			    /* Is the allocation from the third batch? */
 			    i + c - OPT_ALLOC_MAX >= 3 * OPT_ALLOC_MAX ?
 			    /* If yes, then it's just recorded. */
@@ -259,12 +245,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			     */
 			    i + c - 2 * OPT_ALLOC_MAX);
 			if (c == OPT_ALLOC_MAX) {
-				confirm_record_living(tsd, n);
+				confirm_record_living(n);
 			} else {
-				confirm_record_released(tsd, n);
+				confirm_record_released(n);
 			}
 		}
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		assert_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
@@ -275,13 +260,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	ql_foreach(n, &prof_recent_alloc_list, link) {
-		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
-		confirm_record_released(tsd, n);
+		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(n);
 		++c;
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_u_eq(c, OPT_ALLOC_MAX,
 	    "Incorrect total number of allocations");
 
@@ -293,13 +276,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	ql_foreach(n, &prof_recent_alloc_list, link) {
-		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
-		confirm_record_released(tsd, n);
+		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(n);
 		++c;
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_u_eq(c, OPT_ALLOC_MAX,
 	    "Incorrect total number of allocations");
 
@@ -311,13 +292,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	ql_foreach(n, &prof_recent_alloc_list, link) {
 		++c;
-		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
-		confirm_record_released(tsd, n);
+		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(n);
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_u_eq(c, OPT_ALLOC_MAX - 1,
 	    "Incorrect total number of allocations");
 
@@ -326,13 +305,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	ql_foreach(n, &prof_recent_alloc_list, link) {
 		++c;
-		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
-		confirm_record_released(tsd, n);
+		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(n);
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_u_eq(c, OPT_ALLOC_MAX - 1,
 	    "Incorrect total number of allocations");
 
@@ -340,32 +317,26 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	future = 1;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_false(ql_empty(&prof_recent_alloc_list), "Recent list is empty");
 	n = ql_first(&prof_recent_alloc_list);
-	confirm_record_size(tsd, n, 4 * OPT_ALLOC_MAX - 1);
-	confirm_record_released(tsd, n);
+	confirm_record_size(n, 4 * OPT_ALLOC_MAX - 1);
+	confirm_record_released(n);
 	n = ql_next(&prof_recent_alloc_list, n, link);
 	assert_ptr_null(n, "Recent list should only contain one record");
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	/* Completely turn off. */
 	future = 0;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_true(ql_empty(&prof_recent_alloc_list),
 	    "Recent list should be empty");
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	/* Restore the settings. */
 	future = OPT_ALLOC_MAX;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_true(ql_empty(&prof_recent_alloc_list),
 	    "Recent list should be empty");
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	confirm_prof_setup(tsd);
 }
-- 
cgit v0.12


From 12be9f5727e382c96656f9469e9702322ccd0c73 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 7 Mar 2020 20:14:49 -0800
Subject: Add a stub PA module -- a page allocator.

---
 Makefile.in                                            | 1 +
 include/jemalloc/internal/pa.h                         | 9 +++++++++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 +++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 3 +++
 src/pa.c                                               | 2 ++
 7 files changed, 20 insertions(+)
 create mode 100644 include/jemalloc/internal/pa.h
 create mode 100644 src/pa.c

diff --git a/Makefile.in b/Makefile.in
index 10af489..a3c43a6 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -125,6 +125,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/mutex_pool.c \
 	$(srcroot)src/nstime.c \
+	$(srcroot)src/pa.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
new file mode 100644
index 0000000..5146ae1
--- /dev/null
+++ b/include/jemalloc/internal/pa.h
@@ -0,0 +1,9 @@
+#ifndef JEMALLOC_INTERNAL_PA_H
+#define JEMALLOC_INTERNAL_PA_H
+
+/*
+ * The page allocator; responsible for acquiring pages of memory for
+ * allocations.
+ */
+
+#endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 920d55e..3c17e50 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -66,6 +66,7 @@
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
+    <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index fe77170..2f5ed62 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -82,6 +82,9 @@
     <ClCompile Include="..\..\..\..\src\nstime.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 2db9401..d63042d 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -66,6 +66,7 @@
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
+    <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index fe77170..2f5ed62 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -82,6 +82,9 @@
     <ClCompile Include="..\..\..\..\src\nstime.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/pa.c b/src/pa.c
new file mode 100644
index 0000000..3a26b39
--- /dev/null
+++ b/src/pa.c
@@ -0,0 +1,2 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
-- 
cgit v0.12


From 585f92505521136157aad8ac2e9288609127f863 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 10:11:02 -0700
Subject: Move cache index randomization out of extent.

This is logically at a higher level of the stack; extent should just allocate
things at the page-level; it shouldn't care exactly why the callers wants a
given number of pages.
---
 include/jemalloc/internal/arena_inlines_b.h |  26 ++++++
 include/jemalloc/internal/extent.h          |  12 +--
 src/arena.c                                 |  24 ++---
 src/extent.c                                | 130 ++++++++++------------------
 src/large.c                                 |   6 +-
 5 files changed, 93 insertions(+), 105 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index e7f7b85..cadfc8f 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -397,4 +397,30 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	}
 }
 
+static inline void
+arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
+    size_t alignment) {
+	assert(edata_base_get(edata) == edata_addr_get(edata));
+
+	if (alignment < PAGE) {
+		unsigned lg_range = LG_PAGE -
+		    lg_floor(CACHELINE_CEILING(alignment));
+		size_t r;
+		if (!tsdn_null(tsdn)) {
+			tsd_t *tsd = tsdn_tsd(tsdn);
+			r = (size_t)prng_lg_range_u64(
+			    tsd_prng_statep_get(tsd), lg_range);
+		} else {
+			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
+			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
+		}
+		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
+		    lg_range);
+		edata->e_addr = (void *)((uintptr_t)edata->e_addr +
+		    random_offset);
+		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
+		    edata->e_addr);
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index d0ba70b..e615fb6 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -20,19 +20,19 @@
 extern size_t opt_lg_extent_max_active_fit;
 
 edata_t *ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero);
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, bool *zero);
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero);
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, bool *zero);
 void ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit);
+    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit);
 void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata);
 void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata);
diff --git a/src/arena.c b/src/arena.c
index d4b6979..f6876e3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -433,24 +433,24 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
+	size_t esize = usize + sz_large_pad;
 	edata_t *edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    NULL, usize, sz_large_pad, alignment, false, szind, zero);
+	    NULL, esize, alignment, false, szind, zero);
 	if (edata == NULL && arena_may_have_muzzy(arena)) {
 		edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-		    NULL, usize, sz_large_pad, alignment, false, szind, zero);
+		    NULL, esize, alignment, false, szind, zero);
 	}
-	size_t size = usize + sz_large_pad;
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, arena, ehooks,
-		    &arena->ecache_retained, NULL, usize, sz_large_pad,
-		    alignment, false, szind, zero);
+		    &arena->ecache_retained, NULL, esize, alignment, false,
+		    szind, zero);
 		if (config_stats) {
 			/*
 			 * edata may be NULL on OOM, but in that case mapped_add
 			 * isn't used below, so there's no need to conditionlly
 			 * set it to 0 here.
 			 */
-			mapped_add = size;
+			mapped_add = esize;
 		}
 	} else if (config_stats) {
 		mapped_add = 0;
@@ -466,7 +466,11 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 			}
 			arena_stats_unlock(tsdn, &arena->stats);
 		}
-		arena_nactive_add(arena, size >> LG_PAGE);
+		arena_nactive_add(arena, esize >> LG_PAGE);
+	}
+
+	if (edata != NULL && sz_large_pad != 0) {
+		arena_cache_oblivious_randomize(tsdn, arena, edata, alignment);
 	}
 
 	return edata;
@@ -1207,7 +1211,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	zero = false;
 	slab = ecache_alloc_grow(tsdn, arena, ehooks, &arena->ecache_retained,
-	    NULL, bin_info->slab_size, 0, PAGE, true, szind, &zero);
+	    NULL, bin_info->slab_size, PAGE, true, szind, &zero);
 
 	if (config_stats && slab != NULL) {
 		arena_stats_mapped_add(tsdn, &arena->stats,
@@ -1227,10 +1231,10 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
 	edata_t *slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero);
+	    NULL, bin_info->slab_size, PAGE, true, binind, &zero);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
 		slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-		    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero);
+		    NULL, bin_info->slab_size, PAGE, true, binind, &zero);
 	}
 	if (slab == NULL) {
 		slab = arena_slab_alloc_hard(tsdn, arena, ehooks, bin_info,
diff --git a/src/extent.c b/src/extent.c
index 87dcec3..54ac40b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -40,45 +40,19 @@ static atomic_zu_t highpages;
 
 static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t usize, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
+    ecache_t *ecache, void *new_addr, size_t usize, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit, bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit);
+    ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit);
 
 /******************************************************************************/
 
-static void
-extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    size_t alignment) {
-	assert(edata_base_get(edata) == edata_addr_get(edata));
-
-	if (alignment < PAGE) {
-		unsigned lg_range = LG_PAGE -
-		    lg_floor(CACHELINE_CEILING(alignment));
-		size_t r;
-		if (!tsdn_null(tsdn)) {
-			tsd_t *tsd = tsdn_tsd(tsdn);
-			r = (size_t)prng_lg_range_u64(
-			    tsd_prng_statep_get(tsd), lg_range);
-		} else {
-			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
-			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
-		}
-		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
-		    lg_range);
-		edata->e_addr = (void *)((uintptr_t)edata->e_addr +
-		    random_offset);
-		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
-		    edata->e_addr);
-	}
-}
-
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, ecache_t *ecache, edata_t *edata) {
@@ -97,32 +71,32 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
 
 edata_t *
 ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero) {
-	assert(size + pad != 0);
+    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
+    bool *zero) {
+	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
 	edata_t *edata = extent_recycle(tsdn, arena, ehooks, ecache, new_addr,
-	    size, pad, alignment, slab, szind, zero, &commit, false);
+	    size, alignment, slab, szind, zero, &commit, false);
 	assert(edata == NULL || edata_dumpable_get(edata));
 	return edata;
 }
 
 edata_t *
 ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero) {
-	assert(size + pad != 0);
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, bool *zero) {
+	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
 	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
-	    size, pad, alignment, slab, szind, zero, &commit);
+	    size, alignment, slab, szind, zero, &commit);
 	if (edata == NULL) {
 		if (opt_retain && new_addr != NULL) {
 			/*
@@ -133,8 +107,8 @@ ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			 */
 			return NULL;
 		}
-		edata = extent_alloc_wrapper(tsdn, arena, ehooks,
-		    new_addr, size, pad, alignment, slab, szind, zero, &commit);
+		edata = extent_alloc_wrapper(tsdn, arena, ehooks, new_addr,
+		    size, alignment, slab, szind, zero, &commit);
 	}
 
 	assert(edata == NULL || edata_dumpable_get(edata));
@@ -382,8 +356,8 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, bool growing_retained) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(alignment > 0);
@@ -400,11 +374,9 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		 * course cannot be recycled).
 		 */
 		assert(PAGE_ADDR2BASE(new_addr) == new_addr);
-		assert(pad == 0);
 		assert(alignment <= PAGE);
 	}
 
-	size_t esize = size + pad;
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
@@ -418,7 +390,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			edata_t *unlock_edata = edata;
 			assert(edata_base_get(edata) == new_addr);
 			if (edata_arena_ind_get(edata) != arena_ind_get(arena)
-			    || edata_size_get(edata) < esize
+			    || edata_size_get(edata) < size
 			    || edata_state_get(edata)
 			    != ecache->state) {
 				edata = NULL;
@@ -426,7 +398,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			emap_unlock_edata(tsdn, &emap_global, unlock_edata);
 		}
 	} else {
-		edata = eset_fit(&ecache->eset, esize, alignment,
+		edata = eset_fit(&ecache->eset, size, alignment,
 		    ecache->delay_coalesce);
 	}
 	if (edata == NULL) {
@@ -472,16 +444,15 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
     edata_t **to_leak, edata_t **to_salvage,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool growing_retained) {
-	size_t esize = size + pad;
+    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
+    bool growing_retained) {
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
 	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
 	assert(new_addr == NULL || leadsize == 0);
-	if (edata_size_get(*edata) < leadsize + esize) {
+	if (edata_size_get(*edata) < leadsize + size) {
 		return extent_split_interior_cant_alloc;
 	}
-	size_t trailsize = edata_size_get(*edata) - leadsize - esize;
+	size_t trailsize = edata_size_get(*edata) - leadsize - size;
 
 	*lead = NULL;
 	*trail = NULL;
@@ -492,7 +463,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (leadsize != 0) {
 		*lead = *edata;
 		*edata = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
-		    *lead, leadsize, SC_NSIZES, false, esize + trailsize, szind,
+		    *lead, leadsize, SC_NSIZES, false, size + trailsize, szind,
 		    slab, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
@@ -504,7 +475,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	/* Split the trail. */
 	if (trailsize != 0) {
 		*trail = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
-		    *edata, esize, szind, slab, trailsize, SC_NSIZES, false,
+		    *edata, size, szind, slab, trailsize, SC_NSIZES, false,
 		    growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
@@ -530,8 +501,8 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, edata_t *edata, bool growing_retained) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, edata_t *edata, bool growing_retained) {
 	edata_t *lead;
 	edata_t *trail;
 	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
@@ -539,7 +510,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	extent_split_interior_result_t result = extent_split_interior(
 	    tsdn, arena, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
-	    new_addr, size, pad, alignment, slab, szind, growing_retained);
+	    new_addr, size, alignment, slab, szind, growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
 	    && !opt_retain) {
@@ -588,22 +559,21 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
+    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(new_addr == NULL || !slab);
-	assert(pad == 0 || !slab);
 	assert(!*zero || !slab);
 
 	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks, ecache,
-	    new_addr, size, pad, alignment, slab, growing_retained);
+	    new_addr, size, alignment, slab, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
 	edata = extent_recycle_split(tsdn, arena, ehooks, ecache, new_addr,
-	    size, pad, alignment, slab, szind, edata, growing_retained);
+	    size, alignment, slab, szind, edata, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -624,9 +594,6 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		*zero = true;
 	}
 
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, edata, alignment);
-	}
 	assert(edata_state_get(edata) == extent_state_active);
 	if (slab) {
 		edata_slab_set(edata, slab);
@@ -650,16 +617,14 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
  */
 static edata_t *
 extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
+    size_t size, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit) {
 	malloc_mutex_assert_owner(tsdn, &arena->ecache_grow.mtx);
-	assert(pad == 0 || !slab);
 	assert(!*zero || !slab);
 
-	size_t esize = size + pad;
-	size_t alloc_size_min = esize + PAGE_CEILING(alignment) - PAGE;
+	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
-	if (alloc_size_min < esize) {
+	if (alloc_size_min < size) {
 		goto label_err;
 	}
 	/*
@@ -715,8 +680,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    arena, ehooks, &edata, &lead, &trail, &to_leak,
-	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
+	    arena, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
+	    size, alignment, slab, szind, true);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
@@ -783,9 +748,6 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		/* Adjust gdump stats now that extent is final size. */
 		extent_gdump_add(tsdn, edata);
 	}
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, edata, alignment);
-	}
 	if (slab) {
 		edata_slab_set(edata, true);
 		emap_register_interior(tsdn, &emap_global, edata, szind);
@@ -804,23 +766,23 @@ label_err:
 
 static edata_t *
 extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
+    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
 	malloc_mutex_lock(tsdn, &arena->ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
-	    &arena->ecache_retained, new_addr, size, pad, alignment, slab,
-	    szind, zero, commit, true);
+	    &arena->ecache_retained, new_addr, size, alignment, slab, szind,
+	    zero, commit, true);
 	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
 	} else if (opt_retain && new_addr == NULL) {
-		edata = extent_grow_retained(tsdn, arena, ehooks, size, pad,
+		edata = extent_grow_retained(tsdn, arena, ehooks, size,
 		    alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
@@ -833,29 +795,25 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 edata_t *
 extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	size_t esize = size + pad;
 	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
 	if (edata == NULL) {
 		return NULL;
 	}
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
+	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
 	    zero, commit);
 	if (addr == NULL) {
 		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		return NULL;
 	}
-	edata_init(edata, arena_ind_get(arena), addr, esize, slab, szind,
+	edata_init(edata, arena_ind_get(arena), addr, size, slab, szind,
 	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
 	    true, EXTENT_NOT_HEAD);
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, edata, alignment);
-	}
 	if (extent_register(tsdn, edata)) {
 		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		return NULL;
diff --git a/src/large.c b/src/large.c
index 8982d10..1899a46 100644
--- a/src/large.c
+++ b/src/large.c
@@ -120,10 +120,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	edata_t *trail;
 	bool new_mapping;
 	if ((trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
+	    edata_past_get(edata), trailsize, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail)) != NULL
 	    || (trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
+	    edata_past_get(edata), trailsize, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
@@ -131,7 +131,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	} else {
 		if ((trail = ecache_alloc_grow(tsdn, arena, ehooks,
 		    &arena->ecache_retained, edata_past_get(edata), trailsize,
-		    0, CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
+		    CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
 			== NULL) {
 			return true;
 		}
-- 
cgit v0.12


From a24faed56915df38c5ab67b66cefbb596c0e165c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 10:35:56 -0700
Subject: PA: Move in the ecache_t objects.

---
 include/jemalloc/internal/arena_structs.h |  12 +---
 include/jemalloc/internal/pa.h            |  13 ++++
 src/arena.c                               | 112 ++++++++++++++++--------------
 src/background_thread.c                   |   8 +--
 src/ctl.c                                 |   6 +-
 src/extent.c                              |  20 +++---
 src/large.c                               |  16 ++---
 7 files changed, 102 insertions(+), 85 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index fde540a..23fa424 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -12,6 +12,7 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/pa.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/smoothstep.h"
@@ -150,15 +151,8 @@ struct arena_s {
 	/* Synchronizes all large allocation/update/deallocation. */
 	malloc_mutex_t		large_mtx;
 
-	/*
-	 * Collections of extents that were previously allocated.  These are
-	 * used when allocating extents, in an attempt to re-use address space.
-	 *
-	 * Synchronization: internal.
-	 */
-	ecache_t	ecache_dirty;
-	ecache_t	ecache_muzzy;
-	ecache_t	ecache_retained;
+	/* The page-level allocator shard this arena uses. */
+	pa_shard_t		pa_shard;
 
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 5146ae1..4e73f10 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -6,4 +6,17 @@
  * allocations.
  */
 
+typedef struct pa_shard_s pa_shard_t;
+struct pa_shard_s {
+	/*
+	 * Collections of extents that were previously allocated.  These are
+	 * used when allocating extents, in an attempt to re-use address space.
+	 *
+	 * Synchronization: internal.
+	 */
+	ecache_t ecache_dirty;
+	ecache_t ecache_muzzy;
+	ecache_t ecache_retained;
+};
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index f6876e3..d9932b1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -74,8 +74,8 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
 	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
 	*nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED);
-	*ndirty += ecache_npages_get(&arena->ecache_dirty);
-	*nmuzzy += ecache_npages_get(&arena->ecache_muzzy);
+	*ndirty += ecache_npages_get(&arena->pa_shard.ecache_dirty);
+	*nmuzzy += ecache_npages_get(&arena->pa_shard.ecache_muzzy);
 }
 
 void
@@ -98,7 +98,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->mapped, base_mapped
 	    + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
 	arena_stats_accum_zu(&astats->retained,
-	    ecache_npages_get(&arena->ecache_retained) << LG_PAGE);
+	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->edata_avail,
 	    atomic_load_zu(&arena->edata_cache.count, ATOMIC_RELAXED),
@@ -129,8 +129,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
 	arena_stats_accum_zu(&astats->resident, base_resident +
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
-	    ecache_npages_get(&arena->ecache_dirty) +
-	    ecache_npages_get(&arena->ecache_muzzy)) << LG_PAGE)));
+	    ecache_npages_get(&arena->pa_shard.ecache_dirty) +
+	    ecache_npages_get(&arena->pa_shard.ecache_muzzy)) << LG_PAGE)));
 	arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu(
 	    &arena->stats.abandoned_vm, ATOMIC_RELAXED));
 
@@ -172,12 +172,16 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
 		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
 		    retained_bytes;
-		dirty = ecache_nextents_get(&arena->ecache_dirty, i);
-		muzzy = ecache_nextents_get(&arena->ecache_muzzy, i);
-		retained = ecache_nextents_get(&arena->ecache_retained, i);
-		dirty_bytes = ecache_nbytes_get(&arena->ecache_dirty, i);
-		muzzy_bytes = ecache_nbytes_get(&arena->ecache_muzzy, i);
-		retained_bytes = ecache_nbytes_get(&arena->ecache_retained, i);
+		dirty = ecache_nextents_get(&arena->pa_shard.ecache_dirty, i);
+		muzzy = ecache_nextents_get(&arena->pa_shard.ecache_muzzy, i);
+		retained = ecache_nextents_get(&arena->pa_shard.ecache_retained,
+		    i);
+		dirty_bytes = ecache_nbytes_get(&arena->pa_shard.ecache_dirty,
+		    i);
+		muzzy_bytes = ecache_nbytes_get(&arena->pa_shard.ecache_muzzy,
+		    i);
+		retained_bytes = ecache_nbytes_get(
+		    &arena->pa_shard.ecache_retained, i);
 
 		atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED);
 		atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED);
@@ -226,11 +230,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
 	READ_ARENA_MUTEX_PROF_DATA(edata_cache.mtx,
 	    arena_prof_mutex_extent_avail)
-	READ_ARENA_MUTEX_PROF_DATA(ecache_dirty.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_dirty.mtx,
 	    arena_prof_mutex_extents_dirty)
-	READ_ARENA_MUTEX_PROF_DATA(ecache_muzzy.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_muzzy.mtx,
 	    arena_prof_mutex_extents_muzzy)
-	READ_ARENA_MUTEX_PROF_DATA(ecache_retained.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_retained.mtx,
 	    arena_prof_mutex_extents_retained)
 	READ_ARENA_MUTEX_PROF_DATA(decay_dirty.mtx,
 	    arena_prof_mutex_decay_dirty)
@@ -258,7 +262,8 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	ecache_dalloc(tsdn, arena, ehooks, &arena->ecache_dirty, edata);
+	ecache_dalloc(tsdn, arena, ehooks, &arena->pa_shard.ecache_dirty,
+	    edata);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -434,16 +439,18 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	size_t esize = usize + sz_large_pad;
-	edata_t *edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    NULL, esize, alignment, false, szind, zero);
+	edata_t *edata = ecache_alloc(tsdn, arena, ehooks,
+	    &arena->pa_shard.ecache_dirty, NULL, esize, alignment, false, szind,
+	    zero);
 	if (edata == NULL && arena_may_have_muzzy(arena)) {
-		edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-		    NULL, esize, alignment, false, szind, zero);
+		edata = ecache_alloc(tsdn, arena, ehooks,
+		    &arena->pa_shard.ecache_muzzy, NULL, esize, alignment,
+		    false, szind, zero);
 	}
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, arena, ehooks,
-		    &arena->ecache_retained, NULL, esize, alignment, false,
-		    szind, zero);
+		    &arena->pa_shard.ecache_retained, NULL, esize, alignment,
+		    false, szind, zero);
 		if (config_stats) {
 			/*
 			 * edata may be NULL on OOM, but in that case mapped_add
@@ -808,14 +815,14 @@ bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->decay_dirty,
-	    &arena->ecache_dirty, decay_ms);
+	    &arena->pa_shard.ecache_dirty, decay_ms);
 }
 
 bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->decay_muzzy,
-	    &arena->ecache_muzzy, decay_ms);
+	    &arena->pa_shard.ecache_muzzy, decay_ms);
 }
 
 static size_t
@@ -867,7 +874,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    !extent_purge_lazy_wrapper(tsdn, arena,
 			    ehooks, edata, 0, edata_size_get(edata))) {
 				ecache_dalloc(tsdn, arena, ehooks,
-				    &arena->ecache_muzzy, edata);
+				    &arena->pa_shard.ecache_muzzy, edata);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
 				break;
@@ -978,18 +985,18 @@ static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->decay_dirty,
-	    &arena->ecache_dirty, is_background_thread, all);
+	    &arena->pa_shard.ecache_dirty, is_background_thread, all);
 }
 
 static bool
 arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
-	if (ecache_npages_get(&arena->ecache_muzzy) == 0 &&
+	if (ecache_npages_get(&arena->pa_shard.ecache_muzzy) == 0 &&
 	    arena_muzzy_decay_ms_get(arena) <= 0) {
 		return false;
 	}
 	return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
-	    &arena->ecache_muzzy, is_background_thread, all);
+	    &arena->pa_shard.ecache_muzzy, is_background_thread, all);
 }
 
 void
@@ -1159,7 +1166,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	edata_t *edata;
 	while ((edata = ecache_evict(tsdn, arena, ehooks,
-	    &arena->ecache_retained, 0)) != NULL) {
+	    &arena->pa_shard.ecache_retained, 0)) != NULL) {
 		extent_destroy_wrapper(tsdn, arena, ehooks, edata);
 	}
 }
@@ -1175,8 +1182,8 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
 	 * extents, so only retained extents may remain.
 	 */
-	assert(ecache_npages_get(&arena->ecache_dirty) == 0);
-	assert(ecache_npages_get(&arena->ecache_muzzy) == 0);
+	assert(ecache_npages_get(&arena->pa_shard.ecache_dirty) == 0);
+	assert(ecache_npages_get(&arena->pa_shard.ecache_muzzy) == 0);
 
 	/* Deallocate retained memory. */
 	arena_destroy_retained(tsd_tsdn(tsd), arena);
@@ -1210,8 +1217,9 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	zero = false;
-	slab = ecache_alloc_grow(tsdn, arena, ehooks, &arena->ecache_retained,
-	    NULL, bin_info->slab_size, PAGE, true, szind, &zero);
+	slab = ecache_alloc_grow(tsdn, arena, ehooks,
+	    &arena->pa_shard.ecache_retained, NULL, bin_info->slab_size, PAGE,
+	    true, szind, &zero);
 
 	if (config_stats && slab != NULL) {
 		arena_stats_mapped_add(tsdn, &arena->stats,
@@ -1230,11 +1238,13 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
-	edata_t *slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    NULL, bin_info->slab_size, PAGE, true, binind, &zero);
+	edata_t *slab = ecache_alloc(tsdn, arena, ehooks,
+	    &arena->pa_shard.ecache_dirty, NULL, bin_info->slab_size, PAGE,
+	    true, binind, &zero);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-		    NULL, bin_info->slab_size, PAGE, true, binind, &zero);
+		slab = ecache_alloc(tsdn, arena, ehooks,
+		    &arena->pa_shard.ecache_muzzy, NULL, bin_info->slab_size,
+		    PAGE, true, binind, &zero);
 	}
 	if (slab == NULL) {
 		slab = arena_slab_alloc_hard(tsdn, arena, ehooks, bin_info,
@@ -2023,16 +2033,16 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * are likely to be reused soon after deallocation, and the cost of
 	 * merging/splitting extents is non-trivial.
 	 */
-	if (ecache_init(tsdn, &arena->ecache_dirty, extent_state_dirty, ind,
-	    true)) {
+	if (ecache_init(tsdn, &arena->pa_shard.ecache_dirty, extent_state_dirty,
+	    ind, true)) {
 		goto label_error;
 	}
 	/*
 	 * Coalesce muzzy extents immediately, because operations on them are in
 	 * the critical path much less often than for dirty extents.
 	 */
-	if (ecache_init(tsdn, &arena->ecache_muzzy, extent_state_muzzy, ind,
-	    false)) {
+	if (ecache_init(tsdn, &arena->pa_shard.ecache_muzzy, extent_state_muzzy,
+	    ind, false)) {
 		goto label_error;
 	}
 	/*
@@ -2041,8 +2051,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (ecache_init(tsdn, &arena->ecache_retained, extent_state_retained,
-	    ind, false)) {
+	if (ecache_init(tsdn, &arena->pa_shard.ecache_retained,
+	    extent_state_retained, ind, false)) {
 		goto label_error;
 	}
 
@@ -2198,9 +2208,9 @@ arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
-	ecache_prefork(tsdn, &arena->ecache_dirty);
-	ecache_prefork(tsdn, &arena->ecache_muzzy);
-	ecache_prefork(tsdn, &arena->ecache_retained);
+	ecache_prefork(tsdn, &arena->pa_shard.ecache_dirty);
+	ecache_prefork(tsdn, &arena->pa_shard.ecache_muzzy);
+	ecache_prefork(tsdn, &arena->pa_shard.ecache_retained);
 }
 
 void
@@ -2240,9 +2250,9 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
 	edata_cache_postfork_parent(tsdn, &arena->edata_cache);
-	ecache_postfork_parent(tsdn, &arena->ecache_dirty);
-	ecache_postfork_parent(tsdn, &arena->ecache_muzzy);
-	ecache_postfork_parent(tsdn, &arena->ecache_retained);
+	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_dirty);
+	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_muzzy);
+	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_retained);
 	ecache_grow_postfork_parent(tsdn, &arena->ecache_grow);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
@@ -2286,9 +2296,9 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
 	edata_cache_postfork_child(tsdn, &arena->edata_cache);
-	ecache_postfork_child(tsdn, &arena->ecache_dirty);
-	ecache_postfork_child(tsdn, &arena->ecache_muzzy);
-	ecache_postfork_child(tsdn, &arena->ecache_retained);
+	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_dirty);
+	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_muzzy);
+	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_retained);
 	ecache_grow_postfork_child(tsdn, &arena->ecache_grow);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
diff --git a/src/background_thread.c b/src/background_thread.c
index ca06be0..ddfe3a3 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -202,12 +202,12 @@ static uint64_t
 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	uint64_t i1, i2;
 	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
-	    &arena->ecache_dirty);
+	    &arena->pa_shard.ecache_dirty);
 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return i1;
 	}
 	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
-	    &arena->ecache_muzzy);
+	    &arena->pa_shard.ecache_muzzy);
 
 	return i1 < i2 ? i1 : i2;
 }
@@ -717,8 +717,8 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
 		should_signal = true;
 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (ecache_npages_get(&arena->ecache_dirty) > 0 ||
-	    ecache_npages_get(&arena->ecache_muzzy) > 0 ||
+	    (ecache_npages_get(&arena->pa_shard.ecache_dirty) > 0 ||
+	    ecache_npages_get(&arena->pa_shard.ecache_muzzy) > 0 ||
 	    info->npages_to_purge_new > 0)) {
 		should_signal = true;
 	} else {
diff --git a/src/ctl.c b/src/ctl.c
index 86ac83e..1c18069 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3073,9 +3073,9 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
 		MUTEX_PROF_RESET(arena->edata_cache.mtx);
-		MUTEX_PROF_RESET(arena->ecache_dirty.mtx);
-		MUTEX_PROF_RESET(arena->ecache_muzzy.mtx);
-		MUTEX_PROF_RESET(arena->ecache_retained.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.ecache_dirty.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.ecache_muzzy.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.ecache_retained.mtx);
 		MUTEX_PROF_RESET(arena->decay_dirty.mtx);
 		MUTEX_PROF_RESET(arena->decay_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
diff --git a/src/extent.c b/src/extent.c
index 54ac40b..d684388 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -686,11 +686,11 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->ecache_retained, lead, true);
+			    &arena->pa_shard.ecache_retained, lead, true);
 		}
 		if (trail != NULL) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->ecache_retained, trail, true);
+			    &arena->pa_shard.ecache_retained, trail, true);
 		}
 	} else {
 		/*
@@ -703,12 +703,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 				extent_gdump_add(tsdn, to_salvage);
 			}
 			extent_record(tsdn, arena, ehooks,
-			    &arena->ecache_retained, to_salvage, true);
+			    &arena->pa_shard.ecache_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_abandon_vm(tsdn, arena, ehooks,
-			    &arena->ecache_retained, to_leak, true);
+			    &arena->pa_shard.ecache_retained, to_leak, true);
 		}
 		goto label_err;
 	}
@@ -717,7 +717,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->ecache_retained, edata, true);
+			    &arena->pa_shard.ecache_retained, edata, true);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
@@ -774,8 +774,8 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &arena->ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
-	    &arena->ecache_retained, new_addr, size, alignment, slab, szind,
-	    zero, commit, true);
+	    &arena->pa_shard.ecache_retained, new_addr, size, alignment, slab,
+	    szind, zero, commit, true);
 	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
 		if (config_prof) {
@@ -974,7 +974,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		edata = extent_try_coalesce(tsdn, &arena->edata_cache, ehooks,
 		    ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
-		assert(ecache == &arena->ecache_dirty);
+		assert(ecache == &arena->pa_shard.ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
@@ -1076,8 +1076,8 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, arena, ehooks, &arena->ecache_retained, edata,
-	    false);
+	extent_record(tsdn, arena, ehooks, &arena->pa_shard.ecache_retained,
+	    edata, false);
 }
 
 void
diff --git a/src/large.c b/src/large.c
index 1899a46..24ff3be 100644
--- a/src/large.c
+++ b/src/large.c
@@ -119,19 +119,19 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	bool is_zeroed_trail = zero;
 	edata_t *trail;
 	bool new_mapping;
-	if ((trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    edata_past_get(edata), trailsize, CACHELINE, false, SC_NSIZES,
-	    &is_zeroed_trail)) != NULL
-	    || (trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-	    edata_past_get(edata), trailsize, CACHELINE, false, SC_NSIZES,
-	    &is_zeroed_trail)) != NULL) {
+	if ((trail = ecache_alloc(tsdn, arena, ehooks,
+	    &arena->pa_shard.ecache_dirty, edata_past_get(edata), trailsize,
+	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail)) != NULL
+	    || (trail = ecache_alloc(tsdn, arena, ehooks,
+	    &arena->pa_shard.ecache_muzzy, edata_past_get(edata), trailsize,
+	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
 		if ((trail = ecache_alloc_grow(tsdn, arena, ehooks,
-		    &arena->ecache_retained, edata_past_get(edata), trailsize,
-		    CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
+		    &arena->pa_shard.ecache_retained, edata_past_get(edata),
+		    trailsize, CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
 			== NULL) {
 			return true;
 		}
-- 
cgit v0.12


From 8433ad84eaac3b7ecb6ee01256ccb5766708ae3a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 11:19:41 -0700
Subject: PA: move in shard initialization.

---
 include/jemalloc/internal/pa.h |  3 +++
 src/arena.c                    | 27 +--------------------------
 src/pa.c                       | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 4e73f10..d3f8514 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -19,4 +19,7 @@ struct pa_shard_s {
 	ecache_t ecache_retained;
 };
 
+/* Returns true on error. */
+bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, unsigned ind);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index d9932b1..23f1988 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2027,32 +2027,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	/*
-	 * Delay coalescing for dirty extents despite the disruptive effect on
-	 * memory layout for best-fit extent allocation, since cached extents
-	 * are likely to be reused soon after deallocation, and the cost of
-	 * merging/splitting extents is non-trivial.
-	 */
-	if (ecache_init(tsdn, &arena->pa_shard.ecache_dirty, extent_state_dirty,
-	    ind, true)) {
-		goto label_error;
-	}
-	/*
-	 * Coalesce muzzy extents immediately, because operations on them are in
-	 * the critical path much less often than for dirty extents.
-	 */
-	if (ecache_init(tsdn, &arena->pa_shard.ecache_muzzy, extent_state_muzzy,
-	    ind, false)) {
-		goto label_error;
-	}
-	/*
-	 * Coalesce retained extents immediately, in part because they will
-	 * never be evicted (and therefore there's no opportunity for delayed
-	 * coalescing), but also because operations on retained extents are not
-	 * in the critical path.
-	 */
-	if (ecache_init(tsdn, &arena->pa_shard.ecache_retained,
-	    extent_state_retained, ind, false)) {
+	if (pa_shard_init(tsdn, &arena->pa_shard, ind)) {
 		goto label_error;
 	}
 
diff --git a/src/pa.c b/src/pa.c
index 3a26b39..620bf76 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,2 +1,35 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
+
+bool
+pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, unsigned ind) {
+	/*
+	 * Delay coalescing for dirty extents despite the disruptive effect on
+	 * memory layout for best-fit extent allocation, since cached extents
+	 * are likely to be reused soon after deallocation, and the cost of
+	 * merging/splitting extents is non-trivial.
+	 */
+	if (ecache_init(tsdn, &shard->ecache_dirty, extent_state_dirty, ind,
+	    /* delay_coalesce */ true)) {
+		return true;
+	}
+	/*
+	 * Coalesce muzzy extents immediately, because operations on them are in
+	 * the critical path much less often than for dirty extents.
+	 */
+	if (ecache_init(tsdn, &shard->ecache_muzzy, extent_state_muzzy, ind,
+	    /* delay_coalesce */ false)) {
+		return true;
+	}
+	/*
+	 * Coalesce retained extents immediately, in part because they will
+	 * never be evicted (and therefore there's no opportunity for delayed
+	 * coalescing), but also because operations on retained extents are not
+	 * in the critical path.
+	 */
+	if (ecache_init(tsdn, &shard->ecache_retained, extent_state_retained,
+	    ind, /* delay_coalesce */ false)) {
+		return true;
+	}
+	return false;
+}
-- 
cgit v0.12


From 688fb3eb8959db178922476ffcfa5e94a82c1511 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 11:41:19 -0700
Subject: PA: Move in the arena edata_cache.

---
 include/jemalloc/internal/arena_structs.h |  3 ---
 include/jemalloc/internal/pa.h            |  5 +++-
 src/arena.c                               | 16 +++++-------
 src/ctl.c                                 |  2 +-
 src/extent.c                              | 42 +++++++++++++++----------------
 src/extent_dss.c                          |  6 ++---
 src/large.c                               | 11 ++++----
 src/pa.c                                  |  8 +++++-
 8 files changed, 48 insertions(+), 45 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 23fa424..dc4e326 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -166,9 +166,6 @@ struct arena_s {
 	/* The grow info for the retained ecache. */
 	ecache_grow_t		ecache_grow;
 
-	/* The source of edata_t objects. */
-	edata_cache_t		edata_cache;
-
 	/*
 	 * bins is used to store heaps of free regions.
 	 *
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index d3f8514..6bc5e33 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -17,9 +17,12 @@ struct pa_shard_s {
 	ecache_t ecache_dirty;
 	ecache_t ecache_muzzy;
 	ecache_t ecache_retained;
+
+	/* The source of edata_t objects. */
+	edata_cache_t edata_cache;
 };
 
 /* Returns true on error. */
-bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, unsigned ind);
+bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index 23f1988..55a64c7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -101,7 +101,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->edata_avail,
-	    atomic_load_zu(&arena->edata_cache.count, ATOMIC_RELAXED),
+	    atomic_load_zu(&arena->pa_shard.edata_cache.count, ATOMIC_RELAXED),
 	    ATOMIC_RELAXED);
 
 	arena_stats_accum_u64(&astats->decay_dirty.npurge,
@@ -228,7 +228,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	/* Gather per arena mutex profiling data. */
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
-	READ_ARENA_MUTEX_PROF_DATA(edata_cache.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(pa_shard.edata_cache.mtx,
 	    arena_prof_mutex_extent_avail)
 	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_dirty.mtx,
 	    arena_prof_mutex_extents_dirty)
@@ -2027,7 +2027,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (pa_shard_init(tsdn, &arena->pa_shard, ind)) {
+	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind)) {
 		goto label_error;
 	}
 
@@ -2044,10 +2044,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (edata_cache_init(&arena->edata_cache, base)) {
-		goto label_error;
-	}
-
 	/* Initialize bins. */
 	uintptr_t bin_addr = (uintptr_t)arena + sizeof(arena_t);
 	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
@@ -2190,7 +2186,7 @@ arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
-	edata_cache_prefork(tsdn, &arena->edata_cache);
+	edata_cache_prefork(tsdn, &arena->pa_shard.edata_cache);
 }
 
 void
@@ -2224,7 +2220,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
-	edata_cache_postfork_parent(tsdn, &arena->edata_cache);
+	edata_cache_postfork_parent(tsdn, &arena->pa_shard.edata_cache);
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_dirty);
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_retained);
@@ -2270,7 +2266,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
-	edata_cache_postfork_child(tsdn, &arena->edata_cache);
+	edata_cache_postfork_child(tsdn, &arena->pa_shard.edata_cache);
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_dirty);
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_muzzy);
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_retained);
diff --git a/src/ctl.c b/src/ctl.c
index 1c18069..31277ae 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3072,7 +3072,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 			continue;
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
-		MUTEX_PROF_RESET(arena->edata_cache.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.edata_cache.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.ecache_dirty.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.ecache_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.ecache_retained.mtx);
diff --git a/src/extent.c b/src/extent.c
index d684388..ae62070 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -157,8 +157,8 @@ ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, &arena->edata_cache,
-		    ehooks, ecache, edata)) {
+		if (extent_try_delayed_coalesce(tsdn,
+		    &arena->pa_shard.edata_cache, ehooks, ecache, edata)) {
 			break;
 		}
 		/*
@@ -212,7 +212,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    edata_size_get(edata), growing_retained);
 		}
 	}
-	edata_cache_put(tsdn, &arena->edata_cache, edata);
+	edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 }
 
 static void
@@ -462,9 +462,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *edata;
-		*edata = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
-		    *lead, leadsize, SC_NSIZES, false, size + trailsize, szind,
-		    slab, growing_retained);
+		*edata = extent_split_impl(tsdn, &arena->pa_shard.edata_cache,
+		    ehooks, *lead, leadsize, SC_NSIZES, false, size + trailsize,
+		    szind, slab, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -474,9 +474,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
-		    *edata, size, szind, slab, trailsize, SC_NSIZES, false,
-		    growing_retained);
+		*trail = extent_split_impl(tsdn, &arena->pa_shard.edata_cache,
+		    ehooks, *edata, size, szind, slab, trailsize, SC_NSIZES,
+		    false, growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -643,7 +643,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
 	}
 
-	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, &arena->pa_shard.edata_cache);
 	if (edata == NULL) {
 		goto label_err;
 	}
@@ -654,7 +654,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    &committed);
 
 	if (ptr == NULL) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		goto label_err;
 	}
 
@@ -663,7 +663,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    committed, true, EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		goto label_err;
 	}
 
@@ -800,7 +800,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, &arena->pa_shard.edata_cache);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -808,14 +808,14 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
 	    zero, commit);
 	if (addr == NULL) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		return NULL;
 	}
 	edata_init(edata, arena_ind_get(arena), addr, size, slab, szind,
 	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
 	    true, EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		return NULL;
 	}
 
@@ -971,8 +971,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	emap_assert_mapped(tsdn, &emap_global, edata);
 
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, &arena->edata_cache, ehooks,
-		    ecache, edata, NULL, growing_retained);
+		edata = extent_try_coalesce(tsdn, &arena->pa_shard.edata_cache,
+		    ehooks, ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &arena->pa_shard.ecache_dirty);
 		/* Always coalesce large extents eagerly. */
@@ -980,7 +980,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn,
-			    &arena->edata_cache, ehooks, ecache, edata,
+			    &arena->pa_shard.edata_cache, ehooks, ecache, edata,
 			    &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold &&
@@ -1004,7 +1004,7 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	    WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		return;
 	}
 	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
@@ -1027,7 +1027,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    edata_size_get(edata), edata_committed_get(edata));
 
 	if (!err) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 	}
 
 	return err;
@@ -1097,7 +1097,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
 	    edata_size_get(edata), edata_committed_get(edata));
 
-	edata_cache_put(tsdn, &arena->edata_cache, edata);
+	edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 }
 
 static bool
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 9cf098e..d125c43 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -123,7 +123,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		return NULL;
 	}
 
-	gap = edata_cache_get(tsdn, &arena->edata_cache);
+	gap = edata_cache_get(tsdn, &arena->pa_shard.edata_cache);
 	if (gap == NULL) {
 		return NULL;
 	}
@@ -189,7 +189,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					extent_dalloc_gap(tsdn, arena, gap);
 				} else {
 					edata_cache_put(tsdn,
-					    &arena->edata_cache, gap);
+					    &arena->pa_shard.edata_cache, gap);
 				}
 				if (!*commit) {
 					*commit = pages_decommit(ret, size);
@@ -225,7 +225,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	}
 label_oom:
 	extent_dss_extending_finish();
-	edata_cache_put(tsdn, &arena->edata_cache, gap);
+	edata_cache_put(tsdn, &arena->pa_shard.edata_cache, gap);
 	return NULL;
 }
 
diff --git a/src/large.c b/src/large.c
index 24ff3be..fa03a50 100644
--- a/src/large.c
+++ b/src/large.c
@@ -81,9 +81,10 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 
 	/* Split excess pages. */
 	if (diff != 0) {
-		edata_t *trail = extent_split_wrapper(tsdn, &arena->edata_cache,
-		    ehooks, edata, usize + sz_large_pad, sz_size2index(usize),
-		    false, diff, SC_NSIZES, false);
+		edata_t *trail = extent_split_wrapper(tsdn,
+		    &arena->pa_shard.edata_cache, ehooks, edata,
+		    usize + sz_large_pad, sz_size2index(usize), false, diff,
+		    SC_NSIZES, false);
 		if (trail == NULL) {
 			return true;
 		}
@@ -140,8 +141,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 		}
 	}
 
-	if (extent_merge_wrapper(tsdn, ehooks, &arena->edata_cache, edata,
-	    trail)) {
+	if (extent_merge_wrapper(tsdn, ehooks, &arena->pa_shard.edata_cache,
+	    edata, trail)) {
 		extent_dalloc_wrapper(tsdn, arena, ehooks, trail);
 		return true;
 	}
diff --git a/src/pa.c b/src/pa.c
index 620bf76..6db623b 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -2,7 +2,9 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 bool
-pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, unsigned ind) {
+pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind) {
+	/* This will change eventually, but for now it should hold. */
+	assert(base_ind_get(base) == ind);
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
 	 * memory layout for best-fit extent allocation, since cached extents
@@ -31,5 +33,9 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, unsigned ind) {
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
+	if (edata_cache_init(&shard->edata_cache, base)) {
+		return true;
+	}
+
 	return false;
 }
-- 
cgit v0.12


From 32cb7c2f0b4da21ed2b98b8fde7bba86309d1acd Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 13:08:15 -0700
Subject: PA: Add a stats type.

---
 include/jemalloc/internal/arena_stats.h |  9 +++++++--
 include/jemalloc/internal/pa.h          | 14 +++++++++++++-
 src/arena.c                             |  8 +++++---
 src/ctl.c                               |  7 ++++---
 src/extent.c                            |  2 +-
 src/pa.c                                |  6 +++++-
 6 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 4166705..ab10361 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/pa.h"
 #include "jemalloc/internal/sc.h"
 
 JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
@@ -112,8 +113,12 @@ struct arena_stats_s {
 	arena_stats_u64_t	nflushes_large; /* Derived. */
 	arena_stats_u64_t	nrequests_large; /* Derived. */
 
-	/* VM space had to be leaked (undocumented).  Normally 0. */
-	atomic_zu_t		abandoned_vm;
+	/*
+	 * The stats logically owned by the pa_shard in the same arena.  This
+	 * lives here only because it's convenient for the purposes of the ctl
+	 * module -- it only knows about the single arena_stats.
+	 */
+	pa_shard_stats_t	pa_shard_stats;
 
 	/* Number of bytes cached in tcache associated with this arena. */
 	atomic_zu_t		tcache_bytes; /* Derived. */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 6bc5e33..890f7b1 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -1,11 +1,20 @@
 #ifndef JEMALLOC_INTERNAL_PA_H
 #define JEMALLOC_INTERNAL_PA_H
 
+#include "jemalloc/internal/ecache.h"
+#include "jemalloc/internal/edata_cache.h"
+
 /*
  * The page allocator; responsible for acquiring pages of memory for
  * allocations.
  */
 
+typedef struct pa_shard_stats_s pa_shard_stats_t;
+struct pa_shard_stats_s {
+	/* VM space had to be leaked (undocumented).  Normally 0. */
+	atomic_zu_t abandoned_vm;
+};
+
 typedef struct pa_shard_s pa_shard_t;
 struct pa_shard_s {
 	/*
@@ -20,9 +29,12 @@ struct pa_shard_s {
 
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
+
+	pa_shard_stats_t *stats;
 };
 
 /* Returns true on error. */
-bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind);
+bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
+    pa_shard_stats_t *stats);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index 55a64c7..d03bc72 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -131,8 +131,9 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
 	    ecache_npages_get(&arena->pa_shard.ecache_dirty) +
 	    ecache_npages_get(&arena->pa_shard.ecache_muzzy)) << LG_PAGE)));
-	arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu(
-	    &arena->stats.abandoned_vm, ATOMIC_RELAXED));
+	arena_stats_accum_zu(&astats->pa_shard_stats.abandoned_vm,
+	    atomic_load_zu(&arena->stats.pa_shard_stats.abandoned_vm,
+	    ATOMIC_RELAXED));
 
 	for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) {
 		uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
@@ -2027,7 +2028,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind)) {
+	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind,
+	    &arena->stats.pa_shard_stats)) {
 		goto label_error;
 	}
 
diff --git a/src/ctl.c b/src/ctl.c
index 31277ae..1a9b0d9 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -939,8 +939,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		    &astats->astats.nrequests_large);
 		ctl_accum_arena_stats_u64(&sdstats->astats.nflushes_large,
 		    &astats->astats.nflushes_large);
-		accum_atomic_zu(&sdstats->astats.abandoned_vm,
-		    &astats->astats.abandoned_vm);
+		accum_atomic_zu(&sdstats->astats.pa_shard_stats.abandoned_vm,
+		    &astats->astats.pa_shard_stats.abandoned_vm);
 
 		accum_atomic_zu(&sdstats->astats.tcache_bytes,
 		    &astats->astats.tcache_bytes);
@@ -2962,7 +2962,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.resident, ATOMIC_RELAXED),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.abandoned_vm,
+    atomic_load_zu(
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.abandoned_vm,
     ATOMIC_RELAXED), size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
diff --git a/src/extent.c b/src/extent.c
index ae62070..1b7f00f 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -199,7 +199,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
+		arena_stats_accum_zu(&arena->pa_shard.stats->abandoned_vm, sz);
 	}
 	/*
 	 * Leak extent after making sure its pages have already been purged, so
diff --git a/src/pa.c b/src/pa.c
index 6db623b..516ae1d 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -2,7 +2,8 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 bool
-pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind) {
+pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
+    pa_shard_stats_t *stats) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	/*
@@ -37,5 +38,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind) {
 		return true;
 	}
 
+	shard->stats = stats;
+	memset(shard->stats, 0, sizeof(*shard->stats));
+
 	return false;
 }
-- 
cgit v0.12


From acd0bf6a2697d47fcfd868f76583c9d0a5974af1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 13:47:02 -0700
Subject: PA: move in ecache_grow.

---
 include/jemalloc/internal/arena_structs.h |  3 ---
 include/jemalloc/internal/pa.h            |  3 +++
 src/arena.c                               | 18 +++++++-----------
 src/extent.c                              | 31 +++++++++++++++++--------------
 src/pa.c                                  |  4 ++++
 test/unit/retained.c                      |  2 +-
 6 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index dc4e326..ed16337 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -163,9 +163,6 @@ struct arena_s {
 	arena_decay_t		decay_dirty; /* dirty --> muzzy */
 	arena_decay_t		decay_muzzy; /* muzzy --> retained */
 
-	/* The grow info for the retained ecache. */
-	ecache_grow_t		ecache_grow;
-
 	/*
 	 * bins is used to store heaps of free regions.
 	 *
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 890f7b1..3b1a765 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -30,6 +30,9 @@ struct pa_shard_s {
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
 
+	/* The grow info for the retained ecache. */
+	ecache_grow_t ecache_grow;
+
 	pa_shard_stats_t *stats;
 };
 
diff --git a/src/arena.c b/src/arena.c
index d03bc72..ced01d7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1935,14 +1935,14 @@ arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
 		}
 	}
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &arena->ecache_grow.mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->pa_shard.ecache_grow.mtx);
 	if (old_limit != NULL) {
-		*old_limit = sz_pind2sz(arena->ecache_grow.limit);
+		*old_limit = sz_pind2sz(arena->pa_shard.ecache_grow.limit);
 	}
 	if (new_limit != NULL) {
-		arena->ecache_grow.limit = new_ind;
+		arena->pa_shard.ecache_grow.limit = new_ind;
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->ecache_grow.mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->pa_shard.ecache_grow.mtx);
 
 	return false;
 }
@@ -2042,10 +2042,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (ecache_grow_init(tsdn, &arena->ecache_grow)) {
-		goto label_error;
-	}
-
 	/* Initialize bins. */
 	uintptr_t bin_addr = (uintptr_t)arena + sizeof(arena_t);
 	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
@@ -2176,7 +2172,7 @@ arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
-	ecache_grow_prefork(tsdn, &arena->ecache_grow);
+	ecache_grow_prefork(tsdn, &arena->pa_shard.ecache_grow);
 }
 
 void
@@ -2226,7 +2222,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_dirty);
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_retained);
-	ecache_grow_postfork_parent(tsdn, &arena->ecache_grow);
+	ecache_grow_postfork_parent(tsdn, &arena->pa_shard.ecache_grow);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats) {
@@ -2272,7 +2268,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_dirty);
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_muzzy);
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_retained);
-	ecache_grow_postfork_child(tsdn, &arena->ecache_grow);
+	ecache_grow_postfork_child(tsdn, &arena->pa_shard.ecache_grow);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats) {
diff --git a/src/extent.c b/src/extent.c
index 1b7f00f..7c00525 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -619,7 +619,7 @@ static edata_t *
 extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     size_t size, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &arena->ecache_grow.mtx);
+	malloc_mutex_assert_owner(tsdn, &arena->pa_shard.ecache_grow.mtx);
 	assert(!*zero || !slab);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
@@ -632,15 +632,17 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
+	size_t alloc_size = sz_pind2sz(
+	    arena->pa_shard.ecache_grow.next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (arena->ecache_grow.next + egn_skip >=
+		if (arena->pa_shard.ecache_grow.next + egn_skip >=
 		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			goto label_err;
 		}
-		alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
+		alloc_size = sz_pind2sz(
+		    arena->pa_shard.ecache_grow.next + egn_skip);
 	}
 
 	edata_t *edata = edata_cache_get(tsdn, &arena->pa_shard.edata_cache);
@@ -735,14 +737,15 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (arena->ecache_grow.next + egn_skip + 1 <=
-	    arena->ecache_grow.limit) {
-		arena->ecache_grow.next += egn_skip + 1;
+	if (arena->pa_shard.ecache_grow.next + egn_skip + 1 <=
+	    arena->pa_shard.ecache_grow.limit) {
+		arena->pa_shard.ecache_grow.next += egn_skip + 1;
 	} else {
-		arena->ecache_grow.next = arena->ecache_grow.limit;
+		arena->pa_shard.ecache_grow.next
+		    = arena->pa_shard.ecache_grow.limit;
 	}
 	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -760,7 +763,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
 	return NULL;
 }
 
@@ -771,13 +774,13 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &arena->ecache_grow.mtx);
+	malloc_mutex_lock(tsdn, &arena->pa_shard.ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
 	    &arena->pa_shard.ecache_retained, new_addr, size, alignment, slab,
 	    szind, zero, commit, true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
@@ -786,9 +789,9 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		    alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &arena->ecache_grow.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &arena->pa_shard.ecache_grow.mtx);
 
 	return edata;
 }
diff --git a/src/pa.c b/src/pa.c
index 516ae1d..5063d48 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -38,6 +38,10 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 		return true;
 	}
 
+	if (ecache_grow_init(tsdn, &shard->ecache_grow)) {
+		return true;
+	}
+
 	shard->stats = stats;
 	memset(shard->stats, 0, sizeof(*shard->stats));
 
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 9ac83ef..cf3de1e 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -142,7 +142,7 @@ TEST_BEGIN(test_retained) {
 		size_t usable = 0;
 		size_t fragmented = 0;
 		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
-		    arena->ecache_grow.next; pind++) {
+		    arena->pa_shard.ecache_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;
-- 
cgit v0.12


From 356aaa7dc65d554806287dfa1849a2d47be9b7a8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 20:43:41 -0700
Subject: Introduce lockedint module.

This pulls out the various abstractions where some stats counter is sometimes an
atomic, sometimes a plain variable, sometimes always protected by a lock,
sometimes protected by reads but not writes, etc.  With this change, these cases
are treated consistently, and access patterns tagged.

In the process, we fix a few missed-update bugs (where one caller assumes
"protected-by-a-lock" semantics and another does not).
---
 include/jemalloc/internal/arena_inlines_b.h |  12 +-
 include/jemalloc/internal/arena_stats.h     | 179 +++++-----------------------
 include/jemalloc/internal/atomic.h          |  26 +++-
 include/jemalloc/internal/lockedint.h       | 151 +++++++++++++++++++++++
 src/arena.c                                 | 132 ++++++++++----------
 src/ctl.c                                   | 124 +++++++++----------
 src/extent.c                                |   3 +-
 7 files changed, 339 insertions(+), 288 deletions(-)
 create mode 100644 include/jemalloc/internal/lockedint.h

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index cadfc8f..5b33769 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -148,14 +148,14 @@ arena_decay_extent(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
 	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
-		arena_stats_lock(tsdn, &arena->stats);
-		arena_stats_add_u64(tsdn, &arena->stats,
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->decay_dirty.stats->nmadvise, 1);
-		arena_stats_add_u64(tsdn, &arena->stats,
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->decay_dirty.stats->purged, extent_size >> LG_PAGE);
-		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
-		    extent_size);
-		arena_stats_unlock(tsdn, &arena->stats);
+		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.mapped, extent_size);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
 
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index ab10361..0a1ec73 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_ARENA_STATS_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/pa.h"
@@ -9,40 +10,28 @@
 
 JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 
-/*
- * In those architectures that support 64-bit atomics, we use atomic updates for
- * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
- * externally.
- */
-#ifdef JEMALLOC_ATOMIC_U64
-typedef atomic_u64_t arena_stats_u64_t;
-#else
-/* Must hold the arena stats mutex while reading atomically. */
-typedef uint64_t arena_stats_u64_t;
-#endif
-
 typedef struct arena_stats_large_s arena_stats_large_t;
 struct arena_stats_large_s {
 	/*
 	 * Total number of allocation/deallocation requests served directly by
 	 * the arena.
 	 */
-	arena_stats_u64_t	nmalloc;
-	arena_stats_u64_t	ndalloc;
+	locked_u64_t	nmalloc;
+	locked_u64_t	ndalloc;
 
 	/*
 	 * Number of allocation requests that correspond to this size class.
 	 * This includes requests served by tcache, though tcache only
 	 * periodically merges into this counter.
 	 */
-	arena_stats_u64_t	nrequests; /* Partially derived. */
+	locked_u64_t	nrequests; /* Partially derived. */
 	/*
 	 * Number of tcache fills / flushes for large (similarly, periodically
 	 * merged).  Note that there is no large tcache batch-fill currently
 	 * (i.e. only fill 1 at a time); however flush may be batched.
 	 */
-	arena_stats_u64_t	nfills; /* Partially derived. */
-	arena_stats_u64_t	nflushes; /* Partially derived. */
+	locked_u64_t	nfills; /* Partially derived. */
+	locked_u64_t	nflushes; /* Partially derived. */
 
 	/* Current number of allocations of this size class. */
 	size_t		curlextents; /* Derived. */
@@ -51,11 +40,11 @@ struct arena_stats_large_s {
 typedef struct arena_stats_decay_s arena_stats_decay_t;
 struct arena_stats_decay_s {
 	/* Total number of purge sweeps. */
-	arena_stats_u64_t	npurge;
+	locked_u64_t	npurge;
 	/* Total number of madvise calls made. */
-	arena_stats_u64_t	nmadvise;
+	locked_u64_t	nmadvise;
 	/* Total number of pages purged. */
-	arena_stats_u64_t	purged;
+	locked_u64_t	purged;
 };
 
 typedef struct arena_stats_extents_s arena_stats_extents_t;
@@ -81,19 +70,19 @@ struct arena_stats_extents_s {
  */
 typedef struct arena_stats_s arena_stats_t;
 struct arena_stats_s {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_t		mtx;
-#endif
+	LOCKEDINT_MTX_DECLARE(mtx)
 
-	/* Number of bytes currently mapped, excluding retained memory. */
-	atomic_zu_t		mapped; /* Partially derived. */
+	/*
+	 * Number of bytes currently mapped, excluding retained memory.
+	 */
+	locked_zu_t		mapped; /* Partially derived. */
 
 	/*
 	 * Number of unused virtual memory bytes currently retained.  Retained
 	 * bytes are technically mapped (though always decommitted or purged),
 	 * but they are excluded from the mapped statistic (above).
 	 */
-	atomic_zu_t		retained; /* Derived. */
+	locked_zu_t		retained; /* Derived. */
 
 	/* Number of edata_t structs allocated by base, but not being used. */
 	atomic_zu_t		edata_avail;
@@ -107,11 +96,11 @@ struct arena_stats_s {
 	atomic_zu_t		metadata_thp;
 
 	atomic_zu_t		allocated_large; /* Derived. */
-	arena_stats_u64_t	nmalloc_large; /* Derived. */
-	arena_stats_u64_t	ndalloc_large; /* Derived. */
-	arena_stats_u64_t	nfills_large; /* Derived. */
-	arena_stats_u64_t	nflushes_large; /* Derived. */
-	arena_stats_u64_t	nrequests_large; /* Derived. */
+	locked_u64_t	nmalloc_large; /* Derived. */
+	locked_u64_t	ndalloc_large; /* Derived. */
+	locked_u64_t	nfills_large; /* Derived. */
+	locked_u64_t	nflushes_large; /* Derived. */
+	locked_u64_t	nrequests_large; /* Derived. */
 
 	/*
 	 * The stats logically owned by the pa_shard in the same arena.  This
@@ -139,138 +128,32 @@ arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
 			assert(((char *)arena_stats)[i] == 0);
 		}
 	}
-#ifndef JEMALLOC_ATOMIC_U64
-	if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
+	if (LOCKEDINT_MTX_INIT(LOCKEDINT_MTX(arena_stats->mtx), "arena_stats",
 	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-#endif
 	/* Memory is zeroed, so there is no need to clear stats. */
 	return false;
 }
 
 static inline void
-arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_lock(tsdn, &arena_stats->mtx);
-#endif
-}
-
-static inline void
-arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_unlock(tsdn, &arena_stats->mtx);
-#endif
-}
-
-static inline uint64_t
-arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
-	return atomic_load_u64(p, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	return *p;
-#endif
-}
-
-static inline void
-arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p, uint64_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	atomic_fetch_add_u64(p, x, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	*p += x;
-#endif
-}
-
-static inline void
-arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p, uint64_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
-	assert(r - x <= r);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	*p -= x;
-	assert(*p + x >= *p);
-#endif
-}
-
-/*
- * Non-atomically sets *dst += src.  *dst needs external synchronization.
- * This lets us avoid the cost of a fetch_add when its unnecessary (note that
- * the types here are atomic).
- */
-static inline void
-arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
-#ifdef JEMALLOC_ATOMIC_U64
-	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
-	atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED);
-#else
-	*dst += src;
-#endif
-}
-
-static inline size_t
-arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    atomic_zu_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
-	return atomic_load_zu(p, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	return atomic_load_zu(p, ATOMIC_RELAXED);
-#endif
-}
-
-static inline void
-arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    atomic_zu_t *p, size_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
-	atomic_store_zu(p, cur + x, ATOMIC_RELAXED);
-#endif
-}
-
-static inline void
-arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    atomic_zu_t *p, size_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
-	assert(r - x <= r);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
-	atomic_store_zu(p, cur - x, ATOMIC_RELAXED);
-#endif
-}
-
-/* Like the _u64 variant, needs an externally synchronized *dst. */
-static inline void
-arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
-	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
-	atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED);
-}
-
-static inline void
 arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     szind_t szind, uint64_t nrequests) {
-	arena_stats_lock(tsdn, arena_stats);
+	LOCKEDINT_MTX_LOCK(tsdn, arena_stats->mtx);
 	arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS];
-	arena_stats_add_u64(tsdn, arena_stats, &lstats->nrequests, nrequests);
-	arena_stats_add_u64(tsdn, arena_stats, &lstats->nflushes, 1);
-	arena_stats_unlock(tsdn, arena_stats);
+	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
+	    &lstats->nrequests, nrequests);
+	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
+	    &lstats->nflushes, 1);
+	LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
 }
 
 static inline void
 arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
-	arena_stats_lock(tsdn, arena_stats);
-	arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size);
-	arena_stats_unlock(tsdn, arena_stats);
+	LOCKEDINT_MTX_LOCK(tsdn, arena_stats->mtx);
+	locked_inc_zu(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
+	    &arena_stats->mapped, size);
+	LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
 }
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index a76f54c..e5afb20 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -52,6 +52,20 @@
 #define ATOMIC_SEQ_CST atomic_memory_order_seq_cst
 
 /*
+ * Another convenience -- simple atomic helper functions.
+ */
+#define JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(type, short_type,	\
+    lg_size)								\
+    JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)		\
+    ATOMIC_INLINE void							\
+    atomic_load_add_store_##short_type(atomic_##short_type##_t *a,	\
+	type inc) {							\
+	    type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);	\
+	    type newval = oldval + inc;					\
+	    atomic_store_##short_type(a, newval, ATOMIC_RELAXED);	\
+	}
+
+/*
  * Not all platforms have 64-bit atomics.  If we do, this #define exposes that
  * fact.
  */
@@ -67,18 +81,18 @@ JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR)
  */
 JEMALLOC_GENERATE_ATOMICS(bool, b, 0)
 
-JEMALLOC_GENERATE_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
 
-JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
 
-JEMALLOC_GENERATE_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
 
-JEMALLOC_GENERATE_INT_ATOMICS(uint8_t, u8, 0)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(uint8_t, u8, 0)
 
-JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(uint32_t, u32, 2)
 
 #ifdef JEMALLOC_ATOMIC_U64
-JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(uint64_t, u64, 3)
 #endif
 
 #undef ATOMIC_INLINE
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
new file mode 100644
index 0000000..6a1f9ad
--- /dev/null
+++ b/include/jemalloc/internal/lockedint.h
@@ -0,0 +1,151 @@
+#ifndef JEMALLOC_INTERNAL_LOCKEDINT_H
+#define JEMALLOC_INTERNAL_LOCKEDINT_H
+
+/*
+ * In those architectures that support 64-bit atomics, we use atomic updates for
+ * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
+ * externally.
+ */
+
+typedef struct locked_u64_s locked_u64_t;
+#ifdef JEMALLOC_ATOMIC_U64
+struct locked_u64_s {
+	atomic_u64_t val;
+};
+#else
+/* Must hold the associated mutex. */
+struct locked_u64_s {
+	uint64_t val;
+};
+#endif
+
+typedef struct locked_zu_s locked_zu_t;
+struct locked_zu_s {
+	atomic_zu_t val;
+};
+
+#ifndef JEMALLOC_ATOMIC_U64
+#  define LOCKEDINT_MTX_DECLARE(name) malloc_mutex_t name;
+#  define LOCKEDINT_MTX_INIT(ptr, name, rank, rank_mode)		\
+    malloc_mutex_init(ptr, name, rank, rank_mode)
+#  define LOCKEDINT_MTX(mtx) (&(mtx))
+#  define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
+#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
+#else
+#  define LOCKEDINT_MTX_DECLARE(name)
+#  define LOCKEDINT_MTX(ptr) NULL
+#  define LOCKEDINT_MTX_INIT(ptr, name, rank, rank_mode) false
+#  define LOCKEDINT_MTX_LOCK(tsdn, mu) do {} while (0)
+#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) do {} while (0)
+#endif
+
+static inline uint64_t
+locked_read_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_u64(&p->val, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	return p->val;
+#endif
+}
+
+static inline void
+locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
+    uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_fetch_add_u64(&p->val, x, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	p->val += x;
+#endif
+}
+
+static inline void
+locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
+    uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	uint64_t r = atomic_fetch_sub_u64(&p->val, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	p->val -= x;
+	assert(p->val + x >= p->val);
+#endif
+}
+
+/*
+ * Non-atomically sets *dst += src.  *dst needs external synchronization.
+ * This lets us avoid the cost of a fetch_add when its unnecessary (note that
+ * the types here are atomic).
+ */
+static inline void
+locked_inc_u64_unsynchronized(locked_u64_t *dst, uint64_t src) {
+#ifdef JEMALLOC_ATOMIC_U64
+	uint64_t cur_dst = atomic_load_u64(&dst->val, ATOMIC_RELAXED);
+	atomic_store_u64(&dst->val, src + cur_dst, ATOMIC_RELAXED);
+#else
+	dst->val += src;
+#endif
+}
+
+static inline uint64_t
+locked_read_u64_unsynchronized(locked_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_u64(&p->val, ATOMIC_RELAXED);
+#else
+	return p->val;
+#endif
+
+}
+
+static inline size_t
+locked_read_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_zu(&p->val, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	return atomic_load_zu(&p->val, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_fetch_add_zu(&p->val, x, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	size_t cur = atomic_load_zu(&p->val, ATOMIC_RELAXED);
+	atomic_store_zu(&p->val, cur + x, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+locked_dec_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	size_t r = atomic_fetch_sub_zu(&p->val, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	size_t cur = atomic_load_zu(&p->val, ATOMIC_RELAXED);
+	atomic_store_zu(&p->val, cur - x, ATOMIC_RELAXED);
+#endif
+}
+
+/* Like the _u64 variant, needs an externally synchronized *dst. */
+static inline void
+locked_inc_zu_unsynchronized(locked_zu_t *dst, size_t src) {
+	size_t cur_dst = atomic_load_zu(&dst->val, ATOMIC_RELAXED);
+	atomic_store_zu(&dst->val, src + cur_dst, ATOMIC_RELAXED);
+}
+
+/*
+ * Unlike the _u64 variant, this is safe to call unconditionally.
+ */
+static inline size_t
+locked_read_atomic_zu(locked_zu_t *p) {
+	return atomic_load_zu(&p->val, ATOMIC_RELAXED);
+}
+
+#endif /* JEMALLOC_INTERNAL_LOCKEDINT_H */
diff --git a/src/arena.c b/src/arena.c
index ced01d7..d4e200c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -93,80 +93,89 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
 
-	arena_stats_lock(tsdn, &arena->stats);
+	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
-	arena_stats_accum_zu(&astats->mapped, base_mapped
-	    + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
-	arena_stats_accum_zu(&astats->retained,
+	locked_inc_zu_unsynchronized(&astats->mapped, base_mapped
+	    + locked_read_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+	    &arena->stats.mapped));
+	locked_inc_zu_unsynchronized(&astats->retained,
 	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->edata_avail,
 	    atomic_load_zu(&arena->pa_shard.edata_cache.count, ATOMIC_RELAXED),
 	    ATOMIC_RELAXED);
 
-	arena_stats_accum_u64(&astats->decay_dirty.npurge,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_dirty.npurge,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_dirty.npurge));
-	arena_stats_accum_u64(&astats->decay_dirty.nmadvise,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_dirty.nmadvise,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_dirty.nmadvise));
-	arena_stats_accum_u64(&astats->decay_dirty.purged,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_dirty.purged,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_dirty.purged));
 
-	arena_stats_accum_u64(&astats->decay_muzzy.npurge,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_muzzy.npurge,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_muzzy.npurge));
-	arena_stats_accum_u64(&astats->decay_muzzy.nmadvise,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_muzzy.nmadvise,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_muzzy.nmadvise));
-	arena_stats_accum_u64(&astats->decay_muzzy.purged,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_muzzy.purged,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_muzzy.purged));
 
-	arena_stats_accum_zu(&astats->base, base_allocated);
-	arena_stats_accum_zu(&astats->internal, arena_internal_get(arena));
-	arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
-	arena_stats_accum_zu(&astats->resident, base_resident +
+	atomic_load_add_store_zu(&astats->base, base_allocated);
+	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
+	atomic_load_add_store_zu(&astats->metadata_thp, metadata_thp);
+	atomic_load_add_store_zu(&astats->resident, base_resident +
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
 	    ecache_npages_get(&arena->pa_shard.ecache_dirty) +
 	    ecache_npages_get(&arena->pa_shard.ecache_muzzy)) << LG_PAGE)));
-	arena_stats_accum_zu(&astats->pa_shard_stats.abandoned_vm,
+	atomic_load_add_store_zu(&astats->pa_shard_stats.abandoned_vm,
 	    atomic_load_zu(&arena->stats.pa_shard_stats.abandoned_vm,
 	    ATOMIC_RELAXED));
 
 	for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) {
-		uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
+		uint64_t nmalloc = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nmalloc);
-		arena_stats_accum_u64(&lstats[i].nmalloc, nmalloc);
-		arena_stats_accum_u64(&astats->nmalloc_large, nmalloc);
+		locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc);
+		locked_inc_u64_unsynchronized(&astats->nmalloc_large,
+		    nmalloc);
 
-		uint64_t ndalloc = arena_stats_read_u64(tsdn, &arena->stats,
+		uint64_t ndalloc = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].ndalloc);
-		arena_stats_accum_u64(&lstats[i].ndalloc, ndalloc);
-		arena_stats_accum_u64(&astats->ndalloc_large, ndalloc);
+		locked_inc_u64_unsynchronized(&lstats[i].ndalloc, ndalloc);
+		locked_inc_u64_unsynchronized(&astats->ndalloc_large,
+		    ndalloc);
 
-		uint64_t nrequests = arena_stats_read_u64(tsdn, &arena->stats,
+		uint64_t nrequests = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nrequests);
-		arena_stats_accum_u64(&lstats[i].nrequests,
+		locked_inc_u64_unsynchronized(&lstats[i].nrequests,
 		    nmalloc + nrequests);
-		arena_stats_accum_u64(&astats->nrequests_large,
+		locked_inc_u64_unsynchronized(&astats->nrequests_large,
 		    nmalloc + nrequests);
 
 		/* nfill == nmalloc for large currently. */
-		arena_stats_accum_u64(&lstats[i].nfills, nmalloc);
-		arena_stats_accum_u64(&astats->nfills_large, nmalloc);
+		locked_inc_u64_unsynchronized(&lstats[i].nfills, nmalloc);
+		locked_inc_u64_unsynchronized(&astats->nfills_large,
+		    nmalloc);
 
-		uint64_t nflush = arena_stats_read_u64(tsdn, &arena->stats,
+		uint64_t nflush = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nflushes);
-		arena_stats_accum_u64(&lstats[i].nflushes, nflush);
-		arena_stats_accum_u64(&astats->nflushes_large, nflush);
+		locked_inc_u64_unsynchronized(&lstats[i].nflushes, nflush);
+		locked_inc_u64_unsynchronized(&astats->nflushes_large,
+		    nflush);
 
 		assert(nmalloc >= ndalloc);
 		assert(nmalloc - ndalloc <= SIZE_T_MAX);
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
-		arena_stats_accum_zu(&astats->allocated_large,
+		atomic_load_add_store_zu(&astats->allocated_large,
 		    curlextents * sz_index2size(SC_NBINS + i));
 	}
 
@@ -195,7 +204,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		    ATOMIC_RELAXED);
 	}
 
-	arena_stats_unlock(tsdn, &arena->stats);
+	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
 	/* tcache_bytes counts currently cached bytes. */
 	atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
@@ -204,13 +213,13 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		for (szind_t i = 0; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
-			arena_stats_accum_zu(&astats->tcache_bytes,
-			    cache_bin_ncached_get(tbin, &tcache_bin_info[i])
-			    * sz_index2size(i));
+			atomic_load_add_store_zu(&astats->tcache_bytes,
+			    cache_bin_ncached_get(tbin,
+			    &tcache_bin_info[i]) * sz_index2size(i));
 		}
 		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_large[i];
-			arena_stats_accum_zu(&astats->tcache_bytes,
+			atomic_load_add_store_zu(&astats->tcache_bytes,
 			    cache_bin_ncached_get(tbin,
 			    &tcache_bin_info[i + SC_NBINS])
 			    * sz_index2size(i + SC_NBINS));
@@ -397,7 +406,7 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	index = sz_size2index(usize);
 	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
 
-	arena_stats_add_u64(tsdn, &arena->stats,
+	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.lstats[hindex].nmalloc, 1);
 }
 
@@ -413,7 +422,7 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	index = sz_size2index(usize);
 	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
 
-	arena_stats_add_u64(tsdn, &arena->stats,
+	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.lstats[hindex].ndalloc, 1);
 }
 
@@ -466,13 +475,14 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	if (edata != NULL) {
 		if (config_stats) {
-			arena_stats_lock(tsdn, &arena->stats);
+			LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 			arena_large_malloc_stats_update(tsdn, arena, usize);
 			if (mapped_add != 0) {
-				arena_stats_add_zu(tsdn, &arena->stats,
+				locked_inc_zu(tsdn,
+				    LOCKEDINT_MTX(arena->stats.mtx),
 				    &arena->stats.mapped, mapped_add);
 			}
-			arena_stats_unlock(tsdn, &arena->stats);
+			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 		}
 		arena_nactive_add(arena, esize >> LG_PAGE);
 	}
@@ -487,10 +497,10 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void
 arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_dalloc_stats_update(tsdn, arena,
 		    edata_usize_get(edata));
-		arena_stats_unlock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 	arena_nactive_sub(arena, edata_size_get(edata) >> LG_PAGE);
 }
@@ -502,9 +512,9 @@ arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	size_t udiff = oldusize - usize;
 
 	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		arena_stats_unlock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 	arena_nactive_sub(arena, udiff >> LG_PAGE);
 }
@@ -516,9 +526,9 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	size_t udiff = usize - oldusize;
 
 	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		arena_stats_unlock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 }
@@ -894,16 +904,16 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 
 	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
-		arena_stats_add_u64(tsdn, &arena->stats, &decay->stats->npurge,
-		    1);
-		arena_stats_add_u64(tsdn, &arena->stats,
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &decay->stats->npurge, 1);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		    &decay->stats->nmadvise, nmadvise);
-		arena_stats_add_u64(tsdn, &arena->stats, &decay->stats->purged,
-		    npurged);
-		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
-		    nunmapped << LG_PAGE);
-		arena_stats_unlock(tsdn, &arena->stats);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		&decay->stats->purged, npurged);
+		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.mapped, nunmapped << LG_PAGE);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 
 	return npurged;
diff --git a/src/ctl.c b/src/ctl.c
index 1a9b0d9..56d3000 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -676,28 +676,19 @@ static const ctl_named_node_t super_root_node[] = {
  * synchronized by the ctl mutex.
  */
 static void
-ctl_accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) {
-#ifdef JEMALLOC_ATOMIC_U64
-	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
-	uint64_t cur_src = atomic_load_u64(src, ATOMIC_RELAXED);
-	atomic_store_u64(dst, cur_dst + cur_src, ATOMIC_RELAXED);
-#else
-	*dst += *src;
-#endif
+ctl_accum_locked_u64(locked_u64_t *dst, locked_u64_t *src) {
+	locked_inc_u64_unsynchronized(dst,
+	    locked_read_u64_unsynchronized(src));
 }
 
-/* Likewise: with ctl mutex synchronization, reading is simple. */
-static uint64_t
-ctl_arena_stats_read_u64(arena_stats_u64_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
-	return atomic_load_u64(p, ATOMIC_RELAXED);
-#else
-	return *p;
-#endif
+static void
+ctl_accum_locked_zu(locked_zu_t *dst, locked_zu_t *src) {
+	locked_inc_zu_unsynchronized(dst,
+	    locked_read_atomic_zu(src));
 }
 
 static void
-accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
+ctl_accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
 	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
 	size_t cur_src = atomic_load_zu(src, ATOMIC_RELAXED);
 	atomic_store_zu(dst, cur_dst + cur_src, ATOMIC_RELAXED);
@@ -870,26 +861,26 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		ctl_arena_stats_t *astats = ctl_arena->astats;
 
 		if (!destroyed) {
-			accum_atomic_zu(&sdstats->astats.mapped,
+			ctl_accum_locked_zu(&sdstats->astats.mapped,
 			    &astats->astats.mapped);
-			accum_atomic_zu(&sdstats->astats.retained,
+			ctl_accum_locked_zu(&sdstats->astats.retained,
 			    &astats->astats.retained);
-			accum_atomic_zu(&sdstats->astats.edata_avail,
+			ctl_accum_atomic_zu(&sdstats->astats.edata_avail,
 			    &astats->astats.edata_avail);
 		}
 
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge,
+		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.npurge,
 		    &astats->astats.decay_dirty.npurge);
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise,
+		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.nmadvise,
 		    &astats->astats.decay_dirty.nmadvise);
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged,
+		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.purged,
 		    &astats->astats.decay_dirty.purged);
 
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge,
+		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.npurge,
 		    &astats->astats.decay_muzzy.npurge);
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise,
+		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.nmadvise,
 		    &astats->astats.decay_muzzy.nmadvise);
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged,
+		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.purged,
 		    &astats->astats.decay_muzzy.purged);
 
 #define OP(mtx) malloc_mutex_prof_merge(				\
@@ -900,13 +891,13 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 		if (!destroyed) {
-			accum_atomic_zu(&sdstats->astats.base,
+			ctl_accum_atomic_zu(&sdstats->astats.base,
 			    &astats->astats.base);
-			accum_atomic_zu(&sdstats->astats.internal,
+			ctl_accum_atomic_zu(&sdstats->astats.internal,
 			    &astats->astats.internal);
-			accum_atomic_zu(&sdstats->astats.resident,
+			ctl_accum_atomic_zu(&sdstats->astats.resident,
 			    &astats->astats.resident);
-			accum_atomic_zu(&sdstats->astats.metadata_thp,
+			ctl_accum_atomic_zu(&sdstats->astats.metadata_thp,
 			    &astats->astats.metadata_thp);
 		} else {
 			assert(atomic_load_zu(
@@ -925,24 +916,25 @@ MUTEX_PROF_ARENA_MUTEXES
 		sdstats->nflushes_small += astats->nflushes_small;
 
 		if (!destroyed) {
-			accum_atomic_zu(&sdstats->astats.allocated_large,
+			ctl_accum_atomic_zu(&sdstats->astats.allocated_large,
 			    &astats->astats.allocated_large);
 		} else {
 			assert(atomic_load_zu(&astats->astats.allocated_large,
 			    ATOMIC_RELAXED) == 0);
 		}
-		ctl_accum_arena_stats_u64(&sdstats->astats.nmalloc_large,
+		ctl_accum_locked_u64(&sdstats->astats.nmalloc_large,
 		    &astats->astats.nmalloc_large);
-		ctl_accum_arena_stats_u64(&sdstats->astats.ndalloc_large,
+		ctl_accum_locked_u64(&sdstats->astats.ndalloc_large,
 		    &astats->astats.ndalloc_large);
-		ctl_accum_arena_stats_u64(&sdstats->astats.nrequests_large,
+		ctl_accum_locked_u64(&sdstats->astats.nrequests_large,
 		    &astats->astats.nrequests_large);
-		ctl_accum_arena_stats_u64(&sdstats->astats.nflushes_large,
+		ctl_accum_locked_u64(&sdstats->astats.nflushes_large,
 		    &astats->astats.nflushes_large);
-		accum_atomic_zu(&sdstats->astats.pa_shard_stats.abandoned_vm,
+		ctl_accum_atomic_zu(
+		    &sdstats->astats.pa_shard_stats.abandoned_vm,
 		    &astats->astats.pa_shard_stats.abandoned_vm);
 
-		accum_atomic_zu(&sdstats->astats.tcache_bytes,
+		ctl_accum_atomic_zu(&sdstats->astats.tcache_bytes,
 		    &astats->astats.tcache_bytes);
 
 		if (ctl_arena->arena_ind == 0) {
@@ -978,11 +970,11 @@ MUTEX_PROF_ARENA_MUTEXES
 
 		/* Merge stats for large allocations. */
 		for (i = 0; i < SC_NSIZES - SC_NBINS; i++) {
-			ctl_accum_arena_stats_u64(&sdstats->lstats[i].nmalloc,
+			ctl_accum_locked_u64(&sdstats->lstats[i].nmalloc,
 			    &astats->lstats[i].nmalloc);
-			ctl_accum_arena_stats_u64(&sdstats->lstats[i].ndalloc,
+			ctl_accum_locked_u64(&sdstats->lstats[i].ndalloc,
 			    &astats->lstats[i].ndalloc);
-			ctl_accum_arena_stats_u64(&sdstats->lstats[i].nrequests,
+			ctl_accum_locked_u64(&sdstats->lstats[i].nrequests,
 			    &astats->lstats[i].nrequests);
 			if (!destroyed) {
 				sdstats->lstats[i].curlextents +=
@@ -994,17 +986,17 @@ MUTEX_PROF_ARENA_MUTEXES
 
 		/* Merge extents stats. */
 		for (i = 0; i < SC_NPSIZES; i++) {
-			accum_atomic_zu(&sdstats->estats[i].ndirty,
+			ctl_accum_atomic_zu(&sdstats->estats[i].ndirty,
 			    &astats->estats[i].ndirty);
-			accum_atomic_zu(&sdstats->estats[i].nmuzzy,
+			ctl_accum_atomic_zu(&sdstats->estats[i].nmuzzy,
 			    &astats->estats[i].nmuzzy);
-			accum_atomic_zu(&sdstats->estats[i].nretained,
+			ctl_accum_atomic_zu(&sdstats->estats[i].nretained,
 			    &astats->estats[i].nretained);
-			accum_atomic_zu(&sdstats->estats[i].dirty_bytes,
+			ctl_accum_atomic_zu(&sdstats->estats[i].dirty_bytes,
 			    &astats->estats[i].dirty_bytes);
-			accum_atomic_zu(&sdstats->estats[i].muzzy_bytes,
+			ctl_accum_atomic_zu(&sdstats->estats[i].muzzy_bytes,
 			    &astats->estats[i].muzzy_bytes);
-			accum_atomic_zu(&sdstats->estats[i].retained_bytes,
+			ctl_accum_atomic_zu(&sdstats->estats[i].retained_bytes,
 			    &astats->estats[i].retained_bytes);
 		}
 	}
@@ -1104,10 +1096,10 @@ ctl_refresh(tsdn_t *tsdn) {
 		    &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED);
 		ctl_stats->resident = atomic_load_zu(
 		    &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED);
-		ctl_stats->mapped = atomic_load_zu(
-		    &ctl_sarena->astats->astats.mapped, ATOMIC_RELAXED);
-		ctl_stats->retained = atomic_load_zu(
-		    &ctl_sarena->astats->astats.retained, ATOMIC_RELAXED);
+		ctl_stats->mapped = locked_read_atomic_zu(
+		    &ctl_sarena->astats->astats.mapped);
+		ctl_stats->retained = locked_read_atomic_zu(
+		    &ctl_sarena->astats->astats.retained);
 
 		ctl_background_thread_stats_read(tsdn);
 
@@ -2916,10 +2908,10 @@ CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
 CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.mapped, ATOMIC_RELAXED),
+    locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.mapped),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.retained, ATOMIC_RELAXED),
+    locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.retained),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.edata_avail,
@@ -2927,23 +2919,23 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
     size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_dirty.npurge), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_dirty.nmadvise), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_dirty.purged), uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_muzzy.nmadvise), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_muzzy.purged), uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
@@ -2982,23 +2974,23 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large,
     ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.nrequests_large), uint64_t)
 /*
  * Note: "nmalloc_large" here instead of "nfills" in the read.  This is
  * intentional (large has no batch fill).
  */
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nfills,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.nflushes_large), uint64_t)
 
 /* Lock profiling related APIs below. */
@@ -3124,13 +3116,13 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
     arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t)
diff --git a/src/extent.c b/src/extent.c
index 7c00525..a023d3e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -199,7 +199,8 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		arena_stats_accum_zu(&arena->pa_shard.stats->abandoned_vm, sz);
+		atomic_fetch_add_zu(&arena->pa_shard.stats->abandoned_vm, sz,
+		    ATOMIC_RELAXED);
 	}
 	/*
 	 * Leak extent after making sure its pages have already been purged, so
-- 
cgit v0.12


From 1ad368c8b7443881f40bc84cba87259f1892a8ce Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 10:40:37 -0700
Subject: PA: Move in decay stats.

---
 include/jemalloc/internal/arena_inlines_b.h |  5 +--
 include/jemalloc/internal/arena_stats.h     | 13 --------
 include/jemalloc/internal/arena_structs.h   |  2 +-
 include/jemalloc/internal/pa.h              | 13 ++++++++
 src/arena.c                                 | 41 ++++++++++++++---------
 src/ctl.c                                   | 50 ++++++++++++++++++-----------
 6 files changed, 73 insertions(+), 51 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 5b33769..eac4a63 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -150,9 +150,10 @@ arena_decay_extent(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
 		/* Update stats accordingly. */
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->decay_dirty.stats->nmadvise, 1);
+		    &arena->pa_shard.stats->decay_dirty.nmadvise, 1);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->decay_dirty.stats->purged, extent_size >> LG_PAGE);
+		    &arena->pa_shard.stats->decay_dirty.purged,
+		    extent_size >> LG_PAGE);
 		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.mapped, extent_size);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 0a1ec73..82996b8 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -37,16 +37,6 @@ struct arena_stats_large_s {
 	size_t		curlextents; /* Derived. */
 };
 
-typedef struct arena_stats_decay_s arena_stats_decay_t;
-struct arena_stats_decay_s {
-	/* Total number of purge sweeps. */
-	locked_u64_t	npurge;
-	/* Total number of madvise calls made. */
-	locked_u64_t	nmadvise;
-	/* Total number of pages purged. */
-	locked_u64_t	purged;
-};
-
 typedef struct arena_stats_extents_s arena_stats_extents_t;
 struct arena_stats_extents_s {
 	/*
@@ -87,9 +77,6 @@ struct arena_stats_s {
 	/* Number of edata_t structs allocated by base, but not being used. */
 	atomic_zu_t		edata_avail;
 
-	arena_stats_decay_t	decay_dirty;
-	arena_stats_decay_t	decay_muzzy;
-
 	atomic_zu_t		base; /* Derived. */
 	atomic_zu_t		internal;
 	atomic_zu_t		resident; /* Derived. */
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index ed16337..7d31234 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -73,7 +73,7 @@ struct arena_decay_s {
 	 * arena and ctl code.
 	 *
 	 * Synchronization: Same as associated arena's stats field. */
-	arena_stats_decay_t	*stats;
+	pa_shard_decay_stats_t	*stats;
 	/* Peak number of pages in associated extents.  Used for debug only. */
 	uint64_t		ceil_npages;
 };
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 3b1a765..1b9e58c 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -3,14 +3,27 @@
 
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
+#include "jemalloc/internal/lockedint.h"
 
 /*
  * The page allocator; responsible for acquiring pages of memory for
  * allocations.
  */
 
+typedef struct pa_shard_decay_stats_s pa_shard_decay_stats_t;
+struct pa_shard_decay_stats_s {
+	/* Total number of purge sweeps. */
+	locked_u64_t npurge;
+	/* Total number of madvise calls made. */
+	locked_u64_t nmadvise;
+	/* Total number of pages purged. */
+	locked_u64_t purged;
+};
+
 typedef struct pa_shard_stats_s pa_shard_stats_t;
 struct pa_shard_stats_s {
+	pa_shard_decay_stats_t decay_dirty;
+	pa_shard_decay_stats_t decay_muzzy;
 	/* VM space had to be leaked (undocumented).  Normally 0. */
 	atomic_zu_t abandoned_vm;
 };
diff --git a/src/arena.c b/src/arena.c
index d4e200c..a29dc93 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -105,25 +105,33 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    atomic_load_zu(&arena->pa_shard.edata_cache.count, ATOMIC_RELAXED),
 	    ATOMIC_RELAXED);
 
-	locked_inc_u64_unsynchronized(&astats->decay_dirty.npurge,
+	/* Dirty pa_shard_decay_stats_t */
+	locked_inc_u64_unsynchronized(
+	    &astats->pa_shard_stats.decay_dirty.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.decay_dirty.npurge));
-	locked_inc_u64_unsynchronized(&astats->decay_dirty.nmadvise,
+	    &arena->pa_shard.stats->decay_dirty.npurge));
+	locked_inc_u64_unsynchronized(
+	    &astats->pa_shard_stats.decay_dirty.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.decay_dirty.nmadvise));
-	locked_inc_u64_unsynchronized(&astats->decay_dirty.purged,
+	    &arena->pa_shard.stats->decay_dirty.nmadvise));
+	locked_inc_u64_unsynchronized(
+	    &astats->pa_shard_stats.decay_dirty.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.decay_dirty.purged));
+	    &arena->pa_shard.stats->decay_dirty.purged));
 
-	locked_inc_u64_unsynchronized(&astats->decay_muzzy.npurge,
+	/* Muzzy pa_shard_decay_stats_t */
+	locked_inc_u64_unsynchronized(
+	    &astats->pa_shard_stats.decay_muzzy.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.decay_muzzy.npurge));
-	locked_inc_u64_unsynchronized(&astats->decay_muzzy.nmadvise,
+	    &arena->pa_shard.stats->decay_muzzy.npurge));
+	locked_inc_u64_unsynchronized(
+	    &astats->pa_shard_stats.decay_muzzy.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.decay_muzzy.nmadvise));
-	locked_inc_u64_unsynchronized(&astats->decay_muzzy.purged,
+	    &arena->pa_shard.stats->decay_muzzy.nmadvise));
+	locked_inc_u64_unsynchronized(
+	    &astats->pa_shard_stats.decay_muzzy.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.decay_muzzy.purged));
+	    &arena->pa_shard.stats->decay_muzzy.purged));
 
 	atomic_load_add_store_zu(&astats->base, base_allocated);
 	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
@@ -695,7 +703,7 @@ arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) {
 
 static bool
 arena_decay_init(arena_decay_t *decay, ssize_t decay_ms,
-    arena_stats_decay_t *stats) {
+    pa_shard_decay_stats_t *stats) {
 	if (config_debug) {
 		for (size_t i = 0; i < sizeof(arena_decay_t); i++) {
 			assert(((char *)decay)[i] == 0);
@@ -708,7 +716,6 @@ arena_decay_init(arena_decay_t *decay, ssize_t decay_ms,
 	}
 	decay->purging = false;
 	arena_decay_reinit(decay, decay_ms);
-	/* Memory is zeroed, so there is no need to clear stats. */
 	if (config_stats) {
 		decay->stats = stats;
 	}
@@ -2044,11 +2051,13 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	if (arena_decay_init(&arena->decay_dirty,
-	    arena_dirty_decay_ms_default_get(), &arena->stats.decay_dirty)) {
+	    arena_dirty_decay_ms_default_get(),
+	    &arena->pa_shard.stats->decay_dirty)) {
 		goto label_error;
 	}
 	if (arena_decay_init(&arena->decay_muzzy,
-	    arena_muzzy_decay_ms_default_get(), &arena->stats.decay_muzzy)) {
+	    arena_muzzy_decay_ms_default_get(),
+	    &arena->pa_shard.stats->decay_muzzy)) {
 		goto label_error;
 	}
 
diff --git a/src/ctl.c b/src/ctl.c
index 56d3000..26d86da 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -869,19 +869,25 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			    &astats->astats.edata_avail);
 		}
 
-		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.npurge,
-		    &astats->astats.decay_dirty.npurge);
-		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.nmadvise,
-		    &astats->astats.decay_dirty.nmadvise);
-		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.purged,
-		    &astats->astats.decay_dirty.purged);
-
-		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.npurge,
-		    &astats->astats.decay_muzzy.npurge);
-		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.nmadvise,
-		    &astats->astats.decay_muzzy.nmadvise);
-		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.purged,
-		    &astats->astats.decay_muzzy.purged);
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.decay_dirty.npurge,
+		    &astats->astats.pa_shard_stats.decay_dirty.npurge);
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.decay_dirty.nmadvise,
+		    &astats->astats.pa_shard_stats.decay_dirty.nmadvise);
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.decay_dirty.purged,
+		    &astats->astats.pa_shard_stats.decay_dirty.purged);
+
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.decay_muzzy.npurge,
+		    &astats->astats.pa_shard_stats.decay_muzzy.npurge);
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.decay_muzzy.nmadvise,
+		    &astats->astats.pa_shard_stats.decay_muzzy.nmadvise);
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.decay_muzzy.purged,
+		    &astats->astats.pa_shard_stats.decay_muzzy.purged);
 
 #define OP(mtx) malloc_mutex_prof_merge(				\
 		    &(sdstats->astats.mutex_prof_data[			\
@@ -2920,23 +2926,29 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.decay_dirty.npurge), uint64_t)
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_dirty.npurge),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.decay_dirty.nmadvise), uint64_t)
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_dirty.nmadvise),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.decay_dirty.purged), uint64_t)
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_dirty.purged),
+    uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), uint64_t)
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_muzzy.npurge),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.decay_muzzy.nmadvise), uint64_t)
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_muzzy.nmadvise),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.decay_muzzy.purged), uint64_t)
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_muzzy.purged),
+    uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.base, ATOMIC_RELAXED),
-- 
cgit v0.12


From 1ada4aef84246d3fc494d8064ee14d5ae62ec569 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 10:52:26 -0700
Subject: PA: Get rid of arena_ind_get calls.

This is another step on the path towards breaking the extent reliance on the
arena module.
---
 src/extent.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index a023d3e..5106264 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -390,7 +390,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			 */
 			edata_t *unlock_edata = edata;
 			assert(edata_base_get(edata) == new_addr);
-			if (edata_arena_ind_get(edata) != arena_ind_get(arena)
+			if (edata_arena_ind_get(edata) != ecache_ind_get(ecache)
 			    || edata_size_get(edata) < size
 			    || edata_state_get(edata)
 			    != ecache->state) {
@@ -661,9 +661,9 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		goto label_err;
 	}
 
-	edata_init(edata, arena_ind_get(arena), ptr, alloc_size, false,
-	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
-	    committed, true, EXTENT_IS_HEAD);
+	edata_init(edata, ecache_ind_get(&arena->pa_shard.ecache_retained), ptr,
+	    alloc_size, false, SC_NSIZES, arena_extent_sn_next(arena),
+	    extent_state_active, zeroed, committed, true, EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, edata)) {
 		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
@@ -815,9 +815,9 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		return NULL;
 	}
-	edata_init(edata, arena_ind_get(arena), addr, size, slab, szind,
-	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
-	    true, EXTENT_NOT_HEAD);
+	edata_init(edata, ecache_ind_get(&arena->pa_shard.ecache_dirty), addr,
+	    size, slab, szind, arena_extent_sn_next(arena), extent_state_active,
+	    *zero, *commit, true, EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, edata)) {
 		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		return NULL;
-- 
cgit v0.12


From ce8c0d6c09e744f52f2ce01b93c77d9acf0cf1a8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 11:10:43 -0700
Subject: PA: Move in arena extent_sn counter.

Just another step towards making PA self-contained.
---
 include/jemalloc/internal/arena_externs.h | 1 -
 include/jemalloc/internal/arena_structs.h | 7 -------
 include/jemalloc/internal/pa.h            | 4 ++++
 src/arena.c                               | 7 -------
 src/extent.c                              | 9 +++++----
 src/extent_dss.c                          | 6 +++---
 src/pa.c                                  | 7 +++++++
 7 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 4ef8d8e..8548b1f 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -81,7 +81,6 @@ bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
 unsigned arena_nthreads_get(arena_t *arena, bool internal);
 void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);
-size_t arena_extent_sn_next(arena_t *arena);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 bool arena_init_huge(void);
 bool arena_is_huge(unsigned arena_ind);
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 7d31234..ca11af7 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -122,13 +122,6 @@ struct arena_s {
 	counter_accum_t		prof_accum;
 
 	/*
-	 * Extent serial number generator state.
-	 *
-	 * Synchronization: atomic.
-	 */
-	atomic_zu_t		extent_sn_next;
-
-	/*
 	 * Represents a dss_prec_t, but atomically.
 	 *
 	 * Synchronization: atomic.
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 1b9e58c..29c6b21 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -46,11 +46,15 @@ struct pa_shard_s {
 	/* The grow info for the retained ecache. */
 	ecache_grow_t ecache_grow;
 
+	/* Extent serial number generator state. */
+	atomic_zu_t extent_sn_next;
+
 	pa_shard_stats_t *stats;
 };
 
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats);
+size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index a29dc93..8f30660 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1979,11 +1979,6 @@ arena_nthreads_dec(arena_t *arena, bool internal) {
 	atomic_fetch_sub_u(&arena->nthreads[internal], 1, ATOMIC_RELAXED);
 }
 
-size_t
-arena_extent_sn_next(arena_t *arena) {
-	return atomic_fetch_add_zu(&arena->extent_sn_next, 1, ATOMIC_RELAXED);
-}
-
 arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	arena_t *arena;
@@ -2032,8 +2027,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		}
 	}
 
-	atomic_store_zu(&arena->extent_sn_next, 0, ATOMIC_RELAXED);
-
 	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
 	    ATOMIC_RELAXED);
 
diff --git a/src/extent.c b/src/extent.c
index 5106264..918738d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -662,8 +662,9 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 
 	edata_init(edata, ecache_ind_get(&arena->pa_shard.ecache_retained), ptr,
-	    alloc_size, false, SC_NSIZES, arena_extent_sn_next(arena),
-	    extent_state_active, zeroed, committed, true, EXTENT_IS_HEAD);
+	    alloc_size, false, SC_NSIZES,
+	    pa_shard_extent_sn_next(&arena->pa_shard), extent_state_active,
+	    zeroed, committed, true, EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, edata)) {
 		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
@@ -816,8 +817,8 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		return NULL;
 	}
 	edata_init(edata, ecache_ind_get(&arena->pa_shard.ecache_dirty), addr,
-	    size, slab, szind, arena_extent_sn_next(arena), extent_state_active,
-	    *zero, *commit, true, EXTENT_NOT_HEAD);
+	    size, slab, szind, pa_shard_extent_sn_next(&arena->pa_shard),
+	    extent_state_active, *zero, *commit, true, EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, edata)) {
 		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		return NULL;
diff --git a/src/extent_dss.c b/src/extent_dss.c
index d125c43..7746a20 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -155,9 +155,9 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			if (gap_size_page != 0) {
 				edata_init(gap, arena_ind_get(arena),
 				    gap_addr_page, gap_size_page, false,
-				    SC_NSIZES, arena_extent_sn_next(arena),
-				    extent_state_active, false, true, true,
-				    EXTENT_NOT_HEAD);
+				    SC_NSIZES, pa_shard_extent_sn_next(
+					&arena->pa_shard), extent_state_active,
+				    false, true, true, EXTENT_NOT_HEAD);
 			}
 			/*
 			 * Compute the address just past the end of the desired
diff --git a/src/pa.c b/src/pa.c
index 5063d48..35d3335 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -42,8 +42,15 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 		return true;
 	}
 
+	atomic_store_zu(&shard->extent_sn_next, 0, ATOMIC_RELAXED);
+
 	shard->stats = stats;
 	memset(shard->stats, 0, sizeof(*shard->stats));
 
 	return false;
 }
+
+size_t
+pa_shard_extent_sn_next(pa_shard_t *shard) {
+	return atomic_fetch_add_zu(&shard->extent_sn_next, 1, ATOMIC_RELAXED);
+}
-- 
cgit v0.12


From 6ca918d0cfe54587376282ec85edf153c2ea0d5b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 11:26:15 -0700
Subject: PA: Add a stats comment.

---
 include/jemalloc/internal/pa.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 29c6b21..a7c5789 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -10,6 +10,17 @@
  * allocations.
  */
 
+/*
+ * The stats for a particular pa_shard.  Because of the way the ctl module
+ * handles stats epoch data collection (it has its own arena_stats, and merges
+ * the stats from each arena into it), this needs to live in the arena_stats_t;
+ * hence we define it here and let the pa_shard have a pointer (rather than the
+ * more natural approach of just embedding it in the pa_shard itself).
+ *
+ * We follow the arena_stats_t approach of marking the derived fields.  These
+ * are the ones that are not maintained on their own; instead, their values are
+ * derived during those stats merges.
+ */
 typedef struct pa_shard_decay_stats_s pa_shard_decay_stats_t;
 struct pa_shard_decay_stats_s {
 	/* Total number of purge sweeps. */
-- 
cgit v0.12


From 70d12ffa055518326573c985cbc86a32a1f2de1d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 12:06:19 -0700
Subject: PA: Move mapped into pa stats.

---
 include/jemalloc/internal/arena_inlines_b.h | 15 +++++++++------
 include/jemalloc/internal/arena_stats.h     | 14 --------------
 include/jemalloc/internal/pa.h              | 19 ++++++++++++++++++-
 src/arena.c                                 | 15 ++++++++-------
 src/ctl.c                                   | 11 ++++++-----
 src/large.c                                 |  2 +-
 src/pa.c                                    |  3 ++-
 7 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index eac4a63..fd64175 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -148,15 +148,18 @@ arena_decay_extent(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
 	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
-		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		LOCKEDINT_MTX_LOCK(tsdn, *arena->pa_shard.stats_mtx);
+		locked_inc_u64(tsdn,
+		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
 		    &arena->pa_shard.stats->decay_dirty.nmadvise, 1);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		locked_inc_u64(tsdn,
+		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
 		    &arena->pa_shard.stats->decay_dirty.purged,
 		    extent_size >> LG_PAGE);
-		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->stats.mapped, extent_size);
-		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
+		locked_dec_zu(tsdn,
+		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
+		    &arena->pa_shard.stats->mapped, extent_size);
+		LOCKEDINT_MTX_UNLOCK(tsdn, *arena->pa_shard.stats_mtx);
 	}
 }
 
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 82996b8..129a8fe 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -61,12 +61,6 @@ struct arena_stats_extents_s {
 typedef struct arena_stats_s arena_stats_t;
 struct arena_stats_s {
 	LOCKEDINT_MTX_DECLARE(mtx)
-
-	/*
-	 * Number of bytes currently mapped, excluding retained memory.
-	 */
-	locked_zu_t		mapped; /* Partially derived. */
-
 	/*
 	 * Number of unused virtual memory bytes currently retained.  Retained
 	 * bytes are technically mapped (though always decommitted or purged),
@@ -135,12 +129,4 @@ arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
 }
 
-static inline void
-arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
-	LOCKEDINT_MTX_LOCK(tsdn, arena_stats->mtx);
-	locked_inc_zu(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
-	    &arena_stats->mapped, size);
-	LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
-}
-
 #endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index a7c5789..61b6f42 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -35,6 +35,14 @@ typedef struct pa_shard_stats_s pa_shard_stats_t;
 struct pa_shard_stats_s {
 	pa_shard_decay_stats_t decay_dirty;
 	pa_shard_decay_stats_t decay_muzzy;
+	/*
+	 * Number of bytes currently mapped, excluding retained memory.
+	 *
+	 * Partially derived -- we maintain our own counter, but add in the
+	 * base's own counter at merge.
+	 */
+	locked_zu_t mapped;
+
 	/* VM space had to be leaked (undocumented).  Normally 0. */
 	atomic_zu_t abandoned_vm;
 };
@@ -60,12 +68,21 @@ struct pa_shard_s {
 	/* Extent serial number generator state. */
 	atomic_zu_t extent_sn_next;
 
+	malloc_mutex_t *stats_mtx;
 	pa_shard_stats_t *stats;
 };
 
+static inline void
+pa_shard_stats_mapped_add(tsdn_t *tsdn, pa_shard_t *shard, size_t size) {
+	LOCKEDINT_MTX_LOCK(tsdn, *shard->stats_mtx);
+	locked_inc_zu(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->mapped, size);
+	LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
+}
+
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
-    pa_shard_stats_t *stats);
+    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
 size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index 8f30660..2f626fe 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -95,9 +95,10 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
-	locked_inc_zu_unsynchronized(&astats->mapped, base_mapped
-	    + locked_read_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.mapped));
+	locked_inc_zu_unsynchronized(&astats->pa_shard_stats.mapped,
+	    base_mapped + locked_read_zu(tsdn,
+	    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
+	    &arena->pa_shard.stats->mapped));
 	locked_inc_zu_unsynchronized(&astats->retained,
 	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
 
@@ -488,7 +489,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 			if (mapped_add != 0) {
 				locked_inc_zu(tsdn,
 				    LOCKEDINT_MTX(arena->stats.mtx),
-				    &arena->stats.mapped, mapped_add);
+				    &arena->pa_shard.stats->mapped, mapped_add);
 			}
 			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 		}
@@ -919,7 +920,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		&decay->stats->purged, npurged);
 		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->stats.mapped, nunmapped << LG_PAGE);
+		    &arena->pa_shard.stats->mapped, nunmapped << LG_PAGE);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 
@@ -1240,7 +1241,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    true, szind, &zero);
 
 	if (config_stats && slab != NULL) {
-		arena_stats_mapped_add(tsdn, &arena->stats,
+		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard,
 		    bin_info->slab_size);
 	}
 
@@ -2039,7 +2040,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind,
-	    &arena->stats.pa_shard_stats)) {
+	    &arena->stats.pa_shard_stats, LOCKEDINT_MTX(arena->stats.mtx))) {
 		goto label_error;
 	}
 
diff --git a/src/ctl.c b/src/ctl.c
index 26d86da..122856c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -861,8 +861,9 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		ctl_arena_stats_t *astats = ctl_arena->astats;
 
 		if (!destroyed) {
-			ctl_accum_locked_zu(&sdstats->astats.mapped,
-			    &astats->astats.mapped);
+			ctl_accum_locked_zu(
+			    &sdstats->astats.pa_shard_stats.mapped,
+			    &astats->astats.pa_shard_stats.mapped);
 			ctl_accum_locked_zu(&sdstats->astats.retained,
 			    &astats->astats.retained);
 			ctl_accum_atomic_zu(&sdstats->astats.edata_avail,
@@ -1103,7 +1104,7 @@ ctl_refresh(tsdn_t *tsdn) {
 		ctl_stats->resident = atomic_load_zu(
 		    &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED);
 		ctl_stats->mapped = locked_read_atomic_zu(
-		    &ctl_sarena->astats->astats.mapped);
+		    &ctl_sarena->astats->astats.pa_shard_stats.mapped);
 		ctl_stats->retained = locked_read_atomic_zu(
 		    &ctl_sarena->astats->astats.retained);
 
@@ -2914,8 +2915,8 @@ CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
 CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
-    locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.mapped),
-    size_t)
+    locked_read_atomic_zu(&arenas_i(
+    mib[2])->astats->astats.pa_shard_stats.mapped), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.retained),
     size_t)
diff --git a/src/large.c b/src/large.c
index fa03a50..57bf674 100644
--- a/src/large.c
+++ b/src/large.c
@@ -151,7 +151,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	emap_remap(tsdn, &emap_global, edata, szind, false);
 
 	if (config_stats && new_mapping) {
-		arena_stats_mapped_add(tsdn, &arena->stats, trailsize);
+		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard, trailsize);
 	}
 
 	if (zero) {
diff --git a/src/pa.c b/src/pa.c
index 35d3335..e4dbb04 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -3,7 +3,7 @@
 
 bool
 pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
-    pa_shard_stats_t *stats) {
+    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	/*
@@ -44,6 +44,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 
 	atomic_store_zu(&shard->extent_sn_next, 0, ATOMIC_RELAXED);
 
+	shard->stats_mtx = stats_mtx;
 	shard->stats = stats;
 	memset(shard->stats, 0, sizeof(*shard->stats));
 
-- 
cgit v0.12


From 22a0a7b93a192a07e9a3e5ba9f5adfa64036219e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 12:14:51 -0700
Subject: Move arena_decay_extent to extent module.

---
 include/jemalloc/internal/arena_inlines_b.h | 23 -----------------------
 src/extent.c                                | 26 +++++++++++++++++++++++++-
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index fd64175..50223ba 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -140,29 +140,6 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
 	arena_decay_ticks(tsdn, arena, 1);
 }
 
-/* Purge a single extent to retained / unmapped directly. */
-JEMALLOC_ALWAYS_INLINE void
-arena_decay_extent(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata) {
-	size_t extent_size = edata_size_get(edata);
-	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
-	if (config_stats) {
-		/* Update stats accordingly. */
-		LOCKEDINT_MTX_LOCK(tsdn, *arena->pa_shard.stats_mtx);
-		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-		    &arena->pa_shard.stats->decay_dirty.nmadvise, 1);
-		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-		    &arena->pa_shard.stats->decay_dirty.purged,
-		    extent_size >> LG_PAGE);
-		locked_dec_zu(tsdn,
-		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-		    &arena->pa_shard.stats->mapped, extent_size);
-		LOCKEDINT_MTX_UNLOCK(tsdn, *arena->pa_shard.stats_mtx);
-	}
-}
-
 JEMALLOC_ALWAYS_INLINE void *
 arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool slow_path) {
diff --git a/src/extent.c b/src/extent.c
index 918738d..8411e8a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -954,6 +954,30 @@ extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
 	    edata, coalesced, growing_retained, true);
 }
 
+/* Purge a single extent to retained / unmapped directly. */
+static void
+extent_maximally_purge(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata) {
+	size_t extent_size = edata_size_get(edata);
+	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
+	if (config_stats) {
+		/* Update stats accordingly. */
+		LOCKEDINT_MTX_LOCK(tsdn, *arena->pa_shard.stats_mtx);
+		locked_inc_u64(tsdn,
+		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
+		    &arena->pa_shard.stats->decay_dirty.nmadvise, 1);
+		locked_inc_u64(tsdn,
+		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
+		    &arena->pa_shard.stats->decay_dirty.purged,
+		    extent_size >> LG_PAGE);
+		locked_dec_zu(tsdn,
+		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
+		    &arena->pa_shard.stats->mapped, extent_size);
+		LOCKEDINT_MTX_UNLOCK(tsdn, *arena->pa_shard.stats_mtx);
+	}
+}
+
+
 /*
  * Does the metadata management portions of putting an unused extent into the
  * given ecache_t (coalesces, deregisters slab interiors, the heap operations).
@@ -992,7 +1016,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		    arena_may_force_decay(arena)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
-			arena_decay_extent(tsdn, arena, ehooks, edata);
+			extent_maximally_purge(tsdn, arena, ehooks, edata);
 			return;
 		}
 	}
-- 
cgit v0.12


From 3192d6b77dae3b4aa36b95eea793fcdea6f5ffbd Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 12:20:06 -0700
Subject: Extents: Have extent_dalloc_gap take ehooks.

We're almost to the point where the extent code doesn't know about arenas at
all.  In that world, we shouldn't pull them out of the arena.
---
 include/jemalloc/internal/extent.h | 3 ++-
 src/extent.c                       | 5 ++---
 src/extent_dss.c                   | 5 ++++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index e615fb6..bb01254 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -33,7 +33,8 @@ edata_t *ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit);
-void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata);
+void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata);
 void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata);
 void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
diff --git a/src/extent.c b/src/extent.c
index 8411e8a..0162494 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1026,9 +1026,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 void
-extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-
+extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 7746a20..55f037e 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -186,7 +186,10 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				extent_dss_extending_finish();
 
 				if (gap_size_page != 0) {
-					extent_dalloc_gap(tsdn, arena, gap);
+					ehooks_t *ehooks = arena_get_ehooks(
+					    arena);
+					extent_dalloc_gap(tsdn, arena, ehooks,
+					    gap);
 				} else {
 					edata_cache_put(tsdn,
 					    &arena->pa_shard.edata_cache, gap);
-- 
cgit v0.12


From 497836dbc8bd5badb0726a36fb5ce12779b15c6b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 13:19:09 -0700
Subject: Arena stats: mark edata_avail as derived.

The true number is in the edata_cache itself.
---
 include/jemalloc/internal/arena_stats.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 129a8fe..310b907 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -69,7 +69,7 @@ struct arena_stats_s {
 	locked_zu_t		retained; /* Derived. */
 
 	/* Number of edata_t structs allocated by base, but not being used. */
-	atomic_zu_t		edata_avail;
+	atomic_zu_t		edata_avail; /* Derived. */
 
 	atomic_zu_t		base; /* Derived. */
 	atomic_zu_t		internal;
-- 
cgit v0.12


From 7b6288547637124088ef208fe667037b70bd3e01 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 13:11:35 -0700
Subject: Introduce decay module and put decay objects in PA

---
 include/jemalloc/internal/arena_inlines_b.h        |   4 +-
 include/jemalloc/internal/arena_structs.h          |  70 ---------
 .../jemalloc/internal/background_thread_externs.h  |   2 +-
 .../jemalloc/internal/background_thread_inlines.h  |   2 +-
 include/jemalloc/internal/decay.h                  |  66 +++++++++
 include/jemalloc/internal/pa.h                     |  30 ++--
 src/arena.c                                        | 158 +++++++++++----------
 src/background_thread.c                            |  18 +--
 src/ctl.c                                          |   4 +-
 9 files changed, 181 insertions(+), 173 deletions(-)
 create mode 100644 include/jemalloc/internal/decay.h

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 50223ba..8b77a33 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -134,8 +134,8 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_assert_not_owner(tsdn, &arena->decay_dirty.mtx);
-	malloc_mutex_assert_not_owner(tsdn, &arena->decay_muzzy.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &arena->pa_shard.decay_dirty.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &arena->pa_shard.decay_muzzy.mtx);
 
 	arena_decay_ticks(tsdn, arena, 1);
 }
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index ca11af7..49568fc 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -15,69 +15,8 @@
 #include "jemalloc/internal/pa.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/sc.h"
-#include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/ticker.h"
 
-struct arena_decay_s {
-	/* Synchronizes all non-atomic fields. */
-	malloc_mutex_t		mtx;
-	/*
-	 * True if a thread is currently purging the extents associated with
-	 * this decay structure.
-	 */
-	bool			purging;
-	/*
-	 * Approximate time in milliseconds from the creation of a set of unused
-	 * dirty pages until an equivalent set of unused dirty pages is purged
-	 * and/or reused.
-	 */
-	atomic_zd_t		time_ms;
-	/* time / SMOOTHSTEP_NSTEPS. */
-	nstime_t		interval;
-	/*
-	 * Time at which the current decay interval logically started.  We do
-	 * not actually advance to a new epoch until sometime after it starts
-	 * because of scheduling and computation delays, and it is even possible
-	 * to completely skip epochs.  In all cases, during epoch advancement we
-	 * merge all relevant activity into the most recently recorded epoch.
-	 */
-	nstime_t		epoch;
-	/* Deadline randomness generator. */
-	uint64_t		jitter_state;
-	/*
-	 * Deadline for current epoch.  This is the sum of interval and per
-	 * epoch jitter which is a uniform random variable in [0..interval).
-	 * Epochs always advance by precise multiples of interval, but we
-	 * randomize the deadline to reduce the likelihood of arenas purging in
-	 * lockstep.
-	 */
-	nstime_t		deadline;
-	/*
-	 * Number of unpurged pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between arena->decay_*.nunpurged and
-	 * ecache_npages_get(&arena->ecache_*) to determine how many dirty pages,
-	 * if any, were generated.
-	 */
-	size_t			nunpurged;
-	/*
-	 * Trailing log of how many unused dirty pages were generated during
-	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
-	 * element is the most recent epoch.  Corresponding epoch times are
-	 * relative to epoch.
-	 */
-	size_t			backlog[SMOOTHSTEP_NSTEPS];
-
-	/*
-	 * Pointer to associated stats.  These stats are embedded directly in
-	 * the arena's stats due to how stats structures are shared between the
-	 * arena and ctl code.
-	 *
-	 * Synchronization: Same as associated arena's stats field. */
-	pa_shard_decay_stats_t	*stats;
-	/* Peak number of pages in associated extents.  Used for debug only. */
-	uint64_t		ceil_npages;
-};
-
 struct arena_s {
 	/*
 	 * Number of threads currently assigned to this arena.  Each thread has
@@ -148,15 +87,6 @@ struct arena_s {
 	pa_shard_t		pa_shard;
 
 	/*
-	 * Decay-based purging state, responsible for scheduling extent state
-	 * transitions.
-	 *
-	 * Synchronization: internal.
-	 */
-	arena_decay_t		decay_dirty; /* dirty --> muzzy */
-	arena_decay_t		decay_muzzy; /* muzzy --> retained */
-
-	/*
 	 * bins is used to store heaps of free regions.
 	 *
 	 * Synchronization: internal.
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 224e370..d5c1369 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -13,7 +13,7 @@ bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
 bool background_threads_disable(tsd_t *tsd);
 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, size_t npages_new);
+    decay_t *decay, size_t npages_new);
 void background_thread_prefork0(tsdn_t *tsdn);
 void background_thread_prefork1(tsdn_t *tsdn);
 void background_thread_postfork_parent(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index f85e86f..7bdbe92 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -55,7 +55,7 @@ arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
 	    arena_background_thread_info_get(arena);
 	if (background_thread_indefinite_sleep(info)) {
 		background_thread_interval_check(tsdn, arena,
-		    &arena->decay_dirty, 0);
+		    &arena->pa_shard.decay_dirty, 0);
 	}
 }
 
diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
new file mode 100644
index 0000000..28fe54d
--- /dev/null
+++ b/include/jemalloc/internal/decay.h
@@ -0,0 +1,66 @@
+#ifndef JEMALLOC_INTERNAL_DECAY_H
+#define JEMALLOC_INTERNAL_DECAY_H
+
+#include "jemalloc/internal/smoothstep.h"
+
+/*
+ * The decay_t computes the number of pages we should purge at any given time.
+ * Page allocators inform a decay object when pages enter a decay-able state
+ * (i.e. dirty or muzzy), and query it to determine how many pages should be
+ * purged at any given time.
+ */
+typedef struct decay_s decay_t;
+struct decay_s {
+	/* Synchronizes all non-atomic fields. */
+	malloc_mutex_t mtx;
+	/*
+	 * True if a thread is currently purging the extents associated with
+	 * this decay structure.
+	 */
+	bool purging;
+	/*
+	 * Approximate time in milliseconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	atomic_zd_t time_ms;
+	/* time / SMOOTHSTEP_NSTEPS. */
+	nstime_t interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	nstime_t epoch;
+	/* Deadline randomness generator. */
+	uint64_t jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of interval and per
+	 * epoch jitter which is a uniform random variable in [0..interval).
+	 * Epochs always advance by precise multiples of interval, but we
+	 * randomize the deadline to reduce the likelihood of arenas purging in
+	 * lockstep.
+	 */
+	nstime_t deadline;
+	/*
+	 * Number of unpurged pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between arena->decay_*.nunpurged and
+	 * ecache_npages_get(&arena->ecache_*) to determine how many dirty pages,
+	 * if any, were generated.
+	 */
+	size_t nunpurged;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to epoch.
+	 */
+	size_t backlog[SMOOTHSTEP_NSTEPS];
+
+	/* Peak number of pages in associated extents.  Used for debug only. */
+	uint64_t ceil_npages;
+};
+
+#endif /* JEMALLOC_INTERNAL_DECAY_H */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 61b6f42..d686652 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PA_H
 #define JEMALLOC_INTERNAL_PA_H
 
+#include "jemalloc/internal/decay.h"
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/lockedint.h"
@@ -10,6 +11,16 @@
  * allocations.
  */
 
+typedef struct pa_shard_decay_stats_s pa_shard_decay_stats_t;
+struct pa_shard_decay_stats_s {
+	/* Total number of purge sweeps. */
+	locked_u64_t npurge;
+	/* Total number of madvise calls made. */
+	locked_u64_t nmadvise;
+	/* Total number of pages purged. */
+	locked_u64_t purged;
+};
+
 /*
  * The stats for a particular pa_shard.  Because of the way the ctl module
  * handles stats epoch data collection (it has its own arena_stats, and merges
@@ -21,16 +32,6 @@
  * are the ones that are not maintained on their own; instead, their values are
  * derived during those stats merges.
  */
-typedef struct pa_shard_decay_stats_s pa_shard_decay_stats_t;
-struct pa_shard_decay_stats_s {
-	/* Total number of purge sweeps. */
-	locked_u64_t npurge;
-	/* Total number of madvise calls made. */
-	locked_u64_t nmadvise;
-	/* Total number of pages purged. */
-	locked_u64_t purged;
-};
-
 typedef struct pa_shard_stats_s pa_shard_stats_t;
 struct pa_shard_stats_s {
 	pa_shard_decay_stats_t decay_dirty;
@@ -70,6 +71,15 @@ struct pa_shard_s {
 
 	malloc_mutex_t *stats_mtx;
 	pa_shard_stats_t *stats;
+
+	/*
+	 * Decay-based purging state, responsible for scheduling extent state
+	 * transitions.
+	 *
+	 * Synchronization: internal.
+	 */
+	decay_t decay_dirty; /* dirty --> muzzy */
+	decay_t decay_muzzy; /* muzzy --> retained */
 };
 
 static inline void
diff --git a/src/arena.c b/src/arena.c
index 2f626fe..ce0b57c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -55,9 +55,9 @@ static unsigned huge_arena_ind;
  * definition.
  */
 
-static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, ecache_t *ecache, bool all, size_t npages_limit,
-    size_t npages_decay_max, bool is_background_thread);
+static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool all,
+    size_t npages_limit, size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
 static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
@@ -106,7 +106,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    atomic_load_zu(&arena->pa_shard.edata_cache.count, ATOMIC_RELAXED),
 	    ATOMIC_RELAXED);
 
-	/* Dirty pa_shard_decay_stats_t */
+	/* Dirty decay stats */
 	locked_inc_u64_unsynchronized(
 	    &astats->pa_shard_stats.decay_dirty.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
@@ -120,7 +120,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->pa_shard.stats->decay_dirty.purged));
 
-	/* Muzzy pa_shard_decay_stats_t */
+	/* Decay stats */
 	locked_inc_u64_unsynchronized(
 	    &astats->pa_shard_stats.decay_muzzy.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
@@ -255,9 +255,9 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    arena_prof_mutex_extents_muzzy)
 	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_retained.mtx,
 	    arena_prof_mutex_extents_retained)
-	READ_ARENA_MUTEX_PROF_DATA(decay_dirty.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(pa_shard.decay_dirty.mtx,
 	    arena_prof_mutex_decay_dirty)
-	READ_ARENA_MUTEX_PROF_DATA(decay_muzzy.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(pa_shard.decay_muzzy.mtx,
 	    arena_prof_mutex_decay_muzzy)
 	READ_ARENA_MUTEX_PROF_DATA(base->mtx,
 	    arena_prof_mutex_base)
@@ -543,17 +543,17 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 }
 
 static ssize_t
-arena_decay_ms_read(arena_decay_t *decay) {
+arena_decay_ms_read(decay_t *decay) {
 	return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
 }
 
 static void
-arena_decay_ms_write(arena_decay_t *decay, ssize_t decay_ms) {
+arena_decay_ms_write(decay_t *decay, ssize_t decay_ms) {
 	atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
 }
 
 static void
-arena_decay_deadline_init(arena_decay_t *decay) {
+arena_decay_deadline_init(decay_t *decay) {
 	/*
 	 * Generate a new deadline that is uniformly random within the next
 	 * epoch after the current one.
@@ -570,12 +570,12 @@ arena_decay_deadline_init(arena_decay_t *decay) {
 }
 
 static bool
-arena_decay_deadline_reached(const arena_decay_t *decay, const nstime_t *time) {
+arena_decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
 	return (nstime_compare(&decay->deadline, time) <= 0);
 }
 
 static size_t
-arena_decay_backlog_npages_limit(const arena_decay_t *decay) {
+arena_decay_backlog_npages_limit(const decay_t *decay) {
 	uint64_t sum;
 	size_t npages_limit_backlog;
 	unsigned i;
@@ -595,7 +595,7 @@ arena_decay_backlog_npages_limit(const arena_decay_t *decay) {
 }
 
 static void
-arena_decay_backlog_update_last(arena_decay_t *decay, size_t current_npages) {
+arena_decay_backlog_update_last(decay_t *decay, size_t current_npages) {
 	size_t npages_delta = (current_npages > decay->nunpurged) ?
 	    current_npages - decay->nunpurged : 0;
 	decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta;
@@ -613,7 +613,7 @@ arena_decay_backlog_update_last(arena_decay_t *decay, size_t current_npages) {
 }
 
 static void
-arena_decay_backlog_update(arena_decay_t *decay, uint64_t nadvance_u64,
+arena_decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
     size_t current_npages) {
 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
 		memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
@@ -635,18 +635,18 @@ arena_decay_backlog_update(arena_decay_t *decay, uint64_t nadvance_u64,
 }
 
 static void
-arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    ecache_t *ecache, size_t current_npages, size_t npages_limit,
-    bool is_background_thread) {
+arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    size_t current_npages, size_t npages_limit, bool is_background_thread) {
 	if (current_npages > npages_limit) {
-		arena_decay_to_limit(tsdn, arena, decay, ecache, false,
-		    npages_limit, current_npages - npages_limit,
+		arena_decay_to_limit(tsdn, arena, decay, decay_stats, ecache,
+		    false, npages_limit, current_npages - npages_limit,
 		    is_background_thread);
 	}
 }
 
 static void
-arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time,
+arena_decay_epoch_advance_helper(decay_t *decay, const nstime_t *time,
     size_t current_npages) {
 	assert(arena_decay_deadline_reached(decay, time));
 
@@ -670,8 +670,9 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time,
 }
 
 static void
-arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    ecache_t *ecache, const nstime_t *time, bool is_background_thread) {
+arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, const nstime_t *time,
+    bool is_background_thread) {
 	size_t current_npages = ecache_npages_get(ecache);
 	arena_decay_epoch_advance_helper(decay, time, current_npages);
 
@@ -681,13 +682,13 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	    current_npages;
 
 	if (!background_thread_enabled() || is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, ecache,
+		arena_decay_try_purge(tsdn, arena, decay, decay_stats, ecache,
 		    current_npages, npages_limit, is_background_thread);
 	}
 }
 
 static void
-arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) {
+arena_decay_reinit(decay_t *decay, ssize_t decay_ms) {
 	arena_decay_ms_write(decay, decay_ms);
 	if (decay_ms > 0) {
 		nstime_init(&decay->interval, (uint64_t)decay_ms *
@@ -703,10 +704,9 @@ arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) {
 }
 
 static bool
-arena_decay_init(arena_decay_t *decay, ssize_t decay_ms,
-    pa_shard_decay_stats_t *stats) {
+arena_decay_init(decay_t *decay, ssize_t decay_ms) {
 	if (config_debug) {
-		for (size_t i = 0; i < sizeof(arena_decay_t); i++) {
+		for (size_t i = 0; i < sizeof(decay_t); i++) {
 			assert(((char *)decay)[i] == 0);
 		}
 		decay->ceil_npages = 0;
@@ -717,9 +717,6 @@ arena_decay_init(arena_decay_t *decay, ssize_t decay_ms,
 	}
 	decay->purging = false;
 	arena_decay_reinit(decay, decay_ms);
-	if (config_stats) {
-		decay->stats = stats;
-	}
 	return false;
 }
 
@@ -736,16 +733,17 @@ arena_decay_ms_valid(ssize_t decay_ms) {
 }
 
 static bool
-arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    ecache_t *ecache, bool is_background_thread) {
+arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    bool is_background_thread) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
 	ssize_t decay_ms = arena_decay_ms_read(decay);
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
-			arena_decay_to_limit(tsdn, arena, decay, ecache, false,
-			    0, ecache_npages_get(ecache),
+			arena_decay_to_limit(tsdn, arena, decay, decay_stats,
+			    ecache, false, 0, ecache_npages_get(ecache),
 			    is_background_thread);
 		}
 		return false;
@@ -780,10 +778,10 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 */
 	bool advance_epoch = arena_decay_deadline_reached(decay, &time);
 	if (advance_epoch) {
-		arena_decay_epoch_advance(tsdn, arena, decay, ecache, &time,
-		    is_background_thread);
+		arena_decay_epoch_advance(tsdn, arena, decay, decay_stats,
+		    ecache, &time, is_background_thread);
 	} else if (is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, ecache,
+		arena_decay_try_purge(tsdn, arena, decay, decay_stats, ecache,
 		    ecache_npages_get(ecache),
 		    arena_decay_backlog_npages_limit(decay),
 		    is_background_thread);
@@ -793,23 +791,23 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 }
 
 static ssize_t
-arena_decay_ms_get(arena_decay_t *decay) {
+arena_decay_ms_get(decay_t *decay) {
 	return arena_decay_ms_read(decay);
 }
 
 ssize_t
 arena_dirty_decay_ms_get(arena_t *arena) {
-	return arena_decay_ms_get(&arena->decay_dirty);
+	return arena_decay_ms_get(&arena->pa_shard.decay_dirty);
 }
 
 ssize_t
 arena_muzzy_decay_ms_get(arena_t *arena) {
-	return arena_decay_ms_get(&arena->decay_muzzy);
+	return arena_decay_ms_get(&arena->pa_shard.decay_muzzy);
 }
 
 static bool
-arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    ecache_t *ecache, ssize_t decay_ms) {
+arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, ssize_t decay_ms) {
 	if (!arena_decay_ms_valid(decay_ms)) {
 		return true;
 	}
@@ -824,7 +822,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 * arbitrary change during initial arena configuration.
 	 */
 	arena_decay_reinit(decay, decay_ms);
-	arena_maybe_decay(tsdn, arena, decay, ecache, false);
+	arena_maybe_decay(tsdn, arena, decay, decay_stats, ecache, false);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
 	return false;
@@ -833,15 +831,17 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
-	return arena_decay_ms_set(tsdn, arena, &arena->decay_dirty,
-	    &arena->pa_shard.ecache_dirty, decay_ms);
+	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.decay_dirty,
+	    &arena->pa_shard.stats->decay_dirty, &arena->pa_shard.ecache_dirty,
+	    decay_ms);
 }
 
 bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
-	return arena_decay_ms_set(tsdn, arena, &arena->decay_muzzy,
-	    &arena->pa_shard.ecache_muzzy, decay_ms);
+	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.decay_muzzy,
+	    &arena->pa_shard.stats->decay_muzzy, &arena->pa_shard.ecache_muzzy,
+	    decay_ms);
 }
 
 static size_t
@@ -865,8 +865,8 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    arena_decay_t *decay, ecache_t *ecache, bool all,
-    edata_list_t *decay_extents, bool is_background_thread) {
+    decay_t *decay, pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    bool all, edata_list_t *decay_extents, bool is_background_thread) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
 
@@ -914,11 +914,11 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (config_stats) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &decay->stats->npurge, 1);
+		    &decay_stats->npurge, 1);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &decay->stats->nmadvise, nmadvise);
+		    &decay_stats->nmadvise, nmadvise);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		&decay->stats->purged, npurged);
+		    &decay_stats->purged, npurged);
 		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->pa_shard.stats->mapped, nunmapped << LG_PAGE);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
@@ -935,9 +935,9 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * current decay run, so that the purging thread never finishes.
  */
 static void
-arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    ecache_t *ecache, bool all, size_t npages_limit, size_t npages_decay_max,
-    bool is_background_thread) {
+arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool all,
+    size_t npages_limit, size_t npages_decay_max, bool is_background_thread) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
@@ -957,7 +957,8 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = arena_decay_stashed(tsdn, arena, ehooks, decay,
-		    ecache, all, &decay_extents, is_background_thread);
+		    decay_stats, ecache, all, &decay_extents,
+		    is_background_thread);
 		assert(npurged == npurge);
 	}
 
@@ -966,12 +967,13 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 }
 
 static bool
-arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    ecache_t *ecache, bool is_background_thread, bool all) {
+arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    bool is_background_thread, bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
-		arena_decay_to_limit(tsdn, arena, decay, ecache, all, 0,
-		    ecache_npages_get(ecache), is_background_thread);
+		arena_decay_to_limit(tsdn, arena, decay, decay_stats, ecache,
+		    all, 0, ecache_npages_get(ecache), is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 
 		return false;
@@ -982,8 +984,8 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 		return true;
 	}
 
-	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, ecache,
-	    is_background_thread);
+	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, decay_stats,
+	    ecache, is_background_thread);
 	size_t npages_new;
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
@@ -1003,8 +1005,9 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
-	return arena_decay_impl(tsdn, arena, &arena->decay_dirty,
-	    &arena->pa_shard.ecache_dirty, is_background_thread, all);
+	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_dirty,
+	    &arena->pa_shard.stats->decay_dirty, &arena->pa_shard.ecache_dirty,
+	    is_background_thread, all);
 }
 
 static bool
@@ -1014,8 +1017,9 @@ arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 	    arena_muzzy_decay_ms_get(arena) <= 0) {
 		return false;
 	}
-	return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
-	    &arena->pa_shard.ecache_muzzy, is_background_thread, all);
+	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_muzzy,
+	    &arena->pa_shard.stats->decay_muzzy, &arena->pa_shard.ecache_muzzy,
+	    is_background_thread, all);
 }
 
 void
@@ -2044,14 +2048,12 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (arena_decay_init(&arena->decay_dirty,
-	    arena_dirty_decay_ms_default_get(),
-	    &arena->pa_shard.stats->decay_dirty)) {
+	if (arena_decay_init(&arena->pa_shard.decay_dirty,
+	    arena_dirty_decay_ms_default_get())) {
 		goto label_error;
 	}
-	if (arena_decay_init(&arena->decay_muzzy,
-	    arena_muzzy_decay_ms_default_get(),
-	    &arena->pa_shard.stats->decay_muzzy)) {
+	if (arena_decay_init(&arena->pa_shard.decay_muzzy,
+	    arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
 
@@ -2172,8 +2174,8 @@ arena_boot(sc_data_t *sc_data) {
 
 void
 arena_prefork0(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->decay_dirty.mtx);
-	malloc_mutex_prefork(tsdn, &arena->decay_muzzy.mtx);
+	malloc_mutex_prefork(tsdn, &arena->pa_shard.decay_dirty.mtx);
+	malloc_mutex_prefork(tsdn, &arena->pa_shard.decay_muzzy.mtx);
 }
 
 void
@@ -2236,8 +2238,8 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_retained);
 	ecache_grow_postfork_parent(tsdn, &arena->pa_shard.ecache_grow);
-	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
-	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->pa_shard.decay_dirty.mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->pa_shard.decay_muzzy.mtx);
 	if (config_stats) {
 		malloc_mutex_postfork_parent(tsdn, &arena->tcache_ql_mtx);
 	}
@@ -2282,8 +2284,8 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_muzzy);
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_retained);
 	ecache_grow_postfork_child(tsdn, &arena->pa_shard.ecache_grow);
-	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
-	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->pa_shard.decay_dirty.mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->pa_shard.decay_muzzy.mtx);
 	if (config_stats) {
 		malloc_mutex_postfork_child(tsdn, &arena->tcache_ql_mtx);
 	}
diff --git a/src/background_thread.c b/src/background_thread.c
index ddfe3a3..95a8b16 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -55,7 +55,7 @@ bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, size_t npages_new) NOT_REACHED
+    decay_t *decay, size_t npages_new) NOT_REACHED
 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
@@ -99,7 +99,7 @@ set_current_thread_affinity(int cpu) {
 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
 
 static inline size_t
-decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
+decay_npurge_after_interval(decay_t *decay, size_t interval) {
 	size_t i;
 	uint64_t sum = 0;
 	for (i = 0; i < interval; i++) {
@@ -113,7 +113,7 @@ decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
 }
 
 static uint64_t
-arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
+arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, decay_t *decay,
     ecache_t *ecache) {
 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
 		/* Use minimal interval if decay is contended. */
@@ -201,13 +201,13 @@ label_done:
 static uint64_t
 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	uint64_t i1, i2;
-	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
-	    &arena->pa_shard.ecache_dirty);
+	i1 = arena_decay_compute_purge_interval_impl(tsdn,
+	    &arena->pa_shard.decay_dirty, &arena->pa_shard.ecache_dirty);
 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return i1;
 	}
-	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
-	    &arena->pa_shard.ecache_muzzy);
+	i2 = arena_decay_compute_purge_interval_impl(tsdn,
+	    &arena->pa_shard.decay_muzzy, &arena->pa_shard.ecache_muzzy);
 
 	return i1 < i2 ? i1 : i2;
 }
@@ -653,8 +653,8 @@ background_threads_disable(tsd_t *tsd) {
 
 /* Check if we need to signal the background thread early. */
 void
-background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, size_t npages_new) {
+background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    size_t npages_new) {
 	background_thread_info_t *info = arena_background_thread_info_get(
 	    arena);
 	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
diff --git a/src/ctl.c b/src/ctl.c
index 122856c..9233c84 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3082,8 +3082,8 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(arena->pa_shard.ecache_dirty.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.ecache_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.ecache_retained.mtx);
-		MUTEX_PROF_RESET(arena->decay_dirty.mtx);
-		MUTEX_PROF_RESET(arena->decay_muzzy.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.decay_dirty.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.decay_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
 		MUTEX_PROF_RESET(arena->base->mtx);
 
-- 
cgit v0.12


From 4d090d23f1518327ba1c5b1477d4f5a31a6cb745 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 14:52:25 -0700
Subject: Decay: Introduce a stub .c file.

---
 Makefile.in                                            | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 +++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 3 +++
 src/decay.c                                            | 3 +++
 6 files changed, 12 insertions(+)
 create mode 100644 src/decay.c

diff --git a/Makefile.in b/Makefile.in
index a3c43a6..6bb56a0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -106,6 +106,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/counter.c \
 	$(srcroot)src/ctl.c \
+	$(srcroot)src/decay.c \
 	$(srcroot)src/div.c \
 	$(srcroot)src/ecache.c \
 	$(srcroot)src/edata.c \
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 3c17e50..156e459 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -46,6 +46,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\counter.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\decay.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\ecache.c" />
     <ClCompile Include="..\..\..\..\src\edata.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 2f5ed62..45557f6 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -37,6 +37,9 @@
     <ClCompile Include="..\..\..\..\src\ctl.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\decay.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\div.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index d63042d..c5cfb95 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -46,6 +46,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\counter.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\decay.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\ecache.c" />
     <ClCompile Include="..\..\..\..\src\edata.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 2f5ed62..45557f6 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -37,6 +37,9 @@
     <ClCompile Include="..\..\..\..\src\ctl.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\decay.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\div.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/decay.c b/src/decay.c
new file mode 100644
index 0000000..454cb47
--- /dev/null
+++ b/src/decay.c
@@ -0,0 +1,3 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
-- 
cgit v0.12


From 8f2193dc8db26eba40f7948f7ce60c8584ab31a9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 14:44:11 -0700
Subject: Decay: Move in arena decay functions.

---
 include/jemalloc/internal/decay.h |  54 +++++++++
 src/arena.c                       | 244 ++++----------------------------------
 src/decay.c                       | 174 +++++++++++++++++++++++++++
 3 files changed, 249 insertions(+), 223 deletions(-)

diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index 28fe54d..ef336f0 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -8,6 +8,15 @@
  * Page allocators inform a decay object when pages enter a decay-able state
  * (i.e. dirty or muzzy), and query it to determine how many pages should be
  * purged at any given time.
+ *
+ * This is mostly a single-threaded data structure and doesn't care about
+ * synchronization at all; it's the caller's responsibility to manage their
+ * synchronization on their own.  There are two exceptions:
+ * 1) It's OK to racily call decay_ms_read (i.e. just the simplest state query).
+ * 2) The mtx and purging fields live (and are initialized) here, but are
+ *    logically owned by the page allocator.  This is just a convenience (since
+ *    those fields would be duplicated for both the dirty and muzzy states
+ *    otherwise).
  */
 typedef struct decay_s decay_t;
 struct decay_s {
@@ -45,6 +54,12 @@ struct decay_s {
 	 */
 	nstime_t deadline;
 	/*
+	 * The number of pages we cap ourselves at in the current epoch, per
+	 * decay policies.  Updated on an epoch change.  After an epoch change,
+	 * the caller should take steps to try to purge down to this amount.
+	 */
+	size_t npages_limit;
+	/*
 	 * Number of unpurged pages at beginning of current epoch.  During epoch
 	 * advancement we use the delta between arena->decay_*.nunpurged and
 	 * ecache_npages_get(&arena->ecache_*) to determine how many dirty pages,
@@ -56,6 +71,9 @@ struct decay_s {
 	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
 	 * element is the most recent epoch.  Corresponding epoch times are
 	 * relative to epoch.
+	 *
+	 * Updated only on epoch advance, triggered by
+	 * decay_maybe_advance_epoch, below.
 	 */
 	size_t backlog[SMOOTHSTEP_NSTEPS];
 
@@ -63,4 +81,40 @@ struct decay_s {
 	uint64_t ceil_npages;
 };
 
+static inline ssize_t
+decay_ms_read(const decay_t *decay) {
+	return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
+}
+
+static inline size_t
+decay_npages_limit_get(const decay_t *decay) {
+	return decay->npages_limit;
+}
+
+/* How many unused dirty pages were generated during the last epoch. */
+static inline size_t
+decay_epoch_npages_delta(const decay_t *decay) {
+	return decay->backlog[SMOOTHSTEP_NSTEPS - 1];
+}
+
+bool decay_ms_valid(ssize_t decay_ms);
+
+/*
+ * As a precondition, the decay_t must be zeroed out (as if with memset).
+ *
+ * Returns true on error.
+ */
+bool decay_init(decay_t *decay, ssize_t decay_ms);
+
+/*
+ * Given an already-initialized decay_t, reinitialize it with the given decay
+ * time.  The decay_t must have previously been initialized (and should not then
+ * be zeroed).
+ */
+void decay_reinit(decay_t *decay, ssize_t decay_ms);
+
+/* Returns true if the epoch advanced and there are pages to purge. */
+bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
+    size_t current_npages);
+
 #endif /* JEMALLOC_INTERNAL_DECAY_H */
diff --git a/src/arena.c b/src/arena.c
index ce0b57c..055b36f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/decay.h"
 #include "jemalloc/internal/div.h"
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent_dss.h"
@@ -542,98 +543,6 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 }
 
-static ssize_t
-arena_decay_ms_read(decay_t *decay) {
-	return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
-}
-
-static void
-arena_decay_ms_write(decay_t *decay, ssize_t decay_ms) {
-	atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
-}
-
-static void
-arena_decay_deadline_init(decay_t *decay) {
-	/*
-	 * Generate a new deadline that is uniformly random within the next
-	 * epoch after the current one.
-	 */
-	nstime_copy(&decay->deadline, &decay->epoch);
-	nstime_add(&decay->deadline, &decay->interval);
-	if (arena_decay_ms_read(decay) > 0) {
-		nstime_t jitter;
-
-		nstime_init(&jitter, prng_range_u64(&decay->jitter_state,
-		    nstime_ns(&decay->interval)));
-		nstime_add(&decay->deadline, &jitter);
-	}
-}
-
-static bool
-arena_decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
-	return (nstime_compare(&decay->deadline, time) <= 0);
-}
-
-static size_t
-arena_decay_backlog_npages_limit(const decay_t *decay) {
-	uint64_t sum;
-	size_t npages_limit_backlog;
-	unsigned i;
-
-	/*
-	 * For each element of decay_backlog, multiply by the corresponding
-	 * fixed-point smoothstep decay factor.  Sum the products, then divide
-	 * to round down to the nearest whole number of pages.
-	 */
-	sum = 0;
-	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
-		sum += decay->backlog[i] * h_steps[i];
-	}
-	npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
-
-	return npages_limit_backlog;
-}
-
-static void
-arena_decay_backlog_update_last(decay_t *decay, size_t current_npages) {
-	size_t npages_delta = (current_npages > decay->nunpurged) ?
-	    current_npages - decay->nunpurged : 0;
-	decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta;
-
-	if (config_debug) {
-		if (current_npages > decay->ceil_npages) {
-			decay->ceil_npages = current_npages;
-		}
-		size_t npages_limit = arena_decay_backlog_npages_limit(decay);
-		assert(decay->ceil_npages >= npages_limit);
-		if (decay->ceil_npages > npages_limit) {
-			decay->ceil_npages = npages_limit;
-		}
-	}
-}
-
-static void
-arena_decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
-    size_t current_npages) {
-	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
-		memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
-		    sizeof(size_t));
-	} else {
-		size_t nadvance_z = (size_t)nadvance_u64;
-
-		assert((uint64_t)nadvance_z == nadvance_u64);
-
-		memmove(decay->backlog, &decay->backlog[nadvance_z],
-		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
-		if (nadvance_z > 1) {
-			memset(&decay->backlog[SMOOTHSTEP_NSTEPS -
-			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
-		}
-	}
-
-	arena_decay_backlog_update_last(decay, current_npages);
-}
-
 static void
 arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
@@ -645,93 +554,6 @@ arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	}
 }
 
-static void
-arena_decay_epoch_advance_helper(decay_t *decay, const nstime_t *time,
-    size_t current_npages) {
-	assert(arena_decay_deadline_reached(decay, time));
-
-	nstime_t delta;
-	nstime_copy(&delta, time);
-	nstime_subtract(&delta, &decay->epoch);
-
-	uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval);
-	assert(nadvance_u64 > 0);
-
-	/* Add nadvance_u64 decay intervals to epoch. */
-	nstime_copy(&delta, &decay->interval);
-	nstime_imultiply(&delta, nadvance_u64);
-	nstime_add(&decay->epoch, &delta);
-
-	/* Set a new deadline. */
-	arena_decay_deadline_init(decay);
-
-	/* Update the backlog. */
-	arena_decay_backlog_update(decay, nadvance_u64, current_npages);
-}
-
-static void
-arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, const nstime_t *time,
-    bool is_background_thread) {
-	size_t current_npages = ecache_npages_get(ecache);
-	arena_decay_epoch_advance_helper(decay, time, current_npages);
-
-	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
-	/* We may unlock decay->mtx when try_purge(). Finish logging first. */
-	decay->nunpurged = (npages_limit > current_npages) ? npages_limit :
-	    current_npages;
-
-	if (!background_thread_enabled() || is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, decay_stats, ecache,
-		    current_npages, npages_limit, is_background_thread);
-	}
-}
-
-static void
-arena_decay_reinit(decay_t *decay, ssize_t decay_ms) {
-	arena_decay_ms_write(decay, decay_ms);
-	if (decay_ms > 0) {
-		nstime_init(&decay->interval, (uint64_t)decay_ms *
-		    KQU(1000000));
-		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
-	}
-
-	nstime_init_update(&decay->epoch);
-	decay->jitter_state = (uint64_t)(uintptr_t)decay;
-	arena_decay_deadline_init(decay);
-	decay->nunpurged = 0;
-	memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
-}
-
-static bool
-arena_decay_init(decay_t *decay, ssize_t decay_ms) {
-	if (config_debug) {
-		for (size_t i = 0; i < sizeof(decay_t); i++) {
-			assert(((char *)decay)[i] == 0);
-		}
-		decay->ceil_npages = 0;
-	}
-	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY,
-	    malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-	decay->purging = false;
-	arena_decay_reinit(decay, decay_ms);
-	return false;
-}
-
-static bool
-arena_decay_ms_valid(ssize_t decay_ms) {
-	if (decay_ms < -1) {
-		return false;
-	}
-	if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX *
-	    KQU(1000)) {
-		return true;
-	}
-	return false;
-}
-
 static bool
 arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
@@ -739,7 +561,7 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
-	ssize_t decay_ms = arena_decay_ms_read(decay);
+	ssize_t decay_ms = decay_ms_read(decay);
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
 			arena_decay_to_limit(tsdn, arena, decay, decay_stats,
@@ -749,26 +571,6 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		return false;
 	}
 
-	nstime_t time;
-	nstime_init_update(&time);
-	if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch, &time)
-	    > 0)) {
-		/*
-		 * Time went backwards.  Move the epoch back in time and
-		 * generate a new deadline, with the expectation that time
-		 * typically flows forward for long enough periods of time that
-		 * epochs complete.  Unfortunately, this strategy is susceptible
-		 * to clock jitter triggering premature epoch advances, but
-		 * clock jitter estimation and compensation isn't feasible here
-		 * because calls into this code are event-driven.
-		 */
-		nstime_copy(&decay->epoch, &time);
-		arena_decay_deadline_init(decay);
-	} else {
-		/* Verify that time does not go backwards. */
-		assert(nstime_compare(&decay->epoch, &time) <= 0);
-	}
-
 	/*
 	 * If the deadline has been reached, advance to the current epoch and
 	 * purge to the new limit if necessary.  Note that dirty pages created
@@ -776,39 +578,35 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	 * epoch, so as a result purging only happens during epoch advances, or
 	 * being triggered by background threads (scheduled event).
 	 */
-	bool advance_epoch = arena_decay_deadline_reached(decay, &time);
-	if (advance_epoch) {
-		arena_decay_epoch_advance(tsdn, arena, decay, decay_stats,
-		    ecache, &time, is_background_thread);
-	} else if (is_background_thread) {
+	nstime_t time;
+	nstime_init_update(&time);
+	size_t npages_current = ecache_npages_get(ecache);
+	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
+	    npages_current);
+	if (is_background_thread ||
+	    (epoch_advanced && !background_thread_enabled())) {
+		size_t npages_limit = decay_npages_limit_get(decay);
 		arena_decay_try_purge(tsdn, arena, decay, decay_stats, ecache,
-		    ecache_npages_get(ecache),
-		    arena_decay_backlog_npages_limit(decay),
-		    is_background_thread);
+		    npages_current, npages_limit, is_background_thread);
 	}
 
-	return advance_epoch;
-}
-
-static ssize_t
-arena_decay_ms_get(decay_t *decay) {
-	return arena_decay_ms_read(decay);
+	return epoch_advanced;
 }
 
 ssize_t
 arena_dirty_decay_ms_get(arena_t *arena) {
-	return arena_decay_ms_get(&arena->pa_shard.decay_dirty);
+	return decay_ms_read(&arena->pa_shard.decay_dirty);
 }
 
 ssize_t
 arena_muzzy_decay_ms_get(arena_t *arena) {
-	return arena_decay_ms_get(&arena->pa_shard.decay_muzzy);
+	return decay_ms_read(&arena->pa_shard.decay_muzzy);
 }
 
 static bool
 arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, ssize_t decay_ms) {
-	if (!arena_decay_ms_valid(decay_ms)) {
+	if (!decay_ms_valid(decay_ms)) {
 		return true;
 	}
 
@@ -821,7 +619,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	 * infrequent, either between the {-1, 0, >0} states, or a one-time
 	 * arbitrary change during initial arena configuration.
 	 */
-	arena_decay_reinit(decay, decay_ms);
+	decay_reinit(decay, decay_ms);
 	arena_maybe_decay(tsdn, arena, decay, decay_stats, ecache, false);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
@@ -989,7 +787,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	size_t npages_new;
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
-		npages_new = decay->backlog[SMOOTHSTEP_NSTEPS-1];
+		npages_new = decay_epoch_npages_delta(decay);
 	}
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
@@ -1922,7 +1720,7 @@ arena_dirty_decay_ms_default_get(void) {
 
 bool
 arena_dirty_decay_ms_default_set(ssize_t decay_ms) {
-	if (!arena_decay_ms_valid(decay_ms)) {
+	if (!decay_ms_valid(decay_ms)) {
 		return true;
 	}
 	atomic_store_zd(&dirty_decay_ms_default, decay_ms, ATOMIC_RELAXED);
@@ -1936,7 +1734,7 @@ arena_muzzy_decay_ms_default_get(void) {
 
 bool
 arena_muzzy_decay_ms_default_set(ssize_t decay_ms) {
-	if (!arena_decay_ms_valid(decay_ms)) {
+	if (!decay_ms_valid(decay_ms)) {
 		return true;
 	}
 	atomic_store_zd(&muzzy_decay_ms_default, decay_ms, ATOMIC_RELAXED);
@@ -2048,11 +1846,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (arena_decay_init(&arena->pa_shard.decay_dirty,
+	if (decay_init(&arena->pa_shard.decay_dirty,
 	    arena_dirty_decay_ms_default_get())) {
 		goto label_error;
 	}
-	if (arena_decay_init(&arena->pa_shard.decay_muzzy,
+	if (decay_init(&arena->pa_shard.decay_muzzy,
 	    arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
diff --git a/src/decay.c b/src/decay.c
index 454cb47..462b9bf 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -1,3 +1,177 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/decay.h"
+
+/*
+ * Generate a new deadline that is uniformly random within the next epoch after
+ * the current one.
+ */
+void
+decay_deadline_init(decay_t *decay) {
+	nstime_copy(&decay->deadline, &decay->epoch);
+	nstime_add(&decay->deadline, &decay->interval);
+	if (decay_ms_read(decay) > 0) {
+		nstime_t jitter;
+
+		nstime_init(&jitter, prng_range_u64(&decay->jitter_state,
+		    nstime_ns(&decay->interval)));
+		nstime_add(&decay->deadline, &jitter);
+	}
+}
+
+void
+decay_reinit(decay_t *decay, ssize_t decay_ms) {
+	atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
+	if (decay_ms > 0) {
+		nstime_init(&decay->interval, (uint64_t)decay_ms *
+		    KQU(1000000));
+		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
+	}
+
+	nstime_init_update(&decay->epoch);
+	decay->jitter_state = (uint64_t)(uintptr_t)decay;
+	decay_deadline_init(decay);
+	decay->nunpurged = 0;
+	memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
+}
+
+bool
+decay_init(decay_t *decay, ssize_t decay_ms) {
+	if (config_debug) {
+		for (size_t i = 0; i < sizeof(decay_t); i++) {
+			assert(((char *)decay)[i] == 0);
+		}
+		decay->ceil_npages = 0;
+	}
+	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	decay->purging = false;
+	decay_reinit(decay, decay_ms);
+	return false;
+}
+
+bool
+decay_ms_valid(ssize_t decay_ms) {
+	if (decay_ms < -1) {
+		return false;
+	}
+	if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX *
+	    KQU(1000)) {
+		return true;
+	}
+	return false;
+}
+
+static void
+decay_maybe_update_time(decay_t *decay, nstime_t *new_time) {
+	if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch,
+	    new_time) > 0)) {
+		/*
+		 * Time went backwards.  Move the epoch back in time and
+		 * generate a new deadline, with the expectation that time
+		 * typically flows forward for long enough periods of time that
+		 * epochs complete.  Unfortunately, this strategy is susceptible
+		 * to clock jitter triggering premature epoch advances, but
+		 * clock jitter estimation and compensation isn't feasible here
+		 * because calls into this code are event-driven.
+		 */
+		nstime_copy(&decay->epoch, new_time);
+		decay_deadline_init(decay);
+	} else {
+		/* Verify that time does not go backwards. */
+		assert(nstime_compare(&decay->epoch, new_time) <= 0);
+	}
+}
+
+static size_t
+decay_backlog_npages_limit(const decay_t *decay) {
+	/*
+	 * For each element of decay_backlog, multiply by the corresponding
+	 * fixed-point smoothstep decay factor.  Sum the products, then divide
+	 * to round down to the nearest whole number of pages.
+	 */
+	uint64_t sum = 0;
+	for (unsigned i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
+		sum += decay->backlog[i] * h_steps[i];
+	}
+	size_t npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
+
+	return npages_limit_backlog;
+}
+
+static void
+decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
+    size_t current_npages) {
+	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
+		memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+		    sizeof(size_t));
+	} else {
+		size_t nadvance_z = (size_t)nadvance_u64;
+
+		assert((uint64_t)nadvance_z == nadvance_u64);
+
+		memmove(decay->backlog, &decay->backlog[nadvance_z],
+		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
+		if (nadvance_z > 1) {
+			memset(&decay->backlog[SMOOTHSTEP_NSTEPS -
+			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
+		}
+	}
+
+	size_t npages_delta = (current_npages > decay->nunpurged) ?
+	    current_npages - decay->nunpurged : 0;
+	decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta;
+
+	if (config_debug) {
+		if (current_npages > decay->ceil_npages) {
+			decay->ceil_npages = current_npages;
+		}
+		size_t npages_limit = decay_backlog_npages_limit(decay);
+		assert(decay->ceil_npages >= npages_limit);
+		if (decay->ceil_npages > npages_limit) {
+			decay->ceil_npages = npages_limit;
+		}
+	}
+}
+
+static inline bool
+decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
+	return (nstime_compare(&decay->deadline, time) <= 0);
+}
+
+bool
+decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
+    size_t npages_current) {
+	/* Handle possible non-monotonicity of time. */
+	decay_maybe_update_time(decay, new_time);
+
+	if (!decay_deadline_reached(decay, new_time)) {
+		return false;
+	}
+	nstime_t delta;
+	nstime_copy(&delta, new_time);
+	nstime_subtract(&delta, &decay->epoch);
+
+	uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval);
+	assert(nadvance_u64 > 0);
+
+	/* Add nadvance_u64 decay intervals to epoch. */
+	nstime_copy(&delta, &decay->interval);
+	nstime_imultiply(&delta, nadvance_u64);
+	nstime_add(&decay->epoch, &delta);
+
+	/* Set a new deadline. */
+	decay_deadline_init(decay);
+
+	/* Update the backlog. */
+	decay_backlog_update(decay, nadvance_u64, npages_current);
+
+	decay->npages_limit = decay_backlog_npages_limit(decay);
+	decay->nunpurged = (decay->npages_limit > npages_current) ?
+	    decay->npages_limit : npages_current;
+
+	return true;
+}
-- 
cgit v0.12


From cdb916ed3f76f348891d4f2a83f38bd70ed75067 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 18:37:23 -0700
Subject: Decay: Add comments for the public API.

---
 include/jemalloc/internal/decay.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index ef336f0..b1e80f5 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -81,11 +81,19 @@ struct decay_s {
 	uint64_t ceil_npages;
 };
 
+/*
+ * The current decay time setting.  This is the only public access to a decay_t
+ * that's allowed without holding mtx.
+ */
 static inline ssize_t
 decay_ms_read(const decay_t *decay) {
 	return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
 }
 
+/*
+ * See the comment on the struct field -- the limit on pages we should allow in
+ * this decay state this epoch.
+ */
 static inline size_t
 decay_npages_limit_get(const decay_t *decay) {
 	return decay->npages_limit;
@@ -97,6 +105,16 @@ decay_epoch_npages_delta(const decay_t *decay) {
 	return decay->backlog[SMOOTHSTEP_NSTEPS - 1];
 }
 
+/*
+ * Returns true if the passed in decay time setting is valid.
+ * < -1 : invalid
+ * -1   : never decay
+ *  0   : decay immediately
+ *  > 0 : some positive decay time, up to a maximum allowed value of
+ *  NSTIME_SEC_MAX * 1000, which corresponds to decaying somewhere in the early
+ *  27th century.  By that time, we expect to have implemented alternate purging
+ *  strategies.
+ */
 bool decay_ms_valid(ssize_t decay_ms);
 
 /*
-- 
cgit v0.12


From d1d7e1076b6132a1faacd10cafaebaee975edb98 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 19:16:34 -0700
Subject: Decay: move in some background_thread accesses.

---
 include/jemalloc/internal/decay.h | 11 +++++++++++
 src/background_thread.c           |  8 ++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index b1e80f5..6a260fc 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -106,6 +106,17 @@ decay_epoch_npages_delta(const decay_t *decay) {
 }
 
 /*
+ * Current epoch duration, in nanoseconds.  Given that new epochs are started
+ * somewhat haphazardly, this is not necessarily exactly the time between any
+ * two calls to decay_maybe_advance_epoch; see the comments on fields in the
+ * decay_t.
+ */
+static inline uint64_t
+decay_epoch_duration_ns(const decay_t *decay) {
+	return nstime_ns(&decay->interval);
+}
+
+/*
  * Returns true if the passed in decay time setting is valid.
  * < -1 : invalid
  * -1   : never decay
diff --git a/src/background_thread.c b/src/background_thread.c
index 95a8b16..6b68053 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -121,14 +121,14 @@ arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, decay_t *decay,
 	}
 
 	uint64_t interval;
-	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
+	ssize_t decay_time = decay_ms_read(decay);
 	if (decay_time <= 0) {
 		/* Purging is eagerly done or disabled currently. */
 		interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
 		goto label_done;
 	}
 
-	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
+	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
 	assert(decay_interval_ns > 0);
 	size_t npages = ecache_npages_get(ecache);
 	if (npages == 0) {
@@ -674,12 +674,12 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		goto label_done;
 	}
 
-	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
+	ssize_t decay_time = decay_ms_read(decay);
 	if (decay_time <= 0) {
 		/* Purging is eagerly done or disabled currently. */
 		goto label_done_unlock2;
 	}
-	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
+	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
 	assert(decay_interval_ns > 0);
 
 	nstime_t diff;
-- 
cgit v0.12


From bf55e58e63af719ce52a1df08758fb3a64ab2589 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 08:46:47 -0700
Subject: Rename test/unit/decay -> test/unit/arena_decay.

This is really more of an end-to-end test at the arena level; it's not just of
the decay code in particular any more.
---
 Makefile.in              |   2 +-
 test/unit/arena_decay.c  | 602 +++++++++++++++++++++++++++++++++++++++++++++++
 test/unit/arena_decay.sh |   3 +
 test/unit/decay.c        | 602 -----------------------------------------------
 test/unit/decay.sh       |   3 -
 5 files changed, 606 insertions(+), 606 deletions(-)
 create mode 100644 test/unit/arena_decay.c
 create mode 100644 test/unit/arena_decay.sh
 delete mode 100644 test/unit/decay.c
 delete mode 100644 test/unit/decay.sh

diff --git a/Makefile.in b/Makefile.in
index 6bb56a0..823ccc7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -184,6 +184,7 @@ C_UTIL_CPP_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
 endif
 TESTS_UNIT := \
 	$(srcroot)test/unit/a0.c \
+	$(srcroot)test/unit/arena_decay.c \
 	$(srcroot)test/unit/arena_reset.c \
 	$(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/background_thread.c \
@@ -196,7 +197,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/cache_bin.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/counter.c \
-	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/edata_cache.c \
 	$(srcroot)test/unit/emitter.c \
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
new file mode 100644
index 0000000..7ed270f
--- /dev/null
+++ b/test/unit/arena_decay.c
@@ -0,0 +1,602 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/ticker.h"
+
+static nstime_monotonic_t *nstime_monotonic_orig;
+static nstime_update_t *nstime_update_orig;
+
+static unsigned nupdates_mock;
+static nstime_t time_mock;
+static bool monotonic_mock;
+
+static bool
+check_background_thread_enabled(void) {
+	bool enabled;
+	size_t sz = sizeof(bool);
+	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
+	if (ret == ENOENT) {
+		return false;
+	}
+	expect_d_eq(ret, 0, "Unexpected mallctl error");
+	return enabled;
+}
+
+static bool
+nstime_monotonic_mock(void) {
+	return monotonic_mock;
+}
+
+static bool
+nstime_update_mock(nstime_t *time) {
+	nupdates_mock++;
+	if (monotonic_mock) {
+		nstime_copy(time, &time_mock);
+	}
+	return !monotonic_mock;
+}
+
+static unsigned
+do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+	unsigned arena_ind;
+	size_t sz = sizeof(unsigned);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+
+	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
+	    "Unexpected mallctlbymib() failure");
+
+	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
+	    "Unexpected mallctlbymib() failure");
+
+	return arena_ind;
+}
+
+static void
+do_arena_destroy(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+void
+do_epoch(void) {
+	uint64_t epoch = 1;
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+}
+
+void
+do_purge(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+void
+do_decay(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+static uint64_t
+get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	uint64_t npurge = 0;
+	size_t sz = sizeof(npurge);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
+	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
+	return npurge;
+}
+
+static uint64_t
+get_arena_dirty_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind);
+}
+
+static uint64_t
+get_arena_dirty_purged(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_purged", arena_ind);
+}
+
+static uint64_t
+get_arena_muzzy_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+}
+
+static uint64_t
+get_arena_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
+	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+}
+
+static size_t
+get_arena_pdirty(unsigned arena_ind) {
+	do_epoch();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	size_t pdirty;
+	size_t sz = sizeof(pdirty);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	return pdirty;
+}
+
+static size_t
+get_arena_pmuzzy(unsigned arena_ind) {
+	do_epoch();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	size_t pmuzzy;
+	size_t sz = sizeof(pmuzzy);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	return pmuzzy;
+}
+
+static void *
+do_mallocx(size_t size, int flags) {
+	void *p = mallocx(size, flags);
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	return p;
+}
+
+static void
+generate_dirty(unsigned arena_ind, size_t size) {
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	void *p = do_mallocx(size, flags);
+	dallocx(p, flags);
+}
+
+TEST_BEGIN(test_decay_ticks) {
+	test_skip_if(check_background_thread_enabled());
+
+	ticker_t *decay_ticker;
+	unsigned tick0, tick1, arena_ind;
+	size_t sz, large0;
+	void *p;
+
+	sz = sizeof(size_t);
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
+
+	/* Set up a manually managed arena for test. */
+	arena_ind = do_arena_create(0, 0);
+
+	/* Migrate to the new arena, and get the ticker. */
+	unsigned old_arena_ind;
+	size_t sz_arena_ind = sizeof(old_arena_ind);
+	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind,
+	    &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)), 0,
+	    "Unexpected mallctl() failure");
+	decay_ticker = decay_ticker_get(tsd_fetch(), arena_ind);
+	expect_ptr_not_null(decay_ticker,
+	    "Unexpected failure getting decay ticker");
+
+	/*
+	 * Test the standard APIs using a large size class, since we can't
+	 * control tcache interactions for small size classes (except by
+	 * completely disabling tcache for the entire test program).
+	 */
+
+	/* malloc(). */
+	tick0 = ticker_read(decay_ticker);
+	p = malloc(large0);
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
+	tick1 = ticker_read(decay_ticker);
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()");
+	/* free(). */
+	tick0 = ticker_read(decay_ticker);
+	free(p);
+	tick1 = ticker_read(decay_ticker);
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during free()");
+
+	/* calloc(). */
+	tick0 = ticker_read(decay_ticker);
+	p = calloc(1, large0);
+	expect_ptr_not_null(p, "Unexpected calloc() failure");
+	tick1 = ticker_read(decay_ticker);
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()");
+	free(p);
+
+	/* posix_memalign(). */
+	tick0 = ticker_read(decay_ticker);
+	expect_d_eq(posix_memalign(&p, sizeof(size_t), large0), 0,
+	    "Unexpected posix_memalign() failure");
+	tick1 = ticker_read(decay_ticker);
+	expect_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during posix_memalign()");
+	free(p);
+
+	/* aligned_alloc(). */
+	tick0 = ticker_read(decay_ticker);
+	p = aligned_alloc(sizeof(size_t), large0);
+	expect_ptr_not_null(p, "Unexpected aligned_alloc() failure");
+	tick1 = ticker_read(decay_ticker);
+	expect_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during aligned_alloc()");
+	free(p);
+
+	/* realloc(). */
+	/* Allocate. */
+	tick0 = ticker_read(decay_ticker);
+	p = realloc(NULL, large0);
+	expect_ptr_not_null(p, "Unexpected realloc() failure");
+	tick1 = ticker_read(decay_ticker);
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+	/* Reallocate. */
+	tick0 = ticker_read(decay_ticker);
+	p = realloc(p, large0);
+	expect_ptr_not_null(p, "Unexpected realloc() failure");
+	tick1 = ticker_read(decay_ticker);
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+	/* Deallocate. */
+	tick0 = ticker_read(decay_ticker);
+	realloc(p, 0);
+	tick1 = ticker_read(decay_ticker);
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+
+	/*
+	 * Test the *allocx() APIs using large and small size classes, with
+	 * tcache explicitly disabled.
+	 */
+	{
+		unsigned i;
+		size_t allocx_sizes[2];
+		allocx_sizes[0] = large0;
+		allocx_sizes[1] = 1;
+
+		for (i = 0; i < sizeof(allocx_sizes) / sizeof(size_t); i++) {
+			sz = allocx_sizes[i];
+
+			/* mallocx(). */
+			tick0 = ticker_read(decay_ticker);
+			p = mallocx(sz, MALLOCX_TCACHE_NONE);
+			expect_ptr_not_null(p, "Unexpected mallocx() failure");
+			tick1 = ticker_read(decay_ticker);
+			expect_u32_ne(tick1, tick0,
+			    "Expected ticker to tick during mallocx() (sz=%zu)",
+			    sz);
+			/* rallocx(). */
+			tick0 = ticker_read(decay_ticker);
+			p = rallocx(p, sz, MALLOCX_TCACHE_NONE);
+			expect_ptr_not_null(p, "Unexpected rallocx() failure");
+			tick1 = ticker_read(decay_ticker);
+			expect_u32_ne(tick1, tick0,
+			    "Expected ticker to tick during rallocx() (sz=%zu)",
+			    sz);
+			/* xallocx(). */
+			tick0 = ticker_read(decay_ticker);
+			xallocx(p, sz, 0, MALLOCX_TCACHE_NONE);
+			tick1 = ticker_read(decay_ticker);
+			expect_u32_ne(tick1, tick0,
+			    "Expected ticker to tick during xallocx() (sz=%zu)",
+			    sz);
+			/* dallocx(). */
+			tick0 = ticker_read(decay_ticker);
+			dallocx(p, MALLOCX_TCACHE_NONE);
+			tick1 = ticker_read(decay_ticker);
+			expect_u32_ne(tick1, tick0,
+			    "Expected ticker to tick during dallocx() (sz=%zu)",
+			    sz);
+			/* sdallocx(). */
+			p = mallocx(sz, MALLOCX_TCACHE_NONE);
+			expect_ptr_not_null(p, "Unexpected mallocx() failure");
+			tick0 = ticker_read(decay_ticker);
+			sdallocx(p, sz, MALLOCX_TCACHE_NONE);
+			tick1 = ticker_read(decay_ticker);
+			expect_u32_ne(tick1, tick0,
+			    "Expected ticker to tick during sdallocx() "
+			    "(sz=%zu)", sz);
+		}
+	}
+
+	/*
+	 * Test tcache fill/flush interactions for large and small size classes,
+	 * using an explicit tcache.
+	 */
+	unsigned tcache_ind, i;
+	size_t tcache_sizes[2];
+	tcache_sizes[0] = large0;
+	tcache_sizes[1] = 1;
+
+	size_t tcache_max, sz_tcache_max;
+	sz_tcache_max = sizeof(tcache_max);
+	expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
+	    &sz_tcache_max, NULL, 0), 0, "Unexpected mallctl() failure");
+
+	sz = sizeof(unsigned);
+	expect_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+
+	for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) {
+		sz = tcache_sizes[i];
+
+		/* tcache fill. */
+		tick0 = ticker_read(decay_ticker);
+		p = mallocx(sz, MALLOCX_TCACHE(tcache_ind));
+		expect_ptr_not_null(p, "Unexpected mallocx() failure");
+		tick1 = ticker_read(decay_ticker);
+		expect_u32_ne(tick1, tick0,
+		    "Expected ticker to tick during tcache fill "
+		    "(sz=%zu)", sz);
+		/* tcache flush. */
+		dallocx(p, MALLOCX_TCACHE(tcache_ind));
+		tick0 = ticker_read(decay_ticker);
+		expect_d_eq(mallctl("tcache.flush", NULL, NULL,
+		    (void *)&tcache_ind, sizeof(unsigned)), 0,
+		    "Unexpected mallctl failure");
+		tick1 = ticker_read(decay_ticker);
+
+		/* Will only tick if it's in tcache. */
+		if (sz <= tcache_max) {
+			expect_u32_ne(tick1, tick0,
+			    "Expected ticker to tick during tcache "
+			    "flush (sz=%zu)", sz);
+		} else {
+			expect_u32_eq(tick1, tick0,
+			    "Unexpected ticker tick during tcache "
+			    "flush (sz=%zu)", sz);
+		}
+	}
+}
+TEST_END
+
+static void
+decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
+    uint64_t dirty_npurge0, uint64_t muzzy_npurge0, bool terminate_asap) {
+#define NINTERVALS 101
+	nstime_t time, update_interval, decay_ms, deadline;
+
+	nstime_init_update(&time);
+
+	nstime_init2(&decay_ms, dt, 0);
+	nstime_copy(&deadline, &time);
+	nstime_add(&deadline, &decay_ms);
+
+	nstime_init2(&update_interval, dt, 0);
+	nstime_idivide(&update_interval, NINTERVALS);
+
+	/*
+	 * Keep q's slab from being deallocated during the looping below.  If a
+	 * cached slab were to repeatedly come and go during looping, it could
+	 * prevent the decay backlog ever becoming empty.
+	 */
+	void *p = do_mallocx(1, flags);
+	uint64_t dirty_npurge1, muzzy_npurge1;
+	do {
+		for (unsigned i = 0; i < DECAY_NTICKS_PER_UPDATE / 2;
+		    i++) {
+			void *q = do_mallocx(1, flags);
+			dallocx(q, flags);
+		}
+		dirty_npurge1 = get_arena_dirty_npurge(arena_ind);
+		muzzy_npurge1 = get_arena_muzzy_npurge(arena_ind);
+
+		nstime_add(&time_mock, &update_interval);
+		nstime_update(&time);
+	} while (nstime_compare(&time, &deadline) <= 0 && ((dirty_npurge1 ==
+	    dirty_npurge0 && muzzy_npurge1 == muzzy_npurge0) ||
+	    !terminate_asap));
+	dallocx(p, flags);
+
+	if (config_stats) {
+		expect_u64_gt(dirty_npurge1 + muzzy_npurge1, dirty_npurge0 +
+		    muzzy_npurge0, "Expected purging to occur");
+	}
+#undef NINTERVALS
+}
+
+TEST_BEGIN(test_decay_ticker) {
+	test_skip_if(check_background_thread_enabled());
+#define NPS 2048
+	ssize_t ddt = opt_dirty_decay_ms;
+	ssize_t mdt = opt_muzzy_decay_ms;
+	unsigned arena_ind = do_arena_create(ddt, mdt);
+	int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	void *ps[NPS];
+	size_t large;
+
+	/*
+	 * Allocate a bunch of large objects, pause the clock, deallocate every
+	 * other object (to fragment virtual memory), restore the clock, then
+	 * [md]allocx() in a tight loop while advancing time rapidly to verify
+	 * the ticker triggers purging.
+	 */
+
+	size_t tcache_max;
+	size_t sz = sizeof(size_t);
+	expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
+	large = nallocx(tcache_max + 1, flags);
+
+	do_purge(arena_ind);
+	uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind);
+	uint64_t muzzy_npurge0 = get_arena_muzzy_npurge(arena_ind);
+
+	for (unsigned i = 0; i < NPS; i++) {
+		ps[i] = do_mallocx(large, flags);
+	}
+
+	nupdates_mock = 0;
+	nstime_init_update(&time_mock);
+	monotonic_mock = true;
+
+	nstime_monotonic_orig = nstime_monotonic;
+	nstime_update_orig = nstime_update;
+	nstime_monotonic = nstime_monotonic_mock;
+	nstime_update = nstime_update_mock;
+
+	for (unsigned i = 0; i < NPS; i += 2) {
+		dallocx(ps[i], flags);
+		unsigned nupdates0 = nupdates_mock;
+		do_decay(arena_ind);
+		expect_u_gt(nupdates_mock, nupdates0,
+		    "Expected nstime_update() to be called");
+	}
+
+	decay_ticker_helper(arena_ind, flags, true, ddt, dirty_npurge0,
+	    muzzy_npurge0, true);
+	decay_ticker_helper(arena_ind, flags, false, ddt+mdt, dirty_npurge0,
+	    muzzy_npurge0, false);
+
+	do_arena_destroy(arena_ind);
+
+	nstime_monotonic = nstime_monotonic_orig;
+	nstime_update = nstime_update_orig;
+#undef NPS
+}
+TEST_END
+
+TEST_BEGIN(test_decay_nonmonotonic) {
+	test_skip_if(check_background_thread_enabled());
+#define NPS (SMOOTHSTEP_NSTEPS + 1)
+	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
+	void *ps[NPS];
+	uint64_t npurge0 = 0;
+	uint64_t npurge1 = 0;
+	size_t sz, large0;
+	unsigned i, nupdates0;
+
+	sz = sizeof(size_t);
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
+
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	do_epoch();
+	sz = sizeof(uint64_t);
+	npurge0 = get_arena_npurge(0);
+
+	nupdates_mock = 0;
+	nstime_init_update(&time_mock);
+	monotonic_mock = false;
+
+	nstime_monotonic_orig = nstime_monotonic;
+	nstime_update_orig = nstime_update;
+	nstime_monotonic = nstime_monotonic_mock;
+	nstime_update = nstime_update_mock;
+
+	for (i = 0; i < NPS; i++) {
+		ps[i] = mallocx(large0, flags);
+		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure");
+	}
+
+	for (i = 0; i < NPS; i++) {
+		dallocx(ps[i], flags);
+		nupdates0 = nupdates_mock;
+		expect_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
+		    "Unexpected arena.0.decay failure");
+		expect_u_gt(nupdates_mock, nupdates0,
+		    "Expected nstime_update() to be called");
+	}
+
+	do_epoch();
+	sz = sizeof(uint64_t);
+	npurge1 = get_arena_npurge(0);
+
+	if (config_stats) {
+		expect_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
+	}
+
+	nstime_monotonic = nstime_monotonic_orig;
+	nstime_update = nstime_update_orig;
+#undef NPS
+}
+TEST_END
+
+TEST_BEGIN(test_decay_now) {
+	test_skip_if(check_background_thread_enabled());
+
+	unsigned arena_ind = do_arena_create(0, 0);
+	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
+	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
+	/* Verify that dirty/muzzy pages never linger after deallocation. */
+	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+		size_t size = sizes[i];
+		generate_dirty(arena_ind, size);
+		expect_zu_eq(get_arena_pdirty(arena_ind), 0,
+		    "Unexpected dirty pages");
+		expect_zu_eq(get_arena_pmuzzy(arena_ind), 0,
+		    "Unexpected muzzy pages");
+	}
+	do_arena_destroy(arena_ind);
+}
+TEST_END
+
+TEST_BEGIN(test_decay_never) {
+	test_skip_if(check_background_thread_enabled() || !config_stats);
+
+	unsigned arena_ind = do_arena_create(-1, -1);
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
+	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
+	void *ptrs[sizeof(sizes)/sizeof(size_t)];
+	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+		ptrs[i] = do_mallocx(sizes[i], flags);
+	}
+	/* Verify that each deallocation generates additional dirty pages. */
+	size_t pdirty_prev = get_arena_pdirty(arena_ind);
+	size_t pmuzzy_prev = get_arena_pmuzzy(arena_ind);
+	expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
+	expect_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
+	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+		dallocx(ptrs[i], flags);
+		size_t pdirty = get_arena_pdirty(arena_ind);
+		size_t pmuzzy = get_arena_pmuzzy(arena_ind);
+		expect_zu_gt(pdirty + (size_t)get_arena_dirty_purged(arena_ind),
+		    pdirty_prev, "Expected dirty pages to increase.");
+		expect_zu_eq(pmuzzy, 0, "Unexpected muzzy pages");
+		pdirty_prev = pdirty;
+	}
+	do_arena_destroy(arena_ind);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_decay_ticks,
+	    test_decay_ticker,
+	    test_decay_nonmonotonic,
+	    test_decay_now,
+	    test_decay_never);
+}
diff --git a/test/unit/arena_decay.sh b/test/unit/arena_decay.sh
new file mode 100644
index 0000000..45aeccf
--- /dev/null
+++ b/test/unit/arena_decay.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,lg_tcache_max:0"
diff --git a/test/unit/decay.c b/test/unit/decay.c
deleted file mode 100644
index 7ed270f..0000000
--- a/test/unit/decay.c
+++ /dev/null
@@ -1,602 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#include "jemalloc/internal/ticker.h"
-
-static nstime_monotonic_t *nstime_monotonic_orig;
-static nstime_update_t *nstime_update_orig;
-
-static unsigned nupdates_mock;
-static nstime_t time_mock;
-static bool monotonic_mock;
-
-static bool
-check_background_thread_enabled(void) {
-	bool enabled;
-	size_t sz = sizeof(bool);
-	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
-	if (ret == ENOENT) {
-		return false;
-	}
-	expect_d_eq(ret, 0, "Unexpected mallctl error");
-	return enabled;
-}
-
-static bool
-nstime_monotonic_mock(void) {
-	return monotonic_mock;
-}
-
-static bool
-nstime_update_mock(nstime_t *time) {
-	nupdates_mock++;
-	if (monotonic_mock) {
-		nstime_copy(time, &time_mock);
-	}
-	return !monotonic_mock;
-}
-
-static unsigned
-do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
-	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-
-	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
-
-	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
-
-	return arena_ind;
-}
-
-static void
-do_arena_destroy(unsigned arena_ind) {
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-}
-
-void
-do_epoch(void) {
-	uint64_t epoch = 1;
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
-	    0, "Unexpected mallctl() failure");
-}
-
-void
-do_purge(unsigned arena_ind) {
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-}
-
-void
-do_decay(unsigned arena_ind) {
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-}
-
-static uint64_t
-get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[2] = (size_t)arena_ind;
-	uint64_t npurge = 0;
-	size_t sz = sizeof(npurge);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
-	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
-	return npurge;
-}
-
-static uint64_t
-get_arena_dirty_npurge(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind);
-}
-
-static uint64_t
-get_arena_dirty_purged(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_purged", arena_ind);
-}
-
-static uint64_t
-get_arena_muzzy_npurge(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
-}
-
-static uint64_t
-get_arena_npurge(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
-	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
-}
-
-static size_t
-get_arena_pdirty(unsigned arena_ind) {
-	do_epoch();
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[2] = (size_t)arena_ind;
-	size_t pdirty;
-	size_t sz = sizeof(pdirty);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-	return pdirty;
-}
-
-static size_t
-get_arena_pmuzzy(unsigned arena_ind) {
-	do_epoch();
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[2] = (size_t)arena_ind;
-	size_t pmuzzy;
-	size_t sz = sizeof(pmuzzy);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-	return pmuzzy;
-}
-
-static void *
-do_mallocx(size_t size, int flags) {
-	void *p = mallocx(size, flags);
-	expect_ptr_not_null(p, "Unexpected mallocx() failure");
-	return p;
-}
-
-static void
-generate_dirty(unsigned arena_ind, size_t size) {
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
-	void *p = do_mallocx(size, flags);
-	dallocx(p, flags);
-}
-
-TEST_BEGIN(test_decay_ticks) {
-	test_skip_if(check_background_thread_enabled());
-
-	ticker_t *decay_ticker;
-	unsigned tick0, tick1, arena_ind;
-	size_t sz, large0;
-	void *p;
-
-	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
-
-	/* Set up a manually managed arena for test. */
-	arena_ind = do_arena_create(0, 0);
-
-	/* Migrate to the new arena, and get the ticker. */
-	unsigned old_arena_ind;
-	size_t sz_arena_ind = sizeof(old_arena_ind);
-	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind,
-	    &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)), 0,
-	    "Unexpected mallctl() failure");
-	decay_ticker = decay_ticker_get(tsd_fetch(), arena_ind);
-	expect_ptr_not_null(decay_ticker,
-	    "Unexpected failure getting decay ticker");
-
-	/*
-	 * Test the standard APIs using a large size class, since we can't
-	 * control tcache interactions for small size classes (except by
-	 * completely disabling tcache for the entire test program).
-	 */
-
-	/* malloc(). */
-	tick0 = ticker_read(decay_ticker);
-	p = malloc(large0);
-	expect_ptr_not_null(p, "Unexpected malloc() failure");
-	tick1 = ticker_read(decay_ticker);
-	expect_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()");
-	/* free(). */
-	tick0 = ticker_read(decay_ticker);
-	free(p);
-	tick1 = ticker_read(decay_ticker);
-	expect_u32_ne(tick1, tick0, "Expected ticker to tick during free()");
-
-	/* calloc(). */
-	tick0 = ticker_read(decay_ticker);
-	p = calloc(1, large0);
-	expect_ptr_not_null(p, "Unexpected calloc() failure");
-	tick1 = ticker_read(decay_ticker);
-	expect_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()");
-	free(p);
-
-	/* posix_memalign(). */
-	tick0 = ticker_read(decay_ticker);
-	expect_d_eq(posix_memalign(&p, sizeof(size_t), large0), 0,
-	    "Unexpected posix_memalign() failure");
-	tick1 = ticker_read(decay_ticker);
-	expect_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during posix_memalign()");
-	free(p);
-
-	/* aligned_alloc(). */
-	tick0 = ticker_read(decay_ticker);
-	p = aligned_alloc(sizeof(size_t), large0);
-	expect_ptr_not_null(p, "Unexpected aligned_alloc() failure");
-	tick1 = ticker_read(decay_ticker);
-	expect_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during aligned_alloc()");
-	free(p);
-
-	/* realloc(). */
-	/* Allocate. */
-	tick0 = ticker_read(decay_ticker);
-	p = realloc(NULL, large0);
-	expect_ptr_not_null(p, "Unexpected realloc() failure");
-	tick1 = ticker_read(decay_ticker);
-	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
-	/* Reallocate. */
-	tick0 = ticker_read(decay_ticker);
-	p = realloc(p, large0);
-	expect_ptr_not_null(p, "Unexpected realloc() failure");
-	tick1 = ticker_read(decay_ticker);
-	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
-	/* Deallocate. */
-	tick0 = ticker_read(decay_ticker);
-	realloc(p, 0);
-	tick1 = ticker_read(decay_ticker);
-	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
-
-	/*
-	 * Test the *allocx() APIs using large and small size classes, with
-	 * tcache explicitly disabled.
-	 */
-	{
-		unsigned i;
-		size_t allocx_sizes[2];
-		allocx_sizes[0] = large0;
-		allocx_sizes[1] = 1;
-
-		for (i = 0; i < sizeof(allocx_sizes) / sizeof(size_t); i++) {
-			sz = allocx_sizes[i];
-
-			/* mallocx(). */
-			tick0 = ticker_read(decay_ticker);
-			p = mallocx(sz, MALLOCX_TCACHE_NONE);
-			expect_ptr_not_null(p, "Unexpected mallocx() failure");
-			tick1 = ticker_read(decay_ticker);
-			expect_u32_ne(tick1, tick0,
-			    "Expected ticker to tick during mallocx() (sz=%zu)",
-			    sz);
-			/* rallocx(). */
-			tick0 = ticker_read(decay_ticker);
-			p = rallocx(p, sz, MALLOCX_TCACHE_NONE);
-			expect_ptr_not_null(p, "Unexpected rallocx() failure");
-			tick1 = ticker_read(decay_ticker);
-			expect_u32_ne(tick1, tick0,
-			    "Expected ticker to tick during rallocx() (sz=%zu)",
-			    sz);
-			/* xallocx(). */
-			tick0 = ticker_read(decay_ticker);
-			xallocx(p, sz, 0, MALLOCX_TCACHE_NONE);
-			tick1 = ticker_read(decay_ticker);
-			expect_u32_ne(tick1, tick0,
-			    "Expected ticker to tick during xallocx() (sz=%zu)",
-			    sz);
-			/* dallocx(). */
-			tick0 = ticker_read(decay_ticker);
-			dallocx(p, MALLOCX_TCACHE_NONE);
-			tick1 = ticker_read(decay_ticker);
-			expect_u32_ne(tick1, tick0,
-			    "Expected ticker to tick during dallocx() (sz=%zu)",
-			    sz);
-			/* sdallocx(). */
-			p = mallocx(sz, MALLOCX_TCACHE_NONE);
-			expect_ptr_not_null(p, "Unexpected mallocx() failure");
-			tick0 = ticker_read(decay_ticker);
-			sdallocx(p, sz, MALLOCX_TCACHE_NONE);
-			tick1 = ticker_read(decay_ticker);
-			expect_u32_ne(tick1, tick0,
-			    "Expected ticker to tick during sdallocx() "
-			    "(sz=%zu)", sz);
-		}
-	}
-
-	/*
-	 * Test tcache fill/flush interactions for large and small size classes,
-	 * using an explicit tcache.
-	 */
-	unsigned tcache_ind, i;
-	size_t tcache_sizes[2];
-	tcache_sizes[0] = large0;
-	tcache_sizes[1] = 1;
-
-	size_t tcache_max, sz_tcache_max;
-	sz_tcache_max = sizeof(tcache_max);
-	expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
-	    &sz_tcache_max, NULL, 0), 0, "Unexpected mallctl() failure");
-
-	sz = sizeof(unsigned);
-	expect_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
-
-	for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) {
-		sz = tcache_sizes[i];
-
-		/* tcache fill. */
-		tick0 = ticker_read(decay_ticker);
-		p = mallocx(sz, MALLOCX_TCACHE(tcache_ind));
-		expect_ptr_not_null(p, "Unexpected mallocx() failure");
-		tick1 = ticker_read(decay_ticker);
-		expect_u32_ne(tick1, tick0,
-		    "Expected ticker to tick during tcache fill "
-		    "(sz=%zu)", sz);
-		/* tcache flush. */
-		dallocx(p, MALLOCX_TCACHE(tcache_ind));
-		tick0 = ticker_read(decay_ticker);
-		expect_d_eq(mallctl("tcache.flush", NULL, NULL,
-		    (void *)&tcache_ind, sizeof(unsigned)), 0,
-		    "Unexpected mallctl failure");
-		tick1 = ticker_read(decay_ticker);
-
-		/* Will only tick if it's in tcache. */
-		if (sz <= tcache_max) {
-			expect_u32_ne(tick1, tick0,
-			    "Expected ticker to tick during tcache "
-			    "flush (sz=%zu)", sz);
-		} else {
-			expect_u32_eq(tick1, tick0,
-			    "Unexpected ticker tick during tcache "
-			    "flush (sz=%zu)", sz);
-		}
-	}
-}
-TEST_END
-
-static void
-decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
-    uint64_t dirty_npurge0, uint64_t muzzy_npurge0, bool terminate_asap) {
-#define NINTERVALS 101
-	nstime_t time, update_interval, decay_ms, deadline;
-
-	nstime_init_update(&time);
-
-	nstime_init2(&decay_ms, dt, 0);
-	nstime_copy(&deadline, &time);
-	nstime_add(&deadline, &decay_ms);
-
-	nstime_init2(&update_interval, dt, 0);
-	nstime_idivide(&update_interval, NINTERVALS);
-
-	/*
-	 * Keep q's slab from being deallocated during the looping below.  If a
-	 * cached slab were to repeatedly come and go during looping, it could
-	 * prevent the decay backlog ever becoming empty.
-	 */
-	void *p = do_mallocx(1, flags);
-	uint64_t dirty_npurge1, muzzy_npurge1;
-	do {
-		for (unsigned i = 0; i < DECAY_NTICKS_PER_UPDATE / 2;
-		    i++) {
-			void *q = do_mallocx(1, flags);
-			dallocx(q, flags);
-		}
-		dirty_npurge1 = get_arena_dirty_npurge(arena_ind);
-		muzzy_npurge1 = get_arena_muzzy_npurge(arena_ind);
-
-		nstime_add(&time_mock, &update_interval);
-		nstime_update(&time);
-	} while (nstime_compare(&time, &deadline) <= 0 && ((dirty_npurge1 ==
-	    dirty_npurge0 && muzzy_npurge1 == muzzy_npurge0) ||
-	    !terminate_asap));
-	dallocx(p, flags);
-
-	if (config_stats) {
-		expect_u64_gt(dirty_npurge1 + muzzy_npurge1, dirty_npurge0 +
-		    muzzy_npurge0, "Expected purging to occur");
-	}
-#undef NINTERVALS
-}
-
-TEST_BEGIN(test_decay_ticker) {
-	test_skip_if(check_background_thread_enabled());
-#define NPS 2048
-	ssize_t ddt = opt_dirty_decay_ms;
-	ssize_t mdt = opt_muzzy_decay_ms;
-	unsigned arena_ind = do_arena_create(ddt, mdt);
-	int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
-	void *ps[NPS];
-	size_t large;
-
-	/*
-	 * Allocate a bunch of large objects, pause the clock, deallocate every
-	 * other object (to fragment virtual memory), restore the clock, then
-	 * [md]allocx() in a tight loop while advancing time rapidly to verify
-	 * the ticker triggers purging.
-	 */
-
-	size_t tcache_max;
-	size_t sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
-	large = nallocx(tcache_max + 1, flags);
-
-	do_purge(arena_ind);
-	uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind);
-	uint64_t muzzy_npurge0 = get_arena_muzzy_npurge(arena_ind);
-
-	for (unsigned i = 0; i < NPS; i++) {
-		ps[i] = do_mallocx(large, flags);
-	}
-
-	nupdates_mock = 0;
-	nstime_init_update(&time_mock);
-	monotonic_mock = true;
-
-	nstime_monotonic_orig = nstime_monotonic;
-	nstime_update_orig = nstime_update;
-	nstime_monotonic = nstime_monotonic_mock;
-	nstime_update = nstime_update_mock;
-
-	for (unsigned i = 0; i < NPS; i += 2) {
-		dallocx(ps[i], flags);
-		unsigned nupdates0 = nupdates_mock;
-		do_decay(arena_ind);
-		expect_u_gt(nupdates_mock, nupdates0,
-		    "Expected nstime_update() to be called");
-	}
-
-	decay_ticker_helper(arena_ind, flags, true, ddt, dirty_npurge0,
-	    muzzy_npurge0, true);
-	decay_ticker_helper(arena_ind, flags, false, ddt+mdt, dirty_npurge0,
-	    muzzy_npurge0, false);
-
-	do_arena_destroy(arena_ind);
-
-	nstime_monotonic = nstime_monotonic_orig;
-	nstime_update = nstime_update_orig;
-#undef NPS
-}
-TEST_END
-
-TEST_BEGIN(test_decay_nonmonotonic) {
-	test_skip_if(check_background_thread_enabled());
-#define NPS (SMOOTHSTEP_NSTEPS + 1)
-	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
-	void *ps[NPS];
-	uint64_t npurge0 = 0;
-	uint64_t npurge1 = 0;
-	size_t sz, large0;
-	unsigned i, nupdates0;
-
-	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
-
-	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctl failure");
-	do_epoch();
-	sz = sizeof(uint64_t);
-	npurge0 = get_arena_npurge(0);
-
-	nupdates_mock = 0;
-	nstime_init_update(&time_mock);
-	monotonic_mock = false;
-
-	nstime_monotonic_orig = nstime_monotonic;
-	nstime_update_orig = nstime_update;
-	nstime_monotonic = nstime_monotonic_mock;
-	nstime_update = nstime_update_mock;
-
-	for (i = 0; i < NPS; i++) {
-		ps[i] = mallocx(large0, flags);
-		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure");
-	}
-
-	for (i = 0; i < NPS; i++) {
-		dallocx(ps[i], flags);
-		nupdates0 = nupdates_mock;
-		expect_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
-		    "Unexpected arena.0.decay failure");
-		expect_u_gt(nupdates_mock, nupdates0,
-		    "Expected nstime_update() to be called");
-	}
-
-	do_epoch();
-	sz = sizeof(uint64_t);
-	npurge1 = get_arena_npurge(0);
-
-	if (config_stats) {
-		expect_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
-	}
-
-	nstime_monotonic = nstime_monotonic_orig;
-	nstime_update = nstime_update_orig;
-#undef NPS
-}
-TEST_END
-
-TEST_BEGIN(test_decay_now) {
-	test_skip_if(check_background_thread_enabled());
-
-	unsigned arena_ind = do_arena_create(0, 0);
-	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
-	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
-	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
-	/* Verify that dirty/muzzy pages never linger after deallocation. */
-	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
-		size_t size = sizes[i];
-		generate_dirty(arena_ind, size);
-		expect_zu_eq(get_arena_pdirty(arena_ind), 0,
-		    "Unexpected dirty pages");
-		expect_zu_eq(get_arena_pmuzzy(arena_ind), 0,
-		    "Unexpected muzzy pages");
-	}
-	do_arena_destroy(arena_ind);
-}
-TEST_END
-
-TEST_BEGIN(test_decay_never) {
-	test_skip_if(check_background_thread_enabled() || !config_stats);
-
-	unsigned arena_ind = do_arena_create(-1, -1);
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
-	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
-	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
-	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
-	void *ptrs[sizeof(sizes)/sizeof(size_t)];
-	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
-		ptrs[i] = do_mallocx(sizes[i], flags);
-	}
-	/* Verify that each deallocation generates additional dirty pages. */
-	size_t pdirty_prev = get_arena_pdirty(arena_ind);
-	size_t pmuzzy_prev = get_arena_pmuzzy(arena_ind);
-	expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
-	expect_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
-	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
-		dallocx(ptrs[i], flags);
-		size_t pdirty = get_arena_pdirty(arena_ind);
-		size_t pmuzzy = get_arena_pmuzzy(arena_ind);
-		expect_zu_gt(pdirty + (size_t)get_arena_dirty_purged(arena_ind),
-		    pdirty_prev, "Expected dirty pages to increase.");
-		expect_zu_eq(pmuzzy, 0, "Unexpected muzzy pages");
-		pdirty_prev = pdirty;
-	}
-	do_arena_destroy(arena_ind);
-}
-TEST_END
-
-int
-main(void) {
-	return test(
-	    test_decay_ticks,
-	    test_decay_ticker,
-	    test_decay_nonmonotonic,
-	    test_decay_now,
-	    test_decay_never);
-}
diff --git a/test/unit/decay.sh b/test/unit/decay.sh
deleted file mode 100644
index 45aeccf..0000000
--- a/test/unit/decay.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,lg_tcache_max:0"
-- 
cgit v0.12


From f77cec311e102a46a58402570b43aa74dc5d7ae7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 08:52:58 -0700
Subject: Decay: Take current time as an argument.

This better facilitates testing.
---
 include/jemalloc/internal/decay.h |  4 ++--
 src/arena.c                       | 11 ++++++++---
 src/decay.c                       |  8 ++++----
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index 6a260fc..df39665 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -133,14 +133,14 @@ bool decay_ms_valid(ssize_t decay_ms);
  *
  * Returns true on error.
  */
-bool decay_init(decay_t *decay, ssize_t decay_ms);
+bool decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
 
 /*
  * Given an already-initialized decay_t, reinitialize it with the given decay
  * time.  The decay_t must have previously been initialized (and should not then
  * be zeroed).
  */
-void decay_reinit(decay_t *decay, ssize_t decay_ms);
+void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
 
 /* Returns true if the epoch advanced and there are pages to purge. */
 bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
diff --git a/src/arena.c b/src/arena.c
index 055b36f..16be6b1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -619,7 +619,9 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	 * infrequent, either between the {-1, 0, >0} states, or a one-time
 	 * arbitrary change during initial arena configuration.
 	 */
-	decay_reinit(decay, decay_ms);
+	nstime_t cur_time;
+	nstime_init_update(&cur_time);
+	decay_reinit(decay, &cur_time, decay_ms);
 	arena_maybe_decay(tsdn, arena, decay, decay_stats, ecache, false);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
@@ -1846,11 +1848,14 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (decay_init(&arena->pa_shard.decay_dirty,
+	nstime_t cur_time;
+	nstime_init_update(&cur_time);
+
+	if (decay_init(&arena->pa_shard.decay_dirty, &cur_time,
 	    arena_dirty_decay_ms_default_get())) {
 		goto label_error;
 	}
-	if (decay_init(&arena->pa_shard.decay_muzzy,
+	if (decay_init(&arena->pa_shard.decay_muzzy, &cur_time,
 	    arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
diff --git a/src/decay.c b/src/decay.c
index 462b9bf..23d59da 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -21,7 +21,7 @@ decay_deadline_init(decay_t *decay) {
 }
 
 void
-decay_reinit(decay_t *decay, ssize_t decay_ms) {
+decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
 	atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
 	if (decay_ms > 0) {
 		nstime_init(&decay->interval, (uint64_t)decay_ms *
@@ -29,7 +29,7 @@ decay_reinit(decay_t *decay, ssize_t decay_ms) {
 		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
 	}
 
-	nstime_init_update(&decay->epoch);
+	nstime_copy(&decay->epoch, cur_time);
 	decay->jitter_state = (uint64_t)(uintptr_t)decay;
 	decay_deadline_init(decay);
 	decay->nunpurged = 0;
@@ -37,7 +37,7 @@ decay_reinit(decay_t *decay, ssize_t decay_ms) {
 }
 
 bool
-decay_init(decay_t *decay, ssize_t decay_ms) {
+decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
 	if (config_debug) {
 		for (size_t i = 0; i < sizeof(decay_t); i++) {
 			assert(((char *)decay)[i] == 0);
@@ -49,7 +49,7 @@ decay_init(decay_t *decay, ssize_t decay_ms) {
 		return true;
 	}
 	decay->purging = false;
-	decay_reinit(decay, decay_ms);
+	decay_reinit(decay, cur_time, decay_ms);
 	return false;
 }
 
-- 
cgit v0.12


From 48a2cd6d7932b2a38baab2d5394db3141d41b12e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 10:19:38 -0700
Subject: Decay: Add a (mostly stub) test case.

---
 Makefile.in       |  1 +
 test/unit/decay.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 test/unit/decay.c

diff --git a/Makefile.in b/Makefile.in
index 823ccc7..b19c14f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -197,6 +197,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/cache_bin.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/counter.c \
+	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/edata_cache.c \
 	$(srcroot)test/unit/emitter.c \
diff --git a/test/unit/decay.c b/test/unit/decay.c
new file mode 100644
index 0000000..9da0d94
--- /dev/null
+++ b/test/unit/decay.c
@@ -0,0 +1,45 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/decay.h"
+
+/*
+ * Honestly, this is mostly a stub for now.  Eventually, we should beef up
+ * testing here.
+ */
+
+TEST_BEGIN(test_decay_empty) {
+	/* If we never have any decaying pages, npages_limit should be 0. */
+	decay_t decay;
+	memset(&decay, 0, sizeof(decay));
+
+	nstime_t curtime;
+	nstime_init(&curtime, 0);
+
+	uint64_t decay_ms = 1000;
+	uint64_t decay_ns = decay_ms * 1000 * 1000;
+
+	bool err = decay_init(&decay, &curtime, (ssize_t)decay_ms);
+	assert_false(err, "");
+
+	uint64_t time_between_calls = decay_epoch_duration_ns(&decay) / 5;
+	int nepochs = 0;
+	for (uint64_t i = 0; i < decay_ns / time_between_calls * 10; i++) {
+		size_t dirty_pages = 0;
+		nstime_init(&curtime, i * time_between_calls);
+		bool epoch_advanced = decay_maybe_advance_epoch(&decay,
+		    &curtime, dirty_pages);
+		if (epoch_advanced) {
+			nepochs++;
+			assert_zu_eq(decay_npages_limit_get(&decay), 0,
+			    "Should not increase the limit arbitrarily");
+		}
+	}
+	assert_d_gt(nepochs, 0, "Should have advanced epochs");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_decay_empty);
+}
-- 
cgit v0.12


From e77f47a85a5e48894065852cbafef3d78724acef Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 11:04:02 -0700
Subject: Move arena decay getters to PA.

---
 include/jemalloc/internal/arena_inlines_b.h |  6 ------
 include/jemalloc/internal/pa.h              | 15 +++++++++++++++
 src/arena.c                                 |  4 ++--
 src/extent.c                                |  2 +-
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 8b77a33..565e226 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -108,12 +108,6 @@ arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
 	large_prof_info_set(edata, tctx);
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-arena_may_force_decay(arena_t *arena) {
-	return !(arena_dirty_decay_ms_get(arena) == -1
-	    || arena_muzzy_decay_ms_get(arena) == -1);
-}
-
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 	tsd_t *tsd;
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index d686652..655e46b 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -90,6 +90,21 @@ pa_shard_stats_mapped_add(tsdn_t *tsdn, pa_shard_t *shard, size_t size) {
 	LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
 }
 
+static inline ssize_t
+pa_shard_dirty_decay_ms_get(pa_shard_t *shard) {
+	return decay_ms_read(&shard->decay_dirty);
+}
+static inline ssize_t
+pa_shard_muzzy_decay_ms_get(pa_shard_t *shard) {
+	return decay_ms_read(&shard->decay_muzzy);
+}
+
+static inline bool
+pa_shard_may_force_decay(pa_shard_t *shard) {
+	return !(pa_shard_dirty_decay_ms_get(shard) == -1
+	    || pa_shard_muzzy_decay_ms_get(shard) == -1);
+}
+
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
diff --git a/src/arena.c b/src/arena.c
index 16be6b1..f1ee25a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -595,12 +595,12 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 
 ssize_t
 arena_dirty_decay_ms_get(arena_t *arena) {
-	return decay_ms_read(&arena->pa_shard.decay_dirty);
+	return pa_shard_dirty_decay_ms_get(&arena->pa_shard);
 }
 
 ssize_t
 arena_muzzy_decay_ms_get(arena_t *arena) {
-	return decay_ms_read(&arena->pa_shard.decay_muzzy);
+	return pa_shard_muzzy_decay_ms_get(&arena->pa_shard);
 }
 
 static bool
diff --git a/src/extent.c b/src/extent.c
index 0162494..3d8af3d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1013,7 +1013,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			    &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold &&
-		    arena_may_force_decay(arena)) {
+		    pa_shard_may_force_decay(&arena->pa_shard)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
 			extent_maximally_purge(tsdn, arena, ehooks, edata);
-- 
cgit v0.12


From eba35e2e486ab81f44126d86bbb6555a02072fe2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 10:37:46 -0700
Subject: Remove extent knowledge of arena.

---
 include/jemalloc/internal/extent.h |  24 ++--
 src/arena.c                        |  31 ++---
 src/extent.c                       | 264 ++++++++++++++++++-------------------
 src/extent_dss.c                   |   6 +-
 src/large.c                        |   8 +-
 5 files changed, 165 insertions(+), 168 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index bb01254..8b2db18 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -19,34 +19,34 @@
 #define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 extern size_t opt_lg_extent_max_active_fit;
 
-edata_t *ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, bool *zero);
-edata_t *ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, bool *zero);
-void ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+void ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
-edata_t *ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
-edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit);
-void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+void extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata);
-void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+void extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata);
-void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+void extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
-bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length);
-bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length);
+bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length);
+bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length);
 edata_t *extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
diff --git a/src/arena.c b/src/arena.c
index f1ee25a..7934a6b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -282,8 +282,8 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	ecache_dalloc(tsdn, arena, ehooks, &arena->pa_shard.ecache_dirty,
-	    edata);
+	ecache_dalloc(tsdn, &arena->pa_shard, ehooks,
+	    &arena->pa_shard.ecache_dirty, edata);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -459,16 +459,16 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	size_t esize = usize + sz_large_pad;
-	edata_t *edata = ecache_alloc(tsdn, arena, ehooks,
+	edata_t *edata = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
 	    &arena->pa_shard.ecache_dirty, NULL, esize, alignment, false, szind,
 	    zero);
 	if (edata == NULL && arena_may_have_muzzy(arena)) {
-		edata = ecache_alloc(tsdn, arena, ehooks,
+		edata = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
 		    &arena->pa_shard.ecache_muzzy, NULL, esize, alignment,
 		    false, szind, zero);
 	}
 	if (edata == NULL) {
-		edata = ecache_alloc_grow(tsdn, arena, ehooks,
+		edata = ecache_alloc_grow(tsdn, &arena->pa_shard, ehooks,
 		    &arena->pa_shard.ecache_retained, NULL, esize, alignment,
 		    false, szind, zero);
 		if (config_stats) {
@@ -655,7 +655,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	size_t nstashed = 0;
 	edata_t *edata;
 	while (nstashed < npages_decay_max &&
-	    (edata = ecache_evict(tsdn, arena, ehooks, ecache, npages_limit))
+	    (edata = ecache_evict(tsdn, &arena->pa_shard, ehooks, ecache, npages_limit))
 	    != NULL) {
 		edata_list_append(decay_extents, edata);
 		nstashed += edata_size_get(edata) >> LG_PAGE;
@@ -690,9 +690,9 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			not_reached();
 		case extent_state_dirty:
 			if (!all && muzzy_decay_ms != 0 &&
-			    !extent_purge_lazy_wrapper(tsdn, arena,
-			    ehooks, edata, 0, edata_size_get(edata))) {
-				ecache_dalloc(tsdn, arena, ehooks,
+			    !extent_purge_lazy_wrapper(tsdn, ehooks, edata, 0,
+			    edata_size_get(edata))) {
+				ecache_dalloc(tsdn, &arena->pa_shard, ehooks,
 				    &arena->pa_shard.ecache_muzzy, edata);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
@@ -700,7 +700,8 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			}
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
+			extent_dalloc_wrapper(tsdn, &arena->pa_shard, ehooks,
+			    edata);
 			if (config_stats) {
 				nunmapped += npages;
 			}
@@ -988,9 +989,9 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 */
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	edata_t *edata;
-	while ((edata = ecache_evict(tsdn, arena, ehooks,
+	while ((edata = ecache_evict(tsdn, &arena->pa_shard, ehooks,
 	    &arena->pa_shard.ecache_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, arena, ehooks, edata);
+		extent_destroy_wrapper(tsdn, &arena->pa_shard, ehooks, edata);
 	}
 }
 
@@ -1040,7 +1041,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	zero = false;
-	slab = ecache_alloc_grow(tsdn, arena, ehooks,
+	slab = ecache_alloc_grow(tsdn, &arena->pa_shard, ehooks,
 	    &arena->pa_shard.ecache_retained, NULL, bin_info->slab_size, PAGE,
 	    true, szind, &zero);
 
@@ -1061,11 +1062,11 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
-	edata_t *slab = ecache_alloc(tsdn, arena, ehooks,
+	edata_t *slab = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
 	    &arena->pa_shard.ecache_dirty, NULL, bin_info->slab_size, PAGE,
 	    true, binind, &zero);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = ecache_alloc(tsdn, arena, ehooks,
+		slab = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
 		    &arena->pa_shard.ecache_muzzy, NULL, bin_info->slab_size,
 		    PAGE, true, binind, &zero);
 	}
diff --git a/src/extent.c b/src/extent.c
index 3d8af3d..595916a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -15,12 +15,10 @@ size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
 static bool extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length, bool growing_retained);
-static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
-    bool growing_retained);
-static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
-    bool growing_retained);
+static bool extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_t *edata, size_t offset, size_t length, bool growing_retained);
+static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static edata_t *extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
@@ -39,15 +37,16 @@ static atomic_zu_t highpages;
  */
 
 static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
-static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t usize, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit, bool growing_retained);
+static edata_t *extent_recycle(tsdn_t *tsdn, pa_shard_t *shard,
+    ehooks_t *ehooks, ecache_t *ecache, void *new_addr, size_t usize,
+    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
+    bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained);
-static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+static void extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
-static edata_t *extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
+static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit);
 
@@ -70,23 +69,23 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
 }
 
 edata_t *
-ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool *zero) {
+ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, bool *zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
-	edata_t *edata = extent_recycle(tsdn, arena, ehooks, ecache, new_addr,
-	    size, alignment, slab, szind, zero, &commit, false);
+	edata_t *edata = extent_recycle(tsdn, shard, ehooks, ecache,
+	    new_addr, size, alignment, slab, szind, zero, &commit, false);
 	assert(edata == NULL || edata_dumpable_get(edata));
 	return edata;
 }
 
 edata_t *
-ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, bool *zero) {
 	assert(size != 0);
@@ -95,7 +94,7 @@ ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
-	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
+	edata_t *edata = extent_alloc_retained(tsdn, shard, ehooks, new_addr,
 	    size, alignment, slab, szind, zero, &commit);
 	if (edata == NULL) {
 		if (opt_retain && new_addr != NULL) {
@@ -107,7 +106,7 @@ ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			 */
 			return NULL;
 		}
-		edata = extent_alloc_wrapper(tsdn, arena, ehooks, new_addr,
+		edata = extent_alloc_wrapper(tsdn, shard, ehooks, new_addr,
 		    size, alignment, slab, szind, zero, &commit);
 	}
 
@@ -116,8 +115,8 @@ ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 void
-ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata) {
+ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
 	assert(edata_dumpable_get(edata));
@@ -127,12 +126,12 @@ ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
 
-	extent_record(tsdn, arena, ehooks, ecache, edata, false);
+	extent_record(tsdn, shard, ehooks, ecache, edata, false);
 }
 
 edata_t *
-ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    size_t npages_min) {
+ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, size_t npages_min) {
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	/*
@@ -157,8 +156,8 @@ ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn,
-		    &arena->pa_shard.edata_cache, ehooks, ecache, edata)) {
+		if (extent_try_delayed_coalesce(tsdn, &shard->edata_cache,
+		    ehooks, ecache, edata)) {
 			break;
 		}
 		/*
@@ -195,11 +194,11 @@ label_return:
  * indicates OOM), e.g. when trying to split an existing extent.
  */
 static void
-extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extents_abandon_vm(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		atomic_fetch_add_zu(&arena->pa_shard.stats->abandoned_vm, sz,
+		atomic_fetch_add_zu(&shard->stats->abandoned_vm, sz,
 		    ATOMIC_RELAXED);
 	}
 	/*
@@ -207,13 +206,13 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * that this is only a virtual memory leak.
 	 */
 	if (ecache->state == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, arena, ehooks, edata, 0, sz,
+		if (extent_purge_lazy_impl(tsdn, ehooks, edata, 0, sz,
 		    growing_retained)) {
-			extent_purge_forced_impl(tsdn, arena, ehooks, edata, 0,
+			extent_purge_forced_impl(tsdn, ehooks, edata, 0,
 			    edata_size_get(edata), growing_retained);
 		}
 	}
-	edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+	edata_cache_put(tsdn, &shard->edata_cache, edata);
 }
 
 static void
@@ -356,7 +355,7 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
  * given allocation request.
  */
 static edata_t *
-extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -440,7 +439,7 @@ typedef enum {
 } extent_split_interior_result_t;
 
 static extent_split_interior_result_t
-extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     /* The result of splitting, in case of success. */
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
@@ -463,9 +462,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *edata;
-		*edata = extent_split_impl(tsdn, &arena->pa_shard.edata_cache,
-		    ehooks, *lead, leadsize, SC_NSIZES, false, size + trailsize,
-		    szind, slab, growing_retained);
+		*edata = extent_split_impl(tsdn, &shard->edata_cache, ehooks,
+		    *lead, leadsize, SC_NSIZES, false, size + trailsize, szind,
+		    slab, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -475,9 +474,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, &arena->pa_shard.edata_cache,
-		    ehooks, *edata, size, szind, slab, trailsize, SC_NSIZES,
-		    false, growing_retained);
+		*trail = extent_split_impl(tsdn, &shard->edata_cache, ehooks,
+		    *edata, size, szind, slab, trailsize, SC_NSIZES, false,
+		    growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -501,7 +500,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * and put back into ecache.
  */
 static edata_t *
-extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, edata_t *edata, bool growing_retained) {
 	edata_t *lead;
@@ -510,7 +509,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
+	    tsdn, shard, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
 	    new_addr, size, alignment, slab, szind, growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
@@ -544,7 +543,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		if (to_leak != NULL) {
 			void *leak = edata_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, ehooks, ecache, to_leak,
+			extents_abandon_vm(tsdn, shard, ehooks, ecache, to_leak,
 			    growing_retained);
 			assert(emap_lock_edata_from_addr(tsdn, &emap_global,
 			    leak, false) == NULL);
@@ -559,21 +558,21 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * in the given ecache_t.
  */
 static edata_t *
-extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit, bool growing_retained) {
+extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(new_addr == NULL || !slab);
 	assert(!*zero || !slab);
 
-	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks, ecache,
+	edata_t *edata = extent_recycle_extract(tsdn, shard, ehooks, ecache,
 	    new_addr, size, alignment, slab, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
-	edata = extent_recycle_split(tsdn, arena, ehooks, ecache, new_addr,
+	edata = extent_recycle_split(tsdn, shard, ehooks, ecache, new_addr,
 	    size, alignment, slab, szind, edata, growing_retained);
 	if (edata == NULL) {
 		return NULL;
@@ -582,7 +581,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	if (*commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), growing_retained)) {
-			extent_record(tsdn, arena, ehooks, ecache, edata,
+			extent_record(tsdn, shard, ehooks, ecache, edata,
 			    growing_retained);
 			return NULL;
 		}
@@ -614,13 +613,13 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 /*
  * If virtual memory is retained, create increasingly larger extents from which
  * to split requested extents in order to limit the total number of disjoint
- * virtual memory ranges retained by each arena.
+ * virtual memory ranges retained by each shard.
  */
 static edata_t *
-extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     size_t size, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &arena->pa_shard.ecache_grow.mtx);
+	malloc_mutex_assert_owner(tsdn, &shard->ecache_grow.mtx);
 	assert(!*zero || !slab);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
@@ -633,20 +632,19 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(
-	    arena->pa_shard.ecache_grow.next + egn_skip);
+	size_t alloc_size = sz_pind2sz(shard->ecache_grow.next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (arena->pa_shard.ecache_grow.next + egn_skip >=
+		if (shard->ecache_grow.next + egn_skip >=
 		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			goto label_err;
 		}
 		alloc_size = sz_pind2sz(
-		    arena->pa_shard.ecache_grow.next + egn_skip);
+		    shard->ecache_grow.next + egn_skip);
 	}
 
-	edata_t *edata = edata_cache_get(tsdn, &arena->pa_shard.edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, &shard->edata_cache);
 	if (edata == NULL) {
 		goto label_err;
 	}
@@ -657,17 +655,16 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    &committed);
 
 	if (ptr == NULL) {
-		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		goto label_err;
 	}
 
-	edata_init(edata, ecache_ind_get(&arena->pa_shard.ecache_retained), ptr,
-	    alloc_size, false, SC_NSIZES,
-	    pa_shard_extent_sn_next(&arena->pa_shard), extent_state_active,
-	    zeroed, committed, true, EXTENT_IS_HEAD);
+	edata_init(edata, ecache_ind_get(&shard->ecache_retained), ptr,
+	    alloc_size, false, SC_NSIZES, pa_shard_extent_sn_next(shard),
+	    extent_state_active, zeroed, committed, true, EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		goto label_err;
 	}
 
@@ -684,17 +681,17 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    arena, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
+	    shard, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
 	    size, alignment, slab, szind, true);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->pa_shard.ecache_retained, lead, true);
+			extent_record(tsdn, shard, ehooks,
+			    &shard->ecache_retained, lead, true);
 		}
 		if (trail != NULL) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->pa_shard.ecache_retained, trail, true);
+			extent_record(tsdn, shard, ehooks,
+			    &shard->ecache_retained, trail, true);
 		}
 	} else {
 		/*
@@ -706,13 +703,13 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			if (config_prof) {
 				extent_gdump_add(tsdn, to_salvage);
 			}
-			extent_record(tsdn, arena, ehooks,
-			    &arena->pa_shard.ecache_retained, to_salvage, true);
+			extent_record(tsdn, shard, ehooks,
+			    &shard->ecache_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, ehooks,
-			    &arena->pa_shard.ecache_retained, to_leak, true);
+			extents_abandon_vm(tsdn, shard, ehooks,
+			    &shard->ecache_retained, to_leak, true);
 		}
 		goto label_err;
 	}
@@ -720,8 +717,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (*commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->pa_shard.ecache_retained, edata, true);
+			extent_record(tsdn, shard, ehooks,
+			    &shard->ecache_retained, edata, true);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
@@ -739,15 +736,14 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (arena->pa_shard.ecache_grow.next + egn_skip + 1 <=
-	    arena->pa_shard.ecache_grow.limit) {
-		arena->pa_shard.ecache_grow.next += egn_skip + 1;
+	if (shard->ecache_grow.next + egn_skip + 1 <=
+	    shard->ecache_grow.limit) {
+		shard->ecache_grow.next += egn_skip + 1;
 	} else {
-		arena->pa_shard.ecache_grow.next
-		    = arena->pa_shard.ecache_grow.limit;
+		shard->ecache_grow.next = shard->ecache_grow.limit;
 	}
 	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -765,47 +761,47 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
 	return NULL;
 }
 
 static edata_t *
-extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &arena->pa_shard.ecache_grow.mtx);
+	malloc_mutex_lock(tsdn, &shard->ecache_grow.mtx);
 
-	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
-	    &arena->pa_shard.ecache_retained, new_addr, size, alignment, slab,
+	edata_t *edata = extent_recycle(tsdn, shard, ehooks,
+	    &shard->ecache_retained, new_addr, size, alignment, slab,
 	    szind, zero, commit, true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
 	} else if (opt_retain && new_addr == NULL) {
-		edata = extent_grow_retained(tsdn, arena, ehooks, size,
+		edata = extent_grow_retained(tsdn, shard, ehooks, size,
 		    alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &arena->pa_shard.ecache_grow.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &shard->ecache_grow.mtx);
 
 	return edata;
 }
 
 edata_t *
-extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = edata_cache_get(tsdn, &arena->pa_shard.edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, &shard->edata_cache);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -813,14 +809,14 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
 	    zero, commit);
 	if (addr == NULL) {
-		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return NULL;
 	}
-	edata_init(edata, ecache_ind_get(&arena->pa_shard.ecache_dirty), addr,
-	    size, slab, szind, pa_shard_extent_sn_next(&arena->pa_shard),
+	edata_init(edata, ecache_ind_get(&shard->ecache_dirty), addr,
+	    size, slab, szind, pa_shard_extent_sn_next(shard),
 	    extent_state_active, *zero, *commit, true, EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return NULL;
 	}
 
@@ -956,24 +952,24 @@ extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
 
 /* Purge a single extent to retained / unmapped directly. */
 static void
-extent_maximally_purge(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
+extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata) {
 	size_t extent_size = edata_size_get(edata);
-	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
+	extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
-		LOCKEDINT_MTX_LOCK(tsdn, *arena->pa_shard.stats_mtx);
+		LOCKEDINT_MTX_LOCK(tsdn, *shard->stats_mtx);
 		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-		    &arena->pa_shard.stats->decay_dirty.nmadvise, 1);
+		    LOCKEDINT_MTX(*shard->stats_mtx),
+		    &shard->stats->decay_dirty.nmadvise, 1);
 		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-		    &arena->pa_shard.stats->decay_dirty.purged,
+		    LOCKEDINT_MTX(*shard->stats_mtx),
+		    &shard->stats->decay_dirty.purged,
 		    extent_size >> LG_PAGE);
 		locked_dec_zu(tsdn,
-		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-		    &arena->pa_shard.stats->mapped, extent_size);
-		LOCKEDINT_MTX_UNLOCK(tsdn, *arena->pa_shard.stats_mtx);
+		    LOCKEDINT_MTX(*shard->stats_mtx),
+		    &shard->stats->mapped, extent_size);
+		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
 	}
 }
 
@@ -983,8 +979,8 @@ extent_maximally_purge(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
  * given ecache_t (coalesces, deregisters slab interiors, the heap operations).
  */
 static void
-extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata, bool growing_retained) {
+extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	assert((ecache->state != extent_state_dirty &&
 	    ecache->state != extent_state_muzzy) ||
 	    !edata_zeroed_get(edata));
@@ -1000,23 +996,23 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	emap_assert_mapped(tsdn, &emap_global, edata);
 
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, &arena->pa_shard.edata_cache,
-		    ehooks, ecache, edata, NULL, growing_retained);
+		edata = extent_try_coalesce(tsdn, &shard->edata_cache, ehooks,
+		    ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
-		assert(ecache == &arena->pa_shard.ecache_dirty);
+		assert(ecache == &shard->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn,
-			    &arena->pa_shard.edata_cache, ehooks, ecache, edata,
+			    &shard->edata_cache, ehooks, ecache, edata,
 			    &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold &&
-		    pa_shard_may_force_decay(&arena->pa_shard)) {
+		    pa_shard_may_force_decay(shard)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
-			extent_maximally_purge(tsdn, arena, ehooks, edata);
+			extent_maximally_purge(tsdn, shard, ehooks, edata);
 			return;
 		}
 	}
@@ -1026,20 +1022,20 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 void
-extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return;
 	}
-	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
+	extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
 }
 
 static bool
-extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_dalloc_wrapper_try(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata) {
 	bool err;
 
@@ -1055,14 +1051,14 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    edata_size_get(edata), edata_committed_get(edata));
 
 	if (!err) {
-		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+		edata_cache_put(tsdn, &shard->edata_cache, edata);
 	}
 
 	return err;
 }
 
 void
-extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata) {
 	assert(edata_dumpable_get(edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1075,7 +1071,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		 * threads, and reregister if deallocation fails.
 		 */
 		extent_deregister(tsdn, edata);
-		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, edata)) {
+		if (!extent_dalloc_wrapper_try(tsdn, shard, ehooks, edata)) {
 			return;
 		}
 		extent_reregister(tsdn, edata);
@@ -1104,12 +1100,12 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, arena, ehooks, &arena->pa_shard.ecache_retained,
-	    edata, false);
+	extent_record(tsdn, shard, ehooks, &shard->ecache_retained, edata,
+	    false);
 }
 
 void
-extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
@@ -1125,7 +1121,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
 	    edata_size_get(edata), edata_committed_get(edata));
 
-	edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+	edata_cache_put(tsdn, &shard->edata_cache, edata);
 }
 
 static bool
@@ -1158,8 +1154,8 @@ extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 }
 
 static bool
-extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
+extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	bool err = ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
@@ -1168,15 +1164,15 @@ extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 bool
-extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length) {
-	return extent_purge_lazy_impl(tsdn, arena, ehooks, edata, offset,
+extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length) {
+	return extent_purge_lazy_impl(tsdn, ehooks, edata, offset,
 	    length, false);
 }
 
 static bool
-extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
+extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	bool err = ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
@@ -1185,10 +1181,10 @@ extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 bool
-extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length) {
-	return extent_purge_forced_impl(tsdn, arena, ehooks, edata,
-	    offset, length, false);
+extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length) {
+	return extent_purge_forced_impl(tsdn, ehooks, edata, offset, length,
+	    false);
 }
 
 /*
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 55f037e..de6852e 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -188,8 +188,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				if (gap_size_page != 0) {
 					ehooks_t *ehooks = arena_get_ehooks(
 					    arena);
-					extent_dalloc_gap(tsdn, arena, ehooks,
-					    gap);
+					extent_dalloc_gap(tsdn,
+					    &arena->pa_shard, ehooks, gap);
 				} else {
 					edata_cache_put(tsdn,
 					    &arena->pa_shard.edata_cache, gap);
@@ -208,7 +208,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    extent_state_active, false, true,
 					    true, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
-					    arena, ehooks, &edata, 0, size)) {
+					    ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);
 					}
 				}
diff --git a/src/large.c b/src/large.c
index 57bf674..26a1740 100644
--- a/src/large.c
+++ b/src/large.c
@@ -120,17 +120,17 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	bool is_zeroed_trail = zero;
 	edata_t *trail;
 	bool new_mapping;
-	if ((trail = ecache_alloc(tsdn, arena, ehooks,
+	if ((trail = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
 	    &arena->pa_shard.ecache_dirty, edata_past_get(edata), trailsize,
 	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail)) != NULL
-	    || (trail = ecache_alloc(tsdn, arena, ehooks,
+	    || (trail = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
 	    &arena->pa_shard.ecache_muzzy, edata_past_get(edata), trailsize,
 	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
-		if ((trail = ecache_alloc_grow(tsdn, arena, ehooks,
+		if ((trail = ecache_alloc_grow(tsdn, &arena->pa_shard, ehooks,
 		    &arena->pa_shard.ecache_retained, edata_past_get(edata),
 		    trailsize, CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
 			== NULL) {
@@ -143,7 +143,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 
 	if (extent_merge_wrapper(tsdn, ehooks, &arena->pa_shard.edata_cache,
 	    edata, trail)) {
-		extent_dalloc_wrapper(tsdn, arena, ehooks, trail);
+		extent_dalloc_wrapper(tsdn, &arena->pa_shard, ehooks, trail);
 		return true;
 	}
 
-- 
cgit v0.12


From 7624043a41087bb5124e8dadb184f53dd8583def Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 12:04:16 -0700
Subject: PA: Add ehook-getting support.

---
 include/jemalloc/internal/pa.h | 9 +++++++++
 src/pa.c                       | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 655e46b..827c0b5 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PA_H
 #define JEMALLOC_INTERNAL_PA_H
 
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/decay.h"
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
@@ -80,6 +81,9 @@ struct pa_shard_s {
 	 */
 	decay_t decay_dirty; /* dirty --> muzzy */
 	decay_t decay_muzzy; /* muzzy --> retained */
+
+	/* The base from which we get the ehooks and allocate metadat. */
+	base_t *base;
 };
 
 static inline void
@@ -105,6 +109,11 @@ pa_shard_may_force_decay(pa_shard_t *shard) {
 	    || pa_shard_muzzy_decay_ms_get(shard) == -1);
 }
 
+static inline ehooks_t *
+pa_shard_ehooks_get(pa_shard_t *shard) {
+	return base_ehooks_get(shard->base);
+}
+
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
diff --git a/src/pa.c b/src/pa.c
index e4dbb04..a4ec4bd 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -48,6 +48,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 	shard->stats = stats;
 	memset(shard->stats, 0, sizeof(*shard->stats));
 
+	shard->base = base;
+
 	return false;
 }
 
-- 
cgit v0.12


From 9f93625c1438a4dadc60bda9e43c63bcadd21ebd Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 12:29:12 -0700
Subject: PA: Move in arena large allocation functionality.

---
 include/jemalloc/internal/pa.h |  3 +++
 src/arena.c                    | 31 +++----------------------------
 src/pa.c                       | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 827c0b5..e1821e6 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -119,4 +119,7 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
 size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
+edata_t *
+pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
+    bool slab, szind_t szind, bool *zero, size_t *mapped_add);
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index 7934a6b..1e3ae6e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -451,37 +451,12 @@ arena_may_have_muzzy(arena_t *arena) {
 edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	size_t esize = usize + sz_large_pad;
-	edata_t *edata = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_dirty, NULL, esize, alignment, false, szind,
-	    zero);
-	if (edata == NULL && arena_may_have_muzzy(arena)) {
-		edata = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
-		    &arena->pa_shard.ecache_muzzy, NULL, esize, alignment,
-		    false, szind, zero);
-	}
-	if (edata == NULL) {
-		edata = ecache_alloc_grow(tsdn, &arena->pa_shard, ehooks,
-		    &arena->pa_shard.ecache_retained, NULL, esize, alignment,
-		    false, szind, zero);
-		if (config_stats) {
-			/*
-			 * edata may be NULL on OOM, but in that case mapped_add
-			 * isn't used below, so there's no need to conditionlly
-			 * set it to 0 here.
-			 */
-			mapped_add = esize;
-		}
-	} else if (config_stats) {
-		mapped_add = 0;
-	}
+
+	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
+	    /* slab */ false, szind, zero, &mapped_add);
 
 	if (edata != NULL) {
 		if (config_stats) {
diff --git a/src/pa.c b/src/pa.c
index a4ec4bd..072d485 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -57,3 +57,42 @@ size_t
 pa_shard_extent_sn_next(pa_shard_t *shard) {
 	return atomic_fetch_add_zu(&shard->extent_sn_next, 1, ATOMIC_RELAXED);
 }
+
+static bool
+pa_shard_may_have_muzzy(pa_shard_t *shard) {
+	return pa_shard_muzzy_decay_ms_get(shard) != 0;
+}
+
+edata_t *
+pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
+    bool slab, szind_t szind, bool *zero, size_t *mapped_add) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+
+	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
+	    &shard->ecache_dirty, NULL, size, alignment, slab, szind,
+	    zero);
+	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
+		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
+		    NULL, size, alignment, slab, szind, zero);
+	}
+
+	if (edata == NULL) {
+		edata = ecache_alloc_grow(tsdn, shard, ehooks,
+		    &shard->ecache_retained, NULL, size, alignment, slab,
+		    szind, zero);
+		if (config_stats) {
+			/*
+			 * edata may be NULL on OOM, but in that case mapped_add
+			 * isn't used below, so there's no need to conditionlly
+			 * set it to 0 here.
+			 */
+			*mapped_add = size;
+		}
+	} else if (config_stats) {
+		*mapped_add = 0;
+	}
+	return edata;
+}
-- 
cgit v0.12


From 7be3dea82c8489e7e892c72b5f8d0a2901ff4695 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 13:58:57 -0700
Subject: PA: Have slab allocations use it.

---
 src/arena.c | 48 ++++++++----------------------------------------
 src/pa.c    |  4 +---
 2 files changed, 9 insertions(+), 43 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 1e3ae6e..c3365a1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -443,11 +443,6 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
 	arena_large_malloc_stats_update(tsdn, arena, usize);
 }
 
-static bool
-arena_may_have_muzzy(arena_t *arena) {
-	return arena_muzzy_decay_ms_get(arena) != 0;
-}
-
 edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
@@ -1007,50 +1002,23 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 }
 
 static edata_t *
-arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    const bin_info_t *bin_info, szind_t szind) {
-	edata_t *slab;
-	bool zero;
-
+arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
+    const bin_info_t *bin_info) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	zero = false;
-	slab = ecache_alloc_grow(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_retained, NULL, bin_info->slab_size, PAGE,
-	    true, szind, &zero);
+	bool zero = false;
+	size_t mapped_add = 0;
 
-	if (config_stats && slab != NULL) {
+	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
+	    PAGE, /* slab */ true, /* szind */ binind, &zero, &mapped_add);
+	if (config_stats && slab != NULL && mapped_add != 0) {
 		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard,
 		    bin_info->slab_size);
 	}
 
-	return slab;
-}
-
-static edata_t *
-arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
-    const bin_info_t *bin_info) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-	szind_t szind = sz_size2index(bin_info->reg_size);
-	bool zero = false;
-	edata_t *slab = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_dirty, NULL, bin_info->slab_size, PAGE,
-	    true, binind, &zero);
-	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
-		    &arena->pa_shard.ecache_muzzy, NULL, bin_info->slab_size,
-		    PAGE, true, binind, &zero);
-	}
 	if (slab == NULL) {
-		slab = arena_slab_alloc_hard(tsdn, arena, ehooks, bin_info,
-		    szind);
-		if (slab == NULL) {
-			return NULL;
-		}
+		return NULL;
 	}
 	assert(edata_slab_get(slab));
 
diff --git a/src/pa.c b/src/pa.c
index 072d485..0dbf044 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -68,6 +68,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
     bool slab, szind_t szind, bool *zero, size_t *mapped_add) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
+	*mapped_add = 0;
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 
@@ -78,7 +79,6 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
 		    NULL, size, alignment, slab, szind, zero);
 	}
-
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, NULL, size, alignment, slab,
@@ -91,8 +91,6 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 			 */
 			*mapped_add = size;
 		}
-	} else if (config_stats) {
-		*mapped_add = 0;
 	}
 	return edata;
 }
-- 
cgit v0.12


From 0880c2ab9756ddb59b55dea673b20bd80922b487 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 14:38:55 -0700
Subject: PA: Have large expands use it.

---
 include/jemalloc/internal/pa.h |  9 +++++---
 src/large.c                    | 51 +++++++++---------------------------------
 src/pa.c                       | 40 +++++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 43 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index e1821e6..0df2b4b 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -119,7 +119,10 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
 size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
-edata_t *
-pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
-    bool slab, szind_t szind, bool *zero, size_t *mapped_add);
+edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
+    size_t alignment, bool slab, szind_t szind, bool *zero, size_t *mapped_add);
+/* Returns true on error, in which case nothing changed. */
+bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+    size_t new_usize, bool *zero, size_t *mapped_add);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/large.c b/src/large.c
index 26a1740..ff43a8d 100644
--- a/src/large.c
+++ b/src/large.c
@@ -101,57 +101,28 @@ static bool
 large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
     bool zero) {
 	arena_t *arena = arena_get_from_edata(edata);
-	size_t oldusize = edata_usize_get(edata);
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-	size_t trailsize = usize - oldusize;
-
-	if (ehooks_merge_will_fail(ehooks)) {
-		return true;
-	}
 
 	if (config_fill && unlikely(opt_zero)) {
 		zero = true;
 	}
+
+	size_t old_usize = edata_usize_get(edata);
+
 	/*
 	 * Copy zero into is_zeroed_trail and pass the copy when allocating the
 	 * extent, so that it is possible to make correct zero fill decisions
 	 * below, even if is_zeroed_trail ends up true when zero is false.
 	 */
 	bool is_zeroed_trail = zero;
-	edata_t *trail;
-	bool new_mapping;
-	if ((trail = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_dirty, edata_past_get(edata), trailsize,
-	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail)) != NULL
-	    || (trail = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_muzzy, edata_past_get(edata), trailsize,
-	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail)) != NULL) {
-		if (config_stats) {
-			new_mapping = false;
-		}
-	} else {
-		if ((trail = ecache_alloc_grow(tsdn, &arena->pa_shard, ehooks,
-		    &arena->pa_shard.ecache_retained, edata_past_get(edata),
-		    trailsize, CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
-			== NULL) {
-			return true;
-		}
-		if (config_stats) {
-			new_mapping = true;
-		}
-	}
-
-	if (extent_merge_wrapper(tsdn, ehooks, &arena->pa_shard.edata_cache,
-	    edata, trail)) {
-		extent_dalloc_wrapper(tsdn, &arena->pa_shard, ehooks, trail);
+	size_t mapped_add;
+	bool err = pa_expand(tsdn, &arena->pa_shard, edata, usize,
+	    &is_zeroed_trail, &mapped_add);
+	if (err) {
 		return true;
 	}
 
-	szind_t szind = sz_size2index(usize);
-	emap_remap(tsdn, &emap_global, edata, szind, false);
-
-	if (config_stats && new_mapping) {
-		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard, trailsize);
+	if (config_stats && mapped_add > 0) {
+		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard, mapped_add);
 	}
 
 	if (zero) {
@@ -164,7 +135,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 			 * of CACHELINE in [0 .. PAGE).
 			 */
 			void *zbase = (void *)
-			    ((uintptr_t)edata_addr_get(edata) + oldusize);
+			    ((uintptr_t)edata_addr_get(edata) + old_usize);
 			void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase +
 			    PAGE));
 			size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase;
@@ -173,7 +144,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 		}
 		assert(is_zeroed_trail);
 	}
-	arena_extent_ralloc_large_expand(tsdn, arena, edata, oldusize);
+	arena_extent_ralloc_large_expand(tsdn, arena, edata, old_usize);
 
 	return false;
 }
diff --git a/src/pa.c b/src/pa.c
index 0dbf044..7c3b568 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -94,3 +94,43 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	}
 	return edata;
 }
+
+bool
+pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t new_usize,
+    bool *zero, size_t *mapped_add) {
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+	size_t old_usize = edata_usize_get(edata);
+	size_t trail_size = new_usize - old_usize;
+	void *trail_begin = edata_past_get(edata);
+
+	*mapped_add = 0;
+	if (ehooks_merge_will_fail(ehooks)) {
+		return true;
+	}
+	edata_t *trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_dirty,
+	    trail_begin, trail_size, PAGE, /* slab */ false, SC_NSIZES, zero);
+	if (trail == NULL) {
+		trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
+		    trail_begin, trail_size, PAGE, /* slab */ false, SC_NSIZES,
+		    zero);
+	}
+	if (trail == NULL) {
+		trail = ecache_alloc_grow(tsdn, shard, ehooks,
+		    &shard->ecache_retained, trail_begin, trail_size, PAGE,
+		    /* slab */ false, SC_NSIZES, zero);
+		*mapped_add = trail_size;
+	}
+	if (trail == NULL) {
+		*mapped_add = 0;
+		return true;
+	}
+	if (extent_merge_wrapper(tsdn, ehooks, &shard->edata_cache, edata,
+	    trail)) {
+		extent_dalloc_wrapper(tsdn, shard, ehooks, trail);
+		*mapped_add = 0;
+		return true;
+	}
+	szind_t szind = sz_size2index(new_usize);
+	emap_remap(tsdn, &emap_global, edata, szind, /* slab */ false);
+	return false;
+}
-- 
cgit v0.12


From 5bcc2c2ab9b46cc15c1bc054a74615daabfd3675 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 17:21:04 -0700
Subject: PA: Have expand take szind and slab.

This isn't really necessary, but having a uniform API will help us later.
---
 include/jemalloc/internal/pa.h | 5 +++--
 src/large.c                    | 3 ++-
 src/pa.c                       | 5 ++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 0df2b4b..ef140b3 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -122,7 +122,8 @@ size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
     size_t alignment, bool slab, szind_t szind, bool *zero, size_t *mapped_add);
 /* Returns true on error, in which case nothing changed. */
-bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
-    size_t new_usize, bool *zero, size_t *mapped_add);
+bool
+pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t new_usize,
+    szind_t szind, bool slab, bool *zero, size_t *mapped_add);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/large.c b/src/large.c
index ff43a8d..60b51d8 100644
--- a/src/large.c
+++ b/src/large.c
@@ -115,8 +115,9 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	 */
 	bool is_zeroed_trail = zero;
 	size_t mapped_add;
+	szind_t szind = sz_size2index(usize);
 	bool err = pa_expand(tsdn, &arena->pa_shard, edata, usize,
-	    &is_zeroed_trail, &mapped_add);
+	    szind, /* slab */ false, &is_zeroed_trail, &mapped_add);
 	if (err) {
 		return true;
 	}
diff --git a/src/pa.c b/src/pa.c
index 7c3b568..7fafa7e 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -97,7 +97,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t new_usize,
-    bool *zero, size_t *mapped_add) {
+    szind_t szind, bool slab, bool *zero, size_t *mapped_add) {
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	size_t old_usize = edata_usize_get(edata);
 	size_t trail_size = new_usize - old_usize;
@@ -130,7 +130,6 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t new_usize,
 		*mapped_add = 0;
 		return true;
 	}
-	szind_t szind = sz_size2index(new_usize);
-	emap_remap(tsdn, &emap_global, edata, szind, /* slab */ false);
+	emap_remap(tsdn, &emap_global, edata, szind, slab);
 	return false;
 }
-- 
cgit v0.12


From 74958567a4fb1917cc6c1e9d5ee98378a8781f1a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 17:27:31 -0700
Subject: PA: have expand take sizes instead of new usize.

This avoids involving usize, which makes some of the stats modifications more
intuitively correct.
---
 include/jemalloc/internal/pa.h |  5 ++---
 src/large.c                    |  4 +++-
 src/pa.c                       | 20 +++++++++++---------
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index ef140b3..a4f8081 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -122,8 +122,7 @@ size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
     size_t alignment, bool slab, szind_t szind, bool *zero, size_t *mapped_add);
 /* Returns true on error, in which case nothing changed. */
-bool
-pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t new_usize,
-    szind_t szind, bool slab, bool *zero, size_t *mapped_add);
+bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
+    size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/large.c b/src/large.c
index 60b51d8..c01b057 100644
--- a/src/large.c
+++ b/src/large.c
@@ -106,7 +106,9 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 		zero = true;
 	}
 
+	size_t old_size = edata_size_get(edata);
 	size_t old_usize = edata_usize_get(edata);
+	size_t new_size = usize + sz_large_pad;
 
 	/*
 	 * Copy zero into is_zeroed_trail and pass the copy when allocating the
@@ -116,7 +118,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	bool is_zeroed_trail = zero;
 	size_t mapped_add;
 	szind_t szind = sz_size2index(usize);
-	bool err = pa_expand(tsdn, &arena->pa_shard, edata, usize,
+	bool err = pa_expand(tsdn, &arena->pa_shard, edata, old_size, new_size,
 	    szind, /* slab */ false, &is_zeroed_trail, &mapped_add);
 	if (err) {
 		return true;
diff --git a/src/pa.c b/src/pa.c
index 7fafa7e..8f33d9a 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -96,29 +96,31 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 }
 
 bool
-pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t new_usize,
-    szind_t szind, bool slab, bool *zero, size_t *mapped_add) {
+pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
+    size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add) {
+	assert(new_size > old_size);
+
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	size_t old_usize = edata_usize_get(edata);
-	size_t trail_size = new_usize - old_usize;
 	void *trail_begin = edata_past_get(edata);
+	size_t expand_amount = new_size - old_size;
 
 	*mapped_add = 0;
 	if (ehooks_merge_will_fail(ehooks)) {
 		return true;
 	}
 	edata_t *trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_dirty,
-	    trail_begin, trail_size, PAGE, /* slab */ false, SC_NSIZES, zero);
+	    trail_begin, expand_amount, PAGE, /* slab */ false, SC_NSIZES,
+	    zero);
 	if (trail == NULL) {
 		trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    trail_begin, trail_size, PAGE, /* slab */ false, SC_NSIZES,
-		    zero);
+		    trail_begin, expand_amount, PAGE, /* slab */ false,
+		    SC_NSIZES, zero);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, shard, ehooks,
-		    &shard->ecache_retained, trail_begin, trail_size, PAGE,
+		    &shard->ecache_retained, trail_begin, expand_amount, PAGE,
 		    /* slab */ false, SC_NSIZES, zero);
-		*mapped_add = trail_size;
+		*mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
 		*mapped_add = 0;
-- 
cgit v0.12


From 71fc0dc968189e72a4437fb38759ef380a02a7ab Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 11:36:38 -0700
Subject: PA: Move in remaining page allocation functions.

---
 include/jemalloc/internal/arena_externs.h |  3 +--
 include/jemalloc/internal/pa.h            | 16 ++++++++++++++
 src/arena.c                               | 18 +++++++---------
 src/large.c                               | 36 +++++++++++++++----------------
 src/pa.c                                  | 32 +++++++++++++++++++++++++++
 5 files changed, 75 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 8548b1f..cdbfa4b 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -27,8 +27,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     arena_stats_extents_t *estats);
-void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, edata_t *edata);
+void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
 #endif
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index a4f8081..df2e88f 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -119,10 +119,26 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
 size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
+/* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
     size_t alignment, bool slab, szind_t szind, bool *zero, size_t *mapped_add);
 /* Returns true on error, in which case nothing changed. */
 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add);
+/*
+ * The same.  Sets *generated_dirty to true if we produced new dirty pages, and
+ * false otherwise.
+ */
+bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
+    size_t new_size, szind_t szind, bool slab, bool *generated_dirty);
+/*
+ * Frees the given edata back to the pa.  Sets *generated_dirty if we produced
+ * new dirty pages (well, we alwyas set it for now; but this need not be the
+ * case).
+ * (We could make generated_dirty the return value of course, but this is more
+ * consistent with the shrink pathway and our error codes here).
+ */
+void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+    bool *generated_dirty);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index c3365a1..35fefeb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -276,14 +276,10 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	}
 }
 
-void
-arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata) {
+void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	ecache_dalloc(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_dirty, edata);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -636,7 +632,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     decay_t *decay, pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
-    bool all, edata_list_t *decay_extents, bool is_background_thread) {
+    bool all, edata_list_t *decay_extents) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
 
@@ -728,8 +724,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = arena_decay_stashed(tsdn, arena, ehooks, decay,
-		    decay_stats, ecache, all, &decay_extents,
-		    is_background_thread);
+		    decay_stats, ecache, all, &decay_extents);
 		assert(npurged == npurge);
 	}
 
@@ -805,8 +800,11 @@ void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
 	arena_nactive_sub(arena, edata_size_get(slab) >> LG_PAGE);
 
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-	arena_extents_dirty_dalloc(tsdn, arena, ehooks, slab);
+	bool generated_dirty;
+	pa_dalloc(tsdn, &arena->pa_shard, slab, &generated_dirty);
+	if (generated_dirty) {
+		arena_handle_new_dirty_pages(tsdn, arena);
+	}
 }
 
 static void
diff --git a/src/large.c b/src/large.c
index c01b057..2b913d6 100644
--- a/src/large.c
+++ b/src/large.c
@@ -69,30 +69,27 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 	arena_t *arena = arena_get_from_edata(edata);
-	size_t oldusize = edata_usize_get(edata);
 	ehooks_t *ehooks = arena_get_ehooks(arena);
-	size_t diff = edata_size_get(edata) - (usize + sz_large_pad);
+	size_t old_size = edata_size_get(edata);
+	size_t old_usize = edata_usize_get(edata);
 
-	assert(oldusize > usize);
+	assert(old_usize > usize);
 
 	if (ehooks_split_will_fail(ehooks)) {
 		return true;
 	}
 
-	/* Split excess pages. */
-	if (diff != 0) {
-		edata_t *trail = extent_split_wrapper(tsdn,
-		    &arena->pa_shard.edata_cache, ehooks, edata,
-		    usize + sz_large_pad, sz_size2index(usize), false, diff,
-		    SC_NSIZES, false);
-		if (trail == NULL) {
-			return true;
-		}
-
-		arena_extents_dirty_dalloc(tsdn, arena, ehooks, trail);
+	bool generated_dirty;
+	bool err = pa_shrink(tsdn, &arena->pa_shard, edata, old_size,
+	    usize + sz_large_pad, sz_size2index(usize), false,
+	    &generated_dirty);
+	if (err) {
+		return true;
 	}
-
-	arena_extent_ralloc_large_shrink(tsdn, arena, edata, oldusize);
+	if (generated_dirty) {
+		arena_handle_new_dirty_pages(tsdn, arena);
+	}
+	arena_extent_ralloc_large_shrink(tsdn, arena, edata, old_usize);
 
 	return false;
 }
@@ -275,8 +272,11 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 
 static void
 large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-	arena_extents_dirty_dalloc(tsdn, arena, ehooks, edata);
+	bool generated_dirty;
+	pa_dalloc(tsdn, &arena->pa_shard, edata, &generated_dirty);
+	if (generated_dirty) {
+		arena_handle_new_dirty_pages(tsdn, arena);
+	}
 }
 
 void
diff --git a/src/pa.c b/src/pa.c
index 8f33d9a..dfbff22 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -99,6 +99,7 @@ bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add) {
 	assert(new_size > old_size);
+	assert(edata_size_get(edata) == old_size);
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	void *trail_begin = edata_past_get(edata);
@@ -135,3 +136,34 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	emap_remap(tsdn, &emap_global, edata, szind, slab);
 	return false;
 }
+
+bool
+pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
+    size_t new_size, szind_t szind, bool slab, bool *generated_dirty) {
+	assert(new_size < old_size);
+
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+	*generated_dirty = false;
+
+	if (ehooks_split_will_fail(ehooks)) {
+		return true;
+	}
+
+	edata_t *trail = extent_split_wrapper(tsdn, &shard->edata_cache, ehooks,
+	    edata, new_size, szind, slab, old_size - new_size, SC_NSIZES,
+	    false);
+	if (trail == NULL) {
+		return true;
+	}
+	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, trail);
+	*generated_dirty = true;
+	return false;
+}
+
+void
+pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+    bool *generated_dirty) {
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);
+	*generated_dirty = true;
+}
-- 
cgit v0.12


From 655a09634347628abc6720ad1e2b6e1d08fdf8d9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 08:14:25 -0700
Subject: Move bg inactivity check out of purge inner loop.

I.e. do it once per call to arena_decay_stashed instead of once per muzzy purge.
---
 src/arena.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 35fefeb..da3fa5c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -660,8 +660,6 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    edata_size_get(edata))) {
 				ecache_dalloc(tsdn, &arena->pa_shard, ehooks,
 				    &arena->pa_shard.ecache_muzzy, edata);
-				arena_background_thread_inactivity_check(tsdn,
-				    arena, is_background_thread);
 				break;
 			}
 			JEMALLOC_FALLTHROUGH;
@@ -727,6 +725,8 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		    decay_stats, ecache, all, &decay_extents);
 		assert(npurged == npurge);
 	}
+	arena_background_thread_inactivity_check(tsdn, arena,
+	    is_background_thread);
 
 	malloc_mutex_lock(tsdn, &decay->mtx);
 	decay->purging = false;
-- 
cgit v0.12


From aef28b2f8fc4031f970896b312127cda00bbc2d0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 12:00:45 -0700
Subject: PA: Move in stash_decayed.

---
 include/jemalloc/internal/pa.h |  5 ++++-
 src/arena.c                    | 21 +--------------------
 src/pa.c                       | 21 +++++++++++++++++++++
 3 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index df2e88f..99f1608 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -77,7 +77,7 @@ struct pa_shard_s {
 	 * Decay-based purging state, responsible for scheduling extent state
 	 * transitions.
 	 *
-	 * Synchronization: internal.
+	 * Synchronization: via the internal mutex.
 	 */
 	decay_t decay_dirty; /* dirty --> muzzy */
 	decay_t decay_muzzy; /* muzzy --> retained */
@@ -141,4 +141,7 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
 
+size_t pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
+    size_t npages_limit, size_t npages_decay_max, edata_list_t *decay_extents);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index da3fa5c..efdda70 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -611,25 +611,6 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 }
 
 static size_t
-arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, ecache_t *ecache, size_t npages_limit,
-    size_t npages_decay_max, edata_list_t *decay_extents) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	/* Stash extents according to npages_limit. */
-	size_t nstashed = 0;
-	edata_t *edata;
-	while (nstashed < npages_decay_max &&
-	    (edata = ecache_evict(tsdn, &arena->pa_shard, ehooks, ecache, npages_limit))
-	    != NULL) {
-		edata_list_append(decay_extents, edata);
-		nstashed += edata_size_get(edata) >> LG_PAGE;
-	}
-	return nstashed;
-}
-
-static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     decay_t *decay, pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     bool all, edata_list_t *decay_extents) {
@@ -718,7 +699,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	edata_list_t decay_extents;
 	edata_list_init(&decay_extents);
 
-	size_t npurge = arena_stash_decayed(tsdn, arena, ehooks, ecache,
+	size_t npurge = pa_stash_decayed(tsdn, &arena->pa_shard, ecache,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = arena_decay_stashed(tsdn, arena, ehooks, decay,
diff --git a/src/pa.c b/src/pa.c
index dfbff22..d6fb473 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -167,3 +167,24 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);
 	*generated_dirty = true;
 }
+
+size_t
+pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
+    size_t npages_limit, size_t npages_decay_max, edata_list_t *decay_extents) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+
+	/* Stash extents according to npages_limit. */
+	size_t nstashed = 0;
+	while (nstashed < npages_decay_max) {
+		edata_t *edata = ecache_evict(tsdn, shard, ehooks, ecache,
+		    npages_limit);
+		if (edata == NULL) {
+			break;
+		}
+		edata_list_append(decay_extents, edata);
+		nstashed += edata_size_get(edata) >> LG_PAGE;
+	}
+	return nstashed;
+}
-- 
cgit v0.12


From 3034f4a508524e995864e485f03da3fb2792856a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 12:23:09 -0700
Subject: PA: Move in decay_stashed.

---
 include/jemalloc/internal/pa.h |  5 ++-
 src/arena.c                    | 71 ++----------------------------------------
 src/pa.c                       | 68 ++++++++++++++++++++++++++++++++++++++--
 3 files changed, 73 insertions(+), 71 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 99f1608..0c2294e 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -142,6 +142,9 @@ void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
 
 size_t pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
-    size_t npages_limit, size_t npages_decay_max, edata_list_t *decay_extents);
+    size_t npages_limit, size_t npages_decay_max, edata_list_t *result);
+size_t pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    edata_list_t *decay_extents);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index efdda70..a378ba0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -610,69 +610,6 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 	    decay_ms);
 }
 
-static size_t
-arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    decay_t *decay, pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
-    bool all, edata_list_t *decay_extents) {
-	size_t nmadvise, nunmapped;
-	size_t npurged;
-
-	if (config_stats) {
-		nmadvise = 0;
-		nunmapped = 0;
-	}
-	npurged = 0;
-
-	ssize_t muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
-	for (edata_t *edata = edata_list_first(decay_extents); edata !=
-	    NULL; edata = edata_list_first(decay_extents)) {
-		if (config_stats) {
-			nmadvise++;
-		}
-		size_t npages = edata_size_get(edata) >> LG_PAGE;
-		npurged += npages;
-		edata_list_remove(decay_extents, edata);
-		switch (ecache->state) {
-		case extent_state_active:
-			not_reached();
-		case extent_state_dirty:
-			if (!all && muzzy_decay_ms != 0 &&
-			    !extent_purge_lazy_wrapper(tsdn, ehooks, edata, 0,
-			    edata_size_get(edata))) {
-				ecache_dalloc(tsdn, &arena->pa_shard, ehooks,
-				    &arena->pa_shard.ecache_muzzy, edata);
-				break;
-			}
-			JEMALLOC_FALLTHROUGH;
-		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, &arena->pa_shard, ehooks,
-			    edata);
-			if (config_stats) {
-				nunmapped += npages;
-			}
-			break;
-		case extent_state_retained:
-		default:
-			not_reached();
-		}
-	}
-
-	if (config_stats) {
-		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &decay_stats->npurge, 1);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &decay_stats->nmadvise, nmadvise);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &decay_stats->purged, npurged);
-		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->pa_shard.stats->mapped, nunmapped << LG_PAGE);
-		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
-	}
-
-	return npurged;
-}
-
 /*
  * npages_limit: Decay at most npages_decay_max pages without violating the
  * invariant: (ecache_npages_get(ecache) >= npages_limit).  We need an upper
@@ -694,16 +631,14 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	decay->purging = true;
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-
 	edata_list_t decay_extents;
 	edata_list_init(&decay_extents);
-
 	size_t npurge = pa_stash_decayed(tsdn, &arena->pa_shard, ecache,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
-		size_t npurged = arena_decay_stashed(tsdn, arena, ehooks, decay,
-		    decay_stats, ecache, all, &decay_extents);
+		size_t npurged = pa_decay_stashed(tsdn, &arena->pa_shard,
+		    decay, decay_stats, ecache, /* fully_decay */all,
+		    &decay_extents);
 		assert(npurged == npurge);
 	}
 	arena_background_thread_inactivity_check(tsdn, arena,
diff --git a/src/pa.c b/src/pa.c
index d6fb473..34177eb 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -170,7 +170,7 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 
 size_t
 pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
-    size_t npages_limit, size_t npages_decay_max, edata_list_t *decay_extents) {
+    size_t npages_limit, size_t npages_decay_max, edata_list_t *result) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
@@ -183,8 +183,72 @@ pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 		if (edata == NULL) {
 			break;
 		}
-		edata_list_append(decay_extents, edata);
+		edata_list_append(result, edata);
 		nstashed += edata_size_get(edata) >> LG_PAGE;
 	}
 	return nstashed;
 }
+
+size_t
+pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    edata_list_t *decay_extents) {
+	bool err;
+
+	size_t nmadvise = 0;
+	size_t nunmapped = 0;
+	size_t npurged = 0;
+
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+
+	bool try_muzzy = !fully_decay && pa_shard_may_have_muzzy(shard);
+
+	for (edata_t *edata = edata_list_first(decay_extents); edata !=
+	    NULL; edata = edata_list_first(decay_extents)) {
+		edata_list_remove(decay_extents, edata);
+
+		size_t size = edata_size_get(edata);
+		size_t npages = size >> LG_PAGE;
+
+		nmadvise++;
+		npurged += npages;
+
+		switch (ecache->state) {
+		case extent_state_active:
+			not_reached();
+		case extent_state_dirty:
+			if (try_muzzy) {
+				err = extent_purge_lazy_wrapper(tsdn, ehooks,
+				    edata, /* offset */ 0, size);
+				if (!err) {
+					ecache_dalloc(tsdn, shard, ehooks,
+					    &shard->ecache_muzzy, edata);
+					break;
+				}
+			}
+			JEMALLOC_FALLTHROUGH;
+		case extent_state_muzzy:
+			extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
+			nunmapped += npages;
+			break;
+		case extent_state_retained:
+		default:
+			not_reached();
+		}
+	}
+
+	if (config_stats) {
+		LOCKEDINT_MTX_LOCK(tsdn, *shard->stats_mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+		    &decay_stats->npurge, 1);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+		    &decay_stats->nmadvise, nmadvise);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+		    &decay_stats->purged, npurged);
+		locked_dec_zu(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+		    &shard->stats->mapped, nunmapped << LG_PAGE);
+		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
+	}
+
+	return npurged;
+}
-- 
cgit v0.12


From 103f5feda598ec5bd857db8d2f072724ef82ef46 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 13:12:12 -0700
Subject: Move bg thread activity check out of purging core.

---
 src/arena.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index a378ba0..b92bb80 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -641,8 +641,6 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		    &decay_extents);
 		assert(npurged == npurge);
 	}
-	arena_background_thread_inactivity_check(tsdn, arena,
-	    is_background_thread);
 
 	malloc_mutex_lock(tsdn, &decay->mtx);
 	decay->purging = false;
@@ -653,10 +651,25 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     bool is_background_thread, bool all) {
 	if (all) {
+		assert(!is_background_thread);
 		malloc_mutex_lock(tsdn, &decay->mtx);
 		arena_decay_to_limit(tsdn, arena, decay, decay_stats, ecache,
 		    all, 0, ecache_npages_get(ecache), is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
+		/*
+		 * The previous pa_decay_to_limit call may not have actually
+		 * decayed all pages, if new pages were added concurrently with
+		 * the purge.
+		 *
+		 * I don't think we need an activity check for that case (some
+		 * other thread must be deallocating, and they should do one),
+		 * but we do one anyways.  This line comes out of a refactoring
+		 * diff in which the check was pulled out of the callee, and so
+		 * an extra redundant check minimizes the change.  We should
+		 * reevaluate.
+		 */
+		arena_background_thread_inactivity_check(tsdn, arena,
+		    /* is_background_thread */ false);
 
 		return false;
 	}
-- 
cgit v0.12


From f012c43be0c5a43267e145b05e69b974b60f5917 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 13:29:44 -0700
Subject: PA: Move in decay_to_limit

---
 include/jemalloc/internal/pa.h |  3 +++
 src/arena.c                    | 56 +++++++-----------------------------------
 src/pa.c                       | 35 ++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 0c2294e..ecaadbe 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -146,5 +146,8 @@ size_t pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 size_t pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     edata_list_t *decay_extents);
+void pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    size_t npages_limit, size_t npages_decay_max);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index b92bb80..dddb083 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -56,9 +56,6 @@ static unsigned huge_arena_ind;
  * definition.
  */
 
-static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool all,
-    size_t npages_limit, size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
 static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
@@ -514,9 +511,9 @@ arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     size_t current_npages, size_t npages_limit, bool is_background_thread) {
 	if (current_npages > npages_limit) {
-		arena_decay_to_limit(tsdn, arena, decay, decay_stats, ecache,
-		    false, npages_limit, current_npages - npages_limit,
-		    is_background_thread);
+		pa_decay_to_limit(tsdn, &arena->pa_shard, decay, decay_stats,
+		    ecache, /* fully_decay */ false, npages_limit,
+		    current_npages - npages_limit);
 	}
 }
 
@@ -530,9 +527,9 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	ssize_t decay_ms = decay_ms_read(decay);
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
-			arena_decay_to_limit(tsdn, arena, decay, decay_stats,
-			    ecache, false, 0, ecache_npages_get(ecache),
-			    is_background_thread);
+			pa_decay_to_limit(tsdn, &arena->pa_shard, decay,
+			    decay_stats, ecache, /* fully_decay */ false, 0,
+			    ecache_npages_get(ecache));
 		}
 		return false;
 	}
@@ -610,42 +607,6 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 	    decay_ms);
 }
 
-/*
- * npages_limit: Decay at most npages_decay_max pages without violating the
- * invariant: (ecache_npages_get(ecache) >= npages_limit).  We need an upper
- * bound on number of pages in order to prevent unbounded growth (namely in
- * stashed), otherwise unbounded new pages could be added to extents during the
- * current decay run, so that the purging thread never finishes.
- */
-static void
-arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool all,
-    size_t npages_limit, size_t npages_decay_max, bool is_background_thread) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 1);
-	malloc_mutex_assert_owner(tsdn, &decay->mtx);
-
-	if (decay->purging || npages_decay_max == 0) {
-		return;
-	}
-	decay->purging = true;
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-
-	edata_list_t decay_extents;
-	edata_list_init(&decay_extents);
-	size_t npurge = pa_stash_decayed(tsdn, &arena->pa_shard, ecache,
-	    npages_limit, npages_decay_max, &decay_extents);
-	if (npurge != 0) {
-		size_t npurged = pa_decay_stashed(tsdn, &arena->pa_shard,
-		    decay, decay_stats, ecache, /* fully_decay */all,
-		    &decay_extents);
-		assert(npurged == npurge);
-	}
-
-	malloc_mutex_lock(tsdn, &decay->mtx);
-	decay->purging = false;
-}
-
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
@@ -653,8 +614,9 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	if (all) {
 		assert(!is_background_thread);
 		malloc_mutex_lock(tsdn, &decay->mtx);
-		arena_decay_to_limit(tsdn, arena, decay, decay_stats, ecache,
-		    all, 0, ecache_npages_get(ecache), is_background_thread);
+		pa_decay_to_limit(tsdn, &arena->pa_shard, decay, decay_stats,
+		    ecache, /* fully_decay */ all, 0,
+		    ecache_npages_get(ecache));
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 		/*
 		 * The previous pa_decay_to_limit call may not have actually
diff --git a/src/pa.c b/src/pa.c
index 34177eb..eda1a0b 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -252,3 +252,38 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 	return npurged;
 }
+
+/*
+ * npages_limit: Decay at most npages_decay_max pages without violating the
+ * invariant: (ecache_npages_get(ecache) >= npages_limit).  We need an upper
+ * bound on number of pages in order to prevent unbounded growth (namely in
+ * stashed), otherwise unbounded new pages could be added to extents during the
+ * current decay run, so that the purging thread never finishes.
+ */
+void
+pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    size_t npages_limit, size_t npages_decay_max) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 1);
+	malloc_mutex_assert_owner(tsdn, &decay->mtx);
+
+	if (decay->purging || npages_decay_max == 0) {
+		return;
+	}
+	decay->purging = true;
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+
+	edata_list_t decay_extents;
+	edata_list_init(&decay_extents);
+	size_t npurge = pa_stash_decayed(tsdn, shard, ecache, npages_limit,
+	    npages_decay_max, &decay_extents);
+	if (npurge != 0) {
+		size_t npurged = pa_decay_stashed(tsdn, shard, decay,
+		    decay_stats, ecache, fully_decay, &decay_extents);
+		assert(npurged == npurge);
+	}
+
+	malloc_mutex_lock(tsdn, &decay->mtx);
+	decay->purging = false;
+}
-- 
cgit v0.12


From 65698b7f2e3613be8e848053213a850dd5a2cf92 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 14:13:01 -0700
Subject: PA: Remove public visibility of some internals.

---
 include/jemalloc/internal/pa.h | 5 -----
 src/pa.c                       | 4 ++--
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index ecaadbe..ff5924c 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -141,11 +141,6 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
 
-size_t pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
-    size_t npages_limit, size_t npages_decay_max, edata_list_t *result);
-size_t pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
-    edata_list_t *decay_extents);
 void pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     size_t npages_limit, size_t npages_decay_max);
diff --git a/src/pa.c b/src/pa.c
index eda1a0b..7a84cb0 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -168,7 +168,7 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	*generated_dirty = true;
 }
 
-size_t
+static size_t
 pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
     size_t npages_limit, size_t npages_decay_max, edata_list_t *result) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -189,7 +189,7 @@ pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 	return nstashed;
 }
 
-size_t
+static size_t
 pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     edata_list_t *decay_extents) {
-- 
cgit v0.12


From 2d6eec7b5cc2a537e5ff702778c0c15832b5f961 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 14:56:05 -0700
Subject: PA: Move in decay-all pathway.

---
 include/jemalloc/internal/pa.h |  2 ++
 src/arena.c                    | 10 +++-------
 src/pa.c                       |  9 +++++++++
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index ff5924c..db04aa0 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -144,5 +144,7 @@ void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 void pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     size_t npages_limit, size_t npages_decay_max);
+void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index dddb083..7c65c5c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -612,12 +612,8 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     bool is_background_thread, bool all) {
 	if (all) {
-		assert(!is_background_thread);
-		malloc_mutex_lock(tsdn, &decay->mtx);
-		pa_decay_to_limit(tsdn, &arena->pa_shard, decay, decay_stats,
-		    ecache, /* fully_decay */ all, 0,
-		    ecache_npages_get(ecache));
-		malloc_mutex_unlock(tsdn, &decay->mtx);
+		pa_decay_all(tsdn, &arena->pa_shard, decay, decay_stats, ecache,
+		    /* fully_decay */ all);
 		/*
 		 * The previous pa_decay_to_limit call may not have actually
 		 * decayed all pages, if new pages were added concurrently with
@@ -630,9 +626,9 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		 * an extra redundant check minimizes the change.  We should
 		 * reevaluate.
 		 */
+		assert(!is_background_thread);
 		arena_background_thread_inactivity_check(tsdn, arena,
 		    /* is_background_thread */ false);
-
 		return false;
 	}
 
diff --git a/src/pa.c b/src/pa.c
index 7a84cb0..711b824 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -287,3 +287,12 @@ pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 	malloc_mutex_lock(tsdn, &decay->mtx);
 	decay->purging = false;
 }
+
+void
+pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) {
+		malloc_mutex_lock(tsdn, &decay->mtx);
+		pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache,
+		    fully_decay, 0, ecache_npages_get(ecache));
+		malloc_mutex_unlock(tsdn, &decay->mtx);
+}
-- 
cgit v0.12


From 46a9d7fc0b0e5124cc8a1ca0e3caec85968a6842 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 15:42:29 -0700
Subject: PA: Move in rest of purging.

---
 include/jemalloc/internal/pa.h | 14 ++++++++
 src/arena.c                    | 79 +++++++++++++-----------------------------
 src/pa.c                       | 51 +++++++++++++++++++++++++++
 3 files changed, 90 insertions(+), 54 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index db04aa0..d99b9b7 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -7,6 +7,13 @@
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/lockedint.h"
 
+enum pa_decay_purge_setting_e {
+	PA_DECAY_PURGE_ALWAYS,
+	PA_DECAY_PURGE_NEVER,
+	PA_DECAY_PURGE_ON_EPOCH_ADVANCE
+};
+typedef enum pa_decay_purge_setting_e pa_decay_purge_setting_t;
+
 /*
  * The page allocator; responsible for acquiring pages of memory for
  * allocations.
@@ -147,4 +154,11 @@ void pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
 
+void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
+/* Returns true if the epoch advanced. */
+bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    pa_decay_purge_setting_t decay_purge_setting);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index 7c65c5c..d1e6136 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -506,56 +506,6 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 }
 
-static void
-arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
-    size_t current_npages, size_t npages_limit, bool is_background_thread) {
-	if (current_npages > npages_limit) {
-		pa_decay_to_limit(tsdn, &arena->pa_shard, decay, decay_stats,
-		    ecache, /* fully_decay */ false, npages_limit,
-		    current_npages - npages_limit);
-	}
-}
-
-static bool
-arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
-    bool is_background_thread) {
-	malloc_mutex_assert_owner(tsdn, &decay->mtx);
-
-	/* Purge all or nothing if the option is disabled. */
-	ssize_t decay_ms = decay_ms_read(decay);
-	if (decay_ms <= 0) {
-		if (decay_ms == 0) {
-			pa_decay_to_limit(tsdn, &arena->pa_shard, decay,
-			    decay_stats, ecache, /* fully_decay */ false, 0,
-			    ecache_npages_get(ecache));
-		}
-		return false;
-	}
-
-	/*
-	 * If the deadline has been reached, advance to the current epoch and
-	 * purge to the new limit if necessary.  Note that dirty pages created
-	 * during the current epoch are not subject to purge until a future
-	 * epoch, so as a result purging only happens during epoch advances, or
-	 * being triggered by background threads (scheduled event).
-	 */
-	nstime_t time;
-	nstime_init_update(&time);
-	size_t npages_current = ecache_npages_get(ecache);
-	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
-	    npages_current);
-	if (is_background_thread ||
-	    (epoch_advanced && !background_thread_enabled())) {
-		size_t npages_limit = decay_npages_limit_get(decay);
-		arena_decay_try_purge(tsdn, arena, decay, decay_stats, ecache,
-		    npages_current, npages_limit, is_background_thread);
-	}
-
-	return epoch_advanced;
-}
-
 ssize_t
 arena_dirty_decay_ms_get(arena_t *arena) {
 	return pa_shard_dirty_decay_ms_get(&arena->pa_shard);
@@ -566,6 +516,22 @@ arena_muzzy_decay_ms_get(arena_t *arena) {
 	return pa_shard_muzzy_decay_ms_get(&arena->pa_shard);
 }
 
+/*
+ * In situations where we're not forcing a decay (i.e. because the user
+ * specifically requested it), should we purge ourselves, or wait for the
+ * background thread to get to it.
+ */
+static pa_decay_purge_setting_t
+arena_decide_unforced_decay_purge_setting(bool is_background_thread) {
+	if (is_background_thread) {
+		return PA_DECAY_PURGE_ALWAYS;
+	} else if (!is_background_thread && background_thread_enabled()) {
+		return PA_DECAY_PURGE_NEVER;
+	} else {
+		return PA_DECAY_PURGE_ON_EPOCH_ADVANCE;
+	}
+}
+
 static bool
 arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, ssize_t decay_ms) {
@@ -585,7 +551,11 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
 	decay_reinit(decay, &cur_time, decay_ms);
-	arena_maybe_decay(tsdn, arena, decay, decay_stats, ecache, false);
+	pa_decay_purge_setting_t decay_purge =
+	    arena_decide_unforced_decay_purge_setting(
+		/* is_background_thread */ false);
+	pa_maybe_decay_purge(tsdn, &arena->pa_shard, decay, decay_stats, ecache,
+	    decay_purge);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
 	return false;
@@ -636,9 +606,10 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		/* No need to wait if another thread is in progress. */
 		return true;
 	}
-
-	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, decay_stats,
-	    ecache, is_background_thread);
+	pa_decay_purge_setting_t decay_purge =
+	    arena_decide_unforced_decay_purge_setting(is_background_thread);
+	bool epoch_advanced = pa_maybe_decay_purge(tsdn, &arena->pa_shard,
+	    decay, decay_stats, ecache, decay_purge);
 	size_t npages_new;
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
diff --git a/src/pa.c b/src/pa.c
index 711b824..06c205c 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -296,3 +296,54 @@ pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 		    fully_decay, 0, ecache_npages_get(ecache));
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 }
+
+static void
+pa_decay_try_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    size_t current_npages, size_t npages_limit) {
+	if (current_npages > npages_limit) {
+		pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache,
+		    /* fully_decay */ false, npages_limit,
+		    current_npages - npages_limit);
+	}
+}
+
+bool
+pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    pa_decay_purge_setting_t decay_purge_setting) {
+	malloc_mutex_assert_owner(tsdn, &decay->mtx);
+
+	/* Purge all or nothing if the option is disabled. */
+	ssize_t decay_ms = decay_ms_read(decay);
+	if (decay_ms <= 0) {
+		if (decay_ms == 0) {
+			pa_decay_to_limit(tsdn, shard, decay, decay_stats,
+			    ecache, /* fully_decay */ false,
+			    /* npages_limit */ 0, ecache_npages_get(ecache));
+		}
+		return false;
+	}
+
+	/*
+	 * If the deadline has been reached, advance to the current epoch and
+	 * purge to the new limit if necessary.  Note that dirty pages created
+	 * during the current epoch are not subject to purge until a future
+	 * epoch, so as a result purging only happens during epoch advances, or
+	 * being triggered by background threads (scheduled event).
+	 */
+	nstime_t time;
+	nstime_init_update(&time);
+	size_t npages_current = ecache_npages_get(ecache);
+	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
+	    npages_current);
+	if (decay_purge_setting == PA_DECAY_PURGE_ALWAYS
+	    || (epoch_advanced && decay_purge_setting
+	    == PA_DECAY_PURGE_ON_EPOCH_ADVANCE)) {
+		size_t npages_limit = decay_npages_limit_get(decay);
+		pa_decay_try_purge(tsdn, shard, decay, decay_stats, ecache,
+		    npages_current, npages_limit);
+	}
+
+	return epoch_advanced;
+}
-- 
cgit v0.12


From c075fd0bcb4a4de13204d26ff400bd315811e435 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 16:13:36 -0700
Subject: PA: Minor cleanups and comment fixes.

---
 include/jemalloc/internal/pa.h | 35 +++++++++++++++++++++++++++++------
 src/arena.c                    |  8 +++++---
 src/pa.c                       | 10 ++++------
 3 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index d99b9b7..9636ced 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -56,6 +56,21 @@ struct pa_shard_stats_s {
 	atomic_zu_t abandoned_vm;
 };
 
+/*
+ * The local allocator handle.  Keeps the state necessary to satisfy page-sized
+ * allocations.
+ *
+ * The contents are mostly internal to the PA module.  The key exception is that
+ * arena decay code is allowed to grab pointers to the dirty and muzzy ecaches
+ * decay_ts, for a couple of queries, passing them back to a PA function, or
+ * acquiring decay.mtx and looking at decay.purging.  The reasoning is that,
+ * while PA decides what and how to purge, the arena code decides when and where
+ * (e.g. on what thread).  It's allowed to use the presence of another purger to
+ * decide.
+ * (The background thread code also touches some other decay internals, but
+ * that's not fundamental; its' just an artifact of a partial refactoring, and
+ * its accesses could be straightforwardly moved inside the decay module).
+ */
 typedef struct pa_shard_s pa_shard_t;
 struct pa_shard_s {
 	/*
@@ -148,15 +163,23 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
 
-void pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
-    size_t npages_limit, size_t npages_decay_max);
-void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
+/*
+ * All purging functions require holding decay->mtx.  This is one of the few
+ * places external modules are allowed to peek inside pa_shard_t internals.
+ */
 
+/*
+ * Decays the number of pages currently in the ecache.  This might not leave the
+ * ecache empty if other threads are inserting dirty objects into it
+ * concurrently with the call.
+ */
 void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
-/* Returns true if the epoch advanced. */
+/*
+ * Updates decay settings for the current time, and conditionally purges in
+ * response (depending on decay_purge_setting).  Returns whether or not the
+ * epoch advanced.
+ */
 bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     pa_decay_purge_setting_t decay_purge_setting);
diff --git a/src/arena.c b/src/arena.c
index d1e6136..25fad27 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -582,12 +582,14 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     bool is_background_thread, bool all) {
 	if (all) {
+		malloc_mutex_lock(tsdn, &decay->mtx);
 		pa_decay_all(tsdn, &arena->pa_shard, decay, decay_stats, ecache,
 		    /* fully_decay */ all);
+		malloc_mutex_unlock(tsdn, &decay->mtx);
 		/*
-		 * The previous pa_decay_to_limit call may not have actually
-		 * decayed all pages, if new pages were added concurrently with
-		 * the purge.
+		 * The previous pa_decay_all call may not have actually decayed
+		 * all pages, if new pages were added concurrently with the
+		 * purge.
 		 *
 		 * I don't think we need an activity check for that case (some
 		 * other thread must be deallocating, and they should do one),
diff --git a/src/pa.c b/src/pa.c
index 06c205c..d9eeb69 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -260,13 +260,12 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
  * stashed), otherwise unbounded new pages could be added to extents during the
  * current decay run, so that the purging thread never finishes.
  */
-void
+static void
 pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     size_t npages_limit, size_t npages_decay_max) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
-	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	if (decay->purging || npages_decay_max == 0) {
 		return;
@@ -291,10 +290,9 @@ pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 void
 pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) {
-		malloc_mutex_lock(tsdn, &decay->mtx);
-		pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache,
-		    fully_decay, 0, ecache_npages_get(ecache));
-		malloc_mutex_unlock(tsdn, &decay->mtx);
+	malloc_mutex_assert_owner(tsdn, &decay->mtx);
+	pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache, fully_decay,
+	    /* npages_limit */ 0, ecache_npages_get(ecache));
 }
 
 static void
-- 
cgit v0.12


From 527dd4cdb8d1ec440fefe894ada4ccbc1c3e437d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 17:40:17 -0700
Subject: PA: Move in nactive counter.

---
 include/jemalloc/internal/arena_structs.h |  7 -------
 include/jemalloc/internal/pa.h            |  7 +++++++
 src/arena.c                               | 29 +++--------------------------
 src/ctl.c                                 |  2 +-
 src/pa.c                                  | 29 +++++++++++++++++++++++++++--
 5 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 49568fc..682450e 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -68,13 +68,6 @@ struct arena_s {
 	atomic_u_t		dss_prec;
 
 	/*
-	 * Number of pages in active extents.
-	 *
-	 * Synchronization: atomic.
-	 */
-	atomic_zu_t		nactive;
-
-	/*
 	 * Extant large allocations.
 	 *
 	 * Synchronization: large_mtx.
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 9636ced..f0b7faa 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -74,6 +74,13 @@ struct pa_shard_stats_s {
 typedef struct pa_shard_s pa_shard_t;
 struct pa_shard_s {
 	/*
+	 * Number of pages in active extents.
+	 *
+	 * Synchronization: atomic.
+	 */
+	atomic_zu_t nactive;
+
+	/*
 	 * Collections of extents that were previously allocated.  These are
 	 * used when allocating extents, in an attempt to re-use address space.
 	 *
diff --git a/src/arena.c b/src/arena.c
index 25fad27..f288654 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -71,7 +71,7 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	*dss = dss_prec_names[arena_dss_prec_get(arena)];
 	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
 	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
-	*nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED);
+	*nactive += atomic_load_zu(&arena->pa_shard.nactive, ATOMIC_RELAXED);
 	*ndirty += ecache_npages_get(&arena->pa_shard.ecache_dirty);
 	*nmuzzy += ecache_npages_get(&arena->pa_shard.ecache_muzzy);
 }
@@ -136,7 +136,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
 	atomic_load_add_store_zu(&astats->metadata_thp, metadata_thp);
 	atomic_load_add_store_zu(&astats->resident, base_resident +
-	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
+	    (((atomic_load_zu(&arena->pa_shard.nactive, ATOMIC_RELAXED) +
 	    ecache_npages_get(&arena->pa_shard.ecache_dirty) +
 	    ecache_npages_get(&arena->pa_shard.ecache_muzzy)) << LG_PAGE)));
 	atomic_load_add_store_zu(&astats->pa_shard_stats.abandoned_vm,
@@ -387,17 +387,6 @@ arena_slab_reg_dalloc(edata_t *slab, slab_data_t *slab_data, void *ptr) {
 }
 
 static void
-arena_nactive_add(arena_t *arena, size_t add_pages) {
-	atomic_fetch_add_zu(&arena->nactive, add_pages, ATOMIC_RELAXED);
-}
-
-static void
-arena_nactive_sub(arena_t *arena, size_t sub_pages) {
-	assert(atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) >= sub_pages);
-	atomic_fetch_sub_zu(&arena->nactive, sub_pages, ATOMIC_RELAXED);
-}
-
-static void
 arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	szind_t index, hindex;
 
@@ -457,7 +446,6 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 			}
 			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 		}
-		arena_nactive_add(arena, esize >> LG_PAGE);
 	}
 
 	if (edata != NULL && sz_large_pad != 0) {
@@ -475,35 +463,30 @@ arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 		    edata_usize_get(edata));
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
-	arena_nactive_sub(arena, edata_size_get(edata) >> LG_PAGE);
 }
 
 void
 arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     size_t oldusize) {
 	size_t usize = edata_usize_get(edata);
-	size_t udiff = oldusize - usize;
 
 	if (config_stats) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
-	arena_nactive_sub(arena, udiff >> LG_PAGE);
 }
 
 void
 arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     size_t oldusize) {
 	size_t usize = edata_usize_get(edata);
-	size_t udiff = usize - oldusize;
 
 	if (config_stats) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
-	arena_nactive_add(arena, udiff >> LG_PAGE);
 }
 
 ssize_t
@@ -658,8 +641,6 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 
 void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
-	arena_nactive_sub(arena, edata_size_get(slab) >> LG_PAGE);
-
 	bool generated_dirty;
 	pa_dalloc(tsdn, &arena->pa_shard, slab, &generated_dirty);
 	if (generated_dirty) {
@@ -801,7 +782,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		}
 	}
 
-	atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED);
+	atomic_store_zu(&arena->pa_shard.nactive, 0, ATOMIC_RELAXED);
 }
 
 static void
@@ -885,8 +866,6 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	edata_nfree_binshard_set(slab, bin_info->nregs, binshard);
 	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false);
 
-	arena_nactive_add(arena, edata_size_get(slab) >> LG_PAGE);
-
 	return slab;
 }
 
@@ -1637,8 +1616,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
 	    ATOMIC_RELAXED);
 
-	atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED);
-
 	edata_list_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
 	    WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
diff --git a/src/ctl.c b/src/ctl.c
index 9233c84..4350347 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3516,7 +3516,7 @@ experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib,
 #if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) ||				\
     defined(JEMALLOC_GCC_SYNC_ATOMICS) || defined(_MSC_VER)
 		/* Expose the underlying counter for fast read. */
-		pactivep = (size_t *)&(arena->nactive.repr);
+		pactivep = (size_t *)&(arena->pa_shard.nactive.repr);
 		READ(pactivep, size_t *);
 		ret = 0;
 #else
diff --git a/src/pa.c b/src/pa.c
index d9eeb69..d678d82 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,6 +1,17 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+static void
+pa_nactive_add(pa_shard_t *shard, size_t add_pages) {
+	atomic_fetch_add_zu(&shard->nactive, add_pages, ATOMIC_RELAXED);
+}
+
+static void
+pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
+	assert(atomic_load_zu(&shard->nactive, ATOMIC_RELAXED) >= sub_pages);
+	atomic_fetch_sub_zu(&shard->nactive, sub_pages, ATOMIC_RELAXED);
+}
+
 bool
 pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx) {
@@ -43,6 +54,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 	}
 
 	atomic_store_zu(&shard->extent_sn_next, 0, ATOMIC_RELAXED);
+	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
 
 	shard->stats_mtx = stats_mtx;
 	shard->stats = stats;
@@ -83,7 +95,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, NULL, size, alignment, slab,
 		    szind, zero);
-		if (config_stats) {
+		if (config_stats && edata != NULL) {
 			/*
 			 * edata may be NULL on OOM, but in that case mapped_add
 			 * isn't used below, so there's no need to conditionlly
@@ -92,6 +104,9 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 			*mapped_add = size;
 		}
 	}
+	if (edata != NULL) {
+		pa_nactive_add(shard, size >> LG_PAGE);
+	}
 	return edata;
 }
 
@@ -100,6 +115,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add) {
 	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
+	assert((new_size & PAGE_MASK) == 0);
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	void *trail_begin = edata_past_get(edata);
@@ -133,6 +149,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		*mapped_add = 0;
 		return true;
 	}
+	pa_nactive_add(shard, expand_amount >> LG_PAGE);
 	emap_remap(tsdn, &emap_global, edata, szind, slab);
 	return false;
 }
@@ -141,6 +158,9 @@ bool
 pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool slab, bool *generated_dirty) {
 	assert(new_size < old_size);
+	assert(edata_size_get(edata) == old_size);
+	assert((new_size & PAGE_MASK) == 0);
+	size_t shrink_amount = old_size - new_size;
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	*generated_dirty = false;
@@ -150,11 +170,13 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	}
 
 	edata_t *trail = extent_split_wrapper(tsdn, &shard->edata_cache, ehooks,
-	    edata, new_size, szind, slab, old_size - new_size, SC_NSIZES,
+	    edata, new_size, szind, slab, shrink_amount, SC_NSIZES,
 	    false);
 	if (trail == NULL) {
 		return true;
 	}
+	pa_nactive_sub(shard, shrink_amount >> LG_PAGE);
+
 	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, trail);
 	*generated_dirty = true;
 	return false;
@@ -163,6 +185,7 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 void
 pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty) {
+	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);
 	*generated_dirty = true;
@@ -345,3 +368,5 @@ pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 	return epoch_advanced;
 }
+
+
-- 
cgit v0.12


From f6bfa3dccaa9bb6bfe97aecc32709680b1d47652 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 17:59:06 -0700
Subject: Move extent stats to the PA module.

While we're at it, make them non-atomic -- they are purely derived statistics
(and in fact aren't even in the arena_t or pa_shard_t).
---
 include/jemalloc/internal/arena_externs.h |  2 +-
 include/jemalloc/internal/arena_stats.h   | 16 -----------
 include/jemalloc/internal/ctl.h           |  2 +-
 include/jemalloc/internal/pa.h            | 17 +++++++++++
 src/arena.c                               | 17 +++++------
 src/ctl.c                                 | 48 +++++++++++--------------------
 6 files changed, 43 insertions(+), 59 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index cdbfa4b..2463495 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -26,7 +26,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    arena_stats_extents_t *estats);
+    pa_extent_stats_t *estats);
 void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 310b907..9dc9e5f 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -37,22 +37,6 @@ struct arena_stats_large_s {
 	size_t		curlextents; /* Derived. */
 };
 
-typedef struct arena_stats_extents_s arena_stats_extents_t;
-struct arena_stats_extents_s {
-	/*
-	 * Stats for a given index in the range [0, SC_NPSIZES] in an extents_t.
-	 * We track both bytes and # of extents: two extents in the same bucket
-	 * may have different sizes if adjacent size classes differ by more than
-	 * a page, so bytes cannot always be derived from # of extents.
-	 */
-	atomic_zu_t ndirty;
-	atomic_zu_t dirty_bytes;
-	atomic_zu_t nmuzzy;
-	atomic_zu_t muzzy_bytes;
-	atomic_zu_t nretained;
-	atomic_zu_t retained_bytes;
-};
-
 /*
  * Arena stats.  Note that fields marked "derived" are not directly maintained
  * within the arena code; rather their values are derived during stats merge
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 55a8ff4..e0b46fa 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -44,7 +44,7 @@ typedef struct ctl_arena_stats_s {
 
 	bin_stats_data_t bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
-	arena_stats_extents_t estats[SC_NPSIZES];
+	pa_extent_stats_t estats[SC_NPSIZES];
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index f0b7faa..acfad89 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -29,6 +29,23 @@ struct pa_shard_decay_stats_s {
 	locked_u64_t purged;
 };
 
+typedef struct pa_extent_stats_s pa_extent_stats_t;
+struct pa_extent_stats_s {
+	/*
+	 * Stats for a given index in the range [0, SC_NPSIZES] in the various
+	 * ecache_ts.
+	 * We track both bytes and # of extents: two extents in the same bucket
+	 * may have different sizes if adjacent size classes differ by more than
+	 * a page, so bytes cannot always be derived from # of extents.
+	 */
+	size_t ndirty;
+	size_t dirty_bytes;
+	size_t nmuzzy;
+	size_t muzzy_bytes;
+	size_t nretained;
+	size_t retained_bytes;
+};
+
 /*
  * The stats for a particular pa_shard.  Because of the way the ctl module
  * handles stats epoch data collection (it has its own arena_stats, and merges
diff --git a/src/arena.c b/src/arena.c
index f288654..2deafe6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -81,7 +81,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    arena_stats_extents_t *estats) {
+    pa_extent_stats_t *estats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -200,15 +200,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		retained_bytes = ecache_nbytes_get(
 		    &arena->pa_shard.ecache_retained, i);
 
-		atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED);
-		atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED);
-		atomic_store_zu(&estats[i].nretained, retained, ATOMIC_RELAXED);
-		atomic_store_zu(&estats[i].dirty_bytes, dirty_bytes,
-		    ATOMIC_RELAXED);
-		atomic_store_zu(&estats[i].muzzy_bytes, muzzy_bytes,
-		    ATOMIC_RELAXED);
-		atomic_store_zu(&estats[i].retained_bytes, retained_bytes,
-		    ATOMIC_RELAXED);
+		estats[i].ndirty = dirty;
+		estats[i].nmuzzy = muzzy;
+		estats[i].nretained = retained;
+		estats[i].dirty_bytes = dirty_bytes;
+		estats[i].muzzy_bytes = muzzy_bytes;
+		estats[i].retained_bytes = retained_bytes;
 	}
 
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
diff --git a/src/ctl.c b/src/ctl.c
index 4350347..1679867 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -804,7 +804,7 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) *
 		    sizeof(arena_stats_large_t));
 		memset(ctl_arena->astats->estats, 0, SC_NPSIZES *
-		    sizeof(arena_stats_extents_t));
+		    sizeof(pa_extent_stats_t));
 	}
 }
 
@@ -993,18 +993,16 @@ MUTEX_PROF_ARENA_MUTEXES
 
 		/* Merge extents stats. */
 		for (i = 0; i < SC_NPSIZES; i++) {
-			ctl_accum_atomic_zu(&sdstats->estats[i].ndirty,
-			    &astats->estats[i].ndirty);
-			ctl_accum_atomic_zu(&sdstats->estats[i].nmuzzy,
-			    &astats->estats[i].nmuzzy);
-			ctl_accum_atomic_zu(&sdstats->estats[i].nretained,
-			    &astats->estats[i].nretained);
-			ctl_accum_atomic_zu(&sdstats->estats[i].dirty_bytes,
-			    &astats->estats[i].dirty_bytes);
-			ctl_accum_atomic_zu(&sdstats->estats[i].muzzy_bytes,
-			    &astats->estats[i].muzzy_bytes);
-			ctl_accum_atomic_zu(&sdstats->estats[i].retained_bytes,
-			    &astats->estats[i].retained_bytes);
+			sdstats->estats[i].ndirty += astats->estats[i].ndirty;
+			sdstats->estats[i].nmuzzy += astats->estats[i].nmuzzy;
+			sdstats->estats[i].nretained
+			    += astats->estats[i].nretained;
+			sdstats->estats[i].dirty_bytes
+			    += astats->estats[i].dirty_bytes;
+			sdstats->estats[i].muzzy_bytes
+			    += astats->estats[i].muzzy_bytes;
+			sdstats->estats[i].retained_bytes
+			    += astats->estats[i].retained_bytes;
 		}
 	}
 }
@@ -3150,29 +3148,17 @@ stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_ndirty,
-    atomic_load_zu(
-        &arenas_i(mib[2])->astats->estats[mib[4]].ndirty,
-	ATOMIC_RELAXED), size_t);
+        arenas_i(mib[2])->astats->estats[mib[4]].ndirty, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nmuzzy,
-    atomic_load_zu(
-        &arenas_i(mib[2])->astats->estats[mib[4]].nmuzzy,
-	ATOMIC_RELAXED), size_t);
+        arenas_i(mib[2])->astats->estats[mib[4]].nmuzzy, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nretained,
-    atomic_load_zu(
-        &arenas_i(mib[2])->astats->estats[mib[4]].nretained,
-	ATOMIC_RELAXED), size_t);
+        arenas_i(mib[2])->astats->estats[mib[4]].nretained, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_dirty_bytes,
-    atomic_load_zu(
-        &arenas_i(mib[2])->astats->estats[mib[4]].dirty_bytes,
-	ATOMIC_RELAXED), size_t);
+        arenas_i(mib[2])->astats->estats[mib[4]].dirty_bytes, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_muzzy_bytes,
-    atomic_load_zu(
-        &arenas_i(mib[2])->astats->estats[mib[4]].muzzy_bytes,
-	ATOMIC_RELAXED), size_t);
+        arenas_i(mib[2])->astats->estats[mib[4]].muzzy_bytes, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_retained_bytes,
-    atomic_load_zu(
-        &arenas_i(mib[2])->astats->estats[mib[4]].retained_bytes,
-	ATOMIC_RELAXED), size_t);
+        arenas_i(mib[2])->astats->estats[mib[4]].retained_bytes, size_t);
 
 static const ctl_named_node_t *
 stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
-- 
cgit v0.12


From 3c28aa6f179421b23fd8795cbcaa4696aba99557 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 18:14:53 -0700
Subject: PA: Move edata_avail stat in, make it non-atomic.

---
 include/jemalloc/internal/arena_stats.h | 3 ---
 include/jemalloc/internal/pa.h          | 3 +++
 src/arena.c                             | 5 ++---
 src/ctl.c                               | 8 +++-----
 src/pa.c                                | 2 --
 5 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 9dc9e5f..496d6e7 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -52,9 +52,6 @@ struct arena_stats_s {
 	 */
 	locked_zu_t		retained; /* Derived. */
 
-	/* Number of edata_t structs allocated by base, but not being used. */
-	atomic_zu_t		edata_avail; /* Derived. */
-
 	atomic_zu_t		base; /* Derived. */
 	atomic_zu_t		internal;
 	atomic_zu_t		resident; /* Derived. */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index acfad89..9da061b 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -69,6 +69,9 @@ struct pa_shard_stats_s {
 	 */
 	locked_zu_t mapped;
 
+	/* Number of edata_t structs allocated by base, but not being used. */
+	size_t edata_avail; /* Derived. */
+
 	/* VM space had to be leaked (undocumented).  Normally 0. */
 	atomic_zu_t abandoned_vm;
 };
diff --git a/src/arena.c b/src/arena.c
index 2deafe6..025418d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -100,9 +100,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	locked_inc_zu_unsynchronized(&astats->retained,
 	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
 
-	atomic_store_zu(&astats->edata_avail,
-	    atomic_load_zu(&arena->pa_shard.edata_cache.count, ATOMIC_RELAXED),
-	    ATOMIC_RELAXED);
+	astats->pa_shard_stats.edata_avail =  atomic_load_zu(
+	    &arena->pa_shard.edata_cache.count, ATOMIC_RELAXED);
 
 	/* Dirty decay stats */
 	locked_inc_u64_unsynchronized(
diff --git a/src/ctl.c b/src/ctl.c
index 1679867..e8687b5 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -866,8 +866,8 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			    &astats->astats.pa_shard_stats.mapped);
 			ctl_accum_locked_zu(&sdstats->astats.retained,
 			    &astats->astats.retained);
-			ctl_accum_atomic_zu(&sdstats->astats.edata_avail,
-			    &astats->astats.edata_avail);
+			sdstats->astats.pa_shard_stats.edata_avail
+			    += astats->astats.pa_shard_stats.edata_avail;
 		}
 
 		ctl_accum_locked_u64(
@@ -2919,9 +2919,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.retained),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.edata_avail,
-        ATOMIC_RELAXED),
-    size_t)
+    arenas_i(mib[2])->astats->astats.pa_shard_stats.edata_avail, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
     locked_read_u64_unsynchronized(
diff --git a/src/pa.c b/src/pa.c
index d678d82..d67c97e 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -368,5 +368,3 @@ pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 	return epoch_advanced;
 }
-
-
-- 
cgit v0.12


From 436789ad96fcc4a091790b9d380ee31570efa6cf Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 18:37:15 -0700
Subject: PA: Make mapped stat atomic.

We always have atomic_zu_t, and mapped/unmapped transitions are always expensive
enough that trying to piggyback on a lock is a waste of time.
---
 include/jemalloc/internal/pa.h | 10 +---------
 src/arena.c                    | 19 +++++++++----------
 src/ctl.c                      | 11 ++++++-----
 src/extent.c                   |  5 ++---
 src/large.c                    |  3 ++-
 src/pa.c                       |  4 ++--
 6 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 9da061b..0cf83cc 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -67,7 +67,7 @@ struct pa_shard_stats_s {
 	 * Partially derived -- we maintain our own counter, but add in the
 	 * base's own counter at merge.
 	 */
-	locked_zu_t mapped;
+	atomic_zu_t mapped;
 
 	/* Number of edata_t structs allocated by base, but not being used. */
 	size_t edata_avail; /* Derived. */
@@ -135,14 +135,6 @@ struct pa_shard_s {
 	base_t *base;
 };
 
-static inline void
-pa_shard_stats_mapped_add(tsdn_t *tsdn, pa_shard_t *shard, size_t size) {
-	LOCKEDINT_MTX_LOCK(tsdn, *shard->stats_mtx);
-	locked_inc_zu(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->mapped, size);
-	LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
-}
-
 static inline ssize_t
 pa_shard_dirty_decay_ms_get(pa_shard_t *shard) {
 	return decay_ms_read(&shard->decay_dirty);
diff --git a/src/arena.c b/src/arena.c
index 025418d..2fe6904 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -90,16 +90,15 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	size_t base_allocated, base_resident, base_mapped, metadata_thp;
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
+	size_t mapped = atomic_load_zu(&arena->pa_shard.stats->mapped,
+	    ATOMIC_RELAXED);
+	atomic_load_add_store_zu(&astats->pa_shard_stats.mapped,
+	    base_mapped + mapped);
 
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
-	locked_inc_zu_unsynchronized(&astats->pa_shard_stats.mapped,
-	    base_mapped + locked_read_zu(tsdn,
-	    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-	    &arena->pa_shard.stats->mapped));
 	locked_inc_zu_unsynchronized(&astats->retained,
 	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
-
 	astats->pa_shard_stats.edata_avail =  atomic_load_zu(
 	    &arena->pa_shard.edata_cache.count, ATOMIC_RELAXED);
 
@@ -436,9 +435,9 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 			LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 			arena_large_malloc_stats_update(tsdn, arena, usize);
 			if (mapped_add != 0) {
-				locked_inc_zu(tsdn,
-				    LOCKEDINT_MTX(arena->stats.mtx),
-				    &arena->pa_shard.stats->mapped, mapped_add);
+				atomic_fetch_add_zu(
+				    &arena->pa_shard.stats->mapped, mapped_add,
+				    ATOMIC_RELAXED);
 			}
 			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 		}
@@ -848,8 +847,8 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
 	    PAGE, /* slab */ true, /* szind */ binind, &zero, &mapped_add);
 	if (config_stats && slab != NULL && mapped_add != 0) {
-		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard,
-		    bin_info->slab_size);
+		atomic_fetch_add_zu(&arena->pa_shard.stats->mapped, mapped_add,
+		    ATOMIC_RELAXED);
 	}
 
 	if (slab == NULL) {
diff --git a/src/ctl.c b/src/ctl.c
index e8687b5..00afc76 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -861,7 +861,7 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		ctl_arena_stats_t *astats = ctl_arena->astats;
 
 		if (!destroyed) {
-			ctl_accum_locked_zu(
+			ctl_accum_atomic_zu(
 			    &sdstats->astats.pa_shard_stats.mapped,
 			    &astats->astats.pa_shard_stats.mapped);
 			ctl_accum_locked_zu(&sdstats->astats.retained,
@@ -1101,8 +1101,9 @@ ctl_refresh(tsdn_t *tsdn) {
 		    &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED);
 		ctl_stats->resident = atomic_load_zu(
 		    &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED);
-		ctl_stats->mapped = locked_read_atomic_zu(
-		    &ctl_sarena->astats->astats.pa_shard_stats.mapped);
+		ctl_stats->mapped = atomic_load_zu(
+		    &ctl_sarena->astats->astats.pa_shard_stats.mapped,
+		    ATOMIC_RELAXED);
 		ctl_stats->retained = locked_read_atomic_zu(
 		    &ctl_sarena->astats->astats.retained);
 
@@ -2913,8 +2914,8 @@ CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
 CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
-    locked_read_atomic_zu(&arenas_i(
-    mib[2])->astats->astats.pa_shard_stats.mapped), size_t)
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.pa_shard_stats.mapped,
+    ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.retained),
     size_t)
diff --git a/src/extent.c b/src/extent.c
index 595916a..62ebff5 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -966,10 +966,9 @@ extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		    LOCKEDINT_MTX(*shard->stats_mtx),
 		    &shard->stats->decay_dirty.purged,
 		    extent_size >> LG_PAGE);
-		locked_dec_zu(tsdn,
-		    LOCKEDINT_MTX(*shard->stats_mtx),
-		    &shard->stats->mapped, extent_size);
 		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
+		atomic_fetch_sub_zu(&shard->stats->mapped, extent_size,
+		    ATOMIC_RELAXED);
 	}
 }
 
diff --git a/src/large.c b/src/large.c
index 2b913d6..f61d1fe 100644
--- a/src/large.c
+++ b/src/large.c
@@ -122,7 +122,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	}
 
 	if (config_stats && mapped_add > 0) {
-		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard, mapped_add);
+		atomic_fetch_add_zu(&arena->pa_shard.stats->mapped, mapped_add,
+		    ATOMIC_RELAXED);
 	}
 
 	if (zero) {
diff --git a/src/pa.c b/src/pa.c
index d67c97e..e20eab9 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -268,9 +268,9 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 		    &decay_stats->nmadvise, nmadvise);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
 		    &decay_stats->purged, npurged);
-		locked_dec_zu(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-		    &shard->stats->mapped, nunmapped << LG_PAGE);
 		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
+		atomic_fetch_sub_zu(&shard->stats->mapped, nunmapped << LG_PAGE,
+		    ATOMIC_RELAXED);
 	}
 
 	return npurged;
-- 
cgit v0.12


From e2cf3fb1a3f064ba2c237620ca938e0e04c36d92 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 18:49:15 -0700
Subject: PA: Move in all modifications of mapped.

---
 include/jemalloc/internal/pa.h |  4 ++--
 src/arena.c                    | 15 ++-------------
 src/large.c                    |  8 +-------
 src/pa.c                       | 33 +++++++++++++++++----------------
 4 files changed, 22 insertions(+), 38 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 0cf83cc..1c84c8d 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -162,10 +162,10 @@ size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
-    size_t alignment, bool slab, szind_t szind, bool *zero, size_t *mapped_add);
+    size_t alignment, bool slab, szind_t szind, bool *zero);
 /* Returns true on error, in which case nothing changed. */
 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add);
+    size_t new_size, szind_t szind, bool slab, bool *zero);
 /*
  * The same.  Sets *generated_dirty to true if we produced new dirty pages, and
  * false otherwise.
diff --git a/src/arena.c b/src/arena.c
index 2fe6904..c4bf29f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -424,21 +424,15 @@ edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
 	szind_t szind = sz_size2index(usize);
-	size_t mapped_add;
 	size_t esize = usize + sz_large_pad;
 
 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
-	    /* slab */ false, szind, zero, &mapped_add);
+	    /* slab */ false, szind, zero);
 
 	if (edata != NULL) {
 		if (config_stats) {
 			LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 			arena_large_malloc_stats_update(tsdn, arena, usize);
-			if (mapped_add != 0) {
-				atomic_fetch_add_zu(
-				    &arena->pa_shard.stats->mapped, mapped_add,
-				    ATOMIC_RELAXED);
-			}
 			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 		}
 	}
@@ -842,14 +836,9 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	    WITNESS_RANK_CORE, 0);
 
 	bool zero = false;
-	size_t mapped_add = 0;
 
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
-	    PAGE, /* slab */ true, /* szind */ binind, &zero, &mapped_add);
-	if (config_stats && slab != NULL && mapped_add != 0) {
-		atomic_fetch_add_zu(&arena->pa_shard.stats->mapped, mapped_add,
-		    ATOMIC_RELAXED);
-	}
+	    PAGE, /* slab */ true, /* szind */ binind, &zero);
 
 	if (slab == NULL) {
 		return NULL;
diff --git a/src/large.c b/src/large.c
index f61d1fe..494a32b 100644
--- a/src/large.c
+++ b/src/large.c
@@ -113,19 +113,13 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	 * below, even if is_zeroed_trail ends up true when zero is false.
 	 */
 	bool is_zeroed_trail = zero;
-	size_t mapped_add;
 	szind_t szind = sz_size2index(usize);
 	bool err = pa_expand(tsdn, &arena->pa_shard, edata, old_size, new_size,
-	    szind, /* slab */ false, &is_zeroed_trail, &mapped_add);
+	    szind, /* slab */ false, &is_zeroed_trail);
 	if (err) {
 		return true;
 	}
 
-	if (config_stats && mapped_add > 0) {
-		atomic_fetch_add_zu(&arena->pa_shard.stats->mapped, mapped_add,
-		    ATOMIC_RELAXED);
-	}
-
 	if (zero) {
 		if (config_cache_oblivious) {
 			/*
diff --git a/src/pa.c b/src/pa.c
index e20eab9..10a4401 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -77,16 +77,17 @@ pa_shard_may_have_muzzy(pa_shard_t *shard) {
 
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
-    bool slab, szind_t szind, bool *zero, size_t *mapped_add) {
+    bool slab, szind_t szind, bool *zero) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
-	*mapped_add = 0;
 
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+	size_t mapped_add = 0;
 
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
 	    &shard->ecache_dirty, NULL, size, alignment, slab, szind,
 	    zero);
+
 	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
 		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
 		    NULL, size, alignment, slab, szind, zero);
@@ -95,24 +96,21 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, NULL, size, alignment, slab,
 		    szind, zero);
-		if (config_stats && edata != NULL) {
-			/*
-			 * edata may be NULL on OOM, but in that case mapped_add
-			 * isn't used below, so there's no need to conditionlly
-			 * set it to 0 here.
-			 */
-			*mapped_add = size;
-		}
+		mapped_add = size;
 	}
 	if (edata != NULL) {
 		pa_nactive_add(shard, size >> LG_PAGE);
+		if (config_stats && mapped_add > 0) {
+			atomic_fetch_add_zu(&shard->stats->mapped, mapped_add,
+			    ATOMIC_RELAXED);
+		}
 	}
 	return edata;
 }
 
 bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add) {
+    size_t new_size, szind_t szind, bool slab, bool *zero) {
 	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
@@ -121,7 +119,8 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	void *trail_begin = edata_past_get(edata);
 	size_t expand_amount = new_size - old_size;
 
-	*mapped_add = 0;
+	size_t mapped_add = 0;
+
 	if (ehooks_merge_will_fail(ehooks)) {
 		return true;
 	}
@@ -137,18 +136,20 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		trail = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, trail_begin, expand_amount, PAGE,
 		    /* slab */ false, SC_NSIZES, zero);
-		*mapped_add = expand_amount;
+		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
-		*mapped_add = 0;
 		return true;
 	}
 	if (extent_merge_wrapper(tsdn, ehooks, &shard->edata_cache, edata,
 	    trail)) {
 		extent_dalloc_wrapper(tsdn, shard, ehooks, trail);
-		*mapped_add = 0;
 		return true;
 	}
+	if (config_stats && mapped_add > 0) {
+		atomic_fetch_add_zu(&shard->stats->mapped, mapped_add,
+		    ATOMIC_RELAXED);
+	}
 	pa_nactive_add(shard, expand_amount >> LG_PAGE);
 	emap_remap(tsdn, &emap_global, edata, szind, slab);
 	return false;
-- 
cgit v0.12


From d0c43217b5bbcf263a4505cad3eaeecc47ac6aa7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 19:24:05 -0700
Subject: Arena stats: Move retained to PA, use plain ints.

Retained is a property of the allocated pages.  The derived fields no longer
require any locking; they're computed on demand.
---
 include/jemalloc/internal/arena_stats.h | 13 +++++-----
 include/jemalloc/internal/pa.h          |  8 ++++++
 src/arena.c                             | 20 +++++++++------
 src/ctl.c                               | 43 ++++++++++++---------------------
 4 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 496d6e7..3bfc858 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -45,17 +45,16 @@ struct arena_stats_large_s {
 typedef struct arena_stats_s arena_stats_t;
 struct arena_stats_s {
 	LOCKEDINT_MTX_DECLARE(mtx)
+
 	/*
-	 * Number of unused virtual memory bytes currently retained.  Retained
-	 * bytes are technically mapped (though always decommitted or purged),
-	 * but they are excluded from the mapped statistic (above).
+	 * resident includes the base stats -- that's why it lives here and not
+	 * in pa_shard_stats_t.
 	 */
-	locked_zu_t		retained; /* Derived. */
+	size_t			base; /* Derived. */
+	size_t			resident; /* Derived. */
+	size_t			metadata_thp; /* Derived. */
 
-	atomic_zu_t		base; /* Derived. */
 	atomic_zu_t		internal;
-	atomic_zu_t		resident; /* Derived. */
-	atomic_zu_t		metadata_thp;
 
 	atomic_zu_t		allocated_large; /* Derived. */
 	locked_u64_t	nmalloc_large; /* Derived. */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 1c84c8d..f7abf1e 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -61,6 +61,14 @@ typedef struct pa_shard_stats_s pa_shard_stats_t;
 struct pa_shard_stats_s {
 	pa_shard_decay_stats_t decay_dirty;
 	pa_shard_decay_stats_t decay_muzzy;
+
+	/*
+	 * Number of unused virtual memory bytes currently retained.  Retained
+	 * bytes are technically mapped (though always decommitted or purged),
+	 * but they are excluded from the mapped statistic (above).
+	 */
+	size_t retained; /* Derived. */
+
 	/*
 	 * Number of bytes currently mapped, excluding retained memory.
 	 *
diff --git a/src/arena.c b/src/arena.c
index c4bf29f..0fe85a9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -97,8 +97,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
-	locked_inc_zu_unsynchronized(&astats->retained,
-	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
+	astats->pa_shard_stats.retained +=
+	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE;
 	astats->pa_shard_stats.edata_avail =  atomic_load_zu(
 	    &arena->pa_shard.edata_cache.count, ATOMIC_RELAXED);
 
@@ -130,13 +130,17 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->pa_shard.stats->decay_muzzy.purged));
 
-	atomic_load_add_store_zu(&astats->base, base_allocated);
+	astats->base += base_allocated;
 	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
-	atomic_load_add_store_zu(&astats->metadata_thp, metadata_thp);
-	atomic_load_add_store_zu(&astats->resident, base_resident +
-	    (((atomic_load_zu(&arena->pa_shard.nactive, ATOMIC_RELAXED) +
-	    ecache_npages_get(&arena->pa_shard.ecache_dirty) +
-	    ecache_npages_get(&arena->pa_shard.ecache_muzzy)) << LG_PAGE)));
+	astats->metadata_thp += metadata_thp;
+
+	size_t pa_resident_pgs = 0;
+	pa_resident_pgs
+	    += atomic_load_zu(&arena->pa_shard.nactive, ATOMIC_RELAXED);
+	pa_resident_pgs
+	    += ecache_npages_get(&arena->pa_shard.ecache_dirty);
+	astats->resident += base_resident + (pa_resident_pgs << LG_PAGE);
+
 	atomic_load_add_store_zu(&astats->pa_shard_stats.abandoned_vm,
 	    atomic_load_zu(&arena->stats.pa_shard_stats.abandoned_vm,
 	    ATOMIC_RELAXED));
diff --git a/src/ctl.c b/src/ctl.c
index 00afc76..368eb5f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -682,12 +682,6 @@ ctl_accum_locked_u64(locked_u64_t *dst, locked_u64_t *src) {
 }
 
 static void
-ctl_accum_locked_zu(locked_zu_t *dst, locked_zu_t *src) {
-	locked_inc_zu_unsynchronized(dst,
-	    locked_read_atomic_zu(src));
-}
-
-static void
 ctl_accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
 	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
 	size_t cur_src = atomic_load_zu(src, ATOMIC_RELAXED);
@@ -864,12 +858,13 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			ctl_accum_atomic_zu(
 			    &sdstats->astats.pa_shard_stats.mapped,
 			    &astats->astats.pa_shard_stats.mapped);
-			ctl_accum_locked_zu(&sdstats->astats.retained,
-			    &astats->astats.retained);
+			sdstats->astats.pa_shard_stats.retained
+			    += astats->astats.pa_shard_stats.retained;
 			sdstats->astats.pa_shard_stats.edata_avail
 			    += astats->astats.pa_shard_stats.edata_avail;
 		}
 
+
 		ctl_accum_locked_u64(
 		    &sdstats->astats.pa_shard_stats.decay_dirty.npurge,
 		    &astats->astats.pa_shard_stats.decay_dirty.npurge);
@@ -898,14 +893,11 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 		if (!destroyed) {
-			ctl_accum_atomic_zu(&sdstats->astats.base,
-			    &astats->astats.base);
+			sdstats->astats.base += astats->astats.base;
+			sdstats->astats.resident += astats->astats.resident;
+			sdstats->astats.metadata_thp += astats->astats.metadata_thp;
 			ctl_accum_atomic_zu(&sdstats->astats.internal,
 			    &astats->astats.internal);
-			ctl_accum_atomic_zu(&sdstats->astats.resident,
-			    &astats->astats.resident);
-			ctl_accum_atomic_zu(&sdstats->astats.metadata_thp,
-			    &astats->astats.metadata_thp);
 		} else {
 			assert(atomic_load_zu(
 			    &astats->astats.internal, ATOMIC_RELAXED) == 0);
@@ -1093,19 +1085,17 @@ ctl_refresh(tsdn_t *tsdn) {
 		    atomic_load_zu(&ctl_sarena->astats->astats.allocated_large,
 			ATOMIC_RELAXED);
 		ctl_stats->active = (ctl_sarena->pactive << LG_PAGE);
-		ctl_stats->metadata = atomic_load_zu(
-		    &ctl_sarena->astats->astats.base, ATOMIC_RELAXED) +
+		ctl_stats->metadata = ctl_sarena->astats->astats.base +
 		    atomic_load_zu(&ctl_sarena->astats->astats.internal,
 			ATOMIC_RELAXED);
-		ctl_stats->metadata_thp = atomic_load_zu(
-		    &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED);
-		ctl_stats->resident = atomic_load_zu(
-		    &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED);
+		ctl_stats->resident = ctl_sarena->astats->astats.resident;
+		ctl_stats->metadata_thp =
+		    ctl_sarena->astats->astats.metadata_thp;
 		ctl_stats->mapped = atomic_load_zu(
 		    &ctl_sarena->astats->astats.pa_shard_stats.mapped,
 		    ATOMIC_RELAXED);
-		ctl_stats->retained = locked_read_atomic_zu(
-		    &ctl_sarena->astats->astats.retained);
+		ctl_stats->retained =
+		    ctl_sarena->astats->astats.pa_shard_stats.retained;
 
 		ctl_background_thread_stats_read(tsdn);
 
@@ -2917,7 +2907,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.pa_shard_stats.mapped,
     ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
-    locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.retained),
+    arenas_i(mib[2])->astats->astats.pa_shard_stats.retained,
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
     arenas_i(mib[2])->astats->astats.pa_shard_stats.edata_avail, size_t)
@@ -2949,19 +2939,18 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.base, ATOMIC_RELAXED),
+    arenas_i(mib[2])->astats->astats.base,
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.metadata_thp,
-    ATOMIC_RELAXED), size_t)
+    arenas_i(mib[2])->astats->astats.metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes,
     ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.resident, ATOMIC_RELAXED),
+    arenas_i(mib[2])->astats->astats.resident,
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
     atomic_load_zu(
-- 
cgit v0.12


From 565045ef716586f93caf6c210905419be9ed6e25 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 08:34:47 -0700
Subject: Arena: Make more derived stats non-atomic/locked.

---
 include/jemalloc/internal/arena_stats.h | 14 +++++-----
 src/arena.c                             | 29 ++++++++-----------
 src/ctl.c                               | 49 ++++++++++++---------------------
 3 files changed, 37 insertions(+), 55 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 3bfc858..3b3441f 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -56,12 +56,12 @@ struct arena_stats_s {
 
 	atomic_zu_t		internal;
 
-	atomic_zu_t		allocated_large; /* Derived. */
-	locked_u64_t	nmalloc_large; /* Derived. */
-	locked_u64_t	ndalloc_large; /* Derived. */
-	locked_u64_t	nfills_large; /* Derived. */
-	locked_u64_t	nflushes_large; /* Derived. */
-	locked_u64_t	nrequests_large; /* Derived. */
+	size_t			allocated_large; /* Derived. */
+	uint64_t		nmalloc_large; /* Derived. */
+	uint64_t		ndalloc_large; /* Derived. */
+	uint64_t		nfills_large; /* Derived. */
+	uint64_t		nflushes_large; /* Derived. */
+	uint64_t		nrequests_large; /* Derived. */
 
 	/*
 	 * The stats logically owned by the pa_shard in the same arena.  This
@@ -71,7 +71,7 @@ struct arena_stats_s {
 	pa_shard_stats_t	pa_shard_stats;
 
 	/* Number of bytes cached in tcache associated with this arena. */
-	atomic_zu_t		tcache_bytes; /* Derived. */
+	size_t			tcache_bytes; /* Derived. */
 
 	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
 
diff --git a/src/arena.c b/src/arena.c
index 0fe85a9..73033a6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -150,42 +150,37 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nmalloc);
 		locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc);
-		locked_inc_u64_unsynchronized(&astats->nmalloc_large,
-		    nmalloc);
+		astats->nmalloc_large += nmalloc;
 
 		uint64_t ndalloc = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].ndalloc);
 		locked_inc_u64_unsynchronized(&lstats[i].ndalloc, ndalloc);
-		locked_inc_u64_unsynchronized(&astats->ndalloc_large,
-		    ndalloc);
+		astats->ndalloc_large += ndalloc;
 
 		uint64_t nrequests = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nrequests);
 		locked_inc_u64_unsynchronized(&lstats[i].nrequests,
 		    nmalloc + nrequests);
-		locked_inc_u64_unsynchronized(&astats->nrequests_large,
-		    nmalloc + nrequests);
+		astats->nrequests_large += nmalloc + nrequests;
 
 		/* nfill == nmalloc for large currently. */
 		locked_inc_u64_unsynchronized(&lstats[i].nfills, nmalloc);
-		locked_inc_u64_unsynchronized(&astats->nfills_large,
-		    nmalloc);
+		astats->nfills_large += nmalloc;
 
 		uint64_t nflush = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nflushes);
 		locked_inc_u64_unsynchronized(&lstats[i].nflushes, nflush);
-		locked_inc_u64_unsynchronized(&astats->nflushes_large,
-		    nflush);
+		astats->nflushes_large += nflush;
 
 		assert(nmalloc >= ndalloc);
 		assert(nmalloc - ndalloc <= SIZE_T_MAX);
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
-		atomic_load_add_store_zu(&astats->allocated_large,
-		    curlextents * sz_index2size(SC_NBINS + i));
+		astats->allocated_large +=
+		    curlextents * sz_index2size(SC_NBINS + i);
 	}
 
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
@@ -213,22 +208,22 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
 	/* tcache_bytes counts currently cached bytes. */
-	atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
+	astats->tcache_bytes = 0;
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 	cache_bin_array_descriptor_t *descriptor;
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		for (szind_t i = 0; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
-			atomic_load_add_store_zu(&astats->tcache_bytes,
+			astats->tcache_bytes +=
 			    cache_bin_ncached_get(tbin,
-			    &tcache_bin_info[i]) * sz_index2size(i));
+				&tcache_bin_info[i]) * sz_index2size(i);
 		}
 		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_large[i];
-			atomic_load_add_store_zu(&astats->tcache_bytes,
+			astats->tcache_bytes +=
 			    cache_bin_ncached_get(tbin,
 			    &tcache_bin_info[i + SC_NBINS])
-			    * sz_index2size(i + SC_NBINS));
+			    * sz_index2size(i + SC_NBINS);
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
diff --git a/src/ctl.c b/src/ctl.c
index 368eb5f..a3cc74a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -915,26 +915,21 @@ MUTEX_PROF_ARENA_MUTEXES
 		sdstats->nflushes_small += astats->nflushes_small;
 
 		if (!destroyed) {
-			ctl_accum_atomic_zu(&sdstats->astats.allocated_large,
-			    &astats->astats.allocated_large);
+			sdstats->astats.allocated_large +=
+			    astats->astats.allocated_large;
 		} else {
-			assert(atomic_load_zu(&astats->astats.allocated_large,
-			    ATOMIC_RELAXED) == 0);
-		}
-		ctl_accum_locked_u64(&sdstats->astats.nmalloc_large,
-		    &astats->astats.nmalloc_large);
-		ctl_accum_locked_u64(&sdstats->astats.ndalloc_large,
-		    &astats->astats.ndalloc_large);
-		ctl_accum_locked_u64(&sdstats->astats.nrequests_large,
-		    &astats->astats.nrequests_large);
-		ctl_accum_locked_u64(&sdstats->astats.nflushes_large,
-		    &astats->astats.nflushes_large);
+			assert(astats->astats.allocated_large == 0);
+		}
+		sdstats->astats.nmalloc_large += astats->astats.nmalloc_large;
+		sdstats->astats.ndalloc_large += astats->astats.ndalloc_large;
+		sdstats->astats.nrequests_large
+		    += astats->astats.nrequests_large;
+		sdstats->astats.nflushes_large += astats->astats.nflushes_large;
 		ctl_accum_atomic_zu(
 		    &sdstats->astats.pa_shard_stats.abandoned_vm,
 		    &astats->astats.pa_shard_stats.abandoned_vm);
 
-		ctl_accum_atomic_zu(&sdstats->astats.tcache_bytes,
-		    &astats->astats.tcache_bytes);
+		sdstats->astats.tcache_bytes += astats->astats.tcache_bytes;
 
 		if (ctl_arena->arena_ind == 0) {
 			sdstats->astats.uptime = astats->astats.uptime;
@@ -1082,8 +1077,7 @@ ctl_refresh(tsdn_t *tsdn) {
 
 	if (config_stats) {
 		ctl_stats->allocated = ctl_sarena->astats->allocated_small +
-		    atomic_load_zu(&ctl_sarena->astats->astats.allocated_large,
-			ATOMIC_RELAXED);
+		    ctl_sarena->astats->astats.allocated_large;
 		ctl_stats->active = (ctl_sarena->pactive << LG_PAGE);
 		ctl_stats->metadata = ctl_sarena->astats->astats.base +
 		    atomic_load_zu(&ctl_sarena->astats->astats.internal,
@@ -2947,8 +2941,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
 CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp,
     arenas_i(mib[2])->astats->astats.metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes,
-    ATOMIC_RELAXED), size_t)
+    arenas_i(mib[2])->astats->astats.tcache_bytes, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
     arenas_i(mib[2])->astats->astats.resident,
     size_t)
@@ -2970,27 +2963,21 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_nfills,
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_nflushes,
     arenas_i(mib[2])->astats->nflushes_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large,
-    ATOMIC_RELAXED), size_t)
+    arenas_i(mib[2])->astats->astats.allocated_large, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
+    arenas_i(mib[2])->astats->astats.nmalloc_large, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t)
+    arenas_i(mib[2])->astats->astats.ndalloc_large, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.nrequests_large), uint64_t)
+    arenas_i(mib[2])->astats->astats.nrequests_large, uint64_t)
 /*
  * Note: "nmalloc_large" here instead of "nfills" in the read.  This is
  * intentional (large has no batch fill).
  */
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nfills,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
+    arenas_i(mib[2])->astats->astats.nmalloc_large, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.nflushes_large), uint64_t)
+    arenas_i(mib[2])->astats->astats.nflushes_large, uint64_t)
 
 /* Lock profiling related APIs below. */
 #define RO_MUTEX_CTL_GEN(n, l)						\
-- 
cgit v0.12


From 8164fad4045a1e30580da30294652e7c3b8a75f7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 08:46:43 -0700
Subject: Stats: Fix edata_cache size merging.

Previously, we assigned to the output rather than incrementing it.
---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 73033a6..dfb4759 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -99,7 +99,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	astats->pa_shard_stats.retained +=
 	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE;
-	astats->pa_shard_stats.edata_avail =  atomic_load_zu(
+	astats->pa_shard_stats.edata_avail +=  atomic_load_zu(
 	    &arena->pa_shard.edata_cache.count, ATOMIC_RELAXED);
 
 	/* Dirty decay stats */
-- 
cgit v0.12


From f29f6090f589bbd1eda92f025e931e449fa9d621 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 09:20:37 -0700
Subject: PA: Add pa_extra.c and put PA forking there.

---
 Makefile.in                                        |  1 +
 include/jemalloc/internal/pa.h                     | 18 +++++++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 src/arena.c                                        | 27 +++--------
 src/pa_extra.c                                     | 55 ++++++++++++++++++++++
 8 files changed, 88 insertions(+), 21 deletions(-)
 create mode 100644 src/pa_extra.c

diff --git a/Makefile.in b/Makefile.in
index b19c14f..c0929ce 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -127,6 +127,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/mutex_pool.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
+	$(srcroot)src/pa_extra.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index f7abf1e..9cf290c 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -211,4 +211,22 @@ bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     pa_decay_purge_setting_t decay_purge_setting);
 
+/******************************************************************************/
+/*
+ * Various bits of "boring" functionality that are still part of this module,
+ * but that we relegate to pa_extra.c, to keep the core logic in pa.c as
+ * readable as possible.
+ */
+
+/*
+ * These fork phases are synchronized with the arena fork phase numbering to
+ * make it easy to keep straight. That's why there's no prefork1.
+ */
+void pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 156e459..9f81e21 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -68,6 +68,7 @@
     <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
+    <ClCompile Include="..\..\..\..\src\pa_extra.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 45557f6..15fe7f0 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -88,6 +88,9 @@
     <ClCompile Include="..\..\..\..\src\pa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa_extra.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index c5cfb95..b5fccae 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -68,6 +68,7 @@
     <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
+    <ClCompile Include="..\..\..\..\src\pa_extra.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 45557f6..15fe7f0 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -88,6 +88,9 @@
     <ClCompile Include="..\..\..\..\src\pa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa_extra.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index dfb4759..dc8c26b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1739,8 +1739,7 @@ arena_boot(sc_data_t *sc_data) {
 
 void
 arena_prefork0(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->pa_shard.decay_dirty.mtx);
-	malloc_mutex_prefork(tsdn, &arena->pa_shard.decay_muzzy.mtx);
+	pa_shard_prefork0(tsdn, &arena->pa_shard);
 }
 
 void
@@ -1752,19 +1751,17 @@ arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
-	ecache_grow_prefork(tsdn, &arena->pa_shard.ecache_grow);
+	pa_shard_prefork2(tsdn, &arena->pa_shard);
 }
 
 void
 arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
-	ecache_prefork(tsdn, &arena->pa_shard.ecache_dirty);
-	ecache_prefork(tsdn, &arena->pa_shard.ecache_muzzy);
-	ecache_prefork(tsdn, &arena->pa_shard.ecache_retained);
+	pa_shard_prefork3(tsdn, &arena->pa_shard);
 }
 
 void
 arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
-	edata_cache_prefork(tsdn, &arena->pa_shard.edata_cache);
+	pa_shard_prefork4(tsdn, &arena->pa_shard);
 }
 
 void
@@ -1798,13 +1795,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
-	edata_cache_postfork_parent(tsdn, &arena->pa_shard.edata_cache);
-	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_dirty);
-	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_muzzy);
-	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_retained);
-	ecache_grow_postfork_parent(tsdn, &arena->pa_shard.ecache_grow);
-	malloc_mutex_postfork_parent(tsdn, &arena->pa_shard.decay_dirty.mtx);
-	malloc_mutex_postfork_parent(tsdn, &arena->pa_shard.decay_muzzy.mtx);
+	pa_shard_postfork_parent(tsdn, &arena->pa_shard);
 	if (config_stats) {
 		malloc_mutex_postfork_parent(tsdn, &arena->tcache_ql_mtx);
 	}
@@ -1844,13 +1835,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
-	edata_cache_postfork_child(tsdn, &arena->pa_shard.edata_cache);
-	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_dirty);
-	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_muzzy);
-	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_retained);
-	ecache_grow_postfork_child(tsdn, &arena->pa_shard.ecache_grow);
-	malloc_mutex_postfork_child(tsdn, &arena->pa_shard.decay_dirty.mtx);
-	malloc_mutex_postfork_child(tsdn, &arena->pa_shard.decay_muzzy.mtx);
+	pa_shard_postfork_child(tsdn, &arena->pa_shard);
 	if (config_stats) {
 		malloc_mutex_postfork_child(tsdn, &arena->tcache_ql_mtx);
 	}
diff --git a/src/pa_extra.c b/src/pa_extra.c
new file mode 100644
index 0000000..bfb0a00
--- /dev/null
+++ b/src/pa_extra.c
@@ -0,0 +1,55 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+/*
+ * This file is logically part of the PA module.  While pa.c contains the core
+ * allocator functionality, this file contains boring integration functionality;
+ * things like the pre- and post- fork handlers, and stats merging for CTL
+ * refreshes.
+ */
+
+void
+pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
+	malloc_mutex_prefork(tsdn, &shard->decay_dirty.mtx);
+	malloc_mutex_prefork(tsdn, &shard->decay_muzzy.mtx);
+}
+
+void
+pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
+	ecache_grow_prefork(tsdn, &shard->ecache_grow);
+}
+
+void
+pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
+	ecache_prefork(tsdn, &shard->ecache_dirty);
+	ecache_prefork(tsdn, &shard->ecache_muzzy);
+	ecache_prefork(tsdn, &shard->ecache_retained);
+}
+
+
+void
+pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) {
+	edata_cache_prefork(tsdn, &shard->edata_cache);
+}
+
+void
+pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
+	edata_cache_postfork_parent(tsdn, &shard->edata_cache);
+	ecache_postfork_parent(tsdn, &shard->ecache_dirty);
+	ecache_postfork_parent(tsdn, &shard->ecache_muzzy);
+	ecache_postfork_parent(tsdn, &shard->ecache_retained);
+	ecache_grow_postfork_parent(tsdn, &shard->ecache_grow);
+	malloc_mutex_postfork_parent(tsdn, &shard->decay_dirty.mtx);
+	malloc_mutex_postfork_parent(tsdn, &shard->decay_muzzy.mtx);
+}
+
+void
+pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
+	edata_cache_postfork_child(tsdn, &shard->edata_cache);
+	ecache_postfork_child(tsdn, &shard->ecache_dirty);
+	ecache_postfork_child(tsdn, &shard->ecache_muzzy);
+	ecache_postfork_child(tsdn, &shard->ecache_retained);
+	ecache_grow_postfork_child(tsdn, &shard->ecache_grow);
+	malloc_mutex_postfork_child(tsdn, &shard->decay_dirty.mtx);
+	malloc_mutex_postfork_child(tsdn, &shard->decay_muzzy.mtx);
+}
-- 
cgit v0.12


From 506d907e40e8b5b191b8bc5f2ee77d87e0684cfb Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 09:28:13 -0700
Subject: PA: Move in basic stats merging.

---
 include/jemalloc/internal/pa.h | 3 +++
 src/arena.c                    | 4 +---
 src/pa_extra.c                 | 8 ++++++++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 9cf290c..8c82823 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -229,4 +229,7 @@ void pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
 
+void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
+    size_t *ndirty, size_t *nmuzzy);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index dc8c26b..10a2468 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -71,9 +71,7 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	*dss = dss_prec_names[arena_dss_prec_get(arena)];
 	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
 	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
-	*nactive += atomic_load_zu(&arena->pa_shard.nactive, ATOMIC_RELAXED);
-	*ndirty += ecache_npages_get(&arena->pa_shard.ecache_dirty);
-	*nmuzzy += ecache_npages_get(&arena->pa_shard.ecache_muzzy);
+	pa_shard_basic_stats_merge(&arena->pa_shard, nactive, ndirty, nmuzzy);
 }
 
 void
diff --git a/src/pa_extra.c b/src/pa_extra.c
index bfb0a00..1b642df 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -53,3 +53,11 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_child(tsdn, &shard->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->decay_muzzy.mtx);
 }
+
+void
+pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
+    size_t *nmuzzy) {
+	*nactive += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
+	*ndirty += ecache_npages_get(&shard->ecache_dirty);
+	*nmuzzy += ecache_npages_get(&shard->ecache_muzzy);
+}
-- 
cgit v0.12


From 81c6027592d59383107b3a7a26caddb787ed10c7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 09:36:25 -0700
Subject: Arena stats: Give it its own "mapped".

This distinguishes it from the PA mapped stat, which is now named "pa_mapped" to
avoid confusion. The (derived) arena stat includes base memory, and the PA stat
is no longer partially derived.
---
 include/jemalloc/internal/arena_stats.h |  1 +
 include/jemalloc/internal/pa.h          |  9 +++++----
 src/arena.c                             |  5 ++---
 src/ctl.c                               | 11 +++--------
 src/extent.c                            |  2 +-
 src/pa.c                                | 10 +++++-----
 6 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 3b3441f..9effa61 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -53,6 +53,7 @@ struct arena_stats_s {
 	size_t			base; /* Derived. */
 	size_t			resident; /* Derived. */
 	size_t			metadata_thp; /* Derived. */
+	size_t			mapped; /* Derived. */
 
 	atomic_zu_t		internal;
 
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 8c82823..1bffa9e 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -70,12 +70,13 @@ struct pa_shard_stats_s {
 	size_t retained; /* Derived. */
 
 	/*
-	 * Number of bytes currently mapped, excluding retained memory.
+	 * Number of bytes currently mapped, excluding retained memory (and any
+	 * base-allocated memory, which is tracked by the arena stats).
 	 *
-	 * Partially derived -- we maintain our own counter, but add in the
-	 * base's own counter at merge.
+	 * We name this "pa_mapped" to avoid confusion with the arena_stats
+	 * "mapped".
 	 */
-	atomic_zu_t mapped;
+	atomic_zu_t pa_mapped;
 
 	/* Number of edata_t structs allocated by base, but not being used. */
 	size_t edata_avail; /* Derived. */
diff --git a/src/arena.c b/src/arena.c
index 10a2468..07a6051 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -88,10 +88,9 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	size_t base_allocated, base_resident, base_mapped, metadata_thp;
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
-	size_t mapped = atomic_load_zu(&arena->pa_shard.stats->mapped,
+	size_t pa_mapped = atomic_load_zu(&arena->pa_shard.stats->pa_mapped,
 	    ATOMIC_RELAXED);
-	atomic_load_add_store_zu(&astats->pa_shard_stats.mapped,
-	    base_mapped + mapped);
+	astats->mapped += base_mapped + pa_mapped;
 
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
diff --git a/src/ctl.c b/src/ctl.c
index a3cc74a..00fd744 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -855,9 +855,7 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		ctl_arena_stats_t *astats = ctl_arena->astats;
 
 		if (!destroyed) {
-			ctl_accum_atomic_zu(
-			    &sdstats->astats.pa_shard_stats.mapped,
-			    &astats->astats.pa_shard_stats.mapped);
+			sdstats->astats.mapped += astats->astats.mapped;
 			sdstats->astats.pa_shard_stats.retained
 			    += astats->astats.pa_shard_stats.retained;
 			sdstats->astats.pa_shard_stats.edata_avail
@@ -1085,9 +1083,7 @@ ctl_refresh(tsdn_t *tsdn) {
 		ctl_stats->resident = ctl_sarena->astats->astats.resident;
 		ctl_stats->metadata_thp =
 		    ctl_sarena->astats->astats.metadata_thp;
-		ctl_stats->mapped = atomic_load_zu(
-		    &ctl_sarena->astats->astats.pa_shard_stats.mapped,
-		    ATOMIC_RELAXED);
+		ctl_stats->mapped = ctl_sarena->astats->astats.mapped;
 		ctl_stats->retained =
 		    ctl_sarena->astats->astats.pa_shard_stats.retained;
 
@@ -2898,8 +2894,7 @@ CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
 CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.pa_shard_stats.mapped,
-    ATOMIC_RELAXED), size_t)
+    arenas_i(mib[2])->astats->astats.mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     arenas_i(mib[2])->astats->astats.pa_shard_stats.retained,
     size_t)
diff --git a/src/extent.c b/src/extent.c
index 62ebff5..05d1755 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -967,7 +967,7 @@ extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		    &shard->stats->decay_dirty.purged,
 		    extent_size >> LG_PAGE);
 		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
-		atomic_fetch_sub_zu(&shard->stats->mapped, extent_size,
+		atomic_fetch_sub_zu(&shard->stats->pa_mapped, extent_size,
 		    ATOMIC_RELAXED);
 	}
 }
diff --git a/src/pa.c b/src/pa.c
index 10a4401..1b7d374 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -101,8 +101,8 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	if (edata != NULL) {
 		pa_nactive_add(shard, size >> LG_PAGE);
 		if (config_stats && mapped_add > 0) {
-			atomic_fetch_add_zu(&shard->stats->mapped, mapped_add,
-			    ATOMIC_RELAXED);
+			atomic_fetch_add_zu(&shard->stats->pa_mapped,
+			    mapped_add, ATOMIC_RELAXED);
 		}
 	}
 	return edata;
@@ -147,7 +147,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		return true;
 	}
 	if (config_stats && mapped_add > 0) {
-		atomic_fetch_add_zu(&shard->stats->mapped, mapped_add,
+		atomic_fetch_add_zu(&shard->stats->pa_mapped, mapped_add,
 		    ATOMIC_RELAXED);
 	}
 	pa_nactive_add(shard, expand_amount >> LG_PAGE);
@@ -270,8 +270,8 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
 		    &decay_stats->purged, npurged);
 		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
-		atomic_fetch_sub_zu(&shard->stats->mapped, nunmapped << LG_PAGE,
-		    ATOMIC_RELAXED);
+		atomic_fetch_sub_zu(&shard->stats->pa_mapped,
+		    nunmapped << LG_PAGE, ATOMIC_RELAXED);
 	}
 
 	return npurged;
-- 
cgit v0.12


From 238f3c743067b1305f14ba4ddcf3b95ec7719ae7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 10:28:18 -0700
Subject: PA: Move in full stats merging.

---
 include/jemalloc/internal/pa.h |  9 ++++++
 src/arena.c                    | 71 +++---------------------------------------
 src/pa_extra.c                 | 66 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 79 insertions(+), 67 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 1bffa9e..03ab6d0 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -233,4 +233,13 @@ void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
     size_t *ndirty, size_t *nmuzzy);
 
+static inline size_t
+pa_shard_pa_mapped(pa_shard_t *shard) {
+	return atomic_load_zu(&shard->stats->pa_mapped, ATOMIC_RELAXED);
+}
+
+void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
+    pa_shard_stats_t *shard_stats_out, pa_extent_stats_t *extent_stats_out,
+    size_t *resident);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index 07a6051..fd2876e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -88,60 +88,16 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	size_t base_allocated, base_resident, base_mapped, metadata_thp;
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
-	size_t pa_mapped = atomic_load_zu(&arena->pa_shard.stats->pa_mapped,
-	    ATOMIC_RELAXED);
+	size_t pa_mapped = pa_shard_pa_mapped(&arena->pa_shard);
 	astats->mapped += base_mapped + pa_mapped;
+	astats->resident += base_resident;
 
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
-	astats->pa_shard_stats.retained +=
-	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE;
-	astats->pa_shard_stats.edata_avail +=  atomic_load_zu(
-	    &arena->pa_shard.edata_cache.count, ATOMIC_RELAXED);
-
-	/* Dirty decay stats */
-	locked_inc_u64_unsynchronized(
-	    &astats->pa_shard_stats.decay_dirty.npurge,
-	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->pa_shard.stats->decay_dirty.npurge));
-	locked_inc_u64_unsynchronized(
-	    &astats->pa_shard_stats.decay_dirty.nmadvise,
-	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->pa_shard.stats->decay_dirty.nmadvise));
-	locked_inc_u64_unsynchronized(
-	    &astats->pa_shard_stats.decay_dirty.purged,
-	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->pa_shard.stats->decay_dirty.purged));
-
-	/* Decay stats */
-	locked_inc_u64_unsynchronized(
-	    &astats->pa_shard_stats.decay_muzzy.npurge,
-	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->pa_shard.stats->decay_muzzy.npurge));
-	locked_inc_u64_unsynchronized(
-	    &astats->pa_shard_stats.decay_muzzy.nmadvise,
-	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->pa_shard.stats->decay_muzzy.nmadvise));
-	locked_inc_u64_unsynchronized(
-	    &astats->pa_shard_stats.decay_muzzy.purged,
-	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->pa_shard.stats->decay_muzzy.purged));
-
 	astats->base += base_allocated;
 	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
 	astats->metadata_thp += metadata_thp;
 
-	size_t pa_resident_pgs = 0;
-	pa_resident_pgs
-	    += atomic_load_zu(&arena->pa_shard.nactive, ATOMIC_RELAXED);
-	pa_resident_pgs
-	    += ecache_npages_get(&arena->pa_shard.ecache_dirty);
-	astats->resident += base_resident + (pa_resident_pgs << LG_PAGE);
-
-	atomic_load_add_store_zu(&astats->pa_shard_stats.abandoned_vm,
-	    atomic_load_zu(&arena->stats.pa_shard_stats.abandoned_vm,
-	    ATOMIC_RELAXED));
-
 	for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) {
 		uint64_t nmalloc = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
@@ -180,27 +136,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		    curlextents * sz_index2size(SC_NBINS + i);
 	}
 
-	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
-		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
-		    retained_bytes;
-		dirty = ecache_nextents_get(&arena->pa_shard.ecache_dirty, i);
-		muzzy = ecache_nextents_get(&arena->pa_shard.ecache_muzzy, i);
-		retained = ecache_nextents_get(&arena->pa_shard.ecache_retained,
-		    i);
-		dirty_bytes = ecache_nbytes_get(&arena->pa_shard.ecache_dirty,
-		    i);
-		muzzy_bytes = ecache_nbytes_get(&arena->pa_shard.ecache_muzzy,
-		    i);
-		retained_bytes = ecache_nbytes_get(
-		    &arena->pa_shard.ecache_retained, i);
-
-		estats[i].ndirty = dirty;
-		estats[i].nmuzzy = muzzy;
-		estats[i].nretained = retained;
-		estats[i].dirty_bytes = dirty_bytes;
-		estats[i].muzzy_bytes = muzzy_bytes;
-		estats[i].retained_bytes = retained_bytes;
-	}
+	pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
+	    estats, &astats->resident);
 
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 1b642df..1088596 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -61,3 +61,69 @@ pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
 	*ndirty += ecache_npages_get(&shard->ecache_dirty);
 	*nmuzzy += ecache_npages_get(&shard->ecache_muzzy);
 }
+
+void
+pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
+    pa_shard_stats_t *shard_stats_out, pa_extent_stats_t *extent_stats_out,
+    size_t *resident) {
+	cassert(config_stats);
+
+	shard_stats_out->retained +=
+	    ecache_npages_get(&shard->ecache_retained) << LG_PAGE;
+	shard_stats_out->edata_avail += atomic_load_zu(
+	    &shard->edata_cache.count, ATOMIC_RELAXED);
+
+	size_t resident_pgs = 0;
+	resident_pgs += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
+	resident_pgs += ecache_npages_get(&shard->ecache_dirty);
+	*resident += (resident_pgs << LG_PAGE);
+
+	/* Dirty decay stats */
+	locked_inc_u64_unsynchronized(
+	    &shard_stats_out->decay_dirty.npurge,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->decay_dirty.npurge));
+	locked_inc_u64_unsynchronized(
+	    &shard_stats_out->decay_dirty.nmadvise,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->decay_dirty.nmadvise));
+	locked_inc_u64_unsynchronized(
+	    &shard_stats_out->decay_dirty.purged,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->decay_dirty.purged));
+
+	/* Muzzy decay stats */
+	locked_inc_u64_unsynchronized(
+	    &shard_stats_out->decay_muzzy.npurge,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->decay_muzzy.npurge));
+	locked_inc_u64_unsynchronized(
+	    &shard_stats_out->decay_muzzy.nmadvise,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->decay_muzzy.nmadvise));
+	locked_inc_u64_unsynchronized(
+	    &shard_stats_out->decay_muzzy.purged,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->decay_muzzy.purged));
+
+	atomic_load_add_store_zu(&shard_stats_out->abandoned_vm,
+	    atomic_load_zu(&shard->stats->abandoned_vm, ATOMIC_RELAXED));
+
+	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
+		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
+		    retained_bytes;
+		dirty = ecache_nextents_get(&shard->ecache_dirty, i);
+		muzzy = ecache_nextents_get(&shard->ecache_muzzy, i);
+		retained = ecache_nextents_get(&shard->ecache_retained, i);
+		dirty_bytes = ecache_nbytes_get(&shard->ecache_dirty, i);
+		muzzy_bytes = ecache_nbytes_get(&shard->ecache_muzzy, i);
+		retained_bytes = ecache_nbytes_get(&shard->ecache_retained, i);
+
+		extent_stats_out[i].ndirty = dirty;
+		extent_stats_out[i].nmuzzy = muzzy;
+		extent_stats_out[i].nretained = retained;
+		extent_stats_out[i].dirty_bytes = dirty_bytes;
+		extent_stats_out[i].muzzy_bytes = muzzy_bytes;
+		extent_stats_out[i].retained_bytes = retained_bytes;
+	}
+}
-- 
cgit v0.12


From 07675840a5d41c2537de2bd16e8da1cd11ef48e9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 11:21:22 -0700
Subject: PA: Move in some more internals accesses.

---
 include/jemalloc/internal/pa.h | 18 ++++++++++++++++++
 src/arena.c                    | 34 +++++-----------------------------
 src/pa.c                       | 26 ++++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 03ab6d0..4156a4e 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -154,6 +154,12 @@ pa_shard_muzzy_decay_ms_get(pa_shard_t *shard) {
 }
 
 static inline bool
+pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
+	return ecache_npages_get(&shard->ecache_muzzy) == 0 &&
+	    pa_shard_muzzy_decay_ms_get(shard) <= 0;
+}
+
+static inline bool
 pa_shard_may_force_decay(pa_shard_t *shard) {
 	return !(pa_shard_dirty_decay_ms_get(shard) == -1
 	    || pa_shard_muzzy_decay_ms_get(shard) == -1);
@@ -167,6 +173,18 @@ pa_shard_ehooks_get(pa_shard_t *shard) {
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
+/*
+ * This does the PA-specific parts of arena reset (i.e. freeing all active
+ * allocations).
+ */
+void pa_shard_reset(pa_shard_t *shard);
+/*
+ * Destroy all the remaining retained extents.  Should only be called after
+ * decaying all active, dirty, and muzzy extents to the retained state, as the
+ * last step in destroying the shard.
+ */
+void pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard);
+
 size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
 /* Gets an edata for the given allocation. */
diff --git a/src/arena.c b/src/arena.c
index fd2876e..c9a4626 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -544,8 +544,7 @@ arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 static bool
 arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
-	if (ecache_npages_get(&arena->pa_shard.ecache_muzzy) == 0 &&
-	    arena_muzzy_decay_ms_get(arena) <= 0) {
+	if (pa_shard_dont_decay_muzzy(&arena->pa_shard)) {
 		return false;
 	}
 	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_muzzy,
@@ -703,27 +702,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 			    &arena->bins[i].bin_shards[j]);
 		}
 	}
-
-	atomic_store_zu(&arena->pa_shard.nactive, 0, ATOMIC_RELAXED);
-}
-
-static void
-arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
-	/*
-	 * Iterate over the retained extents and destroy them.  This gives the
-	 * extent allocator underlying the extent hooks an opportunity to unmap
-	 * all retained memory without having to keep its own metadata
-	 * structures.  In practice, virtual memory for dss-allocated extents is
-	 * leaked here, so best practice is to avoid dss for arenas to be
-	 * destroyed, or provide custom extent hooks that track retained
-	 * dss-based extents for later reuse.
-	 */
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-	edata_t *edata;
-	while ((edata = ecache_evict(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, &arena->pa_shard, ehooks, edata);
-	}
+	pa_shard_reset(&arena->pa_shard);
 }
 
 void
@@ -735,13 +714,10 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	/*
 	 * No allocations have occurred since arena_reset() was called.
 	 * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
-	 * extents, so only retained extents may remain.
+	 * extents, so only retained extents may remain and it's safe to call
+	 * pa_shard_destroy_retained.
 	 */
-	assert(ecache_npages_get(&arena->pa_shard.ecache_dirty) == 0);
-	assert(ecache_npages_get(&arena->pa_shard.ecache_muzzy) == 0);
-
-	/* Deallocate retained memory. */
-	arena_destroy_retained(tsd_tsdn(tsd), arena);
+	pa_shard_destroy_retained(tsd_tsdn(tsd), &arena->pa_shard);
 
 	/*
 	 * Remove the arena pointer from the arenas array.  We rely on the fact
diff --git a/src/pa.c b/src/pa.c
index 1b7d374..a8aee21 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -65,6 +65,32 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 	return false;
 }
 
+void
+pa_shard_reset(pa_shard_t *shard) {
+	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
+}
+
+void
+pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
+	assert(ecache_npages_get(&shard->ecache_dirty) == 0);
+	assert(ecache_npages_get(&shard->ecache_muzzy) == 0);
+	/*
+	 * Iterate over the retained extents and destroy them.  This gives the
+	 * extent allocator underlying the extent hooks an opportunity to unmap
+	 * all retained memory without having to keep its own metadata
+	 * structures.  In practice, virtual memory for dss-allocated extents is
+	 * leaked here, so best practice is to avoid dss for arenas to be
+	 * destroyed, or provide custom extent hooks that track retained
+	 * dss-based extents for later reuse.
+	 */
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+	edata_t *edata;
+	while ((edata = ecache_evict(tsdn, shard, ehooks,
+	    &shard->ecache_retained, 0)) != NULL) {
+		extent_destroy_wrapper(tsdn, shard, ehooks, edata);
+	}
+}
+
 size_t
 pa_shard_extent_sn_next(pa_shard_t *shard) {
 	return atomic_fetch_add_zu(&shard->extent_sn_next, 1, ATOMIC_RELAXED);
-- 
cgit v0.12


From daefde88fe960e2ff0756fac82f82512025bdf1d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 15:26:50 -0700
Subject: PA: Move in mutex stats reading.

---
 include/jemalloc/internal/pa.h |  9 +++++++++
 src/arena.c                    | 16 +++-------------
 src/pa_extra.c                 | 25 +++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 4156a4e..0584154 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -260,4 +260,13 @@ void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *shard_stats_out, pa_extent_stats_t *extent_stats_out,
     size_t *resident);
 
+/*
+ * Reads the PA-owned mutex stats into the output stats array, at the
+ * appropriate positions.  Morally, these stats should really live in
+ * pa_shard_stats_t, but the indices are sort of baked into the various mutex
+ * prof macros.  This would be a good thing to do at some point.
+ */
+void pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
+    mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index c9a4626..e96934a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -173,21 +173,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	/* Gather per arena mutex profiling data. */
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
-	READ_ARENA_MUTEX_PROF_DATA(pa_shard.edata_cache.mtx,
-	    arena_prof_mutex_extent_avail)
-	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_dirty.mtx,
-	    arena_prof_mutex_extents_dirty)
-	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_muzzy.mtx,
-	    arena_prof_mutex_extents_muzzy)
-	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_retained.mtx,
-	    arena_prof_mutex_extents_retained)
-	READ_ARENA_MUTEX_PROF_DATA(pa_shard.decay_dirty.mtx,
-	    arena_prof_mutex_decay_dirty)
-	READ_ARENA_MUTEX_PROF_DATA(pa_shard.decay_muzzy.mtx,
-	    arena_prof_mutex_decay_muzzy)
 	READ_ARENA_MUTEX_PROF_DATA(base->mtx,
-	    arena_prof_mutex_base)
+	    arena_prof_mutex_base);
 #undef READ_ARENA_MUTEX_PROF_DATA
+	pa_shard_mtx_stats_read(tsdn, &arena->pa_shard,
+	    astats->mutex_prof_data);
 
 	nstime_copy(&astats->uptime, &arena->create_time);
 	nstime_update(&astats->uptime);
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 1088596..1f90f7f 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -127,3 +127,28 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 		extent_stats_out[i].retained_bytes = retained_bytes;
 	}
 }
+
+static void
+pa_shard_mtx_stats_read_single(tsdn_t *tsdn, mutex_prof_data_t *mutex_prof_data,
+    malloc_mutex_t *mtx, int ind) {
+	malloc_mutex_lock(tsdn, mtx);
+	malloc_mutex_prof_read(tsdn, &mutex_prof_data[ind], mtx);
+	malloc_mutex_unlock(tsdn, mtx);
+}
+
+void
+pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
+    mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]) {
+	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+	    &shard->edata_cache.mtx, arena_prof_mutex_extent_avail);
+	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+	    &shard->ecache_dirty.mtx, arena_prof_mutex_extents_dirty);
+	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+	    &shard->ecache_muzzy.mtx, arena_prof_mutex_extents_muzzy);
+	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+	    &shard->ecache_retained.mtx, arena_prof_mutex_extents_retained);
+	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+	    &shard->decay_dirty.mtx, arena_prof_mutex_decay_dirty);
+	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+	    &shard->decay_muzzy.mtx, arena_prof_mutex_decay_muzzy);
+}
-- 
cgit v0.12


From 45671e4a27740c85c83b248d0e7e3f45024fdc45 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 15:51:13 -0700
Subject: PA: Move in retain growth limit setting.

---
 include/jemalloc/internal/pa.h | 12 ++++++++++++
 src/arena.c                    | 22 ++--------------------
 src/pa.c                       | 24 ++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 0584154..ec6c804 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -230,6 +230,18 @@ bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     pa_decay_purge_setting_t decay_purge_setting);
 
+/*
+ * Gets / sets the maximum amount that we'll grow an arena down the
+ * grow-retained pathways (unless forced to by an allocaction request).
+ *
+ * Set new_limit to NULL if it's just a query, or old_limit to NULL if you don't
+ * care about the previous value.
+ *
+ * Returns true on error (if the new limit is not valid).
+ */
+bool pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
+    size_t *old_limit, size_t *new_limit);
+
 /******************************************************************************/
 /*
  * Various bits of "boring" functionality that are still part of this module,
diff --git a/src/arena.c b/src/arena.c
index e96934a..178cc9a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1411,26 +1411,8 @@ bool
 arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
     size_t *new_limit) {
 	assert(opt_retain);
-
-	pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
-	if (new_limit != NULL) {
-		size_t limit = *new_limit;
-		/* Grow no more than the new limit. */
-		if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) {
-			return true;
-		}
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &arena->pa_shard.ecache_grow.mtx);
-	if (old_limit != NULL) {
-		*old_limit = sz_pind2sz(arena->pa_shard.ecache_grow.limit);
-	}
-	if (new_limit != NULL) {
-		arena->pa_shard.ecache_grow.limit = new_ind;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->pa_shard.ecache_grow.mtx);
-
-	return false;
+	return pa_shard_retain_grow_limit_get_set(tsd_tsdn(tsd),
+	    &arena->pa_shard, old_limit, new_limit);
 }
 
 unsigned
diff --git a/src/pa.c b/src/pa.c
index a8aee21..d4949f5 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -395,3 +395,27 @@ pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 	return epoch_advanced;
 }
+
+bool
+pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
+    size_t *old_limit, size_t *new_limit) {
+	pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
+	if (new_limit != NULL) {
+		size_t limit = *new_limit;
+		/* Grow no more than the new limit. */
+		if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) {
+			return true;
+		}
+	}
+
+	malloc_mutex_lock(tsdn, &shard->ecache_grow.mtx);
+	if (old_limit != NULL) {
+		*old_limit = sz_pind2sz(shard->ecache_grow.limit);
+	}
+	if (new_limit != NULL) {
+		shard->ecache_grow.limit = new_ind;
+	}
+	malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
+
+	return false;
+}
-- 
cgit v0.12


From faec7219b23303ec812e9aee6fc35352f936d10b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 16:06:40 -0700
Subject: PA: Move in decay initialization.

---
 include/jemalloc/internal/pa.h |  3 ++-
 src/arena.c                    | 14 +++-----------
 src/pa.c                       | 10 +++++++++-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index ec6c804..82676ee 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -172,7 +172,8 @@ pa_shard_ehooks_get(pa_shard_t *shard) {
 
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
-    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
+    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx, nstime_t *cur_time,
+    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
 /*
  * This does the PA-specific parts of arena reset (i.e. freeing all active
  * allocations).
diff --git a/src/arena.c b/src/arena.c
index 178cc9a..fd19e77 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1487,19 +1487,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind,
-	    &arena->stats.pa_shard_stats, LOCKEDINT_MTX(arena->stats.mtx))) {
-		goto label_error;
-	}
-
 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
-
-	if (decay_init(&arena->pa_shard.decay_dirty, &cur_time,
-	    arena_dirty_decay_ms_default_get())) {
-		goto label_error;
-	}
-	if (decay_init(&arena->pa_shard.decay_muzzy, &cur_time,
+	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind,
+	    &arena->stats.pa_shard_stats, LOCKEDINT_MTX(arena->stats.mtx),
+	    &cur_time, arena_dirty_decay_ms_default_get(),
 	    arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
diff --git a/src/pa.c b/src/pa.c
index d4949f5..a1063b9 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -14,7 +14,8 @@ pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
 
 bool
 pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
-    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx) {
+    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx, nstime_t *cur_time,
+    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	/*
@@ -53,6 +54,13 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 		return true;
 	}
 
+	if (decay_init(&shard->decay_dirty, cur_time, dirty_decay_ms)) {
+		return true;
+	}
+	if (decay_init(&shard->decay_muzzy, cur_time, muzzy_decay_ms)) {
+		return true;
+	}
+
 	atomic_store_zu(&shard->extent_sn_next, 0, ATOMIC_RELAXED);
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
 
-- 
cgit v0.12


From bd4fdf295ed5a56f433fa8d4a23d1273cc7ad156 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Mar 2020 11:47:51 -0700
Subject: Rtree: Pull leaf contents into their own struct.

---
 include/jemalloc/internal/edata.h |  17 ++++++
 include/jemalloc/internal/rtree.h | 118 ++++++++++++++++++++++++--------------
 src/emap.c                        |  18 ++++--
 test/unit/rtree.c                 |   7 ++-
 4 files changed, 110 insertions(+), 50 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 2a81bdc..0a99ff0 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -39,6 +39,23 @@ struct e_prof_info_s {
 };
 typedef struct e_prof_info_s e_prof_info_t;
 
+/*
+ * The information about a particular edata that lives in an emap.  Space is
+ * more previous there (the information, plus the edata pointer, has to live in
+ * a 64-bit word if we want to enable a packed representation.
+ *
+ * There are two things that are special about the information here:
+ * - It's quicker to access.  You have one fewer pointer hop, since finding the
+ *   edata_t associated with an item always requires accessing the rtree leaf in
+ *   which this data is stored.
+ * - It can be read unsynchronized, and without worrying about lifetime issues.
+ */
+typedef struct edata_map_info_s edata_map_info_t;
+struct edata_map_info_s {
+	bool slab;
+	szind_t szind;
+};
+
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
 typedef ql_head(edata_t) edata_list_t;
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 094cc1a..1c2715d 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -43,6 +43,13 @@ struct rtree_node_elm_s {
 	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
 };
 
+typedef struct rtree_leaf_elm_contents_s rtree_leaf_elm_contents_t;
+struct rtree_leaf_elm_contents_s {
+	edata_t *edata;
+	szind_t szind;
+	bool slab;
+};
+
 struct rtree_leaf_elm_s {
 #ifdef RTREE_LEAF_COMPACT
 	/*
@@ -163,43 +170,53 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 }
 
-JEMALLOC_ALWAYS_INLINE edata_t *
-rtree_leaf_elm_bits_edata_get(uintptr_t bits) {
+JEMALLOC_ALWAYS_INLINE uintptr_t
+rtree_leaf_elm_bits_encode(rtree_leaf_elm_contents_t contents) {
+	uintptr_t edata_bits = (uintptr_t)contents.edata
+	    & (((uintptr_t)1 << LG_VADDR) - 1);
+	uintptr_t szind_bits = (uintptr_t)contents.szind << LG_VADDR;
+	/*
+	 * Slab shares the low bit of edata; we know edata is on an even address
+	 * (in fact, it's 128 bytes on 64-bit systems; we can enforce this
+	 * alignment if we want to steal 6 extra rtree leaf bits someday.
+	 */
+	uintptr_t slab_bits = (uintptr_t)contents.slab;
+	return szind_bits | edata_bits | slab_bits;
+}
+
+JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_contents_t
+rtree_leaf_elm_bits_decode(uintptr_t bits) {
+	rtree_leaf_elm_contents_t contents;
+	/* Do the easy things first. */
+	contents.szind = bits >> LG_VADDR;
+	contents.slab = (bool)(bits & 1);
 #    ifdef __aarch64__
 	/*
 	 * aarch64 doesn't sign extend the highest virtual address bit to set
-	 * the higher ones.  Instead, the high bits gets zeroed.
+	 * the higher ones.  Instead, the high bits get zeroed.
 	 */
 	uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
 	/* Mask off the slab bit. */
 	uintptr_t low_bit_mask = ~(uintptr_t)1;
 	uintptr_t mask = high_bit_mask & low_bit_mask;
-	return (edata_t *)(bits & mask);
+	contents.edata = (edata_t *)(bits & mask);
 #    else
 	/* Restore sign-extended high bits, mask slab bit. */
-	return (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
-	    RTREE_NHIB) & ~((uintptr_t)0x1));
+	contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
+	    >> RTREE_NHIB) & ~((uintptr_t)0x1));
 #    endif
+	return contents;
 }
 
-JEMALLOC_ALWAYS_INLINE szind_t
-rtree_leaf_elm_bits_szind_get(uintptr_t bits) {
-	return (szind_t)(bits >> LG_VADDR);
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
-	return (bool)(bits & (uintptr_t)0x1);
-}
-
-#  endif
+#  endif /* RTREE_LEAF_COMPACT */
 
 JEMALLOC_ALWAYS_INLINE edata_t *
 rtree_leaf_elm_edata_read(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	return rtree_leaf_elm_bits_edata_get(bits);
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
+	return contents.edata;
 #else
 	edata_t *edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
@@ -212,7 +229,8 @@ rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	return rtree_leaf_elm_bits_szind_get(bits);
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
+	return contents.szind;
 #else
 	return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED
 	    : ATOMIC_ACQUIRE);
@@ -224,7 +242,8 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	return rtree_leaf_elm_bits_slab_get(bits);
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
+	return contents.slab;
 #else
 	return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED :
 	    ATOMIC_ACQUIRE);
@@ -236,9 +255,10 @@ rtree_leaf_elm_edata_write(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, edata_t *edata) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
-	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
-	    LG_VADDR) | ((uintptr_t)edata & (((uintptr_t)0x1 << LG_VADDR) - 1))
-	    | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
+	    old_bits);
+	contents.edata = edata;
+	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	atomic_store_p(&elm->le_edata, edata, ATOMIC_RELEASE);
@@ -253,10 +273,10 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree,
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
 	    true);
-	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
-	    ((uintptr_t)rtree_leaf_elm_bits_edata_get(old_bits) &
-	    (((uintptr_t)0x1 << LG_VADDR) - 1)) |
-	    ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
+	    old_bits);
+	contents.szind = szind;
+	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE);
@@ -269,9 +289,10 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
 	    true);
-	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
-	    LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_edata_get(old_bits) &
-	    (((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab);
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
+	    old_bits);
+	contents.slab = slab;
+	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE);
@@ -280,11 +301,9 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
 
 static inline void
 rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, edata_t *edata, szind_t szind, bool slab) {
+    rtree_leaf_elm_t *elm, rtree_leaf_elm_contents_t contents) {
 #ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
-	    ((uintptr_t)edata & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
-	    ((uintptr_t)slab);
+	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
@@ -382,7 +401,11 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 	}
 
 	assert(rtree_leaf_elm_edata_read(tsdn, rtree, elm, false) == NULL);
-	rtree_leaf_elm_write(tsdn, rtree, elm, edata, szind, slab);
+	rtree_leaf_elm_contents_t contents;
+	contents.edata = edata;
+	contents.szind = szind;
+	contents.slab = slab;
+	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 
 	return false;
 }
@@ -437,9 +460,11 @@ rtree_edata_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree,
 	}
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	*r_edata = rtree_leaf_elm_bits_edata_get(bits);
-	*r_szind = rtree_leaf_elm_bits_szind_get(bits);
-	*r_slab = rtree_leaf_elm_bits_slab_get(bits);
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
+
+	*r_edata = contents.edata;
+	*r_szind = contents.szind;
+	*r_slab = contents.slab;
 #else
 	*r_edata = rtree_leaf_elm_edata_read(tsdn, rtree, elm, dependent);
 	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
@@ -473,8 +498,10 @@ rtree_szind_slab_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 #ifdef RTREE_LEAF_COMPACT
 		uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree,
 							  elm, true);
-		*r_szind = rtree_leaf_elm_bits_szind_get(bits);
-		*r_slab = rtree_leaf_elm_bits_slab_get(bits);
+		rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
+		    bits);
+		*r_szind = contents.szind;
+		*r_slab = contents.slab;
 #else
 		*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, true);
 		*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, true);
@@ -494,8 +521,9 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	}
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	*r_szind = rtree_leaf_elm_bits_szind_get(bits);
-	*r_slab = rtree_leaf_elm_bits_slab_get(bits);
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
+	*r_szind = contents.szind;
+	*r_slab = contents.slab;
 #else
 	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
 	*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
@@ -518,7 +546,11 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
 	assert(rtree_leaf_elm_edata_read(tsdn, rtree, elm, false) !=
 	    NULL);
-	rtree_leaf_elm_write(tsdn, rtree, elm, NULL, SC_NSIZES, false);
+	rtree_leaf_elm_contents_t contents;
+	contents.edata = NULL;
+	contents.szind = SC_NSIZES;
+	contents.slab = false;
+	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 }
 
 #endif /* JEMALLOC_INTERNAL_RTREE_H */
diff --git a/src/emap.c b/src/emap.c
index 723dfad..98921df 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -139,10 +139,13 @@ emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 static void
 emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
     rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
-	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, edata, szind, slab);
+	rtree_leaf_elm_contents_t contents;
+	contents.edata = edata;
+	contents.szind = szind;
+	contents.slab = slab;
+	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, contents);
 	if (elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap->rtree, elm_b, edata, szind,
-		    slab);
+		rtree_leaf_elm_write(tsdn, &emap->rtree, elm_b, contents);
 	}
 }
 
@@ -292,15 +295,20 @@ emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 void
 emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *lead, edata_t *trail) {
+	rtree_leaf_elm_contents_t clear_contents;
+	clear_contents.edata = NULL;
+	clear_contents.szind = SC_NSIZES;
+	clear_contents.slab = false;
+
 	if (prepare->lead_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    prepare->lead_elm_b, NULL, SC_NSIZES, false);
+		    prepare->lead_elm_b, clear_contents);
 	}
 
 	rtree_leaf_elm_t *merged_b;
 	if (prepare->trail_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    prepare->trail_elm_a, NULL, SC_NSIZES, false);
+		    prepare->trail_elm_a, clear_contents);
 		merged_b = prepare->trail_elm_b;
 	} else {
 		merged_b = prepare->trail_elm_a;
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 3545cfc..1a842d7 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -137,8 +137,11 @@ TEST_BEGIN(test_rtree_random) {
 		    &rtree_ctx, keys[i], false, true);
 		expect_ptr_not_null(elm,
 		    "Unexpected rtree_leaf_elm_lookup() failure");
-		rtree_leaf_elm_write(tsdn, rtree, elm, &edata, SC_NSIZES,
-		    false);
+		rtree_leaf_elm_contents_t contents;
+		contents.edata = &edata;
+		contents.szind = SC_NSIZES;
+		contents.slab = false;
+		rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 		expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true), &edata,
 		    "rtree_edata_read() should return previously set value");
-- 
cgit v0.12


From 12eb888e54572c417c68495fa5be75d9f8402f81 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Mar 2020 12:38:28 -0700
Subject: Edata: Add a ranged bit.

We steal the dumpable bit, which we ended up not needing.
---
 include/jemalloc/internal/edata.h | 51 +++++++++++++++------------------------
 src/emap.c                        |  6 ++---
 src/extent.c                      | 14 ++++++-----
 src/extent_dss.c                  |  4 +--
 test/unit/rtree.c                 |  8 +++---
 test/unit/slab.c                  |  2 +-
 6 files changed, 38 insertions(+), 47 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 0a99ff0..3a9ebc8 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -68,7 +68,7 @@ struct edata_s {
 	 * a: arena_ind
 	 * b: slab
 	 * c: committed
-	 * d: dumpable
+	 * r: ranged
 	 * z: zeroed
 	 * t: state
 	 * i: szind
@@ -76,7 +76,7 @@ struct edata_s {
 	 * s: bin_shard
 	 * n: sn
 	 *
-	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
+	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zrcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -91,22 +91,10 @@ struct edata_s {
 	 *            as on a system that overcommits and satisfies physical
 	 *            memory needs on demand via soft page faults.
 	 *
-	 * dumpable: The dumpable flag indicates whether or not we've set the
-	 *           memory in question to be dumpable.  Note that this
-	 *           interacts somewhat subtly with user-specified extent hooks,
-	 *           since we don't know if *they* are fiddling with
-	 *           dumpability (in which case, we don't want to undo whatever
-	 *           they're doing).  To deal with this scenario, we:
-	 *             - Make dumpable false only for memory allocated with the
-	 *               default hooks.
-	 *             - Only allow memory to go from non-dumpable to dumpable,
-	 *               and only once.
-	 *             - Never make the OS call to allow dumping when the
-	 *               dumpable bit is already set.
-	 *           These three constraints mean that we will never
-	 *           accidentally dump user memory that the user meant to set
-	 *           nondumpable with their extent hooks.
-	 *
+	 * ranged: Whether or not this extent is currently owned by the range
+	 *         allocator.  This may be false even if the extent originally
+	 *         came from a range allocator; this indicates its *current*
+	 *         owner, not its original owner.
 	 *
 	 * zeroed: The zeroed flag is used by extent recycling code to track
 	 *         whether memory is zero-filled.
@@ -148,12 +136,12 @@ struct edata_s {
 #define EDATA_BITS_COMMITTED_SHIFT  (EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT)
 #define EDATA_BITS_COMMITTED_MASK  MASK(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
 
-#define EDATA_BITS_DUMPABLE_WIDTH  1
-#define EDATA_BITS_DUMPABLE_SHIFT  (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
-#define EDATA_BITS_DUMPABLE_MASK  MASK(EDATA_BITS_DUMPABLE_WIDTH, EDATA_BITS_DUMPABLE_SHIFT)
+#define EDATA_BITS_RANGED_WIDTH  1
+#define EDATA_BITS_RANGED_SHIFT  (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_RANGED_MASK  MASK(EDATA_BITS_RANGED_WIDTH, EDATA_BITS_RANGED_SHIFT)
 
 #define EDATA_BITS_ZEROED_WIDTH  1
-#define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_DUMPABLE_WIDTH + EDATA_BITS_DUMPABLE_SHIFT)
+#define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_RANGED_WIDTH + EDATA_BITS_RANGED_SHIFT)
 #define EDATA_BITS_ZEROED_MASK  MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
 
 #define EDATA_BITS_STATE_WIDTH  2
@@ -283,9 +271,9 @@ edata_committed_get(const edata_t *edata) {
 }
 
 static inline bool
-edata_dumpable_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_DUMPABLE_MASK) >>
-	    EDATA_BITS_DUMPABLE_SHIFT);
+edata_ranged_get(const edata_t *edata) {
+	return (bool)((edata->e_bits & EDATA_BITS_RANGED_MASK) >>
+	    EDATA_BITS_RANGED_SHIFT);
 }
 
 static inline bool
@@ -479,9 +467,9 @@ edata_committed_set(edata_t *edata, bool committed) {
 }
 
 static inline void
-edata_dumpable_set(edata_t *edata, bool dumpable) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_DUMPABLE_MASK) |
-	    ((uint64_t)dumpable << EDATA_BITS_DUMPABLE_SHIFT);
+edata_ranged_set(edata_t *edata, bool ranged) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_RANGED_MASK) |
+	    ((uint64_t)ranged << EDATA_BITS_RANGED_SHIFT);
 }
 
 static inline void
@@ -522,8 +510,9 @@ edata_is_head_set(edata_t *edata, bool is_head) {
 static inline void
 edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
-    bool committed, bool dumpable, extent_head_state_t is_head) {
+    bool committed, bool ranged, extent_head_state_t is_head) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
+	assert(ranged == false);
 
 	edata_arena_ind_set(edata, arena_ind);
 	edata_addr_set(edata, addr);
@@ -534,7 +523,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	edata_state_set(edata, state);
 	edata_zeroed_set(edata, zeroed);
 	edata_committed_set(edata, committed);
-	edata_dumpable_set(edata, dumpable);
+	edata_ranged_set(edata, ranged);
 	ql_elm_new(edata, ql_link);
 	edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
 	if (config_prof) {
@@ -553,7 +542,7 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 	edata_state_set(edata, extent_state_active);
 	edata_zeroed_set(edata, true);
 	edata_committed_set(edata, true);
-	edata_dumpable_set(edata, true);
+	edata_ranged_set(edata, false);
 }
 
 static inline void
diff --git a/src/emap.c b/src/emap.c
index 98921df..a227ad1 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -246,11 +246,11 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
 	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
 	    edata_zeroed_get(edata), edata_committed_get(edata),
-	    edata_dumpable_get(edata), EXTENT_NOT_HEAD);
+	    edata_ranged_get(edata), EXTENT_NOT_HEAD);
 
 	/*
-	 * We use incorrect constants for things like arena ind, zero, dump, and
-	 * commit state, and head status.  This is a fake edata_t, used to
+	 * We use incorrect constants for things like arena ind, zero, ranged,
+	 * and commit state, and head status.  This is a fake edata_t, used to
 	 * facilitate a lookup.
 	 */
 	edata_t lead;
diff --git a/src/extent.c b/src/extent.c
index 05d1755..7f2d883 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -80,7 +80,7 @@ ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	bool commit = true;
 	edata_t *edata = extent_recycle(tsdn, shard, ehooks, ecache,
 	    new_addr, size, alignment, slab, szind, zero, &commit, false);
-	assert(edata == NULL || edata_dumpable_get(edata));
+	assert(edata == NULL || !edata_ranged_get(edata));
 	return edata;
 }
 
@@ -110,7 +110,7 @@ ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		    size, alignment, slab, szind, zero, &commit);
 	}
 
-	assert(edata == NULL || edata_dumpable_get(edata));
+	assert(edata == NULL || !edata_ranged_get(edata));
 	return edata;
 }
 
@@ -119,7 +119,7 @@ ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
-	assert(edata_dumpable_get(edata));
+	assert(!edata_ranged_get(edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -661,7 +661,8 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	edata_init(edata, ecache_ind_get(&shard->ecache_retained), ptr,
 	    alloc_size, false, SC_NSIZES, pa_shard_extent_sn_next(shard),
-	    extent_state_active, zeroed, committed, true, EXTENT_IS_HEAD);
+	    extent_state_active, zeroed, committed, /* ranged */ false,
+	    EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, edata)) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
@@ -814,7 +815,8 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 	edata_init(edata, ecache_ind_get(&shard->ecache_dirty), addr,
 	    size, slab, szind, pa_shard_extent_sn_next(shard),
-	    extent_state_active, *zero, *commit, true, EXTENT_NOT_HEAD);
+	    extent_state_active, *zero, *commit, /* ranged */ false,
+	    EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, edata)) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return NULL;
@@ -1059,7 +1061,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata) {
-	assert(edata_dumpable_get(edata));
+	assert(!edata_ranged_get(edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
diff --git a/src/extent_dss.c b/src/extent_dss.c
index de6852e..18b6895 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -157,7 +157,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				    gap_addr_page, gap_size_page, false,
 				    SC_NSIZES, pa_shard_extent_sn_next(
 					&arena->pa_shard), extent_state_active,
-				    false, true, true, EXTENT_NOT_HEAD);
+				    false, true, false, EXTENT_NOT_HEAD);
 			}
 			/*
 			 * Compute the address just past the end of the desired
@@ -206,7 +206,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    arena_ind_get(arena), ret, size,
 					    size, false, SC_NSIZES,
 					    extent_state_active, false, true,
-					    true, EXTENT_NOT_HEAD);
+					    false, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
 					    ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 1a842d7..01e710c 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -35,9 +35,9 @@ TEST_BEGIN(test_rtree_extrema) {
 	edata_t edata_a, edata_b;
 	edata_init(&edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
 	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
-	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 	edata_init(&edata_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
 	tsdn_t *tsdn = tsdn_fetch();
 
@@ -80,7 +80,7 @@ TEST_BEGIN(test_rtree_bits) {
 
 	edata_t edata;
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
@@ -126,7 +126,7 @@ TEST_BEGIN(test_rtree_random) {
 
 	edata_t edata;
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
 	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 304a93d..5ca8c44 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -12,7 +12,7 @@ TEST_BEGIN(test_arena_slab_regind) {
 		edata_init(&slab, INVALID_ARENA_IND,
 		    mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE)),
 		    bin_info->slab_size, true,
-		    binind, 0, extent_state_active, false, true, true,
+		    binind, 0, extent_state_active, false, true, false,
 		    EXTENT_NOT_HEAD);
 		expect_ptr_not_null(edata_addr_get(&slab),
 		    "Unexpected malloc() failure");
-- 
cgit v0.12


From dfef0df71a956338c3bb4a902a288ee550409c3b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Mar 2020 18:34:46 -0700
Subject: Emap: Move edata modification out of emap_remap.

---
 src/arena.c  | 6 ++++--
 src/emap.c   | 1 -
 src/extent.c | 1 +
 src/pa.c     | 1 +
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index fd19e77..c70b128 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1067,7 +1067,8 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 
 	szind_t szind = sz_size2index(usize);
-	emap_remap(tsdn, &emap_global, edata, szind, false);
+	edata_szind_set(edata, szind);
+	emap_remap(tsdn, &emap_global, edata, szind, /* slab */ false);
 
 	prof_idump_rollback(tsdn, usize);
 
@@ -1079,7 +1080,8 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	emap_remap(tsdn, &emap_global, edata, SC_NBINS, false);
+	edata_szind_set(edata, SC_NBINS);
+	emap_remap(tsdn, &emap_global, edata, SC_NBINS, /* slab */ false);
 
 	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
 
diff --git a/src/emap.c b/src/emap.c
index a227ad1..11e4f4a 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -206,7 +206,6 @@ void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
     bool slab) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	edata_szind_set(edata, szind);
 	if (szind != SC_NSIZES) {
 		rtree_szind_slab_update(tsdn, &emap->rtree, rtree_ctx,
 		    (uintptr_t)edata_addr_get(edata), szind, slab);
diff --git a/src/extent.c b/src/extent.c
index 7f2d883..0d87cff 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -487,6 +487,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	if (leadsize == 0 && trailsize == 0) {
+		edata_szind_set(*edata, szind);
 		emap_remap(tsdn, &emap_global, *edata, szind, slab);
 	}
 
diff --git a/src/pa.c b/src/pa.c
index a1063b9..2809630 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -185,6 +185,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		    ATOMIC_RELAXED);
 	}
 	pa_nactive_add(shard, expand_amount >> LG_PAGE);
+	edata_szind_set(edata, szind);
 	emap_remap(tsdn, &emap_global, edata, szind, slab);
 	return false;
 }
-- 
cgit v0.12


From 0c96a2f03bcb741b1c29fd1a3af3044a03a8ac08 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Mar 2020 18:45:54 -0700
Subject: Emap: Move out remaining edata modifications.

---
 src/emap.c   | 10 ----------
 src/extent.c |  9 +++++++++
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/emap.c b/src/emap.c
index 11e4f4a..95ff7b3 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -272,9 +272,6 @@ void
 emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *lead, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
     size_t size_b, szind_t szind_b, bool slab_b) {
-	edata_size_set(lead, size_a);
-	edata_szind_set(lead, szind_a);
-
 	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a,
 	    prepare->lead_elm_b, lead, szind_a, slab_a);
 	emap_rtree_write_acquired(tsdn, emap, prepare->trail_elm_a,
@@ -313,13 +310,6 @@ emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 		merged_b = prepare->trail_elm_a;
 	}
 
-	edata_size_set(lead, edata_size_get(lead) + edata_size_get(trail));
-	edata_szind_set(lead, SC_NSIZES);
-	edata_sn_set(lead, (edata_sn_get(lead) < edata_sn_get(trail)) ?
-	    edata_sn_get(lead) : edata_sn_get(trail));
-	edata_zeroed_set(lead, edata_zeroed_get(lead)
-	    && edata_zeroed_get(trail));
-
 	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a, merged_b,
 	    lead, SC_NSIZES, false);
 }
diff --git a/src/extent.c b/src/extent.c
index 0d87cff..b0db91d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1229,6 +1229,8 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 		goto label_error_c;
 	}
 
+	edata_size_set(edata, size_a);
+	edata_szind_set(edata, szind_a);
 	emap_split_commit(tsdn, &emap_global, &prepare, edata, size_a, szind_a,
 	    slab_a, trail, size_b, szind_b, slab_b);
 
@@ -1278,6 +1280,13 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	emap_merge_prepare(tsdn, &emap_global, &prepare, a, b);
 
 	emap_lock_edata2(tsdn, &emap_global, a, b);
+
+	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
+	edata_szind_set(a, SC_NSIZES);
+	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
+	    edata_sn_get(a) : edata_sn_get(b));
+	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
+
 	emap_merge_commit(tsdn, &emap_global, &prepare, a, b);
 	emap_unlock_edata2(tsdn, &emap_global, a, b);
 
-- 
cgit v0.12


From 883ab327cca593de320f781e3c654e8b716a4786 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Mar 2020 19:33:30 -0700
Subject: Emap: Move out last edata state touching.

---
 include/jemalloc/internal/emap.h | 15 ++++++---------
 src/emap.c                       | 11 -----------
 src/extent.c                     |  5 +++++
 3 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index b9f6bc0..8c7713c 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -87,12 +87,10 @@ struct emap_prepare_s {
 };
 
 /**
- * These functions do some of the metadata management for merging, splitting,
- * and reusing extents.  In particular, they set the boundary mappings from
- * addresses to edatas and fill in the szind, size, and slab values for the
- * output edata (and, for splitting, *all* values for the trail).  If the result
- * is going to be used as a slab, you still need to call emap_register_interior
- * on it, though.
+ * These functions the emap metadata management for merging, splitting, and
+ * reusing extents.  In particular, they set the boundary mappings from
+ * addresses to edatas.  If the result is going to be used as a slab, you
+ * still need to call emap_register_interior on it, though.
  *
  * Remap simply changes the szind and slab status of an extent's boundary
  * mappings.  If the extent is not a slab, it doesn't bother with updating the
@@ -107,9 +105,8 @@ struct emap_prepare_s {
  * and it returns true on failure (at which point the caller shouldn't commit).
  *
  * In all cases, "lead" refers to the lower-addressed extent, and trail to the
- * higher-addressed one.  Trail can contain garbage (except for its arena_ind
- * and esn values) data for the split variants, and can be reused for any
- * purpose by its given arena after a merge or a failed split.
+ * higher-addressed one.  It's the caller's responsibility to set the edata
+ * state appropriately.
  */
 void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
     bool slab);
diff --git a/src/emap.c b/src/emap.c
index 95ff7b3..c79dafa 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -228,7 +228,6 @@ void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 			    szind, slab);
 			}
 		}
-
 }
 
 bool
@@ -236,16 +235,6 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
     size_t size_b, szind_t szind_b, bool slab_b) {
 	EMAP_DECLARE_RTREE_CTX;
-	/*
-	 * Note that while the trail mostly inherits its attributes from the
-	 * extent to be split, it maintains its own arena ind -- this allows
-	 * cross-arena edata interactions, such as occur in the range ecache.
-	 */
-	edata_init(trail, edata_arena_ind_get(trail),
-	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
-	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
-	    edata_zeroed_get(edata), edata_committed_get(edata),
-	    edata_ranged_get(edata), EXTENT_NOT_HEAD);
 
 	/*
 	 * We use incorrect constants for things like arena ind, zero, ranged,
diff --git a/src/extent.c b/src/extent.c
index b0db91d..3317993 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1213,6 +1213,11 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 		goto label_error_a;
 	}
 
+	edata_init(trail, edata_arena_ind_get(edata),
+	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
+	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
+	    edata_zeroed_get(edata), edata_committed_get(edata),
+	    edata_ranged_get(edata), EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
 	bool err = emap_split_prepare(tsdn, &emap_global, &prepare, edata,
 	    size_a, szind_a, slab_a, trail, size_b, szind_b, slab_b);
-- 
cgit v0.12


From 7bb6e2dc0d526bac72d2ed531ddb60fd10a5a5e4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 14 Mar 2020 09:46:09 -0700
Subject: Eset: take opt_lg_max_active_fit as a parameter.

This breaks its dependence on the global.
---
 include/jemalloc/internal/eset.h |  2 +-
 src/eset.c                       | 29 ++++++++++++++++++-----------
 src/extent.c                     | 14 ++++++++++++--
 3 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 5c1051a..d051b81 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -52,6 +52,6 @@ void eset_remove(eset_t *eset, edata_t *edata);
  * null if no such item could be found.
  */
 edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment,
-    bool delay_coalesce);
+    unsigned lg_max_fit);
 
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/src/eset.c b/src/eset.c
index 16ca72d..12a57af 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -154,9 +154,15 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
 /*
  * Do first-fit extent selection, i.e. select the oldest/lowest extent that is
  * large enough.
+ *
+ * lg_max_fit is the (log of the) maximum ratio between the requested size and
+ * the returned size that we'll allow.  This can reduce fragmentation by
+ * avoiding reusing and splitting large extents for smaller sizes.  In practice,
+ * it's set to opt_lg_extent_max_active_fit for the dirty eset and SC_PTR_BITS
+ * for others.
  */
 static edata_t *
-eset_first_fit(eset_t *eset, size_t size, bool delay_coalesce) {
+eset_first_fit(eset_t *eset, size_t size, unsigned lg_max_fit) {
 	edata_t *ret = NULL;
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
@@ -178,14 +184,15 @@ eset_first_fit(eset_t *eset, size_t size, bool delay_coalesce) {
 		assert(!edata_heap_empty(&eset->heaps[i]));
 		edata_t *edata = edata_heap_first(&eset->heaps[i]);
 		assert(edata_size_get(edata) >= size);
-		/*
-		 * In order to reduce fragmentation, avoid reusing and splitting
-		 * large eset for much smaller sizes.
-		 *
-		 * Only do check for dirty eset (delay_coalesce).
-		 */
-		if (delay_coalesce &&
-		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
+		if (lg_max_fit == SC_PTR_BITS) {
+			/*
+			 * We'll shift by this below, and shifting out all the
+			 * bits is undefined.  Decreasing is safe, since the
+			 * page size is larger than 1 byte.
+			 */
+			lg_max_fit = SC_PTR_BITS - 1;
+		}
+		if ((sz_pind2sz(i) >> lg_max_fit) > size) {
 			break;
 		}
 		if (ret == NULL || edata_snad_comp(edata, ret) < 0) {
@@ -201,14 +208,14 @@ eset_first_fit(eset_t *eset, size_t size, bool delay_coalesce) {
 }
 
 edata_t *
-eset_fit(eset_t *eset, size_t esize, size_t alignment, bool delay_coalesce) {
+eset_fit(eset_t *eset, size_t esize, size_t alignment, unsigned lg_max_fit) {
 	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (max_size < esize) {
 		return NULL;
 	}
 
-	edata_t *edata = eset_first_fit(eset, max_size, delay_coalesce);
+	edata_t *edata = eset_first_fit(eset, max_size, lg_max_fit);
 
 	if (alignment > PAGE && edata == NULL) {
 		/*
diff --git a/src/extent.c b/src/extent.c
index 3317993..e570ed5 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -398,8 +398,18 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			emap_unlock_edata(tsdn, &emap_global, unlock_edata);
 		}
 	} else {
-		edata = eset_fit(&ecache->eset, size, alignment,
-		    ecache->delay_coalesce);
+		/*
+		 * A large extent might be broken up from its original size to
+		 * some small size to satisfy a small request.  When that small
+		 * request is freed, though, it won't merge back with the larger
+		 * extent if delayed coalescing is on.  The large extent can
+		 * then no longer satify a request for its original size.  To
+		 * limit this effect, when delayed coalescing is enabled, we
+		 * put a cap on how big an extent we can split for a request.
+		 */
+		unsigned lg_max_fit = ecache->delay_coalesce
+		    ? (unsigned)opt_lg_extent_max_active_fit : SC_PTR_BITS;
+		edata = eset_fit(&ecache->eset, size, alignment, lg_max_fit);
 	}
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &ecache->mtx);
-- 
cgit v0.12


From f730577277ace08287bb8eedce75e49d35aeb0ba Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 14 Mar 2020 10:05:12 -0700
Subject: Eset: Parameterize last globals accesses.

I.e. opt_retain and maps_coalesce.
---
 include/jemalloc/internal/eset.h |  2 +-
 src/eset.c                       | 16 ++++++----------
 src/extent.c                     |  8 +++++++-
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index d051b81..e29179d 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -51,7 +51,7 @@ void eset_remove(eset_t *eset, edata_t *edata);
  * Select an extent from this eset of the given size and alignment.  Returns
  * null if no such item could be found.
  */
-edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment,
+edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment, bool exact_only,
     unsigned lg_max_fit);
 
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/src/eset.c b/src/eset.c
index 12a57af..c4e39d2 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -2,8 +2,6 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/eset.h"
-/* For opt_retain */
-#include "jemalloc/internal/extent_mmap.h"
 
 const bitmap_info_t eset_bitmap_info =
     BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
@@ -162,16 +160,13 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
  * for others.
  */
 static edata_t *
-eset_first_fit(eset_t *eset, size_t size, unsigned lg_max_fit) {
+eset_first_fit(eset_t *eset, size_t size, bool exact_only,
+    unsigned lg_max_fit) {
 	edata_t *ret = NULL;
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
-	if (!maps_coalesce && !opt_retain) {
-		/*
-		 * No split / merge allowed (Windows w/o retain). Try exact fit
-		 * only.
-		 */
+	if (exact_only) {
 		return edata_heap_empty(&eset->heaps[pind]) ? NULL :
 		    edata_heap_first(&eset->heaps[pind]);
 	}
@@ -208,14 +203,15 @@ eset_first_fit(eset_t *eset, size_t size, unsigned lg_max_fit) {
 }
 
 edata_t *
-eset_fit(eset_t *eset, size_t esize, size_t alignment, unsigned lg_max_fit) {
+eset_fit(eset_t *eset, size_t esize, size_t alignment, bool exact_only,
+    unsigned lg_max_fit) {
 	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (max_size < esize) {
 		return NULL;
 	}
 
-	edata_t *edata = eset_first_fit(eset, max_size, lg_max_fit);
+	edata_t *edata = eset_first_fit(eset, max_size, exact_only, lg_max_fit);
 
 	if (alignment > PAGE && edata == NULL) {
 		/*
diff --git a/src/extent.c b/src/extent.c
index e570ed5..db658bb 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -399,6 +399,11 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		}
 	} else {
 		/*
+		 * If split and merge are not allowed (Windows w/o retain), try
+		 * exact fit only.
+		 */
+		bool exact_only = (!maps_coalesce && !opt_retain);
+		/*
 		 * A large extent might be broken up from its original size to
 		 * some small size to satisfy a small request.  When that small
 		 * request is freed, though, it won't merge back with the larger
@@ -409,7 +414,8 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		 */
 		unsigned lg_max_fit = ecache->delay_coalesce
 		    ? (unsigned)opt_lg_extent_max_active_fit : SC_PTR_BITS;
-		edata = eset_fit(&ecache->eset, size, alignment, lg_max_fit);
+		edata = eset_fit(&ecache->eset, size, alignment, exact_only,
+		    lg_max_fit);
 	}
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &ecache->mtx);
-- 
cgit v0.12


From 294b276fc7b03319bbc829cef5de7dfec71f997c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 14 Mar 2020 10:49:34 -0700
Subject: PA: Parameterize emap.  Move emap_global to arena.

This lets us test the PA module without interfering with the global emap used by
the real allocator (the one not under test).
---
 include/jemalloc/internal/arena_externs.h   |   1 +
 include/jemalloc/internal/arena_inlines_b.h |  52 ++++---
 include/jemalloc/internal/emap.h            |   2 -
 include/jemalloc/internal/extent.h          |   8 +-
 include/jemalloc/internal/pa.h              |  11 +-
 src/arena.c                                 |  20 +--
 src/ctl.c                                   |   2 +-
 src/ehooks.c                                |   8 +-
 src/emap.c                                  |   2 -
 src/extent.c                                | 223 ++++++++++++++--------------
 src/inspect.c                               |   4 +-
 src/jemalloc.c                              |  25 ++--
 src/large.c                                 |   2 +-
 src/pa.c                                    |  17 ++-
 src/prof.c                                  |   3 +-
 src/tcache.c                                |   5 +-
 test/unit/arena_reset.c                     |   4 +-
 test/unit/binshard.c                        |   4 +-
 test/unit/prof_recent.c                     |   2 +-
 19 files changed, 211 insertions(+), 184 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 2463495..9fea729 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -15,6 +15,7 @@ extern const char *percpu_arena_mode_names[];
 
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
+extern emap_t arena_emap_global;
 
 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 565e226..7351db9 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -48,10 +48,12 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+		edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
+		    ptr);
 		is_slab = edata_slab_get(edata);
 	} else if (unlikely(!(is_slab = alloc_ctx->slab))) {
-		edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+		edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
+		    ptr);
 	}
 
 	if (unlikely(!is_slab)) {
@@ -75,15 +77,15 @@ arena_prof_tctx_reset(tsd_t *tsd, const void *ptr,
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global,
-		    ptr);
+		edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
+		    &arena_emap_global, ptr);
 		if (unlikely(!edata_slab_get(edata))) {
 			large_prof_tctx_reset(edata);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
 			edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
-			    &emap_global, ptr);
+			    &arena_emap_global, ptr);
 			large_prof_tctx_reset(edata);
 		}
 	}
@@ -94,7 +96,8 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
+	    ptr);
 	assert(!edata_slab_get(edata));
 
 	large_prof_tctx_reset(edata);
@@ -157,7 +160,7 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(tsdn_t *tsdn, const void *ptr) {
-	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	unsigned arena_ind = edata_arena_ind_get(edata);
 	return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_RELAXED);
 }
@@ -166,7 +169,7 @@ JEMALLOC_ALWAYS_INLINE size_t
 arena_salloc(tsdn_t *tsdn, const void *ptr) {
 	assert(ptr != NULL);
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
 	return sz_index2size(alloc_ctx.szind);
@@ -184,8 +187,8 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	 */
 
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &emap_global, ptr,
-	    &full_alloc_ctx);
+	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &arena_emap_global,
+	    ptr, &full_alloc_ctx);
 	if (missing) {
 		return 0;
 	}
@@ -208,7 +211,8 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	if (config_prof && unlikely(szind < SC_NBINS)) {
 		arena_dalloc_promoted(tsdn, ptr, NULL, true);
 	} else {
-		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+		    ptr);
 		large_dalloc(tsdn, edata);
 	}
 }
@@ -218,10 +222,11 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	assert(ptr != NULL);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+		    ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
@@ -246,7 +251,8 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 			    slow_path);
 		}
 	} else {
-		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+		    ptr);
 		large_dalloc(tsdn, edata);
 	}
 }
@@ -267,11 +273,13 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		alloc_ctx = *caller_alloc_ctx;
 	} else {
 		util_assume(!tsdn_null(tsdn));
-		emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+		emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
+		    &alloc_ctx);
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+		    ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
@@ -303,15 +311,16 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 	}
 
 	if ((config_prof && opt_prof) || config_debug) {
-		emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+		emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
+		    &alloc_ctx);
 
 		assert(alloc_ctx.szind == sz_size2index(size));
 		assert((config_prof && opt_prof)
 		    || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS));
 
 		if (config_debug) {
-			edata_t *edata = emap_edata_lookup(tsdn, &emap_global,
-			    ptr);
+			edata_t *edata = emap_edata_lookup(tsdn,
+			    &arena_emap_global, ptr);
 			assert(alloc_ctx.szind == edata_szind_get(edata));
 			assert(alloc_ctx.slab == edata_slab_get(edata));
 		}
@@ -341,7 +350,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	if (config_prof && opt_prof) {
 		if (caller_alloc_ctx == NULL) {
 			/* Uncommon case and should be a static check. */
-			emap_alloc_ctx_lookup(tsdn, &emap_global, ptr,
+			emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
 			    &alloc_ctx);
 			assert(alloc_ctx.szind == sz_size2index(size));
 		} else {
@@ -357,7 +366,8 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+		    ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.slab == edata_slab_get(edata));
 	}
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 8c7713c..9f814ce 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -26,8 +26,6 @@ struct emap_full_alloc_ctx_s {
 	edata_t *edata;
 };
 
-extern emap_t emap_global;
-
 bool emap_init(emap_t *emap, base_t *base, bool zeroed);
 
 /*
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 8b2db18..f5fd812 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -47,10 +47,10 @@ bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
-edata_t *extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
-bool extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
+edata_t *extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, edata_t *edata, size_t size_a,
+    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
+bool extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_cache_t *edata_cache, edata_t *a, edata_t *b);
 
 bool extent_boot(void);
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 82676ee..b216412 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/decay.h"
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/lockedint.h"
 
 enum pa_decay_purge_setting_e {
@@ -140,6 +141,9 @@ struct pa_shard_s {
 	decay_t decay_dirty; /* dirty --> muzzy */
 	decay_t decay_muzzy; /* muzzy --> retained */
 
+	/* The emap this shard is tied to. */
+	emap_t *emap;
+
 	/* The base from which we get the ehooks and allocate metadat. */
 	base_t *base;
 };
@@ -171,9 +175,10 @@ pa_shard_ehooks_get(pa_shard_t *shard) {
 }
 
 /* Returns true on error. */
-bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
-    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx, nstime_t *cur_time,
-    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
+bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
+    unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
+    nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
+
 /*
  * This does the PA-specific parts of arena reset (i.e. freeing all active
  * allocations).
diff --git a/src/arena.c b/src/arena.c
index c70b128..2e70308 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -37,6 +37,8 @@ ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
 static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
+emap_t arena_emap_global;
+
 const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
 		h,
@@ -668,7 +670,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 		emap_alloc_ctx_t alloc_ctx;
-		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr,
+		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
 		    &alloc_ctx);
 		assert(alloc_ctx.szind != SC_NSIZES);
 
@@ -1064,11 +1066,11 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 		safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS);
 	}
 
-	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 
 	szind_t szind = sz_size2index(usize);
 	edata_szind_set(edata, szind);
-	emap_remap(tsdn, &emap_global, edata, szind, /* slab */ false);
+	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
 
 	prof_idump_rollback(tsdn, usize);
 
@@ -1081,7 +1083,7 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	assert(ptr != NULL);
 
 	edata_szind_set(edata, SC_NBINS);
-	emap_remap(tsdn, &emap_global, edata, SC_NBINS, /* slab */ false);
+	emap_remap(tsdn, &arena_emap_global, edata, SC_NBINS, /* slab */ false);
 
 	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
 
@@ -1094,7 +1096,7 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	cassert(config_prof);
 	assert(opt_prof);
 
-	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	size_t usize = edata_usize_get(edata);
 	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
 	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
@@ -1223,7 +1225,7 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 
 void
 arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
-	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	arena_t *arena = arena_get_from_edata(edata);
 
 	arena_dalloc_bin(tsdn, arena, edata, ptr);
@@ -1237,7 +1239,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	/* Calls with non-zero extra had to clamp extra. */
 	assert(extra == 0 || size + extra <= SC_LARGE_MAXCLASS);
 
-	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		ret = true;
 		goto done;
@@ -1271,7 +1273,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		ret = true;
 	}
 done:
-	assert(edata == emap_edata_lookup(tsdn, &emap_global, ptr));
+	assert(edata == emap_edata_lookup(tsdn, &arena_emap_global, ptr));
 	*newsize = edata_usize_get(edata);
 
 	return ret;
@@ -1491,7 +1493,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
-	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind,
+	if (pa_shard_init(tsdn, &arena->pa_shard, &arena_emap_global, base, ind,
 	    &arena->stats.pa_shard_stats, LOCKEDINT_MTX(arena->stats.mtx),
 	    &cur_time, arena_dirty_decay_ms_default_get(),
 	    arena_muzzy_decay_ms_default_get())) {
diff --git a/src/ctl.c b/src/ctl.c
index 00fd744..7555267 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2650,7 +2650,7 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	ret = EINVAL;
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
-	edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr);
 	if (edata == NULL)
 		goto label_return;
 
diff --git a/src/ehooks.c b/src/ehooks.c
index ff459df..1016c3e 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -189,8 +189,8 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static inline bool
 ehooks_same_sn(tsdn_t *tsdn, void *addr_a, void *addr_b) {
-	edata_t *a = emap_edata_lookup(tsdn, &emap_global, addr_a);
-	edata_t *b = emap_edata_lookup(tsdn, &emap_global, addr_b);
+	edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global, addr_a);
+	edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global, addr_b);
 	return edata_sn_comp(a, b) == 0;
 }
 
@@ -253,9 +253,9 @@ bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	tsdn_t *tsdn = tsdn_fetch();
-	edata_t *a = emap_edata_lookup(tsdn, &emap_global, addr_a);
+	edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global, addr_a);
 	bool head_a = edata_is_head_get(a);
-	edata_t *b = emap_edata_lookup(tsdn, &emap_global, addr_b);
+	edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global, addr_b);
 	bool head_b = edata_is_head_get(b);
 	return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b, head_b);
 }
diff --git a/src/emap.c b/src/emap.c
index c79dafa..24d6121 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -3,8 +3,6 @@
 
 #include "jemalloc/internal/emap.h"
 
-emap_t emap_global;
-
 /*
  * Note: Ends without at semicolon, so that
  *     EMAP_DECLARE_RTREE_CTX;
diff --git a/src/extent.c b/src/extent.c
index db658bb..ae0aa2c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,11 +19,11 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
-static edata_t *extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+static edata_t *extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, edata_t *edata, size_t size_a,
+    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
-static bool extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks,
+static bool extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_cache_t *edata_cache, edata_t *a, edata_t *b, bool growing_retained);
 
 /* Used exclusively for gdump triggering. */
@@ -36,14 +36,14 @@ static atomic_zu_t highpages;
  * definition.
  */
 
-static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
+static void extent_deregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, ecache_t *ecache, void *new_addr, size_t usize,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
     bool growing_retained);
-static edata_t *extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained);
+static edata_t *extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata, bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
@@ -53,12 +53,13 @@ static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
 /******************************************************************************/
 
 static bool
-extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata) {
+extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, edata_cache, ehooks, ecache, edata,
-	    &coalesced, false);
+	edata = extent_try_coalesce(tsdn, shard, edata_cache, ehooks, ecache,
+	    edata, &coalesced, false);
 	edata_state_set(edata, ecache->state);
 
 	if (!coalesced) {
@@ -156,8 +157,8 @@ ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, &shard->edata_cache,
-		    ehooks, ecache, edata)) {
+		if (extent_try_delayed_coalesce(tsdn, shard,
+		    &shard->edata_cache, ehooks, ecache, edata)) {
 			break;
 		}
 		/*
@@ -178,7 +179,7 @@ ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		edata_state_set(edata, extent_state_active);
 		break;
 	case extent_state_retained:
-		extent_deregister(tsdn, edata);
+		extent_deregister(tsdn, shard, edata);
 		break;
 	default:
 		not_reached();
@@ -278,26 +279,27 @@ extent_gdump_sub(tsdn_t *tsdn, const edata_t *edata) {
 }
 
 static bool
-extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
+extent_register_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+    bool gdump_add) {
 	/*
 	 * We need to hold the lock to protect against a concurrent coalesce
 	 * operation that sees us in a partial state.
 	 */
-	emap_lock_edata(tsdn, &emap_global, edata);
+	emap_lock_edata(tsdn, shard->emap, edata);
 
 	szind_t szind = edata_szind_get_maybe_invalid(edata);
 	bool slab = edata_slab_get(edata);
 
-	if (emap_register_boundary(tsdn, &emap_global, edata, szind, slab)) {
-		emap_unlock_edata(tsdn, &emap_global, edata);
+	if (emap_register_boundary(tsdn, shard->emap, edata, szind, slab)) {
+		emap_unlock_edata(tsdn, shard->emap, edata);
 		return true;
 	}
 
 	if (slab) {
-		emap_register_interior(tsdn, &emap_global, edata, szind);
+		emap_register_interior(tsdn, shard->emap, edata, szind);
 	}
 
-	emap_unlock_edata(tsdn, &emap_global, edata);
+	emap_unlock_edata(tsdn, shard->emap, edata);
 
 	if (config_prof && gdump_add) {
 		extent_gdump_add(tsdn, edata);
@@ -307,18 +309,18 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 }
 
 static bool
-extent_register(tsdn_t *tsdn, edata_t *edata) {
-	return extent_register_impl(tsdn, edata, true);
+extent_register(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
+	return extent_register_impl(tsdn, shard, edata, true);
 }
 
 static bool
-extent_register_no_gdump_add(tsdn_t *tsdn, edata_t *edata) {
-	return extent_register_impl(tsdn, edata, false);
+extent_register_no_gdump_add(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
+	return extent_register_impl(tsdn, shard, edata, false);
 }
 
 static void
-extent_reregister(tsdn_t *tsdn, edata_t *edata) {
-	bool err = extent_register(tsdn, edata);
+extent_reregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
+	bool err = extent_register(tsdn, shard, edata);
 	assert(!err);
 }
 
@@ -326,14 +328,15 @@ extent_reregister(tsdn_t *tsdn, edata_t *edata) {
  * Removes all pointers to the given extent from the global rtree.
  */
 static void
-extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
-	emap_lock_edata(tsdn, &emap_global, edata);
-	emap_deregister_boundary(tsdn, &emap_global, edata);
+extent_deregister_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+    bool gdump) {
+	emap_lock_edata(tsdn, shard->emap, edata);
+	emap_deregister_boundary(tsdn, shard->emap, edata);
 	if (edata_slab_get(edata)) {
-		emap_deregister_interior(tsdn, &emap_global, edata);
+		emap_deregister_interior(tsdn, shard->emap, edata);
 		edata_slab_set(edata, false);
 	}
-	emap_unlock_edata(tsdn, &emap_global, edata);
+	emap_unlock_edata(tsdn, shard->emap, edata);
 
 	if (config_prof && gdump) {
 		extent_gdump_sub(tsdn, edata);
@@ -341,13 +344,14 @@ extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 }
 
 static void
-extent_deregister(tsdn_t *tsdn, edata_t *edata) {
-	extent_deregister_impl(tsdn, edata, true);
+extent_deregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
+	extent_deregister_impl(tsdn, shard, edata, true);
 }
 
 static void
-extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
-	extent_deregister_impl(tsdn, edata, false);
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_t *edata) {
+	extent_deregister_impl(tsdn, shard, edata, false);
 }
 
 /*
@@ -380,7 +384,7 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
-		edata = emap_lock_edata_from_addr(tsdn, &emap_global, new_addr,
+		edata = emap_lock_edata_from_addr(tsdn, shard->emap, new_addr,
 		    false);
 		if (edata != NULL) {
 			/*
@@ -395,7 +399,7 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			    != ecache->state) {
 				edata = NULL;
 			}
-			emap_unlock_edata(tsdn, &emap_global, unlock_edata);
+			emap_unlock_edata(tsdn, shard->emap, unlock_edata);
 		}
 	} else {
 		/*
@@ -478,9 +482,9 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *edata;
-		*edata = extent_split_impl(tsdn, &shard->edata_cache, ehooks,
-		    *lead, leadsize, SC_NSIZES, false, size + trailsize, szind,
-		    slab, growing_retained);
+		*edata = extent_split_impl(tsdn, shard, &shard->edata_cache,
+		    ehooks, *lead, leadsize, SC_NSIZES, false, size + trailsize,
+		    szind, slab, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -490,9 +494,9 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, &shard->edata_cache, ehooks,
-		    *edata, size, szind, slab, trailsize, SC_NSIZES, false,
-		    growing_retained);
+		*trail = extent_split_impl(tsdn, shard, &shard->edata_cache,
+		    ehooks, *edata, size, szind, slab, trailsize, SC_NSIZES,
+		    false, growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -504,7 +508,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	if (leadsize == 0 && trailsize == 0) {
 		edata_szind_set(*edata, szind);
-		emap_remap(tsdn, &emap_global, *edata, szind, slab);
+		emap_remap(tsdn, shard->emap, *edata, szind, slab);
 	}
 
 	return extent_split_interior_ok;
@@ -555,14 +559,14 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		 */
 		assert(result == extent_split_interior_error);
 		if (to_salvage != NULL) {
-			extent_deregister(tsdn, to_salvage);
+			extent_deregister(tsdn, shard, to_salvage);
 		}
 		if (to_leak != NULL) {
 			void *leak = edata_base_get(to_leak);
-			extent_deregister_no_gdump_sub(tsdn, to_leak);
+			extent_deregister_no_gdump_sub(tsdn, shard, to_leak);
 			extents_abandon_vm(tsdn, shard, ehooks, ecache, to_leak,
 			    growing_retained);
-			assert(emap_lock_edata_from_addr(tsdn, &emap_global,
+			assert(emap_lock_edata_from_addr(tsdn, shard->emap,
 			    leak, false) == NULL);
 		}
 		return NULL;
@@ -614,7 +618,7 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	assert(edata_state_get(edata) == extent_state_active);
 	if (slab) {
 		edata_slab_set(edata, slab);
-		emap_register_interior(tsdn, &emap_global, edata, szind);
+		emap_register_interior(tsdn, shard->emap, edata, szind);
 	}
 
 	if (*zero) {
@@ -681,7 +685,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    extent_state_active, zeroed, committed, /* ranged */ false,
 	    EXTENT_IS_HEAD);
 
-	if (extent_register_no_gdump_add(tsdn, edata)) {
+	if (extent_register_no_gdump_add(tsdn, shard, edata)) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		goto label_err;
 	}
@@ -725,7 +729,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			    &shard->ecache_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
-			extent_deregister_no_gdump_sub(tsdn, to_leak);
+			extent_deregister_no_gdump_sub(tsdn, shard, to_leak);
 			extents_abandon_vm(tsdn, shard, ehooks,
 			    &shard->ecache_retained, to_leak, true);
 		}
@@ -769,7 +773,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 	if (slab) {
 		edata_slab_set(edata, true);
-		emap_register_interior(tsdn, &emap_global, edata, szind);
+		emap_register_interior(tsdn, shard->emap, edata, szind);
 	}
 	if (*zero && !edata_zeroed_get(edata)) {
 		void *addr = edata_base_get(edata);
@@ -834,7 +838,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    size, slab, szind, pa_shard_extent_sn_next(shard),
 	    extent_state_active, *zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
-	if (extent_register(tsdn, edata)) {
+	if (extent_register(tsdn, shard, edata)) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return NULL;
 	}
@@ -864,15 +868,15 @@ extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
-    bool growing_retained) {
+extent_coalesce(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *inner, edata_t *outer,
+    bool forward, bool growing_retained) {
 	assert(extent_can_coalesce(ecache, inner, outer));
 
 	extent_activate_locked(tsdn, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
-	bool err = extent_merge_impl(tsdn, ehooks, edata_cache,
+	bool err = extent_merge_impl(tsdn, shard, ehooks, edata_cache,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -884,9 +888,10 @@ extent_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 }
 
 static edata_t *
-extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained, bool inactive_only) {
+extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata, bool *coalesced, bool growing_retained,
+    bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -901,7 +906,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 		again = false;
 
 		/* Try to coalesce forward. */
-		edata_t *next = emap_lock_edata_from_addr(tsdn, &emap_global,
+		edata_t *next = emap_lock_edata_from_addr(tsdn, shard->emap,
 		    edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
@@ -912,10 +917,10 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 			bool can_coalesce = extent_can_coalesce(ecache,
 			    edata, next);
 
-			emap_unlock_edata(tsdn, &emap_global, next);
+			emap_unlock_edata(tsdn, shard->emap, next);
 
-			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
-			    ehooks, ecache, edata, next, true,
+			if (can_coalesce && !extent_coalesce(tsdn, shard,
+			    edata_cache, ehooks, ecache, edata, next, true,
 			    growing_retained)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -927,15 +932,15 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 		}
 
 		/* Try to coalesce backward. */
-		edata_t *prev = emap_lock_edata_from_addr(tsdn, &emap_global,
+		edata_t *prev = emap_lock_edata_from_addr(tsdn, shard->emap,
 		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(ecache, edata,
 			    prev);
-			emap_unlock_edata(tsdn, &emap_global, prev);
+			emap_unlock_edata(tsdn, shard->emap, prev);
 
-			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
-			    ehooks, ecache, edata, prev, false,
+			if (can_coalesce && !extent_coalesce(tsdn, shard,
+			    edata_cache, ehooks, ecache, edata, prev, false,
 			    growing_retained)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
@@ -955,18 +960,19 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 }
 
 static edata_t *
-extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks,  ecache,
-	    edata, coalesced, growing_retained, false);
+extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
+    bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, shard, edata_cache, ehooks,
+	    ecache, edata, coalesced, growing_retained, false);
 }
 
 static edata_t *
-extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, ecache,
-	    edata, coalesced, growing_retained, true);
+extent_try_coalesce_large(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata, bool *coalesced, bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, shard, edata_cache, ehooks,
+	    ecache, edata, coalesced, growing_retained, true);
 }
 
 /* Purge a single extent to retained / unmapped directly. */
@@ -1007,22 +1013,22 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	edata_szind_set(edata, SC_NSIZES);
 	if (edata_slab_get(edata)) {
-		emap_deregister_interior(tsdn, &emap_global, edata);
+		emap_deregister_interior(tsdn, shard->emap, edata);
 		edata_slab_set(edata, false);
 	}
 
-	emap_assert_mapped(tsdn, &emap_global, edata);
+	emap_assert_mapped(tsdn, shard->emap, edata);
 
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, &shard->edata_cache, ehooks,
-		    ecache, edata, NULL, growing_retained);
+		edata = extent_try_coalesce(tsdn, shard, &shard->edata_cache,
+		    ehooks, ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &shard->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
-			edata = extent_try_coalesce_large(tsdn,
+			edata = extent_try_coalesce_large(tsdn, shard,
 			    &shard->edata_cache, ehooks, ecache, edata,
 			    &coalesced, growing_retained);
 		} while (coalesced);
@@ -1045,7 +1051,7 @@ extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (extent_register(tsdn, edata)) {
+	if (extent_register(tsdn, shard, edata)) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return;
 	}
@@ -1088,11 +1094,11 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.
 		 */
-		extent_deregister(tsdn, edata);
+		extent_deregister(tsdn, shard, edata);
 		if (!extent_dalloc_wrapper_try(tsdn, shard, ehooks, edata)) {
 			return;
 		}
-		extent_reregister(tsdn, edata);
+		extent_reregister(tsdn, shard, edata);
 	}
 
 	/* Try to decommit; purge if that fails. */
@@ -1131,7 +1137,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	/* Deregister first to avoid a race with other allocating threads. */
-	extent_deregister(tsdn, edata);
+	extent_deregister(tsdn, shard, edata);
 
 	edata_addr_set(edata, edata_base_get(edata));
 
@@ -1213,9 +1219,10 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  * and returns the trail (except in case of error).
  */
 static edata_t *
-extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
-    size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
+extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+    bool growing_retained) {
 	assert(edata_size_get(edata) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -1235,13 +1242,13 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 	    edata_zeroed_get(edata), edata_committed_get(edata),
 	    edata_ranged_get(edata), EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
-	bool err = emap_split_prepare(tsdn, &emap_global, &prepare, edata,
+	bool err = emap_split_prepare(tsdn, shard->emap, &prepare, edata,
 	    size_a, szind_a, slab_a, trail, size_b, szind_b, slab_b);
 	if (err) {
 		goto label_error_b;
 	}
 
-	emap_lock_edata2(tsdn, &emap_global, edata, trail);
+	emap_lock_edata2(tsdn, shard->emap, edata, trail);
 
 	err = ehooks_split(tsdn, ehooks, edata_base_get(edata), size_a + size_b,
 	    size_a, size_b, edata_committed_get(edata));
@@ -1252,14 +1259,14 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 
 	edata_size_set(edata, size_a);
 	edata_szind_set(edata, szind_a);
-	emap_split_commit(tsdn, &emap_global, &prepare, edata, size_a, szind_a,
+	emap_split_commit(tsdn, shard->emap, &prepare, edata, size_a, szind_a,
 	    slab_a, trail, size_b, szind_b, slab_b);
 
-	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
+	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
 
 	return trail;
 label_error_c:
-	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
+	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
 label_error_b:
 	edata_cache_put(tsdn, edata_cache, trail);
 label_error_a:
@@ -1267,16 +1274,16 @@ label_error_a:
 }
 
 edata_t *
-extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
-    size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, edata_cache, ehooks, edata, size_a,
-	    szind_a, slab_a, size_b, szind_b, slab_b, false);
+extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, edata_t *edata, size_t size_a,
+    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
+	return extent_split_impl(tsdn, shard, edata_cache, ehooks, edata,
+	    size_a, szind_a, slab_a, size_b, szind_b, slab_b, false);
 }
 
 static bool
-extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
-    edata_t *a, edata_t *b, bool growing_retained) {
+extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    edata_cache_t *edata_cache, edata_t *a, edata_t *b, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(edata_base_get(a) < edata_base_get(b));
@@ -1298,9 +1305,9 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	 * than extent_{,de}register() to do things in the right order.
 	 */
 	emap_prepare_t prepare;
-	emap_merge_prepare(tsdn, &emap_global, &prepare, a, b);
+	emap_merge_prepare(tsdn, shard->emap, &prepare, a, b);
 
-	emap_lock_edata2(tsdn, &emap_global, a, b);
+	emap_lock_edata2(tsdn, shard->emap, a, b);
 
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
 	edata_szind_set(a, SC_NSIZES);
@@ -1308,8 +1315,8 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	    edata_sn_get(a) : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
-	emap_merge_commit(tsdn, &emap_global, &prepare, a, b);
-	emap_unlock_edata2(tsdn, &emap_global, a, b);
+	emap_merge_commit(tsdn, shard->emap, &prepare, a, b);
+	emap_unlock_edata2(tsdn, shard->emap, a, b);
 
 	edata_cache_put(tsdn, edata_cache, b);
 
@@ -1317,9 +1324,9 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 }
 
 bool
-extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
-    edata_t *a, edata_t *b) {
-	return extent_merge_impl(tsdn, ehooks, edata_cache, a, b, false);
+extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    edata_cache_t *edata_cache, edata_t *a, edata_t *b) {
+	return extent_merge_impl(tsdn, shard, ehooks, edata_cache, a, b, false);
 }
 
 bool
diff --git a/src/inspect.c b/src/inspect.c
index 6c4dd8a..5e8d51d 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -6,7 +6,7 @@ inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
     size_t *nregs, size_t *size) {
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
 
-	const edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	const edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = 0;
 		return;
@@ -31,7 +31,7 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
 	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
 
-	const edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	const edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
 		*slabcur_addr = NULL;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 72eb55b..0be5549 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1623,7 +1623,7 @@ malloc_init_hard_a0_locked() {
 		return true;
 	}
 	/* emap_global is static, hence zeroed. */
-	if (emap_init(&emap_global, b0get(), /* zeroed */ true)) {
+	if (emap_init(&arena_emap_global, b0get(), /* zeroed */ true)) {
 		return true;
 	}
 	if (extent_boot()) {
@@ -2645,7 +2645,8 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+	    &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
 	size_t usize = sz_index2size(alloc_ctx.szind);
@@ -2699,12 +2700,12 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 			if (config_debug) {
 				emap_alloc_ctx_t dbg_ctx;
 				emap_alloc_ctx_lookup(tsd_tsdn(tsd),
-				    &emap_global, ptr, &dbg_ctx);
+				    &arena_emap_global, ptr, &dbg_ctx);
 				assert(dbg_ctx.szind == alloc_ctx.szind);
 				assert(dbg_ctx.slab == alloc_ctx.slab);
 			}
 		} else if (opt_prof) {
-			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
+			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
 			    ptr, &alloc_ctx);
 
 			if (config_opt_safety_checks) {
@@ -2781,8 +2782,8 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		if (unlikely(tsd == NULL || !tsd_fast(tsd))) {
 			return false;
 		}
-		bool res = emap_alloc_ctx_try_lookup_fast(tsd, &emap_global,
-		    ptr, &alloc_ctx);
+		bool res = emap_alloc_ctx_try_lookup_fast(tsd,
+		    &arena_emap_global, ptr, &alloc_ctx);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
 		if (unlikely(!res || !alloc_ctx.slab)) {
@@ -3238,7 +3239,8 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	}
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+	    &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3510,11 +3512,12 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * object associated with the ptr (though the content of the edata_t
 	 * object can be changed).
 	 */
-	edata_t *old_edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global,
-	    ptr);
+	edata_t *old_edata = emap_edata_lookup(tsd_tsdn(tsd),
+	    &arena_emap_global, ptr);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+	    &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3547,7 +3550,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * xallocx() should keep using the same edata_t object (though its
 	 * content can be changed).
 	 */
-	assert(emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr)
+	assert(emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr)
 	    == old_edata);
 
 	if (unlikely(usize == old_usize)) {
diff --git a/src/large.c b/src/large.c
index 494a32b..31205df 100644
--- a/src/large.c
+++ b/src/large.c
@@ -202,7 +202,7 @@ void *
 large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
-	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 
 	size_t oldusize = edata_usize_get(edata);
 	/* The following should have been caught by callers. */
diff --git a/src/pa.c b/src/pa.c
index 2809630..2a581ef 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -13,9 +13,9 @@ pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
 }
 
 bool
-pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
-    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx, nstime_t *cur_time,
-    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
+    unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
+    nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	/*
@@ -68,6 +68,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 	shard->stats = stats;
 	memset(shard->stats, 0, sizeof(*shard->stats));
 
+	shard->emap = emap;
 	shard->base = base;
 
 	return false;
@@ -175,8 +176,8 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	if (trail == NULL) {
 		return true;
 	}
-	if (extent_merge_wrapper(tsdn, ehooks, &shard->edata_cache, edata,
-	    trail)) {
+	if (extent_merge_wrapper(tsdn, shard, ehooks, &shard->edata_cache,
+	    edata, trail)) {
 		extent_dalloc_wrapper(tsdn, shard, ehooks, trail);
 		return true;
 	}
@@ -186,7 +187,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	}
 	pa_nactive_add(shard, expand_amount >> LG_PAGE);
 	edata_szind_set(edata, szind);
-	emap_remap(tsdn, &emap_global, edata, szind, slab);
+	emap_remap(tsdn, shard->emap, edata, szind, slab);
 	return false;
 }
 
@@ -205,8 +206,8 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		return true;
 	}
 
-	edata_t *trail = extent_split_wrapper(tsdn, &shard->edata_cache, ehooks,
-	    edata, new_size, szind, slab, shrink_amount, SC_NSIZES,
+	edata_t *trail = extent_split_wrapper(tsdn, shard, &shard->edata_cache,
+	    ehooks, edata, new_size, szind, slab, shrink_amount, SC_NSIZES,
 	    false);
 	if (trail == NULL) {
 		return true;
diff --git a/src/prof.c b/src/prof.c
index e68694a..bbf8e9d 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -229,7 +229,8 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 		prof_fetch_sys_thread_name(tsd);
 	}
 
-	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
+	    ptr);
 	prof_info_set(tsd, edata, tctx);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
diff --git a/src/tcache.c b/src/tcache.c
index 9afc006..d345354 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -125,7 +125,7 @@ tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_ptr_array_t *arr,
 	size_t szind_sum = binind * nflush;
 	for (unsigned i = 0; i < nflush; i++) {
 		emap_full_alloc_ctx_t full_alloc_ctx;
-		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
+		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
 		    cache_bin_ptr_array_get(arr, i), &full_alloc_ctx);
 		edatas[i] = full_alloc_ctx.edata;
 		szind_sum -= full_alloc_ctx.szind;
@@ -185,7 +185,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
 			item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd),
-			    &emap_global, cache_bin_ptr_array_get(&ptrs, i));
+			    &arena_emap_global,
+			    cache_bin_ptr_array_get(&ptrs, i));
 		}
 	}
 
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index a7a23f7..a2cf3e5 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -61,8 +61,8 @@ get_large_size(size_t ind) {
 static size_t
 vsalloc(tsdn_t *tsdn, const void *ptr) {
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &emap_global, ptr,
-	    &full_alloc_ctx);
+	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &arena_emap_global,
+	    ptr, &full_alloc_ctx);
 	if (missing) {
 		return 0;
 	}
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index 243a9b3..040ea54 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -62,12 +62,12 @@ thd_start(void *varg) {
 		ptr = mallocx(1, MALLOCX_TCACHE_NONE);
 		ptr2 = mallocx(129, MALLOCX_TCACHE_NONE);
 
-		edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+		edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 		shard1 = edata_binshard_get(edata);
 		dallocx(ptr, 0);
 		expect_u_lt(shard1, 16, "Unexpected bin shard used");
 
-		edata = emap_edata_lookup(tsdn, &emap_global, ptr2);
+		edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr2);
 		shard2 = edata_binshard_get(edata);
 		dallocx(ptr2, 0);
 		expect_u_lt(shard2, 4, "Unexpected bin shard used");
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 19ff15f..4aa9f9e 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -103,7 +103,7 @@ TEST_END
 static void
 confirm_malloc(void *p) {
 	assert_ptr_not_null(p, "malloc failed unexpectedly");
-	edata_t *e = emap_edata_lookup(TSDN_NULL, &emap_global, p);
+	edata_t *e = emap_edata_lookup(TSDN_NULL, &arena_emap_global, p);
 	assert_ptr_not_null(e, "NULL edata for living pointer");
 	prof_recent_t *n = edata_prof_recent_alloc_get_no_lock(e);
 	assert_ptr_not_null(n, "Record in edata should not be NULL");
-- 
cgit v0.12


From 1a1124462e8c671809535a3dd617f08252a48ce5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 14 Mar 2020 18:10:29 -0700
Subject: PA: Take zero as a bool rather than as a bool *.

Now that we've moved junking to a higher level of the allocation stack, we don't
care about this performance optimization (which only occurred in debug modes).
---
 include/jemalloc/internal/arena_externs.h |  2 +-
 include/jemalloc/internal/pa.h            |  4 ++--
 src/arena.c                               |  6 ++----
 src/large.c                               | 26 ++------------------------
 src/pa.c                                  | 16 ++++++++--------
 5 files changed, 15 insertions(+), 39 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 9fea729..6e0fe2b 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -33,7 +33,7 @@ void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
 size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
 #endif
 edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
-    size_t usize, size_t alignment, bool *zero);
+    size_t usize, size_t alignment, bool zero);
 void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
     edata_t *edata);
 void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index b216412..3e9f1c2 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -195,10 +195,10 @@ size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
-    size_t alignment, bool slab, szind_t szind, bool *zero);
+    size_t alignment, bool slab, szind_t szind, bool zero);
 /* Returns true on error, in which case nothing changed. */
 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool *zero);
+    size_t new_size, szind_t szind, bool slab, bool zero);
 /*
  * The same.  Sets *generated_dirty to true if we produced new dirty pages, and
  * false otherwise.
diff --git a/src/arena.c b/src/arena.c
index 2e70308..b983b63 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -347,7 +347,7 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
 
 edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool *zero) {
+    size_t alignment, bool zero) {
 	szind_t szind = sz_size2index(usize);
 	size_t esize = usize + sz_large_pad;
 
@@ -736,10 +736,8 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	bool zero = false;
-
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
-	    PAGE, /* slab */ true, /* szind */ binind, &zero);
+	    PAGE, /* slab */ true, /* szind */ binind, /* zero */ false);
 
 	if (slab == NULL) {
 		return NULL;
diff --git a/src/large.c b/src/large.c
index 31205df..80de716 100644
--- a/src/large.c
+++ b/src/large.c
@@ -23,7 +23,6 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero) {
 	size_t ausize;
 	edata_t *edata;
-	bool is_zeroed;
 	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
 
 	assert(!tsdn_null(tsdn) || arena != NULL);
@@ -36,17 +35,11 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (config_fill && unlikely(opt_zero)) {
 		zero = true;
 	}
-	/*
-	 * Copy zero into is_zeroed and pass the copy when allocating the
-	 * extent, so that it is possible to make correct zero fill decisions
-	 * below, even if is_zeroed ends up true when zero is false.
-	 */
-	is_zeroed = zero;
 	if (likely(!tsdn_null(tsdn))) {
 		arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, usize);
 	}
 	if (unlikely(arena == NULL) || (edata = arena_extent_alloc_large(tsdn,
-	    arena, usize, alignment, &is_zeroed)) == NULL) {
+	    arena, usize, alignment, zero)) == NULL) {
 		return NULL;
 	}
 
@@ -58,10 +51,6 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		malloc_mutex_unlock(tsdn, &arena->large_mtx);
 	}
 
-	if (zero) {
-		assert(is_zeroed);
-	}
-
 	arena_decay_tick(tsdn, arena);
 	return edata_addr_get(edata);
 }
@@ -99,23 +88,13 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
     bool zero) {
 	arena_t *arena = arena_get_from_edata(edata);
 
-	if (config_fill && unlikely(opt_zero)) {
-		zero = true;
-	}
-
 	size_t old_size = edata_size_get(edata);
 	size_t old_usize = edata_usize_get(edata);
 	size_t new_size = usize + sz_large_pad;
 
-	/*
-	 * Copy zero into is_zeroed_trail and pass the copy when allocating the
-	 * extent, so that it is possible to make correct zero fill decisions
-	 * below, even if is_zeroed_trail ends up true when zero is false.
-	 */
-	bool is_zeroed_trail = zero;
 	szind_t szind = sz_size2index(usize);
 	bool err = pa_expand(tsdn, &arena->pa_shard, edata, old_size, new_size,
-	    szind, /* slab */ false, &is_zeroed_trail);
+	    szind, /* slab */ false, zero);
 	if (err) {
 		return true;
 	}
@@ -137,7 +116,6 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 			assert(nzero > 0);
 			memset(zbase, 0, nzero);
 		}
-		assert(is_zeroed_trail);
 	}
 	arena_extent_ralloc_large_expand(tsdn, arena, edata, old_usize);
 
diff --git a/src/pa.c b/src/pa.c
index 2a581ef..04762a0 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -112,7 +112,7 @@ pa_shard_may_have_muzzy(pa_shard_t *shard) {
 
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
-    bool slab, szind_t szind, bool *zero) {
+    bool slab, szind_t szind, bool zero) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -121,16 +121,16 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
 	    &shard->ecache_dirty, NULL, size, alignment, slab, szind,
-	    zero);
+	    &zero);
 
 	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
 		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    NULL, size, alignment, slab, szind, zero);
+		    NULL, size, alignment, slab, szind, &zero);
 	}
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, NULL, size, alignment, slab,
-		    szind, zero);
+		    szind, &zero);
 		mapped_add = size;
 	}
 	if (edata != NULL) {
@@ -145,7 +145,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool *zero) {
+    size_t new_size, szind_t szind, bool slab, bool zero) {
 	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
@@ -161,16 +161,16 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	}
 	edata_t *trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_dirty,
 	    trail_begin, expand_amount, PAGE, /* slab */ false, SC_NSIZES,
-	    zero);
+	    &zero);
 	if (trail == NULL) {
 		trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
 		    trail_begin, expand_amount, PAGE, /* slab */ false,
-		    SC_NSIZES, zero);
+		    SC_NSIZES, &zero);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, trail_begin, expand_amount, PAGE,
-		    /* slab */ false, SC_NSIZES, zero);
+		    /* slab */ false, SC_NSIZES, &zero);
 		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
-- 
cgit v0.12


From 11c47cb1336491b7f4d21f12eaba45a10af639c3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 14 Mar 2020 18:19:19 -0700
Subject: Extent: Take "bool zero" over "bool *zero".

---
 include/jemalloc/internal/extent.h |  6 +++---
 src/extent.c                       | 34 ++++++++++++++--------------------
 src/pa.c                           | 13 ++++++-------
 3 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index f5fd812..9db650f 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -21,10 +21,10 @@ extern size_t opt_lg_extent_max_active_fit;
 
 edata_t *ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero);
+    szind_t szind, bool zero);
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero);
+    szind_t szind, bool zero);
 void ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
@@ -32,7 +32,7 @@ edata_t *ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit);
+    bool zero, bool *commit);
 void extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata);
 void extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
diff --git a/src/extent.c b/src/extent.c
index ae0aa2c..8cc0447 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -39,7 +39,7 @@ static atomic_zu_t highpages;
 static void extent_deregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, ecache_t *ecache, void *new_addr, size_t usize,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
+    size_t alignment, bool slab, szind_t szind, bool zero, bool *commit,
     bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
     edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
@@ -48,7 +48,7 @@ static void extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit);
+    szind_t szind, bool zero, bool *commit);
 
 /******************************************************************************/
 
@@ -72,7 +72,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
 edata_t *
 ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero) {
+    szind_t szind, bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -88,7 +88,7 @@ ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 edata_t *
 ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero) {
+    szind_t szind, bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -581,11 +581,11 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 static edata_t *
 extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
+    szind_t szind, bool zero, bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(new_addr == NULL || !slab);
-	assert(!*zero || !slab);
+	assert(!zero || !slab);
 
 	edata_t *edata = extent_recycle_extract(tsdn, shard, ehooks, ecache,
 	    new_addr, size, alignment, slab, growing_retained);
@@ -611,9 +611,6 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	if (edata_committed_get(edata)) {
 		*commit = true;
 	}
-	if (edata_zeroed_get(edata)) {
-		*zero = true;
-	}
 
 	assert(edata_state_get(edata) == extent_state_active);
 	if (slab) {
@@ -621,7 +618,7 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		emap_register_interior(tsdn, shard->emap, edata, szind);
 	}
 
-	if (*zero) {
+	if (zero) {
 		void *addr = edata_base_get(edata);
 		if (!edata_zeroed_get(edata)) {
 			size_t size = edata_size_get(edata);
@@ -639,9 +636,9 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 static edata_t *
 extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     size_t size, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit) {
+    bool zero, bool *commit) {
 	malloc_mutex_assert_owner(tsdn, &shard->ecache_grow.mtx);
-	assert(!*zero || !slab);
+	assert(!zero || !slab);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -690,9 +687,6 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		goto label_err;
 	}
 
-	if (edata_zeroed_get(edata) && edata_committed_get(edata)) {
-		*zero = true;
-	}
 	if (edata_committed_get(edata)) {
 		*commit = true;
 	}
@@ -775,7 +769,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		edata_slab_set(edata, true);
 		emap_register_interior(tsdn, shard->emap, edata, szind);
 	}
-	if (*zero && !edata_zeroed_get(edata)) {
+	if (zero && !edata_zeroed_get(edata)) {
 		void *addr = edata_base_get(edata);
 		size_t size = edata_size_get(edata);
 		ehooks_zero(tsdn, ehooks, addr, size);
@@ -790,7 +784,7 @@ label_err:
 static edata_t *
 extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit) {
+    bool zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
@@ -819,7 +813,7 @@ extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 edata_t *
 extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
+    szind_t szind, bool zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -829,14 +823,14 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
-	    zero, commit);
+	    &zero, commit);
 	if (addr == NULL) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return NULL;
 	}
 	edata_init(edata, ecache_ind_get(&shard->ecache_dirty), addr,
 	    size, slab, szind, pa_shard_extent_sn_next(shard),
-	    extent_state_active, *zero, *commit, /* ranged */ false,
+	    extent_state_active, zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, shard, edata)) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
diff --git a/src/pa.c b/src/pa.c
index 04762a0..b4a1e5b 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -120,17 +120,16 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
-	    &shard->ecache_dirty, NULL, size, alignment, slab, szind,
-	    &zero);
+	    &shard->ecache_dirty, NULL, size, alignment, slab, szind, zero);
 
 	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
 		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    NULL, size, alignment, slab, szind, &zero);
+		    NULL, size, alignment, slab, szind, zero);
 	}
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, NULL, size, alignment, slab,
-		    szind, &zero);
+		    szind, zero);
 		mapped_add = size;
 	}
 	if (edata != NULL) {
@@ -161,16 +160,16 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	}
 	edata_t *trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_dirty,
 	    trail_begin, expand_amount, PAGE, /* slab */ false, SC_NSIZES,
-	    &zero);
+	    zero);
 	if (trail == NULL) {
 		trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
 		    trail_begin, expand_amount, PAGE, /* slab */ false,
-		    SC_NSIZES, &zero);
+		    SC_NSIZES, zero);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, trail_begin, expand_amount, PAGE,
-		    /* slab */ false, SC_NSIZES, &zero);
+		    /* slab */ false, SC_NSIZES, zero);
 		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
-- 
cgit v0.12


From a4759a1911a6dbb5709302ab5ba94cc1b6322e63 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 15 Mar 2020 09:14:29 -0700
Subject: Ehooks: avoid touching arena_emap_global in tests.

That breaks our ability to test custom emaps in isolation.
---
 src/ehooks.c | 36 +++++-------------------------------
 1 file changed, 5 insertions(+), 31 deletions(-)

diff --git a/src/ehooks.c b/src/ehooks.c
index 1016c3e..f2525e1 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -187,35 +187,6 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	return ehooks_default_split_impl();
 }
 
-static inline bool
-ehooks_same_sn(tsdn_t *tsdn, void *addr_a, void *addr_b) {
-	edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global, addr_a);
-	edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global, addr_b);
-	return edata_sn_comp(a, b) == 0;
-}
-
-/*
- * Returns true if the given extents can't be merged because of their head bit
- * settings.  Assumes the second extent has the higher address.
- */
-static bool
-ehooks_no_merge_heads(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
-    bool head_b) {
-	/* If b is a head extent, disallow the cross-region merge. */
-	if (head_b) {
-		/*
-		 * Additionally, sn should not overflow with retain; sanity
-		 * check that different regions have unique sn.
-		 */
-		assert(!ehooks_same_sn(tsdn, addr_a, addr_b));
-		return true;
-	}
-	assert(ehooks_same_sn(tsdn, addr_a, addr_b) || (have_dss &&
-	    (extent_in_dss(addr_a) || extent_in_dss(addr_b))));
-
-	return false;
-}
-
 bool
 ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
     bool head_b) {
@@ -238,8 +209,11 @@ ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
 	if (!maps_coalesce && !opt_retain) {
 		return true;
 	}
-	if (opt_retain && ehooks_no_merge_heads(tsdn, addr_a, head_a, addr_b,
-	    head_b)) {
+	/*
+	 * Don't merge across mappings when retain is on -- this preserves
+	 * first-fit ordering.
+	 */
+	if (opt_retain && head_b) {
 		return true;
 	}
 	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
-- 
cgit v0.12


From 93b99dd14054886f3d25305b08b8c0f75f289fc4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 15 Mar 2020 09:53:09 -0700
Subject: Extent: Stop passing an edata_cache everywhere.

We already pass the pa_shard_t around everywhere; we can just use that.
---
 include/jemalloc/internal/extent.h |   6 +--
 src/extent.c                       | 103 +++++++++++++++++--------------------
 src/pa.c                           |   8 ++-
 3 files changed, 54 insertions(+), 63 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 9db650f..bec21d6 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -48,10 +48,10 @@ bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 edata_t *extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, edata_t *edata, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    edata_cache_t *edata_cache, edata_t *a, edata_t *b);
+    edata_t *a, edata_t *b);
 
 bool extent_boot(void);
 
diff --git a/src/extent.c b/src/extent.c
index 8cc0447..889857e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -20,11 +20,11 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks,
 static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static edata_t *extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, edata_t *edata, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
 static bool extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    edata_cache_t *edata_cache, edata_t *a, edata_t *b, bool growing_retained);
+    edata_t *a, edata_t *b, bool growing_retained);
 
 /* Used exclusively for gdump triggering. */
 static atomic_zu_t curpages;
@@ -42,8 +42,8 @@ static edata_t *extent_recycle(tsdn_t *tsdn, pa_shard_t *shard,
     size_t alignment, bool slab, szind_t szind, bool zero, bool *commit,
     bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata, bool *coalesced, bool growing_retained);
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
+    bool growing_retained);
 static void extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
@@ -54,11 +54,10 @@ static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
 
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata) {
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, shard, edata_cache, ehooks, ecache,
+	edata = extent_try_coalesce(tsdn, shard, ehooks, ecache,
 	    edata, &coalesced, false);
 	edata_state_set(edata, ecache->state);
 
@@ -157,8 +156,8 @@ ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, shard,
-		    &shard->edata_cache, ehooks, ecache, edata)) {
+		if (extent_try_delayed_coalesce(tsdn, shard, ehooks, ecache,
+		    edata)) {
 			break;
 		}
 		/*
@@ -482,9 +481,9 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *edata;
-		*edata = extent_split_impl(tsdn, shard, &shard->edata_cache,
-		    ehooks, *lead, leadsize, SC_NSIZES, false, size + trailsize,
-		    szind, slab, growing_retained);
+		*edata = extent_split_impl(tsdn, shard, ehooks, *lead, leadsize,
+		    SC_NSIZES, false, size + trailsize, szind, slab,
+		    growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -494,9 +493,8 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, shard, &shard->edata_cache,
-		    ehooks, *edata, size, szind, slab, trailsize, SC_NSIZES,
-		    false, growing_retained);
+		*trail = extent_split_impl(tsdn, shard, ehooks, *edata, size,
+		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -862,15 +860,15 @@ extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *inner, edata_t *outer,
-    bool forward, bool growing_retained) {
+extent_coalesce(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
+    bool growing_retained) {
 	assert(extent_can_coalesce(ecache, inner, outer));
 
 	extent_activate_locked(tsdn, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
-	bool err = extent_merge_impl(tsdn, shard, ehooks, edata_cache,
+	bool err = extent_merge_impl(tsdn, shard, ehooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -882,9 +880,8 @@ extent_coalesce(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
 }
 
 static edata_t *
-extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata, bool *coalesced, bool growing_retained,
+extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained,
     bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
@@ -914,7 +911,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard,
 			emap_unlock_edata(tsdn, shard->emap, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, shard,
-			    edata_cache, ehooks, ecache, edata, next, true,
+			    ehooks, ecache, edata, next, true,
 			    growing_retained)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -934,7 +931,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard,
 			emap_unlock_edata(tsdn, shard->emap, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, shard,
-			    edata_cache, ehooks, ecache, edata, prev, false,
+			    ehooks, ecache, edata, prev, false,
 			    growing_retained)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
@@ -954,19 +951,17 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard,
 }
 
 static edata_t *
-extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, shard, edata_cache, ehooks,
-	    ecache, edata, coalesced, growing_retained, false);
+extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, shard, ehooks, ecache, edata,
+	    coalesced, growing_retained, false);
 }
 
 static edata_t *
-extent_try_coalesce_large(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, shard, edata_cache, ehooks,
-	    ecache, edata, coalesced, growing_retained, true);
+extent_try_coalesce_large(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, shard, ehooks, ecache, edata,
+	    coalesced, growing_retained, true);
 }
 
 /* Purge a single extent to retained / unmapped directly. */
@@ -1014,17 +1009,16 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	emap_assert_mapped(tsdn, shard->emap, edata);
 
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, shard, &shard->edata_cache,
-		    ehooks, ecache, edata, NULL, growing_retained);
+		edata = extent_try_coalesce(tsdn, shard,  ehooks, ecache, edata,
+		    NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &shard->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
-			edata = extent_try_coalesce_large(tsdn, shard,
-			    &shard->edata_cache, ehooks, ecache, edata,
-			    &coalesced, growing_retained);
+			edata = extent_try_coalesce_large(tsdn, shard, ehooks,
+			    ecache, edata, &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold &&
 		    pa_shard_may_force_decay(shard)) {
@@ -1213,10 +1207,9 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  * and returns the trail (except in case of error).
  */
 static edata_t *
-extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
-    bool growing_retained) {
+extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, size_t size_b,
+    szind_t szind_b, bool slab_b, bool growing_retained) {
 	assert(edata_size_get(edata) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -1225,7 +1218,7 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
 		return NULL;
 	}
 
-	edata_t *trail = edata_cache_get(tsdn, edata_cache);
+	edata_t *trail = edata_cache_get(tsdn, &shard->edata_cache);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
@@ -1262,22 +1255,22 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
 label_error_c:
 	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
 label_error_b:
-	edata_cache_put(tsdn, edata_cache, trail);
+	edata_cache_put(tsdn, &shard->edata_cache, trail);
 label_error_a:
 	return NULL;
 }
 
 edata_t *
 extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, edata_t *edata, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, shard, edata_cache, ehooks, edata,
-	    size_a, szind_a, slab_a, size_b, szind_b, slab_b, false);
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
+	return extent_split_impl(tsdn, shard, ehooks, edata, size_a, szind_a,
+	    slab_a, size_b, szind_b, slab_b, false);
 }
 
 static bool
-extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    edata_cache_t *edata_cache, edata_t *a, edata_t *b, bool growing_retained) {
+extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks, edata_t *a,
+    edata_t *b, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(edata_base_get(a) < edata_base_get(b));
@@ -1312,15 +1305,15 @@ extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	emap_merge_commit(tsdn, shard->emap, &prepare, a, b);
 	emap_unlock_edata2(tsdn, shard->emap, a, b);
 
-	edata_cache_put(tsdn, edata_cache, b);
+	edata_cache_put(tsdn, &shard->edata_cache, b);
 
 	return false;
 }
 
 bool
 extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    edata_cache_t *edata_cache, edata_t *a, edata_t *b) {
-	return extent_merge_impl(tsdn, shard, ehooks, edata_cache, a, b, false);
+    edata_t *a, edata_t *b) {
+	return extent_merge_impl(tsdn, shard, ehooks, a, b, false);
 }
 
 bool
diff --git a/src/pa.c b/src/pa.c
index b4a1e5b..78ff348 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -175,8 +175,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	if (trail == NULL) {
 		return true;
 	}
-	if (extent_merge_wrapper(tsdn, shard, ehooks, &shard->edata_cache,
-	    edata, trail)) {
+	if (extent_merge_wrapper(tsdn, shard, ehooks, edata, trail)) {
 		extent_dalloc_wrapper(tsdn, shard, ehooks, trail);
 		return true;
 	}
@@ -205,9 +204,8 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		return true;
 	}
 
-	edata_t *trail = extent_split_wrapper(tsdn, shard, &shard->edata_cache,
-	    ehooks, edata, new_size, szind, slab, shrink_amount, SC_NSIZES,
-	    false);
+	edata_t *trail = extent_split_wrapper(tsdn, shard, ehooks, edata,
+	    new_size, szind, slab, shrink_amount, SC_NSIZES, false);
 	if (trail == NULL) {
 		return true;
 	}
-- 
cgit v0.12


From dc26b3009450aadaffdf2f3e91ff5c41548796d4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 15 Mar 2020 15:49:42 -0700
Subject: Rtree: Clean up compact/non-compact split.

---
 include/jemalloc/internal/rtree.h | 209 +++++++++++++-------------------------
 src/emap.c                        |  24 ++---
 test/unit/rtree.c                 |   6 +-
 3 files changed, 83 insertions(+), 156 deletions(-)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 1c2715d..46c58f9 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -43,13 +43,18 @@ struct rtree_node_elm_s {
 	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
 };
 
-typedef struct rtree_leaf_elm_contents_s rtree_leaf_elm_contents_t;
-struct rtree_leaf_elm_contents_s {
-	edata_t *edata;
+typedef struct rtree_metadata_s rtree_metadata_t;
+struct rtree_metadata_s {
 	szind_t szind;
 	bool slab;
 };
 
+typedef struct rtree_contents_s rtree_contents_t;
+struct rtree_contents_s {
+	edata_t *edata;
+	rtree_metadata_t metadata;
+};
+
 struct rtree_leaf_elm_s {
 #ifdef RTREE_LEAF_COMPACT
 	/*
@@ -67,8 +72,11 @@ struct rtree_leaf_elm_s {
 	atomic_p_t	le_bits;
 #else
 	atomic_p_t	le_edata; /* (edata_t *) */
-	atomic_u_t	le_szind; /* (szind_t) */
-	atomic_b_t	le_slab; /* (bool) */
+	/*
+	 * slab is stored in the low bit; szind is stored in the next lowest
+	 * bits.
+	 */
+	atomic_u_t	le_metadata;
 #endif
 };
 
@@ -171,25 +179,25 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
-rtree_leaf_elm_bits_encode(rtree_leaf_elm_contents_t contents) {
+rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
 	uintptr_t edata_bits = (uintptr_t)contents.edata
 	    & (((uintptr_t)1 << LG_VADDR) - 1);
-	uintptr_t szind_bits = (uintptr_t)contents.szind << LG_VADDR;
+	uintptr_t szind_bits = (uintptr_t)contents.metadata.szind << LG_VADDR;
 	/*
 	 * Slab shares the low bit of edata; we know edata is on an even address
 	 * (in fact, it's 128 bytes on 64-bit systems; we can enforce this
 	 * alignment if we want to steal 6 extra rtree leaf bits someday.
 	 */
-	uintptr_t slab_bits = (uintptr_t)contents.slab;
+	uintptr_t slab_bits = (uintptr_t)contents.metadata.slab;
 	return szind_bits | edata_bits | slab_bits;
 }
 
-JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_contents_t
+JEMALLOC_ALWAYS_INLINE rtree_contents_t
 rtree_leaf_elm_bits_decode(uintptr_t bits) {
-	rtree_leaf_elm_contents_t contents;
+	rtree_contents_t contents;
 	/* Do the easy things first. */
-	contents.szind = bits >> LG_VADDR;
-	contents.slab = (bool)(bits & 1);
+	contents.metadata.szind = bits >> LG_VADDR;
+	contents.metadata.slab = (bool)(bits & 1);
 #    ifdef __aarch64__
 	/*
 	 * aarch64 doesn't sign extend the highest virtual address bit to set
@@ -210,109 +218,42 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 
 #  endif /* RTREE_LEAF_COMPACT */
 
-JEMALLOC_ALWAYS_INLINE edata_t *
-rtree_leaf_elm_edata_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool dependent) {
+JEMALLOC_ALWAYS_INLINE rtree_contents_t
+rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
-	return contents.edata;
+	rtree_contents_t contents = rtree_leaf_elm_bits_decode(bits);
+	return contents;
 #else
-	edata_t *edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
+	rtree_contents_t contents;
+	unsigned metadata_bits = atomic_load_u(&elm->le_metadata, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
-	return edata;
-#endif
-}
-
-JEMALLOC_ALWAYS_INLINE szind_t
-rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool dependent) {
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
-	return contents.szind;
-#else
-	return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED
-	    : ATOMIC_ACQUIRE);
-#endif
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool dependent) {
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
-	return contents.slab;
-#else
-	return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED :
-	    ATOMIC_ACQUIRE);
-#endif
-}
-
-static inline void
-rtree_leaf_elm_edata_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, edata_t *edata) {
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
-	    old_bits);
-	contents.edata = edata;
-	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
-	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
-#else
-	atomic_store_p(&elm->le_edata, edata, ATOMIC_RELEASE);
-#endif
-}
+	contents.metadata.slab = (bool)(metadata_bits & 1);
+	contents.metadata.szind = (metadata_bits >> 1);
 
-static inline void
-rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, szind_t szind) {
-	assert(szind <= SC_NSIZES);
-
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
-	    true);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
-	    old_bits);
-	contents.szind = szind;
-	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
-	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
-#else
-	atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE);
-#endif
-}
+	contents.edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
+	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 
-static inline void
-rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool slab) {
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
-	    true);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
-	    old_bits);
-	contents.slab = slab;
-	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
-	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
-#else
-	atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE);
+	return contents;
 #endif
 }
 
 static inline void
 rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, rtree_leaf_elm_contents_t contents) {
+    rtree_leaf_elm_t *elm, rtree_contents_t contents) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
-	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
-	rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
+	unsigned metadata_bits = ((unsigned)contents.metadata.slab
+	    | ((unsigned)contents.metadata.szind << 1));
+	atomic_store_u(&elm->le_metadata, metadata_bits, ATOMIC_RELEASE);
 	/*
 	 * Write edata last, since the element is atomically considered valid
 	 * as soon as the edata field is non-NULL.
 	 */
-	rtree_leaf_elm_edata_write(tsdn, rtree, elm, edata);
+	atomic_store_p(&elm->le_edata, contents.edata, ATOMIC_RELEASE);
 #endif
 }
 
@@ -320,13 +261,15 @@ static inline void
 rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, szind_t szind, bool slab) {
 	assert(!slab || szind < SC_NBINS);
-
+	rtree_contents_t contents = rtree_leaf_elm_read(
+	    tsdn, rtree, elm, /* dependent */ true);
 	/*
 	 * The caller implicitly assures that it is the only writer to the szind
 	 * and slab fields, and that the edata field cannot currently change.
 	 */
-	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
-	rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
+	contents.metadata.slab = slab;
+	contents.metadata.szind = szind;
+	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
@@ -400,11 +343,11 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 		return true;
 	}
 
-	assert(rtree_leaf_elm_edata_read(tsdn, rtree, elm, false) == NULL);
-	rtree_leaf_elm_contents_t contents;
+	assert(rtree_leaf_elm_read(tsdn, rtree, elm, false).edata == NULL);
+	rtree_contents_t contents;
 	contents.edata = edata;
-	contents.szind = szind;
-	contents.slab = slab;
+	contents.metadata.szind = szind;
+	contents.metadata.slab = slab;
 	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 
 	return false;
@@ -430,7 +373,7 @@ rtree_edata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (!dependent && elm == NULL) {
 		return NULL;
 	}
-	return rtree_leaf_elm_edata_read(tsdn, rtree, elm, dependent);
+	return rtree_leaf_elm_read(tsdn, rtree, elm, dependent).edata;
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
@@ -441,7 +384,7 @@ rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (!dependent && elm == NULL) {
 		return SC_NSIZES;
 	}
-	return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
+	return rtree_leaf_elm_read(tsdn, rtree, elm, dependent).metadata.szind;
 }
 
 /*
@@ -458,18 +401,12 @@ rtree_edata_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree,
 	if (!dependent && elm == NULL) {
 		return true;
 	}
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
-
+	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
+	    dependent);
 	*r_edata = contents.edata;
-	*r_szind = contents.szind;
-	*r_slab = contents.slab;
-#else
-	*r_edata = rtree_leaf_elm_edata_read(tsdn, rtree, elm, dependent);
-	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
-	*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
-#endif
+	*r_szind = contents.metadata.szind;
+	*r_slab = contents.metadata.slab;
+
 	return false;
 }
 
@@ -495,22 +432,16 @@ rtree_szind_slab_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
 		elm = &leaf[subkey];
 
-#ifdef RTREE_LEAF_COMPACT
-		uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree,
-							  elm, true);
-		rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
-		    bits);
-		*r_szind = contents.szind;
-		*r_slab = contents.slab;
-#else
-		*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, true);
-		*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, true);
-#endif
+		rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree,
+		    elm, /* dependent */ true);
+		*r_szind = contents.metadata.szind;
+		*r_slab = contents.metadata.slab;
 		return true;
 	} else {
 		return false;
 	}
 }
+
 JEMALLOC_ALWAYS_INLINE bool
 rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) {
@@ -519,15 +450,11 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (!dependent && elm == NULL) {
 		return true;
 	}
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
-	*r_szind = contents.szind;
-	*r_slab = contents.slab;
-#else
-	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
-	*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
-#endif
+	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
+	    /* dependent */ true);
+	*r_szind = contents.metadata.szind;
+	*r_slab = contents.metadata.slab;
+
 	return false;
 }
 
@@ -544,12 +471,12 @@ static inline void
 rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
-	assert(rtree_leaf_elm_edata_read(tsdn, rtree, elm, false) !=
-	    NULL);
-	rtree_leaf_elm_contents_t contents;
+	assert(rtree_leaf_elm_read(tsdn, rtree, elm,
+	    /* dependent */ false).edata != NULL);
+	rtree_contents_t contents;
 	contents.edata = NULL;
-	contents.szind = SC_NSIZES;
-	contents.slab = false;
+	contents.metadata.szind = SC_NSIZES;
+	contents.metadata.slab = false;
 	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 }
 
diff --git a/src/emap.c b/src/emap.c
index 24d6121..0d10c79 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -65,12 +65,12 @@ emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
 static inline emap_lock_result_t
 emap_try_lock_rtree_leaf_elm(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm,
     edata_t **result, bool inactive_only) {
-	edata_t *edata1 = rtree_leaf_elm_edata_read(tsdn, &emap->rtree,
-	    elm, true);
+	edata_t *edata1 = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
+	    /* dependent */ true).edata;
 
 	/* Slab implies active extents and should be skipped. */
-	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
-	    &emap->rtree, elm, true))) {
+	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_read(tsdn,
+	    &emap->rtree, elm, /* dependent */ true).metadata.slab)) {
 		return emap_lock_result_no_extent;
 	}
 
@@ -79,8 +79,8 @@ emap_try_lock_rtree_leaf_elm(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm,
 	 * the leaf->edata mapping.  We have to recheck while holding the lock.
 	 */
 	emap_lock_edata(tsdn, emap, edata1);
-	edata_t *edata2 = rtree_leaf_elm_edata_read(tsdn, &emap->rtree, elm,
-	    true);
+	edata_t *edata2 = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
+	    /* dependent */ true).edata;
 
 	if (edata1 == edata2) {
 		*result = edata1;
@@ -137,10 +137,10 @@ emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 static void
 emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
     rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
-	rtree_leaf_elm_contents_t contents;
+	rtree_contents_t contents;
 	contents.edata = edata;
-	contents.szind = szind;
-	contents.slab = slab;
+	contents.metadata.szind = szind;
+	contents.metadata.slab = slab;
 	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, contents);
 	if (elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree, elm_b, contents);
@@ -278,10 +278,10 @@ emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 void
 emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *lead, edata_t *trail) {
-	rtree_leaf_elm_contents_t clear_contents;
+	rtree_contents_t clear_contents;
 	clear_contents.edata = NULL;
-	clear_contents.szind = SC_NSIZES;
-	clear_contents.slab = false;
+	clear_contents.metadata.szind = SC_NSIZES;
+	clear_contents.metadata.slab = false;
 
 	if (prepare->lead_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 01e710c..c116420 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -137,10 +137,10 @@ TEST_BEGIN(test_rtree_random) {
 		    &rtree_ctx, keys[i], false, true);
 		expect_ptr_not_null(elm,
 		    "Unexpected rtree_leaf_elm_lookup() failure");
-		rtree_leaf_elm_contents_t contents;
+		rtree_contents_t contents;
 		contents.edata = &edata;
-		contents.szind = SC_NSIZES;
-		contents.slab = false;
+		contents.metadata.szind = SC_NSIZES;
+		contents.metadata.slab = false;
 		rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 		expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true), &edata,
-- 
cgit v0.12


From 50289750b369e50265b1f74fa3dd895552b30615 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 15 Mar 2020 18:55:43 -0700
Subject: Extent: Remove szind/slab knowledge.

---
 include/jemalloc/internal/emap.h   |   5 +-
 include/jemalloc/internal/extent.h |  12 ++--
 include/jemalloc/internal/pa.h     |   4 +-
 include/jemalloc/internal/rtree.h  |   4 --
 src/extent.c                       | 132 +++++++++++++------------------------
 src/large.c                        |   5 +-
 src/pa.c                           |  38 +++++++----
 7 files changed, 81 insertions(+), 119 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 9f814ce..5fc713d 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -28,6 +28,9 @@ struct emap_full_alloc_ctx_s {
 
 bool emap_init(emap_t *emap, base_t *base, bool zeroed);
 
+void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
+    bool slab);
+
 /*
  * Grab the lock or locks associated with the edata or edatas indicated (which
  * is done just by simple address hashing).  The hashing strategy means that
@@ -106,8 +109,6 @@ struct emap_prepare_s {
  * higher-addressed one.  It's the caller's responsibility to set the edata
  * state appropriately.
  */
-void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
-    bool slab);
 bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
     size_t size_b, szind_t szind_b, bool slab_b);
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index bec21d6..2f14b81 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -20,19 +20,16 @@
 extern size_t opt_lg_extent_max_active_fit;
 
 edata_t *ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero);
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero);
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero);
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero);
 void ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool zero, bool *commit);
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
 void extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata);
 void extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
@@ -48,8 +45,7 @@ bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 edata_t *extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *a, edata_t *b);
 
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 3e9f1c2..172c549 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -198,13 +198,13 @@ edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
     size_t alignment, bool slab, szind_t szind, bool zero);
 /* Returns true on error, in which case nothing changed. */
 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool zero);
+    size_t new_size, szind_t szind, bool zero);
 /*
  * The same.  Sets *generated_dirty to true if we produced new dirty pages, and
  * false otherwise.
  */
 bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool *generated_dirty);
+    size_t new_size, szind_t szind, bool *generated_dirty);
 /*
  * Frees the given edata back to the pa.  Sets *generated_dirty if we produced
  * new dirty pages (well, we alwyas set it for now; but this need not be the
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 46c58f9..3b21f17 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -334,16 +334,12 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 static inline bool
 rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
     edata_t *edata, szind_t szind, bool slab) {
-	/* Use rtree_clear() to set the edata to NULL. */
-	assert(edata != NULL);
-
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
 	    key, false, true);
 	if (elm == NULL) {
 		return true;
 	}
 
-	assert(rtree_leaf_elm_read(tsdn, rtree, elm, false).edata == NULL);
 	rtree_contents_t contents;
 	contents.edata = edata;
 	contents.metadata.szind = szind;
diff --git a/src/extent.c b/src/extent.c
index 889857e..671699c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -20,8 +20,7 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks,
 static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static edata_t *extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b,
     bool growing_retained);
 static bool extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *a, edata_t *b, bool growing_retained);
@@ -39,16 +38,15 @@ static atomic_zu_t highpages;
 static void extent_deregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, ecache_t *ecache, void *new_addr, size_t usize,
-    size_t alignment, bool slab, szind_t szind, bool zero, bool *commit,
-    bool growing_retained);
+    size_t alignment, bool zero, bool *commit, bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained);
 static void extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero, bool *commit);
+    ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool zero,
+    bool *commit);
 
 /******************************************************************************/
 
@@ -70,8 +68,8 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
 
 edata_t *
 ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
+    bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -79,15 +77,15 @@ ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	bool commit = true;
 	edata_t *edata = extent_recycle(tsdn, shard, ehooks, ecache,
-	    new_addr, size, alignment, slab, szind, zero, &commit, false);
+	    new_addr, size, alignment, zero, &commit, false);
 	assert(edata == NULL || !edata_ranged_get(edata));
 	return edata;
 }
 
 edata_t *
 ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
+    bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -95,7 +93,7 @@ ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	bool commit = true;
 	edata_t *edata = extent_alloc_retained(tsdn, shard, ehooks, new_addr,
-	    size, alignment, slab, szind, zero, &commit);
+	    size, alignment, zero, &commit);
 	if (edata == NULL) {
 		if (opt_retain && new_addr != NULL) {
 			/*
@@ -107,7 +105,7 @@ ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			return NULL;
 		}
 		edata = extent_alloc_wrapper(tsdn, shard, ehooks, new_addr,
-		    size, alignment, slab, szind, zero, &commit);
+		    size, alignment, zero, &commit);
 	}
 
 	assert(edata == NULL || !edata_ranged_get(edata));
@@ -286,18 +284,12 @@ extent_register_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	 */
 	emap_lock_edata(tsdn, shard->emap, edata);
 
-	szind_t szind = edata_szind_get_maybe_invalid(edata);
-	bool slab = edata_slab_get(edata);
-
-	if (emap_register_boundary(tsdn, shard->emap, edata, szind, slab)) {
+	if (emap_register_boundary(tsdn, shard->emap, edata, SC_NSIZES,
+	    /* slab */ false)) {
 		emap_unlock_edata(tsdn, shard->emap, edata);
 		return true;
 	}
 
-	if (slab) {
-		emap_register_interior(tsdn, shard->emap, edata, szind);
-	}
-
 	emap_unlock_edata(tsdn, shard->emap, edata);
 
 	if (config_prof && gdump_add) {
@@ -331,10 +323,6 @@ extent_deregister_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool gdump) {
 	emap_lock_edata(tsdn, shard->emap, edata);
 	emap_deregister_boundary(tsdn, shard->emap, edata);
-	if (edata_slab_get(edata)) {
-		emap_deregister_interior(tsdn, shard->emap, edata);
-		edata_slab_set(edata, false);
-	}
 	emap_unlock_edata(tsdn, shard->emap, edata);
 
 	if (config_prof && gdump) {
@@ -359,7 +347,7 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, pa_shard_t *shard,
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -463,8 +451,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
     edata_t **to_leak, edata_t **to_salvage,
-    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool growing_retained) {
+    void *new_addr, size_t size, size_t alignment, bool growing_retained) {
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
 	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
 	assert(new_addr == NULL || leadsize == 0);
@@ -482,8 +469,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	if (leadsize != 0) {
 		*lead = *edata;
 		*edata = extent_split_impl(tsdn, shard, ehooks, *lead, leadsize,
-		    SC_NSIZES, false, size + trailsize, szind, slab,
-		    growing_retained);
+		    size + trailsize, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -494,7 +480,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	/* Split the trail. */
 	if (trailsize != 0) {
 		*trail = extent_split_impl(tsdn, shard, ehooks, *edata, size,
-		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
+		    trailsize, growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -504,11 +490,6 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		}
 	}
 
-	if (leadsize == 0 && trailsize == 0) {
-		edata_szind_set(*edata, szind);
-		emap_remap(tsdn, shard->emap, *edata, szind, slab);
-	}
-
 	return extent_split_interior_ok;
 }
 
@@ -520,8 +501,8 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, edata_t *edata, bool growing_retained) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
+    edata_t *edata, bool growing_retained) {
 	edata_t *lead;
 	edata_t *trail;
 	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
@@ -529,7 +510,7 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	extent_split_interior_result_t result = extent_split_interior(
 	    tsdn, shard, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
-	    new_addr, size, alignment, slab, szind, growing_retained);
+	    new_addr, size, alignment, growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
 	    && !opt_retain) {
@@ -578,21 +559,18 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero, bool *commit, bool growing_retained) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero,
+    bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(new_addr == NULL || !slab);
-	assert(!zero || !slab);
-
 	edata_t *edata = extent_recycle_extract(tsdn, shard, ehooks, ecache,
-	    new_addr, size, alignment, slab, growing_retained);
+	    new_addr, size, alignment, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
 	edata = extent_recycle_split(tsdn, shard, ehooks, ecache, new_addr,
-	    size, alignment, slab, szind, edata, growing_retained);
+	    size, alignment, edata, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -611,10 +589,6 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	assert(edata_state_get(edata) == extent_state_active);
-	if (slab) {
-		edata_slab_set(edata, slab);
-		emap_register_interior(tsdn, shard->emap, edata, szind);
-	}
 
 	if (zero) {
 		void *addr = edata_base_get(edata);
@@ -633,10 +607,8 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  */
 static edata_t *
 extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    size_t size, size_t alignment, bool slab, szind_t szind,
-    bool zero, bool *commit) {
+    size_t size, size_t alignment, bool zero, bool *commit) {
 	malloc_mutex_assert_owner(tsdn, &shard->ecache_grow.mtx);
-	assert(!zero || !slab);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -696,7 +668,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
 	    shard, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
-	    size, alignment, slab, szind, true);
+	    size, alignment, /* growing_retained */ true);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
@@ -763,10 +735,6 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		/* Adjust gdump stats now that extent is final size. */
 		extent_gdump_add(tsdn, edata);
 	}
-	if (slab) {
-		edata_slab_set(edata, true);
-		emap_register_interior(tsdn, shard->emap, edata, szind);
-	}
 	if (zero && !edata_zeroed_get(edata)) {
 		void *addr = edata_base_get(edata);
 		size_t size = edata_size_get(edata);
@@ -781,16 +749,15 @@ label_err:
 
 static edata_t *
 extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool zero, bool *commit) {
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
 	malloc_mutex_lock(tsdn, &shard->ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, shard, ehooks,
-	    &shard->ecache_retained, new_addr, size, alignment, slab,
-	    szind, zero, commit, true);
+	    &shard->ecache_retained, new_addr, size, alignment, zero, commit,
+	    /* growing_retained */ true);
 	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
 		if (config_prof) {
@@ -798,7 +765,7 @@ extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		}
 	} else if (opt_retain && new_addr == NULL) {
 		edata = extent_grow_retained(tsdn, shard, ehooks, size,
-		    alignment, slab, szind, zero, commit);
+		    alignment, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
 		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
@@ -810,8 +777,7 @@ extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 edata_t *
 extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero, bool *commit) {
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -827,7 +793,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		return NULL;
 	}
 	edata_init(edata, ecache_ind_get(&shard->ecache_dirty), addr,
-	    size, slab, szind, pa_shard_extent_sn_next(shard),
+	    size, /* slab */ false, SC_NSIZES, pa_shard_extent_sn_next(shard),
 	    extent_state_active, zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, shard, edata)) {
@@ -989,7 +955,7 @@ extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 /*
  * Does the metadata management portions of putting an unused extent into the
- * given ecache_t (coalesces, deregisters slab interiors, the heap operations).
+ * given ecache_t (coalesces and inserts into the eset).
  */
 static void
 extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
@@ -1000,12 +966,6 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
-	edata_szind_set(edata, SC_NSIZES);
-	if (edata_slab_get(edata)) {
-		emap_deregister_interior(tsdn, shard->emap, edata);
-		edata_slab_set(edata, false);
-	}
-
 	emap_assert_mapped(tsdn, shard->emap, edata);
 
 	if (!ecache->delay_coalesce) {
@@ -1208,8 +1168,7 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  */
 static edata_t *
 extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, size_t size_b,
-    szind_t szind_b, bool slab_b, bool growing_retained) {
+    edata_t *edata, size_t size_a, size_t size_b, bool growing_retained) {
 	assert(edata_size_get(edata) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -1225,12 +1184,14 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	edata_init(trail, edata_arena_ind_get(edata),
 	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
-	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
-	    edata_zeroed_get(edata), edata_committed_get(edata),
-	    edata_ranged_get(edata), EXTENT_NOT_HEAD);
+	    /* slab */ false, SC_NSIZES, edata_sn_get(edata),
+	    edata_state_get(edata), edata_zeroed_get(edata),
+	    edata_committed_get(edata), edata_ranged_get(edata),
+	    EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
 	bool err = emap_split_prepare(tsdn, shard->emap, &prepare, edata,
-	    size_a, szind_a, slab_a, trail, size_b, szind_b, slab_b);
+	    size_a, SC_NSIZES, /* slab */ false, trail, size_b, SC_NSIZES,
+	    /* slab */ false);
 	if (err) {
 		goto label_error_b;
 	}
@@ -1245,9 +1206,8 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	edata_size_set(edata, size_a);
-	edata_szind_set(edata, szind_a);
-	emap_split_commit(tsdn, shard->emap, &prepare, edata, size_a, szind_a,
-	    slab_a, trail, size_b, szind_b, slab_b);
+	emap_split_commit(tsdn, shard->emap, &prepare, edata, size_a, SC_NSIZES,
+	    /* slab_a */ false, trail, size_b,SC_NSIZES, /* slab_b */ false);
 
 	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
 
@@ -1262,10 +1222,9 @@ label_error_a:
 
 edata_t *
 extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, shard, ehooks, edata, size_a, szind_a,
-	    slab_a, size_b, szind_b, slab_b, false);
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b) {
+	return extent_split_impl(tsdn, shard, ehooks, edata, size_a, size_b,
+	    /* growing_retained */ false);
 }
 
 static bool
@@ -1297,7 +1256,6 @@ extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks, edata_t *a,
 	emap_lock_edata2(tsdn, shard->emap, a, b);
 
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
-	edata_szind_set(a, SC_NSIZES);
 	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
 	    edata_sn_get(a) : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
diff --git a/src/large.c b/src/large.c
index 80de716..d97009a 100644
--- a/src/large.c
+++ b/src/large.c
@@ -70,8 +70,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 
 	bool generated_dirty;
 	bool err = pa_shrink(tsdn, &arena->pa_shard, edata, old_size,
-	    usize + sz_large_pad, sz_size2index(usize), false,
-	    &generated_dirty);
+	    usize + sz_large_pad, sz_size2index(usize), &generated_dirty);
 	if (err) {
 		return true;
 	}
@@ -94,7 +93,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 
 	szind_t szind = sz_size2index(usize);
 	bool err = pa_expand(tsdn, &arena->pa_shard, edata, old_size, new_size,
-	    szind, /* slab */ false, zero);
+	    szind, zero);
 	if (err) {
 		return true;
 	}
diff --git a/src/pa.c b/src/pa.c
index 78ff348..a7fe70f 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -120,16 +120,15 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
-	    &shard->ecache_dirty, NULL, size, alignment, slab, szind, zero);
+	    &shard->ecache_dirty, NULL, size, alignment, zero);
 
 	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
 		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    NULL, size, alignment, slab, szind, zero);
+		    NULL, size, alignment, zero);
 	}
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
-		    &shard->ecache_retained, NULL, size, alignment, slab,
-		    szind, zero);
+		    &shard->ecache_retained, NULL, size, alignment, zero);
 		mapped_add = size;
 	}
 	if (edata != NULL) {
@@ -138,13 +137,19 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 			atomic_fetch_add_zu(&shard->stats->pa_mapped,
 			    mapped_add, ATOMIC_RELAXED);
 		}
+		emap_remap(tsdn, shard->emap, edata, szind, slab);
+		edata_szind_set(edata, szind);
+		edata_slab_set(edata, slab);
+		if (slab) {
+			emap_register_interior(tsdn, shard->emap, edata, szind);
+		}
 	}
 	return edata;
 }
 
 bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool zero) {
+    size_t new_size, szind_t szind, bool zero) {
 	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
@@ -159,17 +164,15 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		return true;
 	}
 	edata_t *trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_dirty,
-	    trail_begin, expand_amount, PAGE, /* slab */ false, SC_NSIZES,
-	    zero);
+	    trail_begin, expand_amount, PAGE, zero);
 	if (trail == NULL) {
 		trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    trail_begin, expand_amount, PAGE, /* slab */ false,
-		    SC_NSIZES, zero);
+		    trail_begin, expand_amount, PAGE, zero);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, trail_begin, expand_amount, PAGE,
-		    /* slab */ false, SC_NSIZES, zero);
+		    zero);
 		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
@@ -185,13 +188,13 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	}
 	pa_nactive_add(shard, expand_amount >> LG_PAGE);
 	edata_szind_set(edata, szind);
-	emap_remap(tsdn, shard->emap, edata, szind, slab);
+	emap_remap(tsdn, shard->emap, edata, szind, /* slab */ false);
 	return false;
 }
 
 bool
 pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool *generated_dirty) {
+    size_t new_size, szind_t szind, bool *generated_dirty) {
 	assert(new_size < old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
@@ -205,7 +208,7 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	}
 
 	edata_t *trail = extent_split_wrapper(tsdn, shard, ehooks, edata,
-	    new_size, szind, slab, shrink_amount, SC_NSIZES, false);
+	    new_size, shrink_amount);
 	if (trail == NULL) {
 		return true;
 	}
@@ -213,12 +216,21 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 
 	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, trail);
 	*generated_dirty = true;
+
+	edata_szind_set(edata, szind);
+	emap_remap(tsdn, shard->emap, edata, szind, /* slab */ false);
 	return false;
 }
 
 void
 pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty) {
+	emap_remap(tsdn, shard->emap, edata, SC_NSIZES, /* slab */ false);
+	if (edata_slab_get(edata)) {
+		emap_deregister_interior(tsdn, shard->emap, edata);
+		edata_slab_set(edata, false);
+	}
+	edata_szind_set(edata, SC_NSIZES);
 	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);
-- 
cgit v0.12


From bb6a418523718c40e8f7c14eb677435911eb7a18 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 16 Mar 2020 11:31:38 -0700
Subject: Emap: Drop szind/slab splitting parameters.

After the previous diff, these are constants.
---
 include/jemalloc/internal/emap.h |  6 ++----
 src/emap.c                       | 20 +++++++++++++-------
 src/extent.c                     |  7 +++----
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 5fc713d..b7eed84 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -110,11 +110,9 @@ struct emap_prepare_s {
  * state appropriately.
  */
 bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
-    size_t size_b, szind_t szind_b, bool slab_b);
+    edata_t *edata, size_t size_a, edata_t *trail, size_t size_b);
 void emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
-    edata_t *lead, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
-    size_t size_b, szind_t szind_b, bool slab_b);
+    edata_t *lead, size_t size_a, edata_t *trail, size_t size_b);
 void emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *lead, edata_t *trail);
 void emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
diff --git a/src/emap.c b/src/emap.c
index 0d10c79..f7fac01 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -230,8 +230,7 @@ void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 
 bool
 emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
-    size_t size_b, szind_t szind_b, bool slab_b) {
+    edata_t *edata, size_t size_a, edata_t *trail, size_t size_b) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	/*
@@ -240,7 +239,7 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	 * facilitate a lookup.
 	 */
 	edata_t lead;
-	edata_init(&lead, 0U, edata_addr_get(edata), size_a, slab_a, szind_a, 0,
+	edata_init(&lead, 0U, edata_addr_get(edata), size_a, false, 0, 0,
 	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, &lead, false, true,
@@ -257,12 +256,19 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 
 void
 emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
-    edata_t *lead, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
-    size_t size_b, szind_t szind_b, bool slab_b) {
+    edata_t *lead, size_t size_a, edata_t *trail, size_t size_b) {
+	/*
+	 * We should think about not writing to the lead leaf element.  We can
+	 * get into situations where a racing realloc-like call can disagree
+	 * with a size lookup request.  I think it's fine to declare that these
+	 * situations are race bugs, but there's an argument to be made that for
+	 * things like xallocx, a size lookup call should return either the old
+	 * size or the new size, but not anything else.
+	 */
 	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a,
-	    prepare->lead_elm_b, lead, szind_a, slab_a);
+	    prepare->lead_elm_b, lead, SC_NSIZES, /* slab */ false);
 	emap_rtree_write_acquired(tsdn, emap, prepare->trail_elm_a,
-	    prepare->trail_elm_b, trail, szind_b, slab_b);
+	    prepare->trail_elm_b, trail, SC_NSIZES, /* slab */ false);
 }
 
 void
diff --git a/src/extent.c b/src/extent.c
index 671699c..073f806 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1190,8 +1190,7 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
 	bool err = emap_split_prepare(tsdn, shard->emap, &prepare, edata,
-	    size_a, SC_NSIZES, /* slab */ false, trail, size_b, SC_NSIZES,
-	    /* slab */ false);
+	    size_a, trail, size_b);
 	if (err) {
 		goto label_error_b;
 	}
@@ -1206,8 +1205,8 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	edata_size_set(edata, size_a);
-	emap_split_commit(tsdn, shard->emap, &prepare, edata, size_a, SC_NSIZES,
-	    /* slab_a */ false, trail, size_b,SC_NSIZES, /* slab_b */ false);
+	emap_split_commit(tsdn, shard->emap, &prepare, edata, size_a, trail,
+	    size_b);
 
 	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
 
-- 
cgit v0.12


From 26e9a3103d443c45e0fbc7e23754fefb12ea181e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 18 Mar 2020 12:04:02 -0700
Subject: PA: Simple decay test.

---
 Makefile.in    |   1 +
 test/unit/pa.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 test/unit/pa.c

diff --git a/Makefile.in b/Makefile.in
index c0929ce..6cded80 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -219,6 +219,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
 	$(srcroot)test/unit/nstime.c \
+	$(srcroot)test/unit/pa.c \
 	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/pages.c \
 	$(srcroot)test/unit/ph.c \
diff --git a/test/unit/pa.c b/test/unit/pa.c
new file mode 100644
index 0000000..f7b7290
--- /dev/null
+++ b/test/unit/pa.c
@@ -0,0 +1,117 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/pa.h"
+
+static void *
+alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
+	void *ret = pages_map(new_addr, size, alignment, commit);
+	return ret;
+}
+
+static bool
+merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
+	return !maps_coalesce;
+}
+
+static bool
+split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
+	return !maps_coalesce;
+}
+
+static void
+init_test_extent_hooks(extent_hooks_t *hooks) {
+	/*
+	 * The default hooks are mostly fine for testing.  A few of them,
+	 * though, access globals (alloc for dss setting in an arena, split and
+	 * merge touch the global emap to find head state.  The first of these
+	 * can be fixed by keeping that state with the hooks, where it logically
+	 * belongs.  The second, though, we can only fix when we use the extent
+	 * hook API.
+	 */
+	memcpy(hooks, &ehooks_default_extent_hooks, sizeof(extent_hooks_t));
+	hooks->alloc = &alloc_hook;
+	hooks->merge = &merge_hook;
+	hooks->split = &split_hook;
+}
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	pa_shard_t shard;
+	base_t *base;
+	emap_t emap;
+	pa_shard_stats_t stats;
+	malloc_mutex_t stats_mtx;
+	extent_hooks_t hooks;
+};
+
+test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+	test_data_t *test_data = calloc(1, sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+	init_test_extent_hooks(&test_data->hooks);
+
+	base_t *base = base_new(TSDN_NULL, /* ind */ 1, &test_data->hooks);
+	assert_ptr_not_null(base, "");
+
+	test_data->base = base;
+	bool err = emap_init(&test_data->emap, test_data->base,
+	    /* zeroed */ true);
+	assert_false(err, "");
+
+	nstime_t time;
+	nstime_init(&time, 0);
+
+	err = pa_shard_init(TSDN_NULL, &test_data->shard, &test_data->emap,
+	    test_data->base, /* ind */ 1, &test_data->stats,
+	    &test_data->stats_mtx, &time, dirty_decay_ms, muzzy_decay_ms);
+	assert_false(err, "");
+
+	return test_data;
+}
+
+void destroy_test_data(test_data_t *data) {
+	base_delete(TSDN_NULL, data->base);
+	free(data);
+}
+
+static void *
+do_alloc_free_purge(void *arg) {
+	test_data_t *test_data = (test_data_t *)arg;
+	for (int i = 0; i < 10 * 1000; i++) {
+		edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, PAGE,
+		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false);
+		assert_ptr_not_null(edata, "");
+		bool generated_dirty;
+		pa_dalloc(TSDN_NULL, &test_data->shard, edata,
+		    &generated_dirty);
+		malloc_mutex_lock(TSDN_NULL, &test_data->shard.decay_dirty.mtx);
+		pa_decay_all(TSDN_NULL, &test_data->shard,
+		    &test_data->shard.decay_dirty,
+		    &test_data->stats.decay_dirty,
+		    &test_data->shard.ecache_dirty, true);
+		malloc_mutex_unlock(TSDN_NULL,
+		    &test_data->shard.decay_dirty.mtx);
+	}
+	return NULL;
+}
+
+TEST_BEGIN(test_alloc_free_purge_thds) {
+	test_data_t *test_data = init_test_data(0, 0);
+	thd_t thds[4];
+	for (int i = 0; i < 4; i++) {
+		thd_create(&thds[i], do_alloc_free_purge, test_data);
+	}
+	for (int i = 0; i < 4; i++) {
+		thd_join(thds[i], NULL);
+	}
+
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_alloc_free_purge_thds);
+}
-- 
cgit v0.12


From 79ae7f9211e367f0ecc8be24439af73bd3a4ebc4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 19 Mar 2020 17:58:44 -0700
Subject: Rtree: Remove the per-field accessors.

We instead split things into "edata" and "metadata".
---
 include/jemalloc/internal/emap.h  |  48 ++++++----
 include/jemalloc/internal/rtree.h | 183 +++++++++++++-------------------------
 src/emap.c                        |  31 ++++---
 src/jemalloc.c                    |   4 +-
 test/unit/rtree.c                 |  75 ++++++++++------
 5 files changed, 163 insertions(+), 178 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index b7eed84..9b92522 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -132,8 +132,7 @@ emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	return rtree_edata_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr,
-	    true);
+	return rtree_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr).edata;
 }
 
 /* Fills in alloc_ctx with the info in the map. */
@@ -143,8 +142,10 @@ emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	rtree_szind_slab_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr,
-	    true, &alloc_ctx->szind, &alloc_ctx->slab);
+	rtree_metadata_t metadata = rtree_metadata_read(tsdn, &emap->rtree,
+	    rtree_ctx, (uintptr_t)ptr);
+	alloc_ctx->szind = metadata.szind;
+	alloc_ctx->slab = metadata.slab;
 }
 
 /* The pointer must be mapped. */
@@ -154,9 +155,11 @@ emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	rtree_edata_szind_slab_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)ptr, true, &full_alloc_ctx->edata,
-	    &full_alloc_ctx->szind, &full_alloc_ctx->slab);
+	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)ptr);
+	full_alloc_ctx->edata = contents.edata;
+	full_alloc_ctx->szind = contents.metadata.szind;
+	full_alloc_ctx->slab = contents.metadata.slab;
 }
 
 /*
@@ -170,24 +173,35 @@ emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	return rtree_edata_szind_slab_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)ptr, false, &full_alloc_ctx->edata,
-	    &full_alloc_ctx->szind, &full_alloc_ctx->slab);
+	rtree_contents_t contents;
+	bool err = rtree_read_independent(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)ptr, &contents);
+	if (err) {
+		return true;
+	}
+	full_alloc_ctx->edata = contents.edata;
+	full_alloc_ctx->szind = contents.metadata.szind;
+	full_alloc_ctx->slab = contents.metadata.slab;
+	return false;
 }
 
 /*
- * Fills in alloc_ctx, but only if it can be done easily (i.e. with a hit in the
- * L1 rtree cache.
- *
- * Returns whether or not alloc_ctx was filled in.
+ * Returns true on error.
  */
 JEMALLOC_ALWAYS_INLINE bool
 emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
     emap_alloc_ctx_t *alloc_ctx) {
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &emap->rtree,
-	    rtree_ctx, (uintptr_t)ptr, &alloc_ctx->szind, &alloc_ctx->slab);
-	return res;
+
+	rtree_metadata_t metadata;
+	bool err = rtree_metadata_try_read_fast(tsd_tsdn(tsd), &emap->rtree,
+	    rtree_ctx, (uintptr_t)ptr, &metadata);
+	if (err) {
+		return true;
+	}
+	alloc_ctx->szind = metadata.szind;
+	alloc_ctx->slab = metadata.slab;
+	return false;
 }
 
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 3b21f17..83dfdc8 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -257,19 +257,29 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
 #endif
 }
 
-static inline void
-rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, szind_t szind, bool slab) {
-	assert(!slab || szind < SC_NBINS);
-	rtree_contents_t contents = rtree_leaf_elm_read(
-	    tsdn, rtree, elm, /* dependent */ true);
-	/*
-	 * The caller implicitly assures that it is the only writer to the szind
-	 * and slab fields, and that the edata field cannot currently change.
-	 */
-	contents.metadata.slab = slab;
-	contents.metadata.szind = szind;
-	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
+/*
+ * Tries to look up the key in the L1 cache, returning it if there's a hit, or
+ * NULL if there's a miss.
+ * Key is allowed to be NULL; returns NULL in this case.
+ */
+JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
+rtree_leaf_elm_lookup_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key) {
+	rtree_leaf_elm_t *elm;
+
+	size_t slot = rtree_cache_direct_map(key);
+	uintptr_t leafkey = rtree_leafkey(key);
+	assert(leafkey != RTREE_LEAFKEY_INVALID);
+
+	if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
+		rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
+		assert(leaf != NULL);
+		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+		elm = &leaf[subkey];
+		return elm;
+	} else {
+		return NULL;
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
@@ -331,144 +341,79 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	    dependent, init_missing);
 }
 
+/*
+ * Returns true on lookup failure.
+ */
 static inline bool
-rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
-    edata_t *edata, szind_t szind, bool slab) {
+rtree_read_independent(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, rtree_contents_t *r_contents) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
-	    key, false, true);
+	    key, /* dependent */ false, /* init_missing */ false);
 	if (elm == NULL) {
 		return true;
 	}
-
-	rtree_contents_t contents;
-	contents.edata = edata;
-	contents.metadata.szind = szind;
-	contents.metadata.slab = slab;
-	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
-
+	*r_contents = rtree_leaf_elm_read(tsdn, rtree, elm,
+	    /* dependent */ false);
 	return false;
 }
 
-JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
-rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
-    bool dependent) {
+static inline rtree_contents_t
+rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
-	    key, dependent, false);
-	if (!dependent && elm == NULL) {
-		return NULL;
-	}
+	    key, /* dependent */ true, /* init_missing */ false);
 	assert(elm != NULL);
-	return elm;
-}
-
-JEMALLOC_ALWAYS_INLINE edata_t *
-rtree_edata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent) {
-	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
-	    dependent);
-	if (!dependent && elm == NULL) {
-		return NULL;
-	}
-	return rtree_leaf_elm_read(tsdn, rtree, elm, dependent).edata;
+	return rtree_leaf_elm_read(tsdn, rtree, elm, /* dependent */ true);
 }
 
-JEMALLOC_ALWAYS_INLINE szind_t
-rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent) {
-	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
-	    dependent);
-	if (!dependent && elm == NULL) {
-		return SC_NSIZES;
-	}
-	return rtree_leaf_elm_read(tsdn, rtree, elm, dependent).metadata.szind;
+static inline rtree_metadata_t
+rtree_metadata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key) {
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+	    key, /* dependent */ true, /* init_missing */ false);
+	assert(elm != NULL);
+	return rtree_leaf_elm_read(tsdn, rtree, elm,
+	    /* dependent */ true).metadata;
 }
 
 /*
- * rtree_slab_read() is intentionally omitted because slab is always read in
- * conjunction with szind, which makes rtree_szind_slab_read() a better choice.
+ * Returns true on error.
  */
-
-JEMALLOC_ALWAYS_INLINE bool
-rtree_edata_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, edata_t **r_edata,
-    szind_t *r_szind, bool *r_slab) {
-	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
-	    dependent);
-	if (!dependent && elm == NULL) {
+static inline bool
+rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, rtree_metadata_t *r_rtree_metadata) {
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup_fast(tsdn, rtree, rtree_ctx,
+	    key);
+	if (elm == NULL) {
 		return true;
 	}
-	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
-	    dependent);
-	*r_edata = contents.edata;
-	*r_szind = contents.metadata.szind;
-	*r_slab = contents.metadata.slab;
-
+	*r_rtree_metadata = rtree_leaf_elm_read(tsdn, rtree, elm,
+	    /* dependent */ true).metadata;
 	return false;
 }
 
-/*
- * Try to read szind_slab from the L1 cache.  Returns true on a hit,
- * and fills in r_szind and r_slab.  Otherwise returns false.
- *
- * Key is allowed to be NULL in order to save an extra branch on the
- * fastpath.  returns false in this case.
- */
-JEMALLOC_ALWAYS_INLINE bool
-rtree_szind_slab_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-			    uintptr_t key, szind_t *r_szind, bool *r_slab) {
-	rtree_leaf_elm_t *elm;
-
-	size_t slot = rtree_cache_direct_map(key);
-	uintptr_t leafkey = rtree_leafkey(key);
-	assert(leafkey != RTREE_LEAFKEY_INVALID);
-
-	if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
-		rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
-		assert(leaf != NULL);
-		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
-		elm = &leaf[subkey];
-
-		rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree,
-		    elm, /* dependent */ true);
-		*r_szind = contents.metadata.szind;
-		*r_slab = contents.metadata.slab;
+static inline bool
+rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
+    rtree_contents_t contents) {
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+	    key, /* dependent */ false, /* init_missing */ true);
+	if (elm == NULL) {
 		return true;
-	} else {
-		return false;
 	}
-}
 
-JEMALLOC_ALWAYS_INLINE bool
-rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) {
-	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
-	    dependent);
-	if (!dependent && elm == NULL) {
-		return true;
-	}
-	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ true);
-	*r_szind = contents.metadata.szind;
-	*r_slab = contents.metadata.slab;
+	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 
 	return false;
 }
 
 static inline void
-rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, szind_t szind, bool slab) {
-	assert(!slab || szind < SC_NBINS);
-
-	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
-	rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab);
-}
-
-static inline void
 rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key) {
-	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+	    key, /* dependent */ true, /* init_missing */ false);
+	assert(elm != NULL);
 	assert(rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ false).edata != NULL);
+	    /* dependent */ true).edata != NULL);
 	rtree_contents_t contents;
 	contents.edata = NULL;
 	contents.metadata.szind = SC_NSIZES;
diff --git a/src/emap.c b/src/emap.c
index f7fac01..637d332 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -171,9 +171,13 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 
 	/* Register interior. */
 	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
+		rtree_contents_t contents;
+		contents.edata = edata;
+		contents.metadata.szind = szind;
+		contents.metadata.slab = true;
 		rtree_write(tsdn, &emap->rtree, rtree_ctx,
 		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
-		    LG_PAGE), edata, szind, true);
+		    LG_PAGE), contents);
 	}
 }
 
@@ -200,13 +204,18 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 }
 
-void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
+void
+emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
     bool slab) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	if (szind != SC_NSIZES) {
-		rtree_szind_slab_update(tsdn, &emap->rtree, rtree_ctx,
-		    (uintptr_t)edata_addr_get(edata), szind, slab);
+		rtree_contents_t contents;
+		contents.edata = edata;
+		contents.metadata.szind = szind;
+		contents.metadata.slab = slab;
+		rtree_write(tsdn, &emap->rtree, rtree_ctx,
+		    (uintptr_t)edata_addr_get(edata), contents);
 		/*
 		 * Recall that this is called only for active->inactive and
 		 * inactive->active transitions (since only active extents have
@@ -220,12 +229,12 @@ void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 		 * call is coming in those cases, though.
 		 */
 		if (slab && edata_size_get(edata) > PAGE) {
-			rtree_szind_slab_update(tsdn,
-			    &emap->rtree, rtree_ctx,
-			    (uintptr_t)edata_past_get(edata) - (uintptr_t)PAGE,
-			    szind, slab);
-			}
+			uintptr_t key = (uintptr_t)edata_past_get(edata)
+			    - (uintptr_t)PAGE;
+			rtree_write(tsdn, &emap->rtree, rtree_ctx, key,
+			    contents);
 		}
+	}
 }
 
 bool
@@ -311,6 +320,6 @@ void
 emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	assert(rtree_edata_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata), true) == edata);
+	assert(rtree_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata)).edata == edata);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0be5549..63ef578 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2782,11 +2782,11 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		if (unlikely(tsd == NULL || !tsd_fast(tsd))) {
 			return false;
 		}
-		bool res = emap_alloc_ctx_try_lookup_fast(tsd,
+		bool err = emap_alloc_ctx_try_lookup_fast(tsd,
 		    &arena_emap_global, ptr, &alloc_ctx);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
-		if (unlikely(!res || !alloc_ctx.slab)) {
+		if (unlikely(err || !alloc_ctx.slab)) {
 			return false;
 		}
 		assert(alloc_ctx.szind != SC_NSIZES);
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index c116420..2802966 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -20,8 +20,9 @@ TEST_BEGIN(test_rtree_read_empty) {
 	rtree_ctx_data_init(&rtree_ctx);
 	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
-	expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE,
-	    false), "rtree_edata_read() should return NULL for empty tree");
+	rtree_contents_t contents;
+	expect_true(rtree_read_independent(tsdn, rtree, &rtree_ctx, PAGE,
+	    &contents), "rtree_read_independent() should fail on empty rtree.");
 
 	base_delete(tsdn, base);
 }
@@ -50,21 +51,33 @@ TEST_BEGIN(test_rtree_extrema) {
 	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
 
-	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &edata_a,
-	    edata_szind_get(&edata_a), edata_slab_get(&edata_a)),
+	rtree_contents_t contents_a;
+	contents_a.edata = &edata_a;
+	contents_a.metadata.szind = edata_szind_get(&edata_a);
+	contents_a.metadata.slab = edata_slab_get(&edata_a);
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
 	    "Unexpected rtree_write() failure");
-	rtree_szind_slab_update(tsdn, rtree, &rtree_ctx, PAGE,
-	    edata_szind_get(&edata_a), edata_slab_get(&edata_a));
-	expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE, true),
-	    &edata_a,
-	    "rtree_edata_read() should return previously set value");
-
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
+	    "Unexpected rtree_write() failure");
+	rtree_contents_t read_contents_a = rtree_read(tsdn, rtree, &rtree_ctx,
+	    PAGE);
+	expect_true(contents_a.edata == read_contents_a.edata
+	    && contents_a.metadata.szind == read_contents_a.metadata.szind
+	    && contents_a.metadata.slab == read_contents_a.metadata.slab,
+	    "rtree_read() should return previously set value");
+
+	rtree_contents_t contents_b;
+	contents_b.edata = &edata_b;
+	contents_b.metadata.szind = edata_szind_get_maybe_invalid(&edata_b);
+	contents_b.metadata.slab = edata_slab_get(&edata_b);
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
-	    &edata_b, edata_szind_get_maybe_invalid(&edata_b),
-	    edata_slab_get(&edata_b)), "Unexpected rtree_write() failure");
-	expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-	    ~((uintptr_t)0), true), &edata_b,
-	    "rtree_edata_read() should return previously set value");
+	    contents_b), "Unexpected rtree_write() failure");
+	rtree_contents_t read_contents_b = rtree_read(tsdn, rtree, &rtree_ctx,
+	    ~((uintptr_t)0));
+	assert_true(contents_b.edata == read_contents_b.edata
+	    && contents_b.metadata.szind == read_contents_b.metadata.szind
+	    && contents_b.metadata.slab == read_contents_b.metadata.slab,
+	    "rtree_read() should return previously set value");
 
 	base_delete(tsdn, base);
 }
@@ -89,19 +102,23 @@ TEST_BEGIN(test_rtree_bits) {
 	    "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
+		rtree_contents_t contents;
+		contents.edata = &edata;
+		contents.metadata.szind = SC_NSIZES;
+		contents.metadata.slab = false;
+
 		expect_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
-		    &edata, SC_NSIZES, false),
-		    "Unexpected rtree_write() failure");
+		    contents), "Unexpected rtree_write() failure");
 		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-			    keys[j], true), &edata,
+			expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
+			    keys[j]).edata, &edata,
 			    "rtree_edata_read() should return previously set "
 			    "value and ignore insignificant key bits; i=%u, "
 			    "j=%u, set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
 			    j, keys[i], keys[j]);
 		}
-		expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-		    (((uintptr_t)2) << LG_PAGE), false),
+		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
+		    (((uintptr_t)2) << LG_PAGE)).edata,
 		    "Only leftmost rtree leaf should be set; i=%u", i);
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
 	}
@@ -142,26 +159,26 @@ TEST_BEGIN(test_rtree_random) {
 		contents.metadata.szind = SC_NSIZES;
 		contents.metadata.slab = false;
 		rtree_leaf_elm_write(tsdn, rtree, elm, contents);
-		expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-		    keys[i], true), &edata,
+		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
+		    keys[i]).edata, &edata,
 		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-		    keys[i], true), &edata,
+		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
+		    keys[i]).edata, &edata,
 		    "rtree_edata_read() should return previously set value, "
 		    "i=%u", i);
 	}
 
 	for (unsigned i = 0; i < NSET; i++) {
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
-		expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-		    keys[i], true),
+		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
+		    keys[i]).edata,
 		   "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-		    keys[i], true),
+		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
+		    keys[i]).edata,
 		    "rtree_edata_read() should return previously set value");
 	}
 
-- 
cgit v0.12


From 877af247a87f6cb335a0f98aef62cd90afcfa520 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 10 Apr 2020 11:38:33 -0700
Subject: QL, QR: Add documentation.

---
 include/jemalloc/internal/ql.h | 86 ++++++++++++++++++++++++++++++++++++++----
 include/jemalloc/internal/qr.h | 86 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 162 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index db67219..c7f52f8 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -3,45 +3,85 @@
 
 #include "jemalloc/internal/qr.h"
 
+/*
+ * A linked-list implementation.
+ *
+ * This is built on top of the ring implementation, but that can be viewed as an
+ * implementation detail (i.e. trying to advance past the tail of the list
+ * doesn't wrap around).
+ *
+ * You define a struct like so:
+ * typedef strucy my_s my_t;
+ * struct my_s {
+ *   int data;
+ *   ql_elm(my_t) my_link;
+ * };
+ *
+ * // We wobble between "list" and "head" for this type; we're now mostly
+ * // heading towards "list".
+ * typedef ql_head(my_t) my_list_t;
+ *
+ * You then pass a my_list_t * for a_head arguments, a my_t * for a_elm
+ * arguments, the token "my_link" for a_field arguments, and the token "my_t"
+ * for a_type arguments.
+ */
+
 /* List definitions. */
 #define ql_head(a_type)							\
 struct {								\
 	a_type *qlh_first;						\
 }
 
+/* Static initializer for an empty list. */
 #define ql_head_initializer(a_head) {NULL}
 
+/* The field definition. */
 #define ql_elm(a_type)	qr(a_type)
 
-/* List functions. */
+/* A pointer to the first element in the list, or NULL if the list is empty. */
 #define ql_first(a_head) ((a_head)->qlh_first)
 
+/* Dynamically initializes a list. */
 #define ql_new(a_head) do {						\
 	ql_first(a_head) = NULL;					\
 } while (0)
 
-#define ql_clear(a_head) ql_new(a_head)
-
+/*
+ * Sets dest to be the contents of src (overwriting any elements there), leaving
+ * src empty.
+ */
 #define ql_move(a_head_dest, a_head_src) do {				\
 	ql_first(a_head_dest) = ql_first(a_head_src);			\
-	ql_clear(a_head_src);						\
+	ql_new(a_head_src);						\
 } while (0)
 
+/* True if the list is empty, otherwise false. */
 #define ql_empty(a_head) (ql_first(a_head) == NULL)
 
+/*
+ * Initializes a ql_elm.  Must be called even if the field is about to be
+ * overwritten.
+ */
 #define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
 
+/*
+ * Obtains the last item in the list.
+ */
 #define ql_last(a_head, a_field)					\
 	(ql_empty(a_head) ? NULL : qr_prev(ql_first(a_head), a_field))
 
+/*
+ * Gets a pointer to the next/prev element in the list.  Trying to advance past
+ * the end or retreat before the beginning of the list returns NULL.
+ */
 #define ql_next(a_head, a_elm, a_field)					\
 	((ql_last(a_head, a_field) != (a_elm))				\
 	    ? qr_next((a_elm), a_field)	: NULL)
-
 #define ql_prev(a_head, a_elm, a_field)					\
 	((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field)	\
 				       : NULL)
 
+/* Inserts a_elm before a_qlelm in the list. */
 #define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do {		\
 	qr_before_insert((a_qlelm), (a_elm), a_field);			\
 	if (ql_first(a_head) == (a_qlelm)) {				\
@@ -49,9 +89,11 @@ struct {								\
 	}								\
 } while (0)
 
+/* Inserts a_elm after a_qlelm in the list. */
 #define ql_after_insert(a_qlelm, a_elm, a_field)			\
 	qr_after_insert((a_qlelm), (a_elm), a_field)
 
+/* Inserts a_elm as the first item in the list. */
 #define ql_head_insert(a_head, a_elm, a_field) do {			\
 	if (!ql_empty(a_head)) {					\
 		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
@@ -59,6 +101,7 @@ struct {								\
 	ql_first(a_head) = (a_elm);					\
 } while (0)
 
+/* Inserts a_elm as the last item in the list. */
 #define ql_tail_insert(a_head, a_elm, a_field) do {			\
 	if (!ql_empty(a_head)) {					\
 		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
@@ -66,16 +109,21 @@ struct {								\
 	ql_first(a_head) = qr_next((a_elm), a_field);			\
 } while (0)
 
+/*
+ * Given lists a = [a_1, ..., a_n] and [b_1, ..., b_n], results in:
+ * a = [a1, ..., a_n, b_1, ..., b_n] and b = [].
+ */
 #define ql_concat(a_head_a, a_head_b, a_field) do {			\
 	if (ql_empty(a_head_a)) {					\
 		ql_move(a_head_a, a_head_b);				\
 	} else if (!ql_empty(a_head_b)) {				\
 		qr_meld(ql_first(a_head_a), ql_first(a_head_b),		\
 		    a_field);						\
-		ql_clear(a_head_b);					\
+		ql_new(a_head_b);					\
 	}								\
 } while (0)
 
+/* Removes a_elm from the list. */
 #define ql_remove(a_head, a_elm, a_field) do {				\
 	if (ql_first(a_head) == (a_elm)) {				\
 		ql_first(a_head) = qr_next(ql_first(a_head), a_field);	\
@@ -83,20 +131,29 @@ struct {								\
 	if (ql_first(a_head) != (a_elm)) {				\
 		qr_remove((a_elm), a_field);				\
 	} else {							\
-		ql_clear(a_head);					\
+		ql_new(a_head);						\
 	}								\
 } while (0)
 
+/* Removes the first item in the list. */
 #define ql_head_remove(a_head, a_type, a_field) do {			\
 	a_type *t = ql_first(a_head);					\
 	ql_remove((a_head), t, a_field);				\
 } while (0)
 
+/* Removes the last item in the list. */
 #define ql_tail_remove(a_head, a_type, a_field) do {			\
 	a_type *t = ql_last(a_head, a_field);				\
 	ql_remove((a_head), t, a_field);				\
 } while (0)
 
+/*
+ * Given a = [a_1, a_2, ..., a_n-1, a_n, a_n+1, ...],
+ * ql_split(a, a_n, b, some_field) results in
+ *   a = [a_1, a_2, ..., a_n-1]
+ * and replaces b's contents with:
+ *   b = [a_n, a_n+1, ...]
+ */
 #define ql_split(a_head_a, a_elm, a_head_b, a_field) do {		\
 	if (ql_first(a_head_a) == (a_elm)) {				\
 		ql_move(a_head_b, a_head_a);				\
@@ -116,6 +173,21 @@ struct {								\
 	ql_first(a_head) = qr_next(ql_first(a_head), a_field);		\
 } while (0)
 
+/*
+ * Helper macro to iterate over each element in a list in order, starting from
+ * the head (or in reverse order, starting from the tail).  The usage is
+ * (assuming my_t and my_list_t defined as above).
+ *
+ * int sum(my_list_t *list) {
+ *   int sum = 0;
+ *   my_t *iter;
+ *   ql_foreach(iter, list, link) {
+ *     sum += iter->data;
+ *   }
+ *   return sum;
+ * }
+ */
+
 #define ql_foreach(a_var, a_head, a_field)				\
 	qr_foreach((a_var), ql_first(a_head), a_field)
 
diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index 559cbe4..ece4f55 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -1,6 +1,21 @@
 #ifndef JEMALLOC_INTERNAL_QR_H
 #define JEMALLOC_INTERNAL_QR_H
 
+/*
+ * A ring implementation based on an embedded circular doubly-linked list.
+ *
+ * You define your struct like so:
+ *
+ * typedef struct my_s my_t;
+ * struct my_s {
+ *   int data;
+ *   qr(my_t) my_link;
+ * };
+ *
+ * And then pass a my_t * into macros for a_qr arguments, and the token
+ * "my_link" into a_field fields.
+ */
+
 /* Ring definitions. */
 #define qr(a_type)							\
 struct {								\
@@ -8,17 +23,41 @@ struct {								\
 	a_type	*qre_prev;						\
 }
 
-/* Ring functions. */
+/*
+ * Initialize a qr link.  Every link must be initialized before being used, even
+ * if that initialization is going to be immediately overwritten (say, by being
+ * passed into an insertion macro).
+ */
 #define qr_new(a_qr, a_field) do {					\
 	(a_qr)->a_field.qre_next = (a_qr);				\
 	(a_qr)->a_field.qre_prev = (a_qr);				\
 } while (0)
 
+/*
+ * Go forwards or backwards in the ring.  Note that (the ring being circular), this
+ * always succeeds -- you just keep looping around and around the ring if you
+ * chase pointers without end.
+ */
 #define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
-
 #define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
 
-/* a_qr_a can directly be a qr_next() macro, but a_qr_b cannot.  */
+/*
+ * Given two rings:
+ *    a -> a_1 -> ... -> a_n --
+ *    ^                       |
+ *    |------------------------
+ *
+ *    b -> b_1 -> ... -> b_n --
+ *    ^                       |
+ *    |------------------------
+ *
+ * Results in the ring:
+ *   a -> a_1 -> ... -> a_n -> b -> b_1 -> ... -> b_n --
+ *   ^                                                 |
+ *   |-------------------------------------------------|
+ *
+ * a_qr_a can directly be a qr_next() macro, but a_qr_b cannot.
+ */
 #define qr_meld(a_qr_a, a_qr_b, a_field) do {				\
 	(a_qr_b)->a_field.qre_prev->a_field.qre_next =			\
 	    (a_qr_a)->a_field.qre_prev;					\
@@ -29,28 +68,69 @@ struct {								\
 	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
 } while (0)
 
+/*
+ * Logically, this is just a meld.  The intent, though, is that a_qrelm is a
+ * single-element ring, so that "before" has a more obvious interpretation than
+ * meld.
+ */
 #define qr_before_insert(a_qrelm, a_qr, a_field)			\
 	qr_meld((a_qrelm), (a_qr), a_field)
 
+/* Ditto, but inserting after rather than before. */
 #define qr_after_insert(a_qrelm, a_qr, a_field)				\
 	qr_before_insert(qr_next(a_qrelm, a_field), (a_qr), a_field)
 
 /*
+ * Inverts meld; given the ring:
+ *   a -> a_1 -> ... -> a_n -> b -> b_1 -> ... -> b_n --
+ *   ^                                                 |
+ *   |-------------------------------------------------|
+ *
+ * Results in two rings:
+ *    a -> a_1 -> ... -> a_n --
+ *    ^                       |
+ *    |------------------------
+ *
+ *    b -> b_1 -> ... -> b_n --
+ *    ^                       |
+ *    |------------------------
+ *
  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
  * have two copies of the code.
  */
 #define qr_split(a_qr_a, a_qr_b, a_field)				\
 	qr_meld((a_qr_a), (a_qr_b), a_field)
 
+/*
+ * Splits off a_qr from the rest of its ring, so that it becomes a
+ * single-element ring.
+ */
 #define qr_remove(a_qr, a_field)					\
 	qr_split(qr_next(a_qr, a_field), (a_qr), a_field)
 
+/*
+ * Helper macro to iterate over each element in a ring exactly once, starting
+ * with a_qr.  The usage is (assuming my_t defined as above):
+ *
+ * int sum(my_t *item) {
+ *   int sum = 0;
+ *   my_t *iter;
+ *   qr_foreach(iter, item, link) {
+ *     sum += iter->data;
+ *   }
+ *   return sum;
+ * }
+ */
 #define qr_foreach(var, a_qr, a_field)					\
 	for ((var) = (a_qr);						\
 	    (var) != NULL;						\
 	    (var) = (((var)->a_field.qre_next != (a_qr))		\
 	    ? (var)->a_field.qre_next : NULL))
 
+/*
+ * The same (and with the same usage) as qr_foreach, but in the opposite order,
+ * ending with a_qr.
+ */
 #define qr_reverse_foreach(var, a_qr, a_field)				\
 	for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;	\
 	    (var) != NULL;						\
-- 
cgit v0.12


From 3589571bfd4b1fda1d3771f96a08d7d14b7813bd Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 10 Apr 2020 16:09:02 -0700
Subject: SC: use SC_LG_NGROUP instead of its value.

This magic constant introduces inconsistencies.  We should be able to change its
value solely by adjusting the definition in the header.
---
 src/sc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sc.c b/src/sc.c
index 89ddb6b..cfce533 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -245,7 +245,7 @@ size_classes(
 	assert(sc_data->lg_large_minclass == SC_LG_LARGE_MINCLASS);
 	assert(sc_data->large_maxclass == SC_LARGE_MAXCLASS);
 
-	/* 
+	/*
 	 * In the allocation fastpath, we want to assume that we can
 	 * unconditionally subtract the requested allocation size from
 	 * a ssize_t, and detect passing through 0 correctly.  This
@@ -262,7 +262,7 @@ sc_data_init(sc_data_t *sc_data) {
 	int lg_max_lookup = 12;
 
 	size_classes(sc_data, LG_SIZEOF_PTR, LG_QUANTUM, SC_LG_TINY_MIN,
-	    lg_max_lookup, LG_PAGE, 2);
+	    lg_max_lookup, LG_PAGE, SC_LG_NGROUP);
 
 	sc_data->initialized = true;
 }
-- 
cgit v0.12


From 58a00df2383fbe714da3b8a3697d68c4064d4b4a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 7 Apr 2020 16:28:43 -0700
Subject: TSD: Put all fast-path data together.

---
 include/jemalloc/internal/tsd.h | 168 ++++++++++++++++++++--------------------
 1 file changed, 85 insertions(+), 83 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index d88f3d1..7e08f6b 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -15,57 +15,30 @@
 
 /*
  * Thread-Specific-Data layout
- * --- data accessed on tcache fast path: state, rtree_ctx, stats ---
- * s: state
- * m: thread_allocated
- * k: thread_allocated_next_event_fast
- * f: thread_deallocated
- * h: thread_deallocated_next_event_fast
- * c: rtree_ctx (rtree cache accessed on deallocation)
- * t: tcache
- * --- data not accessed on tcache fast path: arena-related fields ---
- * e: tcache_enabled
- * d: arenas_tdata_bypass
- * r: reentrancy_level
- * n: narenas_tdata
- * l: thread_allocated_last_event
- * j: thread_allocated_next_event
- * q: thread_deallocated_last_event
- * u: thread_deallocated_next_event
- * g: tcache_gc_event_wait
- * y: tcache_gc_dalloc_event_wait
- * w: prof_sample_event_wait (config_prof)
- * x: prof_sample_last_event (config_prof)
- * z: stats_interval_event_wait
- * e: stats_interval_last_event
- * p: prof_tdata (config_prof)
- * v: prng_state
- * i: iarena
- * a: arena
- * o: arenas_tdata
- * b: binshards
- * Loading TSD data is on the critical path of basically all malloc operations.
- * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective.
- * Use a compact layout to reduce cache footprint.
- * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
- * |----------------------------  1st cacheline  ----------------------------|
- * | sedrnnnn mmmmmmmm kkkkkkkk ffffffff hhhhhhhh [c * 24  ........ ........]|
- * |----------------------------  2nd cacheline  ----------------------------|
- * | [c * 64  ........ ........ ........ ........ ........ ........ ........]|
- * |----------------------------  3nd cacheline  ----------------------------|
- * | [c * 40  ........ ........ ........ .......] llllllll jjjjjjjj qqqqqqqq |
- * +----------------------------  4th cacheline  ----------------------------+
- * | uuuuuuuu gggggggg yyyyyyyy wwwwwwww xxxxxxxx zzzzzzzz eeeeeeee pppppppp |
- * +----------------------------  5th and after  ----------------------------+
- * | vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b * 40; then embedded tcache ..... |
- * +-------------------------------------------------------------------------+
- * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
  *
- * The elements after rtree_ctx and before tcache aren't really needed on tcache
- * fast path.  However we have a number of unused tcache bins and witnesses
- * (never touched unless config_debug) at the end of tcache, so we place them
- * there to avoid breaking the cachelines and possibly paging in an extra page.
+ * At least some thread-local data gets touched on the fast-path of almost all
+ * malloc operations.  But much of it is only necessary down slow-paths, or
+ * testing.  We want to colocate the fast-path data so that it can live on the
+ * same cacheline if possible.  So we define three tiers of hotness:
+ * TSD_DATA_FAST: Touched on the alloc/dalloc fast paths.
+ * TSD_DATA_SLOW: Touched down slow paths.  "Slow" here is sort of general;
+ *     there are "semi-slow" paths like "not a sized deallocation, but can still
+ *     live in the tcache".  We'll want to keep these closer to the fast-path
+ *     data.
+ * TSD_DATA_SLOWER: Only touched in test or debug modes, or not touched at all.
+ *
+ * An additional concern is that the larger tcache bins won't be used (we have a
+ * bin per size class, but by default only cache relatively small objects).  So
+ * the earlier bins are in the TSD_DATA_FAST tier, but the later ones are in the
+ * TSD_DATA_SLOWER tier.
+ *
+ * As a result of all this, we put the slow data first, then the fast data, then
+ * the slower data, while keeping the tcache as the last element of the fast
+ * data (so that the fast -> slower transition happens midway through the
+ * tcache).  While we don't yet play alignment tricks to guarantee it, this
+ * increases our odds of getting some cache/page locality on fast paths.
  */
+
 #ifdef JEMALLOC_JET
 typedef void (*test_callback_t)(int *);
 #  define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
@@ -79,16 +52,11 @@ typedef void (*test_callback_t)(int *);
 #endif
 
 /*  O(name,			type,			nullable type) */
-#define MALLOC_TSD							\
+#define TSD_DATA_SLOW							\
     O(tcache_enabled,		bool,			bool)		\
     O(arenas_tdata_bypass,	bool,			bool)		\
     O(reentrancy_level,		int8_t,			int8_t)		\
     O(narenas_tdata,		uint32_t,		uint32_t)	\
-    O(thread_allocated,		uint64_t,		uint64_t)	\
-    O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
-    O(thread_deallocated,	uint64_t,		uint64_t)	\
-    O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
-    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
     O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
     O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
@@ -104,28 +72,13 @@ typedef void (*test_callback_t)(int *);
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
-    O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
-    O(tcache,			tcache_t,		tcache_t)	\
-    O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
-    MALLOC_TEST_TSD
-
-/*
- * TE_MIN_START_WAIT should not exceed the minimal allocation usize.
- */
-#define TE_MIN_START_WAIT ((uint64_t)1U)
-#define TE_MAX_START_WAIT UINT64_MAX
+    O(binshards,		tsd_binshards_t,	tsd_binshards_t)
 
-#define TSD_INITIALIZER {						\
-    /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
+#define TSD_DATA_SLOW_INITIALIZER					\
     /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
     /* arenas_tdata_bypass */	false,					\
     /* reentrancy_level */	0,					\
     /* narenas_tdata */		0,					\
-    /* thread_allocated */	0,					\
-    /* thread_allocated_next_event_fast */ 0, 				\
-    /* thread_deallocated */	0,					\
-    /* thread_deallocated_next_event_fast */	0,			\
-    /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
     /* thread_allocated_last_event */	0,				\
     /* thread_allocated_next_event */	TE_MIN_START_WAIT,		\
     /* thread_deallocated_last_event */	0,				\
@@ -141,10 +94,46 @@ typedef void (*test_callback_t)(int *);
     /* iarena */		NULL,					\
     /* arena */			NULL,					\
     /* arenas_tdata */		NULL,					\
-    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
-    /* tcache */		TCACHE_ZERO_INITIALIZER,		\
+    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,
+
+/*  O(name,			type,			nullable type) */
+#define TSD_DATA_FAST							\
+    O(thread_allocated,		uint64_t,		uint64_t)	\
+    O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
+    O(thread_deallocated,	uint64_t,		uint64_t)	\
+    O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
+    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
+    O(tcache,			tcache_t,		tcache_t)
+
+#define TSD_DATA_FAST_INITIALIZER					\
+    /* thread_allocated */	0,					\
+    /* thread_allocated_next_event_fast */ 0, 				\
+    /* thread_deallocated */	0,					\
+    /* thread_deallocated_next_event_fast */	0,			\
+    /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
+    /* tcache */		TCACHE_ZERO_INITIALIZER,
+
+/*  O(name,			type,			nullable type) */
+#define TSD_DATA_SLOWER							\
+    O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
+    MALLOC_TEST_TSD
+
+#define TSD_DATA_SLOWER_INITIALIZER					\
     /* witness */		WITNESS_TSD_INITIALIZER			\
-    /* test data */		MALLOC_TEST_TSD_INITIALIZER		\
+    /* test data */		MALLOC_TEST_TSD_INITIALIZER
+
+
+/*
+ * TE_MIN_START_WAIT should not exceed the minimal allocation usize.
+ */
+#define TE_MIN_START_WAIT ((uint64_t)1U)
+#define TE_MAX_START_WAIT UINT64_MAX
+
+#define TSD_INITIALIZER {						\
+    				TSD_DATA_SLOW_INITIALIZER		\
+    /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
+    				TSD_DATA_FAST_INITIALIZER		\
+    				TSD_DATA_SLOWER_INITIALIZER		\
 }
 
 void *malloc_tsd_malloc(size_t size);
@@ -235,14 +224,17 @@ struct tsd_s {
 	 * setters below.
 	 */
 
+#define O(n, t, nt)							\
+	t TSD_MANGLE(n);
+
+	TSD_DATA_SLOW
 	/*
 	 * We manually limit the state to just a single byte.  Unless the 8-bit
 	 * atomics are unavailable (which is rare).
 	 */
 	tsd_state_t state;
-#define O(n, t, nt)							\
-	t TSD_MANGLE(n);
-MALLOC_TSD
+	TSD_DATA_FAST
+	TSD_DATA_SLOWER
 #undef O
 };
 
@@ -308,7 +300,9 @@ JEMALLOC_ALWAYS_INLINE t *						\
 tsd_##n##p_get_unsafe(tsd_t *tsd) {					\
 	return &tsd->TSD_MANGLE(n);					\
 }
-MALLOC_TSD
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
 #undef O
 
 /* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */
@@ -327,7 +321,9 @@ tsd_##n##p_get(tsd_t *tsd) {						\
 	    state == tsd_state_minimal_initialized);			\
 	return tsd_##n##p_get_unsafe(tsd);				\
 }
-MALLOC_TSD
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
 #undef O
 
 /*
@@ -343,7 +339,9 @@ tsdn_##n##p_get(tsdn_t *tsdn) {						\
 	tsd_t *tsd = tsdn_tsd(tsdn);					\
 	return (nt *)tsd_##n##p_get(tsd);				\
 }
-MALLOC_TSD
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
 #undef O
 
 /* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */
@@ -352,7 +350,9 @@ JEMALLOC_ALWAYS_INLINE t						\
 tsd_##n##_get(tsd_t *tsd) {						\
 	return *tsd_##n##p_get(tsd);					\
 }
-MALLOC_TSD
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
 #undef O
 
 /* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */
@@ -363,7 +363,9 @@ tsd_##n##_set(tsd_t *tsd, t val) {					\
 	    tsd_state_get(tsd) != tsd_state_minimal_initialized);	\
 	*tsd_##n##p_get(tsd) = val;					\
 }
-MALLOC_TSD
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
 #undef O
 
 JEMALLOC_ALWAYS_INLINE void
-- 
cgit v0.12


From 40e7aed59ea1ec8edbeabee71c288afdc2316d72 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 7 Apr 2020 16:59:57 -0700
Subject: TSD: Move in some of the tcache fields.

We had put these in the tcache for cache optimization reasons.  After the
previous diff, these no longer apply.
---
 include/jemalloc/internal/tcache_structs.h |  7 -------
 include/jemalloc/internal/tsd.h            | 10 ++++++++--
 src/hook.c                                 |  6 +++---
 src/tsd.c                                  | 10 +++++-----
 4 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 48dbf0f..783b2df 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -7,9 +7,6 @@
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/tsd_types.h"
 
-/* Various uses of this struct need it to be a named type. */
-typedef ql_elm(tsd_t) tsd_link_t;
-
 struct tcache_s {
 	/*
 	 * To minimize our cache-footprint, we put the frequently accessed data
@@ -30,10 +27,6 @@ struct tcache_s {
 	/* Lets us track all the tcaches in an arena. */
 	ql_elm(tcache_t) link;
 
-	/* Logically scoped to tsd, but put here for cache layout reasons. */
-	ql_elm(tsd_t) tsd_link;
-	bool in_hook;
-
 	/*
 	 * The descriptor lets the arena find our cache bins without seeing the
 	 * tcache definition.  This enables arenas to aggregate stats across
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 7e08f6b..66f2717 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -51,6 +51,8 @@ typedef void (*test_callback_t)(int *);
 #  define MALLOC_TEST_TSD_INITIALIZER
 #endif
 
+typedef ql_elm(tsd_t) tsd_link_t;
+
 /*  O(name,			type,			nullable type) */
 #define TSD_DATA_SLOW							\
     O(tcache_enabled,		bool,			bool)		\
@@ -72,7 +74,9 @@ typedef void (*test_callback_t)(int *);
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
-    O(binshards,		tsd_binshards_t,	tsd_binshards_t)
+    O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
+    O(tsd_link,			tsd_link_t,		tsd_link_t)	\
+    O(in_hook,			bool,			bool)
 
 #define TSD_DATA_SLOW_INITIALIZER					\
     /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
@@ -94,7 +98,9 @@ typedef void (*test_callback_t)(int *);
     /* iarena */		NULL,					\
     /* arena */			NULL,					\
     /* arenas_tdata */		NULL,					\
-    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,
+    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
+    /* tsd_link */		{NULL},					\
+    /* in_hook */		false,
 
 /*  O(name,			type,			nullable type) */
 #define TSD_DATA_FAST							\
diff --git a/src/hook.c b/src/hook.c
index 9ac703c..493edbb 100644
--- a/src/hook.c
+++ b/src/hook.c
@@ -130,9 +130,9 @@ hook_reentrantp() {
 	 */
 	static bool in_hook_global = true;
 	tsdn_t *tsdn = tsdn_fetch();
-	tcache_t *tcache = tsdn_tcachep_get(tsdn);
-	if (tcache != NULL) {
-		return &tcache->in_hook;
+	bool *in_hook = tsdn_in_hookp_get(tsdn);
+	if (in_hook!= NULL) {
+		return in_hook;
 	}
 	return &in_hook_global;
 }
diff --git a/src/tsd.c b/src/tsd.c
index 38196c8..c07a4bf 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -74,7 +74,7 @@ tsd_in_nominal_list(tsd_t *tsd) {
 	 * out of it here.
 	 */
 	malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
-	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
+	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
 		if (tsd == tsd_list) {
 			found = true;
 			break;
@@ -88,9 +88,9 @@ static void
 tsd_add_nominal(tsd_t *tsd) {
 	assert(!tsd_in_nominal_list(tsd));
 	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
-	ql_elm_new(tsd, TSD_MANGLE(tcache).tsd_link);
+	ql_elm_new(tsd, TSD_MANGLE(tsd_link));
 	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
-	ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link);
+	ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
 }
 
@@ -99,7 +99,7 @@ tsd_remove_nominal(tsd_t *tsd) {
 	assert(tsd_in_nominal_list(tsd));
 	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
 	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
-	ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link);
+	ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
 }
 
@@ -112,7 +112,7 @@ tsd_force_recompute(tsdn_t *tsdn) {
 	atomic_fence(ATOMIC_RELEASE);
 	malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
 	tsd_t *remote_tsd;
-	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
+	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
 		assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
 		    <= tsd_state_nominal_max);
 		tsd_atomic_store(&remote_tsd->state,
-- 
cgit v0.12


From 7099c66205a9a435edcf1d2c6da56d6a11deb7d8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 7 Apr 2020 17:49:50 -0700
Subject: Arena: fill in terms of cache_bins.

---
 include/jemalloc/internal/arena_externs.h |  5 +++--
 src/arena.c                               | 18 ++++++++----------
 src/tcache.c                              |  6 +++++-
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 6e0fe2b..40dad71 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -48,8 +48,9 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all);
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
-void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *tbin, szind_t binind);
+void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
+    cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
+    const unsigned nfill);
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero);
diff --git a/src/arena.c b/src/arena.c
index b983b63..894c05f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -821,17 +821,15 @@ arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 }
 
 void
-arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *tbin, szind_t binind) {
-	assert(cache_bin_ncached_get(tbin, &tcache_bin_info[binind]) == 0);
-	tcache->bin_refilled[binind] = true;
+arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
+    cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
+    const unsigned nfill) {
+	assert(cache_bin_ncached_get(cache_bin, cache_bin_info) == 0);
 
 	const bin_info_t *bin_info = &bin_infos[binind];
-	const unsigned nfill = cache_bin_info_ncached_max(
-	    &tcache_bin_info[binind]) >> tcache->lg_fill_div[binind];
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill);
-	cache_bin_init_ptr_array_for_fill(tbin, &tcache_bin_info[binind], &ptrs,
+	cache_bin_init_ptr_array_for_fill(cache_bin, cache_bin_info, &ptrs,
 	    nfill);
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
@@ -915,10 +913,10 @@ label_refill:
 
 	if (config_stats && !alloc_and_retry) {
 		bin->stats.nmalloc += filled;
-		bin->stats.nrequests += tbin->tstats.nrequests;
+		bin->stats.nrequests += cache_bin->tstats.nrequests;
 		bin->stats.curregs += filled;
 		bin->stats.nfills++;
-		tbin->tstats.nrequests = 0;
+		cache_bin->tstats.nrequests = 0;
 	}
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
@@ -944,7 +942,7 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_finish_fill(tbin, &tcache_bin_info[binind], &ptrs, filled);
+	cache_bin_finish_fill(cache_bin, cache_bin_info, &ptrs, filled);
 	arena_decay_tick(tsdn, arena);
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index d345354..2063742 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -103,7 +103,11 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	void *ret;
 
 	assert(tcache->arena != NULL);
-	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
+	unsigned nfill = cache_bin_info_ncached_max(&tcache_bin_info[binind])
+	    >> tcache->lg_fill_div[binind];
+	arena_cache_bin_fill_small(tsdn, arena, tbin, &tcache_bin_info[binind],
+	    binind, nfill);
+	tcache->bin_refilled[binind] = true;
 	ret = cache_bin_alloc(tbin, tcache_success);
 
 	return ret;
-- 
cgit v0.12


From a13fbad374f31a7e6e912c0260b442d134bb0f2e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 7 Apr 2020 17:48:35 -0700
Subject: Tcache: split up fast and slow path data.

---
 include/jemalloc/internal/arena_structs.h          |   2 +-
 .../internal/jemalloc_internal_inlines_a.h         |  11 +-
 .../internal/jemalloc_internal_inlines_b.h         |  21 ++--
 include/jemalloc/internal/tcache_externs.h         |  11 +-
 include/jemalloc/internal/tcache_structs.h         |  44 ++++---
 include/jemalloc/internal/tcache_types.h           |   2 +
 include/jemalloc/internal/tsd.h                    |   6 +-
 src/arena.c                                        |  13 +-
 src/ctl.c                                          |   3 +-
 src/jemalloc.c                                     |   8 +-
 src/tcache.c                                       | 135 ++++++++++++---------
 src/thread_event.c                                 |   3 +-
 12 files changed, 156 insertions(+), 103 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 682450e..e8c3f81 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -53,7 +53,7 @@ struct arena_s {
 	 *
 	 * Synchronization: tcache_ql_mtx.
 	 */
-	ql_head(tcache_t)			tcache_ql;
+	ql_head(tcache_slow_t)			tcache_ql;
 	ql_head(cache_bin_array_descriptor_t)	cache_bin_array_descriptor_ql;
 	malloc_mutex_t				tcache_ql_mtx;
 
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index cc5e359..2e4c034 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -129,7 +129,7 @@ tcache_available(tsd_t *tsd) {
 	 */
 	if (likely(tsd_tcache_enabled_get(tsd))) {
 		/* Associated arena == NULL implies tcache init in progress. */
-		assert(tsd_tcachep_get(tsd)->arena == NULL ||
+		assert(tsd_tcache_slowp_get(tsd)->arena == NULL ||
 		    !cache_bin_still_zero_initialized(
 		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)));
 		return true;
@@ -147,6 +147,15 @@ tcache_get(tsd_t *tsd) {
 	return tsd_tcachep_get(tsd);
 }
 
+JEMALLOC_ALWAYS_INLINE tcache_slow_t *
+tcache_slow_get(tsd_t *tsd) {
+	if (!tcache_available(tsd)) {
+		return NULL;
+	}
+
+	return tsd_tcache_slowp_get(tsd);
+}
+
 static inline void
 pre_reentrancy(tsd_t *tsd, arena_t *arena) {
 	/* arena is the current context.  Reentry from a0 is not allowed. */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 3a0bfc6..1de349e 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -19,8 +19,10 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 		arena_migrate(tsd, oldind, newind);
 		tcache_t *tcache = tcache_get(tsd);
 		if (tcache != NULL) {
-			tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
-			    newarena);
+			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
+			tcache_t *tcache = tsd_tcachep_get(tsd);
+			tcache_arena_reassociate(tsd_tsdn(tsd), tcache_slow,
+			    tcache, newarena);
 		}
 	}
 }
@@ -45,18 +47,19 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 		ret = arena_choose_hard(tsd, internal);
 		assert(ret);
 		if (tcache_available(tsd)) {
-			tcache_t *tcache = tcache_get(tsd);
-			if (tcache->arena != NULL) {
+			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
+			tcache_t *tcache = tsd_tcachep_get(tsd);
+			if (tcache_slow->arena != NULL) {
 				/* See comments in tsd_tcache_data_init().*/
-				assert(tcache->arena ==
+				assert(tcache_slow->arena ==
 				    arena_get(tsd_tsdn(tsd), 0, false));
-				if (tcache->arena != ret) {
+				if (tcache_slow->arena != ret) {
 					tcache_arena_reassociate(tsd_tsdn(tsd),
-					    tcache, ret);
+					    tcache_slow, tcache, ret);
 				}
 			} else {
-				tcache_arena_associate(tsd_tsdn(tsd), tcache,
-				    ret);
+				tcache_arena_associate(tsd_tsdn(tsd),
+				    tcache_slow, tcache, ret);
 			}
 		}
 	}
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index c5c8f48..21829ac 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -26,15 +26,17 @@ extern cache_bin_info_t *tcache_bin_info;
 extern tcaches_t	*tcaches;
 
 size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
-void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
+void	tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow,
+    tcache_t *tcache);
 void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, bool *tcache_success);
+
 void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem);
 void	tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem);
-void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
-    arena_t *arena);
+void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
+    tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
@@ -42,7 +44,8 @@ bool	tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
 bool	tcache_boot(tsdn_t *tsdn, base_t *base);
-void tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+void tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
+    tcache_t *tcache, arena_t *arena);
 void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
 void tcache_postfork_child(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 783b2df..5a27db7 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -7,25 +7,19 @@
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/tsd_types.h"
 
-struct tcache_s {
-	/*
-	 * To minimize our cache-footprint, we put the frequently accessed data
-	 * together at the start of this struct.
-	 */
-
-	/*
-	 * The pointer stacks associated with bins follow as a contiguous array.
-	 * During tcache initialization, the avail pointer in each element of
-	 * tbins is initialized to point to the proper offset within this array.
-	 */
-	cache_bin_t	bins_small[SC_NBINS];
+/*
+ * The tcache state is split into the slow and hot path data.  Each has a
+ * pointer to the other, and the data always comes in pairs.  The layout of each
+ * of them varies in practice; tcache_slow lives in the TSD for the automatic
+ * tcache, and as part of a dynamic allocation for manual allocations.  Keeping
+ * a pointer to tcache_slow lets us treat these cases uniformly, rather than
+ * splitting up the tcache [de]allocation code into those paths called with the
+ * TSD tcache and those called with a manual tcache.
+ */
 
-	/*
-	 * This data is less hot; we can be a little less careful with our
-	 * footprint here.
-	 */
+struct tcache_slow_s {
 	/* Lets us track all the tcaches in an arena. */
-	ql_elm(tcache_t) link;
+	ql_elm(tcache_slow_t) link;
 
 	/*
 	 * The descriptor lets the arena find our cache bins without seeing the
@@ -45,9 +39,23 @@ struct tcache_s {
 	/*
 	 * The start of the allocation containing the dynamic allocation for
 	 * either the cache bins alone, or the cache bin memory as well as this
-	 * tcache_t.
+	 * tcache_slow_t and its associated tcache_t.
 	 */
 	void		*dyn_alloc;
+
+	/* The associated bins. */
+	tcache_t	*tcache;
+};
+
+struct tcache_s {
+	tcache_slow_t	*tcache_slow;
+	/*
+	 * The pointer stacks associated with bins follow as a contiguous array.
+	 * During tcache initialization, the avail pointer in each element of
+	 * tbins is initialized to point to the proper offset within this array.
+	 */
+	cache_bin_t	bins_small[SC_NBINS];
+
 	/*
 	 * We put the cache bins for large size classes at the end of the
 	 * struct, since some of them might not get used.  This might end up
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index c30a533..cba86f4 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/sc.h"
 
+typedef struct tcache_slow_s tcache_slow_t;
 typedef struct tcache_s tcache_t;
 typedef struct tcaches_s tcaches_t;
 
@@ -52,6 +53,7 @@ typedef struct tcaches_s tcaches_t;
 
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
 #define TCACHE_ZERO_INITIALIZER {0}
+#define TCACHE_SLOW_ZERO_INITIALIZER {0}
 
 /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
 #define TCACHE_ENABLED_ZERO_INITIALIZER false
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 66f2717..37f5aa0 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -76,7 +76,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
     O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
     O(tsd_link,			tsd_link_t,		tsd_link_t)	\
-    O(in_hook,			bool,			bool)
+    O(in_hook,			bool,			bool)		\
+    O(tcache_slow,		tcache_slow_t,		tcache_slow_t)
 
 #define TSD_DATA_SLOW_INITIALIZER					\
     /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
@@ -100,7 +101,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* arenas_tdata */		NULL,					\
     /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
     /* tsd_link */		{NULL},					\
-    /* in_hook */		false,
+    /* in_hook */		false,					\
+    /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,
 
 /*  O(name,			type,			nullable type) */
 #define TSD_DATA_FAST							\
diff --git a/src/arena.c b/src/arena.c
index 894c05f..13b75ef 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1690,15 +1690,16 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	if (config_stats) {
 		ql_new(&arena->tcache_ql);
 		ql_new(&arena->cache_bin_array_descriptor_ql);
-		tcache_t *tcache = tcache_get(tsdn_tsd(tsdn));
-		if (tcache != NULL && tcache->arena == arena) {
-			ql_elm_new(tcache, link);
-			ql_tail_insert(&arena->tcache_ql, tcache, link);
+		tcache_slow_t *tcache_slow = tcache_slow_get(tsdn_tsd(tsdn));
+		if (tcache_slow != NULL && tcache_slow->arena == arena) {
+			tcache_t *tcache = tcache_slow->tcache;
+			ql_elm_new(tcache_slow, link);
+			ql_tail_insert(&arena->tcache_ql, tcache_slow, link);
 			cache_bin_array_descriptor_init(
-			    &tcache->cache_bin_array_descriptor,
+			    &tcache_slow->cache_bin_array_descriptor,
 			    tcache->bins_small, tcache->bins_large);
 			ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
-			    &tcache->cache_bin_array_descriptor, link);
+			    &tcache_slow->cache_bin_array_descriptor, link);
 		}
 	}
 
diff --git a/src/ctl.c b/src/ctl.c
index 7555267..ae17d44 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1864,7 +1864,8 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 		arena_migrate(tsd, oldind, newind);
 		if (tcache_available(tsd)) {
 			tcache_arena_reassociate(tsd_tsdn(tsd),
-			    tsd_tcachep_get(tsd), newarena);
+			    tsd_tcache_slowp_get(tsd), tsd_tcachep_get(tsd),
+			    newarena);
 		}
 	}
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 63ef578..c066680 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -717,11 +717,13 @@ stats_print_atexit(void) {
 		for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 			arena_t *arena = arena_get(tsdn, i, false);
 			if (arena != NULL) {
-				tcache_t *tcache;
+				tcache_slow_t *tcache_slow;
 
 				malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
-				ql_foreach(tcache, &arena->tcache_ql, link) {
-					tcache_stats_merge(tsdn, tcache, arena);
+				ql_foreach(tcache_slow, &arena->tcache_ql,
+				    link) {
+					tcache_stats_merge(tsdn,
+					    tcache_slow->tcache, arena);
 				}
 				malloc_mutex_unlock(tsdn,
 				    &arena->tcache_ql_mtx);
diff --git a/src/tcache.c b/src/tcache.c
index 2063742..667a76a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -41,8 +41,8 @@ tcache_salloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 void
-tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
-	szind_t binind = tcache->next_gc_bin;
+tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
+	szind_t binind = tcache_slow->next_gc_bin;
 	cache_bin_t *tbin;
 	bool is_small;
 	if (binind < SC_NBINS) {
@@ -62,7 +62,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
 		 */
 		if (is_small) {
-			assert(!tcache->bin_refilled[binind]);
+			assert(!tcache_slow->bin_refilled[binind]);
 			tcache_bin_flush_small(tsd, tcache, tbin, binind,
 			    ncached - low_water + (low_water >> 2));
 			/*
@@ -71,43 +71,45 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			 */
 			if ((cache_bin_info_ncached_max(
 			    &tcache_bin_info[binind]) >>
-			    (tcache->lg_fill_div[binind] + 1)) >= 1) {
-				tcache->lg_fill_div[binind]++;
+			    (tcache_slow->lg_fill_div[binind] + 1)) >= 1) {
+				tcache_slow->lg_fill_div[binind]++;
 			}
 		} else {
 			tcache_bin_flush_large(tsd, tcache, tbin, binind,
 			     ncached - low_water + (low_water >> 2));
 		}
-	} else if (is_small && tcache->bin_refilled[binind]) {
+	} else if (is_small && tcache_slow->bin_refilled[binind]) {
 		assert(low_water == 0);
 		/*
 		 * Increase fill count by 2X for small bins.  Make sure
 		 * lg_fill_div stays greater than 0.
 		 */
-		if (tcache->lg_fill_div[binind] > 1) {
-			tcache->lg_fill_div[binind]--;
+		if (tcache_slow->lg_fill_div[binind] > 1) {
+			tcache_slow->lg_fill_div[binind]--;
 		}
-		tcache->bin_refilled[binind] = false;
+		tcache_slow->bin_refilled[binind] = false;
 	}
 	cache_bin_low_water_set(tbin);
 
-	tcache->next_gc_bin++;
-	if (tcache->next_gc_bin == nhbins) {
-		tcache->next_gc_bin = 0;
+	tcache_slow->next_gc_bin++;
+	if (tcache_slow->next_gc_bin == nhbins) {
+		tcache_slow->next_gc_bin = 0;
 	}
 }
 
 void *
-tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *tbin, szind_t binind, bool *tcache_success) {
+tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
+    tcache_t *tcache, cache_bin_t *tbin, szind_t binind,
+    bool *tcache_success) {
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	void *ret;
 
-	assert(tcache->arena != NULL);
+	assert(tcache_slow->arena != NULL);
 	unsigned nfill = cache_bin_info_ncached_max(&tcache_bin_info[binind])
-	    >> tcache->lg_fill_div[binind];
+	    >> tcache_slow->lg_fill_div[binind];
 	arena_cache_bin_fill_small(tsdn, arena, tbin, &tcache_bin_info[binind],
 	    binind, nfill);
-	tcache->bin_refilled[binind] = true;
+	tcache_slow->bin_refilled[binind] = true;
 	ret = cache_bin_alloc(tbin, tcache_success);
 
 	return ret;
@@ -154,6 +156,7 @@ tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem, bool small) {
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	/*
 	 * A couple lookup calls take tsdn; declare it once for convenience
 	 * instead of calling tsd_tsdn(tsd) all the time.
@@ -168,7 +171,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
 	    &tcache_bin_info[binind]);
 	assert((cache_bin_sz_t)rem <= ncached);
-	arena_t *tcache_arena = tcache->arena;
+	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
 
 	unsigned nflush = ncached - rem;
@@ -361,57 +364,60 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 }
 
 void
-tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
-	assert(tcache->arena == NULL);
-	tcache->arena = arena;
+tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
+    tcache_t *tcache, arena_t *arena) {
+	assert(tcache_slow->arena == NULL);
+	tcache_slow->arena = arena;
 
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 
-		ql_elm_new(tcache, link);
-		ql_tail_insert(&arena->tcache_ql, tcache, link);
+		ql_elm_new(tcache_slow, link);
+		ql_tail_insert(&arena->tcache_ql, tcache_slow, link);
 		cache_bin_array_descriptor_init(
-		    &tcache->cache_bin_array_descriptor, tcache->bins_small,
-		    tcache->bins_large);
+		    &tcache_slow->cache_bin_array_descriptor,
+		    tcache->bins_small, tcache->bins_large);
 		ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
-		    &tcache->cache_bin_array_descriptor, link);
+		    &tcache_slow->cache_bin_array_descriptor, link);
 
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
 }
 
 static void
-tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
-	arena_t *arena = tcache->arena;
+tcache_arena_dissociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
+    tcache_t *tcache) {
+	arena_t *arena = tcache_slow->arena;
 	assert(arena != NULL);
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 		if (config_debug) {
 			bool in_ql = false;
-			tcache_t *iter;
+			tcache_slow_t *iter;
 			ql_foreach(iter, &arena->tcache_ql, link) {
-				if (iter == tcache) {
+				if (iter == tcache_slow) {
 					in_ql = true;
 					break;
 				}
 			}
 			assert(in_ql);
 		}
-		ql_remove(&arena->tcache_ql, tcache, link);
+		ql_remove(&arena->tcache_ql, tcache_slow, link);
 		ql_remove(&arena->cache_bin_array_descriptor_ql,
-		    &tcache->cache_bin_array_descriptor, link);
-		tcache_stats_merge(tsdn, tcache, arena);
+		    &tcache_slow->cache_bin_array_descriptor, link);
+		tcache_stats_merge(tsdn, tcache_slow->tcache, arena);
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
-	tcache->arena = NULL;
+	tcache_slow->arena = NULL;
 }
 
 void
-tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
-	tcache_arena_dissociate(tsdn, tcache);
-	tcache_arena_associate(tsdn, tcache, arena);
+tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
+    tcache_t *tcache, arena_t *arena) {
+	tcache_arena_dissociate(tsdn, tcache_slow, tcache);
+	tcache_arena_associate(tsdn, tcache_slow, tcache, arena);
 }
 
 bool
@@ -429,11 +435,15 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 }
 
 static void
-tcache_init(tsd_t *tsd, tcache_t *tcache, void *mem) {
-	memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
-	tcache->next_gc_bin = 0;
-	tcache->arena = NULL;
-	tcache->dyn_alloc = mem;
+tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
+    void *mem) {
+	tcache->tcache_slow = tcache_slow;
+	tcache_slow->tcache = tcache;
+
+	memset(&tcache_slow->link, 0, sizeof(ql_elm(tcache_t)));
+	tcache_slow->next_gc_bin = 0;
+	tcache_slow->arena = NULL;
+	tcache_slow->dyn_alloc = mem;
 
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
@@ -444,8 +454,8 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *mem) {
 	cache_bin_preincrement(tcache_bin_info, nhbins, mem,
 	    &cur_offset);
 	for (; i < SC_NBINS; i++) {
-		tcache->lg_fill_div[i] = 1;
-		tcache->bin_refilled[i] = false;
+		tcache_slow->lg_fill_div[i] = 1;
+		tcache_slow->bin_refilled[i] = false;
 		cache_bin_t *bin = tcache_small_bin_get(tcache, i);
 		cache_bin_init(bin, &tcache_bin_info[i], mem,
 		    &cur_offset);
@@ -464,7 +474,9 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *mem) {
 /* Initialize auto tcache (embedded in TSD). */
 bool
 tsd_tcache_data_init(tsd_t *tsd) {
+	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
+
 	assert(cache_bin_still_zero_initialized(
 	    tcache_small_bin_get(tcache, 0)));
 	size_t alignment = tcache_bin_alloc_alignment;
@@ -476,7 +488,7 @@ tsd_tcache_data_init(tsd_t *tsd) {
 		return true;
 	}
 
-	tcache_init(tsd, tcache, mem);
+	tcache_init(tsd, tcache_slow, tcache, mem);
 	/*
 	 * Initialization is a bit tricky here.  After malloc init is done, all
 	 * threads can rely on arena_choose and associate tcache accordingly.
@@ -485,20 +497,22 @@ tsd_tcache_data_init(tsd_t *tsd) {
 	 * associate its tcache to a0 temporarily, and later on
 	 * arena_choose_hard() will re-associate properly.
 	 */
-	tcache->arena = NULL;
+	tcache_slow->arena = NULL;
 	arena_t *arena;
 	if (!malloc_initialized()) {
 		/* If in initialization, assign to a0. */
 		arena = arena_get(tsd_tsdn(tsd), 0, false);
-		tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
+		tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
+		    arena);
 	} else {
 		arena = arena_choose(tsd, NULL);
 		/* This may happen if thread.tcache.enabled is used. */
-		if (tcache->arena == NULL) {
-			tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
+		if (tcache_slow->arena == NULL) {
+			tcache_arena_associate(tsd_tsdn(tsd), tcache_slow,
+			    tcache, arena);
 		}
 	}
-	assert(arena == tcache->arena);
+	assert(arena == tcache_slow->arena);
 
 	return false;
 }
@@ -511,7 +525,8 @@ tcache_create_explicit(tsd_t *tsd) {
 	 * the beginning of the whole allocation (for freeing).  The makes sure
 	 * the cache bins have the requested alignment.
 	 */
-	size_t size = tcache_bin_alloc_size + sizeof(tcache_t);
+	size_t size = tcache_bin_alloc_size + sizeof(tcache_t)
+	    + sizeof(tcache_slow_t);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
 	size = sz_sa2u(size, tcache_bin_alloc_alignment);
@@ -522,16 +537,20 @@ tcache_create_explicit(tsd_t *tsd) {
 		return NULL;
 	}
 	tcache_t *tcache = (void *)((uintptr_t)mem + tcache_bin_alloc_size);
-	tcache_init(tsd, tcache, mem);
+	tcache_slow_t *tcache_slow =
+	    (void *)((uintptr_t)mem + tcache_bin_alloc_size + sizeof(tcache_t));
+	tcache_init(tsd, tcache_slow, tcache, mem);
 
-	tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
+	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
+	    arena_ichoose(tsd, NULL));
 
 	return tcache;
 }
 
 static void
 tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
-	assert(tcache->arena != NULL);
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+	assert(tcache_slow->arena != NULL);
 
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
@@ -559,15 +578,17 @@ tcache_flush(tsd_t *tsd) {
 
 static void
 tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	tcache_flush_cache(tsd, tcache);
-	arena_t *arena = tcache->arena;
-	tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
+	arena_t *arena = tcache_slow->arena;
+	tcache_arena_dissociate(tsd_tsdn(tsd), tcache_slow, tcache);
 
 	if (tsd_tcache) {
 		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
 		cache_bin_assert_empty(bin, &tcache_bin_info[0]);
 	}
-	idalloctm(tsd_tsdn(tsd), tcache->dyn_alloc, NULL, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL, true,
+	    true);
 
 	/*
 	 * The deallocation and tcache flush above may not trigger decay since
diff --git a/src/thread_event.c b/src/thread_event.c
index 163ca3f..c96dea6 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -50,7 +50,8 @@ tcache_gc_event(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
 	tcache_t *tcache = tcache_get(tsd);
 	if (tcache != NULL) {
-		tcache_event_hard(tsd, tcache);
+		tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
+		tcache_event_hard(tsd, tcache_slow, tcache);
 	}
 }
 
-- 
cgit v0.12


From cd29ebefd01be090a636e5560066d866209b141b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 7 Apr 2020 20:04:46 -0700
Subject: Tcache: treat small and large cache bins uniformly

---
 include/jemalloc/internal/cache_bin.h              |   8 +-
 .../internal/jemalloc_internal_inlines_a.h         |  18 +--
 include/jemalloc/internal/tcache_externs.h         |   2 +
 include/jemalloc/internal/tcache_inlines.h         |  15 +-
 include/jemalloc/internal/tcache_structs.h         |  14 +-
 src/arena.c                                        |  17 +--
 src/jemalloc.c                                     |   4 +-
 src/tcache.c                                       | 154 +++++++++------------
 8 files changed, 87 insertions(+), 145 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 5a772bf..a56b4a1 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -106,16 +106,14 @@ struct cache_bin_array_descriptor_s {
 	 */
 	ql_elm(cache_bin_array_descriptor_t) link;
 	/* Pointers to the tcache bins. */
-	cache_bin_t *bins_small;
-	cache_bin_t *bins_large;
+	cache_bin_t *bins;
 };
 
 static inline void
 cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
-    cache_bin_t *bins_small, cache_bin_t *bins_large) {
+    cache_bin_t *bins) {
 	ql_elm_new(descriptor, link);
-	descriptor->bins_small = bins_small;
-	descriptor->bins_large = bins_large;
+	descriptor->bins = bins;
 }
 
 /* Returns ncached_max: Upper limit on ncached. */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 2e4c034..25e5b50 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -108,18 +108,6 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
 	return &tdata->decay_ticker;
 }
 
-JEMALLOC_ALWAYS_INLINE cache_bin_t *
-tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
-	assert(binind < SC_NBINS);
-	return &tcache->bins_small[binind];
-}
-
-JEMALLOC_ALWAYS_INLINE cache_bin_t *
-tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
-	assert(binind >= SC_NBINS &&binind < nhbins);
-	return &tcache->bins_large[binind - SC_NBINS];
-}
-
 JEMALLOC_ALWAYS_INLINE bool
 tcache_available(tsd_t *tsd) {
 	/*
@@ -129,9 +117,9 @@ tcache_available(tsd_t *tsd) {
 	 */
 	if (likely(tsd_tcache_enabled_get(tsd))) {
 		/* Associated arena == NULL implies tcache init in progress. */
-		assert(tsd_tcache_slowp_get(tsd)->arena == NULL ||
-		    !cache_bin_still_zero_initialized(
-		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)));
+		if (config_debug && tsd_tcache_slowp_get(tsd)->arena != NULL) {
+			tcache_assert_initialized(tsd_tcachep_get(tsd));
+		}
 		return true;
 	}
 
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 21829ac..7ca38d6 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -53,4 +53,6 @@ void tcache_flush(tsd_t *tsd);
 bool tsd_tcache_data_init(tsd_t *tsd);
 bool tsd_tcache_enabled_data_init(tsd_t *tsd);
 
+void tcache_assert_initialized(tcache_t *tcache);
+
 #endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 3b78ed2..4cbc2d2 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -30,12 +30,11 @@ JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	cache_bin_t *bin;
 	bool tcache_success;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	assert(binind < SC_NBINS);
-	bin = tcache_small_bin_get(tcache, binind);
+	cache_bin_t *bin = &tcache->bins[binind];
 	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
@@ -74,11 +73,10 @@ JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	cache_bin_t *bin;
 	bool tcache_success;
 
-	assert(binind >= SC_NBINS &&binind < nhbins);
-	bin = tcache_large_bin_get(tcache, binind);
+	assert(binind >= SC_NBINS && binind < nhbins);
+	cache_bin_t *bin = &tcache->bins[binind];
 	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
@@ -120,12 +118,10 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
-	cache_bin_t *bin;
-
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
 	    <= SC_SMALL_MAXCLASS);
 
-	bin = tcache_small_bin_get(tcache, binind);
+	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_info_ncached_max(
 		    &tcache_bin_info[binind]) >> 1;
@@ -138,13 +134,12 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
-	cache_bin_t *bin;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
 	    > SC_SMALL_MAXCLASS);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
-	bin = tcache_large_bin_get(tcache, binind);
+	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_info_ncached_max(
 		    &tcache_bin_info[binind]) >> 1;
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 5a27db7..1c9d4db 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -49,19 +49,7 @@ struct tcache_slow_s {
 
 struct tcache_s {
 	tcache_slow_t	*tcache_slow;
-	/*
-	 * The pointer stacks associated with bins follow as a contiguous array.
-	 * During tcache initialization, the avail pointer in each element of
-	 * tbins is initialized to point to the proper offset within this array.
-	 */
-	cache_bin_t	bins_small[SC_NBINS];
-
-	/*
-	 * We put the cache bins for large size classes at the end of the
-	 * struct, since some of them might not get used.  This might end up
-	 * letting us avoid touching an extra page if we don't have to.
-	 */
-	cache_bin_t	bins_large[SC_NSIZES-SC_NBINS];
+	cache_bin_t	bins[SC_NSIZES];
 };
 
 /* Linkage for list of available (previously used) explicit tcache IDs. */
diff --git a/src/arena.c b/src/arena.c
index 13b75ef..4ed3c88 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -148,18 +148,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 	cache_bin_array_descriptor_t *descriptor;
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
-		for (szind_t i = 0; i < SC_NBINS; i++) {
-			cache_bin_t *tbin = &descriptor->bins_small[i];
+		for (szind_t i = 0; i < nhbins; i++) {
+			cache_bin_t *cache_bin = &descriptor->bins[i];
 			astats->tcache_bytes +=
-			    cache_bin_ncached_get(tbin,
-				&tcache_bin_info[i]) * sz_index2size(i);
-		}
-		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
-			cache_bin_t *tbin = &descriptor->bins_large[i];
-			astats->tcache_bytes +=
-			    cache_bin_ncached_get(tbin,
-			    &tcache_bin_info[i + SC_NBINS])
-			    * sz_index2size(i + SC_NBINS);
+			    cache_bin_ncached_get(cache_bin,
+			    &tcache_bin_info[i]) * sz_index2size(i);
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
@@ -1697,7 +1690,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 			ql_tail_insert(&arena->tcache_ql, tcache_slow, link);
 			cache_bin_array_descriptor_init(
 			    &tcache_slow->cache_bin_array_descriptor,
-			    tcache->bins_small, tcache->bins_large);
+			    tcache->bins);
 			ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
 			    &tcache_slow->cache_bin_array_descriptor, link);
 		}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c066680..fab285d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2495,7 +2495,7 @@ je_malloc(size_t size) {
 	assert(tsd_fast(tsd));
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
-	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
+	cache_bin_t *bin = &tcache->bins[ind];
 	bool tcache_success;
 	void *ret;
 
@@ -2828,7 +2828,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	}
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
-	cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
+	cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
 
 	/*
 	 * If junking were enabled, this is where we would do it.  It's not
diff --git a/src/tcache.c b/src/tcache.c
index 667a76a..63e1a4d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -43,19 +43,12 @@ tcache_salloc(tsdn_t *tsdn, const void *ptr) {
 void
 tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 	szind_t binind = tcache_slow->next_gc_bin;
-	cache_bin_t *tbin;
-	bool is_small;
-	if (binind < SC_NBINS) {
-		tbin = tcache_small_bin_get(tcache, binind);
-		is_small = true;
-	} else {
-		tbin = tcache_large_bin_get(tcache, binind);
-		is_small = false;
-	}
+	bool is_small = (binind < SC_NBINS);
+	cache_bin_t *cache_bin = &tcache->bins[binind];
 
-	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin,
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &tcache_bin_info[binind]);
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
+	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
 	    &tcache_bin_info[binind]);
 	if (low_water > 0) {
 		/*
@@ -63,7 +56,7 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 		 */
 		if (is_small) {
 			assert(!tcache_slow->bin_refilled[binind]);
-			tcache_bin_flush_small(tsd, tcache, tbin, binind,
+			tcache_bin_flush_small(tsd, tcache, cache_bin, binind,
 			    ncached - low_water + (low_water >> 2));
 			/*
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
@@ -75,7 +68,7 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 				tcache_slow->lg_fill_div[binind]++;
 			}
 		} else {
-			tcache_bin_flush_large(tsd, tcache, tbin, binind,
+			tcache_bin_flush_large(tsd, tcache, cache_bin, binind,
 			     ncached - low_water + (low_water >> 2));
 		}
 	} else if (is_small && tcache_slow->bin_refilled[binind]) {
@@ -89,7 +82,7 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 		}
 		tcache_slow->bin_refilled[binind] = false;
 	}
-	cache_bin_low_water_set(tbin);
+	cache_bin_low_water_set(cache_bin);
 
 	tcache_slow->next_gc_bin++;
 	if (tcache_slow->next_gc_bin == nhbins) {
@@ -99,7 +92,7 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 
 void *
 tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
-    tcache_t *tcache, cache_bin_t *tbin, szind_t binind,
+    tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind,
     bool *tcache_success) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	void *ret;
@@ -107,10 +100,10 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	assert(tcache_slow->arena != NULL);
 	unsigned nfill = cache_bin_info_ncached_max(&tcache_bin_info[binind])
 	    >> tcache_slow->lg_fill_div[binind];
-	arena_cache_bin_fill_small(tsdn, arena, tbin, &tcache_bin_info[binind],
-	    binind, nfill);
+	arena_cache_bin_fill_small(tsdn, arena, cache_bin,
+	    &tcache_bin_info[binind], binind, nfill);
 	tcache_slow->bin_refilled[binind] = true;
-	ret = cache_bin_alloc(tbin, tcache_success);
+	ret = cache_bin_alloc(cache_bin, tcache_success);
 
 	return ret;
 }
@@ -154,7 +147,7 @@ tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem, bool small) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	/*
@@ -168,7 +161,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	} else {
 		assert(binind < nhbins);
 	}
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
+	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
 	    &tcache_bin_info[binind]);
 	assert((cache_bin_sz_t)rem <= ncached);
 	arena_t *tcache_arena = tcache_slow->arena;
@@ -182,7 +175,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 
-	cache_bin_init_ptr_array_for_flush(tbin, &tcache_bin_info[binind],
+	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
 	    &ptrs, nflush);
 
 	/* Look up edata once per item. */
@@ -249,13 +242,13 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 			if (small) {
 				cur_bin->stats.nflushes++;
 				cur_bin->stats.nrequests +=
-				    tbin->tstats.nrequests;
-				tbin->tstats.nrequests = 0;
+				    cache_bin->tstats.nrequests;
+				cache_bin->tstats.nrequests = 0;
 			} else {
 				arena_stats_large_flush_nrequests_add(tsdn,
 				    &tcache_arena->stats, binind,
-				    tbin->tstats.nrequests);
-				tbin->tstats.nrequests = 0;
+				    cache_bin->tstats.nrequests);
+				cache_bin->tstats.nrequests = 0;
 			}
 		}
 
@@ -336,31 +329,31 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 			bin_t *bin = arena_bin_choose_lock(tsdn, tcache_arena,
 			    binind, &binshard);
 			bin->stats.nflushes++;
-			bin->stats.nrequests += tbin->tstats.nrequests;
-			tbin->tstats.nrequests = 0;
+			bin->stats.nrequests += cache_bin->tstats.nrequests;
+			cache_bin->tstats.nrequests = 0;
 			malloc_mutex_unlock(tsdn, &bin->lock);
 		} else {
 			arena_stats_large_flush_nrequests_add(tsdn,
 			    &tcache_arena->stats, binind,
-			    tbin->tstats.nrequests);
-			tbin->tstats.nrequests = 0;
+			    cache_bin->tstats.nrequests);
+			cache_bin->tstats.nrequests = 0;
 		}
 	}
 
-	cache_bin_finish_flush(tbin, &tcache_bin_info[binind], &ptrs,
+	cache_bin_finish_flush(cache_bin, &tcache_bin_info[binind], &ptrs,
 	    ncached - rem);
 }
 
 void
-tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, true);
+	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, rem, true);
 }
 
 void
-tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, false);
+	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, rem, false);
 }
 
 void
@@ -376,8 +369,7 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
 		ql_elm_new(tcache_slow, link);
 		ql_tail_insert(&arena->tcache_ql, tcache_slow, link);
 		cache_bin_array_descriptor_init(
-		    &tcache_slow->cache_bin_array_descriptor,
-		    tcache->bins_small, tcache->bins_large);
+		    &tcache_slow->cache_bin_array_descriptor, tcache->bins);
 		ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
 		    &tcache_slow->cache_bin_array_descriptor, link);
 
@@ -446,23 +438,18 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	tcache_slow->dyn_alloc = mem;
 
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
-	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
-	memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
+	memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
 
-	unsigned i = 0;
 	size_t cur_offset = 0;
 	cache_bin_preincrement(tcache_bin_info, nhbins, mem,
 	    &cur_offset);
-	for (; i < SC_NBINS; i++) {
-		tcache_slow->lg_fill_div[i] = 1;
-		tcache_slow->bin_refilled[i] = false;
-		cache_bin_t *bin = tcache_small_bin_get(tcache, i);
-		cache_bin_init(bin, &tcache_bin_info[i], mem,
-		    &cur_offset);
-	}
-	for (; i < nhbins; i++) {
-		cache_bin_t *bin = tcache_large_bin_get(tcache, i);
-		cache_bin_init(bin, &tcache_bin_info[i], mem,
+	for (unsigned i = 0; i < nhbins; i++) {
+		if (i < SC_NBINS) {
+			tcache_slow->lg_fill_div[i] = 1;
+			tcache_slow->bin_refilled[i] = false;
+		}
+		cache_bin_t *cache_bin = &tcache->bins[i];
+		cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
 		    &cur_offset);
 	}
 	cache_bin_postincrement(tcache_bin_info, nhbins, mem,
@@ -477,8 +464,7 @@ tsd_tcache_data_init(tsd_t *tsd) {
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 
-	assert(cache_bin_still_zero_initialized(
-	    tcache_small_bin_get(tcache, 0)));
+	assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
 	size_t alignment = tcache_bin_alloc_alignment;
 	size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
 
@@ -552,20 +538,15 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	assert(tcache_slow->arena != NULL);
 
-	for (unsigned i = 0; i < SC_NBINS; i++) {
-		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
-		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
-
-		if (config_stats) {
-			assert(tbin->tstats.nrequests == 0);
+	for (unsigned i = 0; i < nhbins; i++) {
+		cache_bin_t *cache_bin = &tcache->bins[i];
+		if (i < SC_NBINS) {
+			tcache_bin_flush_small(tsd, tcache, cache_bin, i, 0);
+		} else {
+			tcache_bin_flush_large(tsd, tcache, cache_bin, i, 0);
 		}
-	}
-	for (unsigned i = SC_NBINS; i < nhbins; i++) {
-		cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
-		tcache_bin_flush_large(tsd, tcache, tbin, i, 0);
-
 		if (config_stats) {
-			assert(tbin->tstats.nrequests == 0);
+			assert(cache_bin->tstats.nrequests == 0);
 		}
 	}
 }
@@ -584,8 +565,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	tcache_arena_dissociate(tsd_tsdn(tsd), tcache_slow, tcache);
 
 	if (tsd_tcache) {
-		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
-		cache_bin_assert_empty(bin, &tcache_bin_info[0]);
+		cache_bin_t *cache_bin = &tcache->bins[0];
+		cache_bin_assert_empty(cache_bin, &tcache_bin_info[0]);
 	}
 	idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL, true,
 	    true);
@@ -614,13 +595,11 @@ tcache_cleanup(tsd_t *tsd) {
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	if (!tcache_available(tsd)) {
 		assert(tsd_tcache_enabled_get(tsd) == false);
-		assert(cache_bin_still_zero_initialized(
-		    tcache_small_bin_get(tcache, 0)));
+		assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
 		return;
 	}
 	assert(tsd_tcache_enabled_get(tsd));
-	assert(!cache_bin_still_zero_initialized(
-	    tcache_small_bin_get(tcache, 0)));
+	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
 
 	tcache_destroy(tsd, tcache, true);
 	if (config_debug) {
@@ -628,33 +607,28 @@ tcache_cleanup(tsd_t *tsd) {
 		 * For debug testing only, we want to pretend we're still in the
 		 * zero-initialized state.
 		 */
-		memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
-		memset(tcache->bins_large, 0,
-		    sizeof(cache_bin_t) * (nhbins - SC_NBINS));
+		memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
 	}
 }
 
 void
 tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
-	unsigned i;
-
 	cassert(config_stats);
 
 	/* Merge and reset tcache stats. */
-	for (i = 0; i < SC_NBINS; i++) {
-		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
-		unsigned binshard;
-		bin_t *bin = arena_bin_choose_lock(tsdn, arena, i, &binshard);
-		bin->stats.nrequests += tbin->tstats.nrequests;
-		malloc_mutex_unlock(tsdn, &bin->lock);
-		tbin->tstats.nrequests = 0;
-	}
-
-	for (; i < nhbins; i++) {
-		cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
-		arena_stats_large_flush_nrequests_add(tsdn, &arena->stats, i,
-		    tbin->tstats.nrequests);
-		tbin->tstats.nrequests = 0;
+	for (unsigned i = 0; i < nhbins; i++) {
+		cache_bin_t *cache_bin = &tcache->bins[i];
+		if (i < SC_NBINS) {
+			unsigned binshard;
+			bin_t *bin = arena_bin_choose_lock(tsdn, arena, i,
+			    &binshard);
+			bin->stats.nrequests += cache_bin->tstats.nrequests;
+			malloc_mutex_unlock(tsdn, &bin->lock);
+		} else {
+			arena_stats_large_flush_nrequests_add(tsdn,
+			    &arena->stats, i, cache_bin->tstats.nrequests);
+		}
+		cache_bin->tstats.nrequests = 0;
 	}
 }
 
@@ -824,3 +798,7 @@ void
 tcache_postfork_child(tsdn_t *tsdn) {
 	malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
 }
+
+void tcache_assert_initialized(tcache_t *tcache) {
+	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
+}
-- 
cgit v0.12


From 4f8efba8248aaafa2200e3538bae126729e0407d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 10 Apr 2020 15:02:38 -0700
Subject: TSD: Make rtree_ctx a slow-path field.

Performance-sensitive users will use sized deallocation facilities, so that
actually touching the rtree_ctx is unnecessary.  We make it the last element of
the slow data, so that it is for practical purposes almost-fast.
---
 include/jemalloc/internal/tsd.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 37f5aa0..0f9ec12 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -77,7 +77,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
     O(tsd_link,			tsd_link_t,		tsd_link_t)	\
     O(in_hook,			bool,			bool)		\
-    O(tcache_slow,		tcache_slow_t,		tcache_slow_t)
+    O(tcache_slow,		tcache_slow_t,		tcache_slow_t)	\
+    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)
 
 #define TSD_DATA_SLOW_INITIALIZER					\
     /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
@@ -102,7 +103,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
     /* tsd_link */		{NULL},					\
     /* in_hook */		false,					\
-    /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,
+    /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
+    /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,
 
 /*  O(name,			type,			nullable type) */
 #define TSD_DATA_FAST							\
@@ -110,7 +112,6 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
     O(thread_deallocated,	uint64_t,		uint64_t)	\
     O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
-    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
     O(tcache,			tcache_t,		tcache_t)
 
 #define TSD_DATA_FAST_INITIALIZER					\
@@ -118,7 +119,6 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* thread_allocated_next_event_fast */ 0, 				\
     /* thread_deallocated */	0,					\
     /* thread_deallocated_next_event_fast */	0,			\
-    /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
     /* tcache */		TCACHE_ZERO_INITIALIZER,
 
 /*  O(name,			type,			nullable type) */
-- 
cgit v0.12


From fb6cfffd39ca50add3356c2e61242e13fff2ce1f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 13 Apr 2020 11:39:49 -0700
Subject: Configure: Get rid of LG_QUANTA.

This is no longer used.
---
 configure.ac | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 1c2509a..f67fc3d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1552,9 +1552,7 @@ fi
 
 AC_ARG_WITH([lg_quantum],
   [AS_HELP_STRING([--with-lg-quantum=<lg-quantum>],
-   [Base 2 log of minimum allocation alignment])],
-  [LG_QUANTA="$with_lg_quantum"],
-  [LG_QUANTA="3 4"])
+   [Base 2 log of minimum allocation alignment])])
 if test "x$with_lg_quantum" != "x" ; then
   AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum])
 fi
-- 
cgit v0.12


From 79dd0c04ed88fcebe9f65905d65d6e7ae32c4940 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 14 Apr 2020 18:32:54 -0700
Subject: SC: Simplify SC_NPSIZES computation.

Rather than taking all the sizes and subtracting out those that don't fit, we
instead just add up all the ones that do.
---
 include/jemalloc/internal/sc.h | 48 +++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index a6341a3..6bc5db3 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -197,30 +197,34 @@
     (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
 #define SC_NSIZES (SC_NTINY + SC_NPSEUDO + SC_NREGULAR)
 
-/* The number of size classes that are a multiple of the page size. */
-#define SC_NPSIZES (							\
-    /* Start with all the size classes. */				\
-    SC_NSIZES								\
-    /* Subtract out those groups with too small a base. */		\
-    - (LG_PAGE - 1 - SC_LG_FIRST_REGULAR_BASE) * SC_NGROUP		\
-    /* And the pseudo-group. */						\
-    - SC_NPSEUDO							\
-    /* And the tiny group. */						\
-    - SC_NTINY								\
-    /* Sizes where ndelta*delta is not a multiple of the page size. */	\
-    - (SC_LG_NGROUP * SC_NGROUP))
 /*
- * Note that the last line is computed as the sum of the second column in the
- * following table:
- *                      lg(base) | count of sizes to exclude
- * ------------------------------|-----------------------------
- *                   LG_PAGE - 1 | SC_NGROUP - 1
- *                       LG_PAGE | SC_NGROUP - 1
- *                   LG_PAGE + 1 | SC_NGROUP - 2
- *                   LG_PAGE + 2 | SC_NGROUP - 4
- *                           ... | ...
- *  LG_PAGE + (SC_LG_NGROUP - 1) | SC_NGROUP - (SC_NGROUP / 2)
+ * The number of size classes that are a multiple of the page size.
+ *
+ * Here are the first few bases that have a page-sized SC.
+ *
+ *      lg(base) |     base | highest SC | page-multiple SCs
+ * --------------|------------------------------------------
+ *   LG_PAGE - 1 | PAGE / 2 |       PAGE | 1
+ *       LG_PAGE |     PAGE |   2 * PAGE | 1
+ *   LG_PAGE + 1 | 2 * PAGE |   4 * PAGE | 2
+ *   LG_PAGE + 2 | 4 * PAGE |   8 * PAGE | 4
+ *
+ * The number of page-multiple SCs continues to grow in powers of two, up until
+ * lg_delta == lg_page, which corresponds to setting lg_base to lg_page +
+ * SC_LG_NGROUP.  So, then, the number of size classes that are multiples of the
+ * page size whose lg_delta is less than the page size are
+ * is 1 + (2**0 + 2**1 + ... + 2**(lg_ngroup - 1) == 2**lg_ngroup.
+ *
+ * For each base with lg_base in [lg_page + lg_ngroup, lg_base_max), there are
+ * NGROUP page-sized size classes, and when lg_base == lg_base_max, there are
+ * NGROUP - 1.
+ *
+ * This gives us the quantity we seek.
  */
+#define SC_NPSIZES (							\
+    SC_NGROUP								\
+    + (SC_LG_BASE_MAX - (LG_PAGE + SC_LG_NGROUP)) * SC_NGROUP		\
+    + SC_NGROUP - 1)
 
 /*
  * We declare a size class is binnable if size < page size * group. Or, in other
-- 
cgit v0.12


From 46471ea32760a90ac3b860f96805901c78a34f62 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 15 Apr 2020 14:08:20 -0700
Subject: SC: Name the max lookup constant.

---
 include/jemalloc/internal/sc.h | 5 +++--
 src/sc.c                       | 4 +---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 6bc5db3..138da5c 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -246,8 +246,9 @@
 #  error "Too many small size classes"
 #endif
 
-/* The largest size class in the lookup table. */
-#define SC_LOOKUP_MAXCLASS ((size_t)1 << 12)
+/* The largest size class in the lookup table, and its binary log. */
+#define SC_LG_MAX_LOOKUP 12
+#define SC_LOOKUP_MAXCLASS ((size_t)1 << SC_LG_MAX_LOOKUP)
 
 /* Internal, only used for the definition of SC_SMALL_MAXCLASS. */
 #define SC_SMALL_MAX_BASE ((size_t)1 << (LG_PAGE + SC_LG_NGROUP - 1))
diff --git a/src/sc.c b/src/sc.c
index cfce533..1474eac 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -259,10 +259,8 @@ void
 sc_data_init(sc_data_t *sc_data) {
 	assert(!sc_data->initialized);
 
-	int lg_max_lookup = 12;
-
 	size_classes(sc_data, LG_SIZEOF_PTR, LG_QUANTUM, SC_LG_TINY_MIN,
-	    lg_max_lookup, LG_PAGE, SC_LG_NGROUP);
+	    SC_LG_MAX_LOOKUP, LG_PAGE, SC_LG_NGROUP);
 
 	sc_data->initialized = true;
 }
-- 
cgit v0.12


From 2c09d43494d1c2f0df41ef16b040acb86ad4b095 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 28 Apr 2020 12:18:36 -0700
Subject: Add a benchmark of large allocations.

---
 Makefile.in                    |  1 +
 test/include/test/bench.h      | 39 +++++++++++++++++++++++++++++++++++++++
 test/stress/large_microbench.c | 33 +++++++++++++++++++++++++++++++++
 test/stress/microbench.c       | 41 +----------------------------------------
 4 files changed, 74 insertions(+), 40 deletions(-)
 create mode 100644 test/include/test/bench.h
 create mode 100644 test/stress/large_microbench.c

diff --git a/Makefile.in b/Makefile.in
index 6cded80..d35b74b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -289,6 +289,7 @@ CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
 endif
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
+	$(srcroot)test/stress/large_microbench.c \
 	$(srcroot)test/stress/hookbench.c
 
 
diff --git a/test/include/test/bench.h b/test/include/test/bench.h
new file mode 100644
index 0000000..6cd19fd
--- /dev/null
+++ b/test/include/test/bench.h
@@ -0,0 +1,39 @@
+static inline void
+time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
+    void (*func)(void)) {
+	uint64_t i;
+
+	for (i = 0; i < nwarmup; i++) {
+		func();
+	}
+	timer_start(timer);
+	for (i = 0; i < niter; i++) {
+		func();
+	}
+	timer_stop(timer);
+}
+
+static inline void
+compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
+    void (*func_a), const char *name_b, void (*func_b)) {
+	timedelta_t timer_a, timer_b;
+	char ratio_buf[6];
+	void *p;
+
+	p = mallocx(1, 0);
+	if (p == NULL) {
+		test_fail("Unexpected mallocx() failure");
+		return;
+	}
+
+	time_func(&timer_a, nwarmup, niter, func_a);
+	time_func(&timer_b, nwarmup, niter, func_b);
+
+	timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
+	malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us, "
+	    "%s=%"FMTu64"us, ratio=1:%s\n",
+	    niter, name_a, timer_usec(&timer_a), name_b, timer_usec(&timer_b),
+	    ratio_buf);
+
+	dallocx(p, 0);
+}
diff --git a/test/stress/large_microbench.c b/test/stress/large_microbench.c
new file mode 100644
index 0000000..c66b33a
--- /dev/null
+++ b/test/stress/large_microbench.c
@@ -0,0 +1,33 @@
+#include "test/jemalloc_test.h"
+#include "test/bench.h"
+
+static void
+large_mallocx_free(void) {
+	/*
+	 * We go a bit larger than the large minclass on its own to better
+	 * expose costs from things like zeroing.
+	 */
+	void *p = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(p, "mallocx shouldn't fail");
+	free(p);
+}
+
+static void
+small_mallocx_free(void) {
+	void *p = mallocx(16, 0);
+	assert_ptr_not_null(p, "mallocx shouldn't fail");
+	free(p);
+}
+
+TEST_BEGIN(test_large_vs_small) {
+	compare_funcs(100*1000, 1*1000*1000, "large mallocx",
+	    large_mallocx_free, "small mallocx", small_mallocx_free);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_large_vs_small);
+}
+
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 988b793..226677f 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -1,44 +1,5 @@
 #include "test/jemalloc_test.h"
-
-static inline void
-time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
-    void (*func)(void)) {
-	uint64_t i;
-
-	for (i = 0; i < nwarmup; i++) {
-		func();
-	}
-	timer_start(timer);
-	for (i = 0; i < niter; i++) {
-		func();
-	}
-	timer_stop(timer);
-}
-
-void
-compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
-    void (*func_a), const char *name_b, void (*func_b)) {
-	timedelta_t timer_a, timer_b;
-	char ratio_buf[6];
-	void *p;
-
-	p = mallocx(1, 0);
-	if (p == NULL) {
-		test_fail("Unexpected mallocx() failure");
-		return;
-	}
-
-	time_func(&timer_a, nwarmup, niter, func_a);
-	time_func(&timer_b, nwarmup, niter, func_b);
-
-	timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
-	malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us, "
-	    "%s=%"FMTu64"us, ratio=1:%s\n",
-	    niter, name_a, timer_usec(&timer_a), name_b, timer_usec(&timer_b),
-	    ratio_buf);
-
-	dallocx(p, 0);
-}
+#include "test/bench.h"
 
 static void
 malloc_free(void) {
-- 
cgit v0.12


From f1f8a75496cfff34d14bf067c4af92c63d9a521e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 29 Apr 2020 09:05:57 -0700
Subject: Let opt.zero propagate to core allocation.

I.e. set dopts->zero early on if opt.zero is true, rather than leaving it set by
the entry-point function (malloc, calloc, etc.) and then memsetting.  This
avoids situations where we zero once in the large-alloc pathway and then again
via memset.
---
 src/jemalloc.c | 47 +++++++++++++++++++++++------------------------
 src/large.c    |  3 ---
 2 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index fab285d..14b2a08 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2165,7 +2165,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	}
 
 	/* This is the beginning of the "core" algorithm. */
-
+	if (config_fill && sopts->slow && opt_zero) {
+		dopts->zero = true;
+	}
 	if (dopts->alignment == 0) {
 		ind = sz_size2index(size);
 		if (unlikely(ind >= SC_NSIZES)) {
@@ -2263,12 +2265,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 	assert(usize == isalloc(tsd_tsdn(tsd), allocation));
 
-	if (config_fill && sopts->slow && !dopts->zero) {
-		if (unlikely(opt_junk_alloc)) {
-			junk_alloc_callback(allocation, usize);
-		} else if (unlikely(opt_zero)) {
-			memset(allocation, 0, usize);
-		}
+	if (config_fill && sopts->slow && !dopts->zero
+	    && unlikely(opt_junk_alloc)) {
+		junk_alloc_callback(allocation, usize);
 	}
 
 	if (sopts->slow) {
@@ -3210,7 +3209,6 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	size_t usize;
 	size_t old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
-	bool zero = flags & MALLOCX_ZERO;
 	arena_t *arena;
 	tcache_t *tcache;
 
@@ -3220,6 +3218,11 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	tsd = tsd_fetch();
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
+	bool zero = flags & MALLOCX_ZERO;
+	if (config_fill && unlikely(opt_zero)) {
+		zero = true;
+	}
+
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
 		arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
@@ -3275,14 +3278,11 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	if (config_fill && malloc_slow && !zero && usize > old_usize) {
+	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize
+	    && !zero) {
 		size_t excess_len = usize - old_usize;
 		void *excess_start = (void *)((uintptr_t)p + old_usize);
-		if (unlikely(opt_junk_alloc)) {
-			junk_alloc_callback(excess_start, excess_len);
-		} else if (unlikely(opt_zero)) {
-			memset(excess_start, 0, excess_len);
-		}
+		junk_alloc_callback(excess_start, excess_len);
 	}
 
 	return p;
@@ -3497,7 +3497,11 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	tsd_t *tsd;
 	size_t usize, old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
+
 	bool zero = flags & MALLOCX_ZERO;
+	if (config_fill && unlikely(opt_zero)) {
+		zero = true;
+	}
 
 	LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
 	    "flags: %d", ptr, size, extra, flags);
@@ -3561,16 +3565,11 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	thread_alloc_event(tsd, usize);
 	thread_dalloc_event(tsd, old_usize);
 
-	if (config_fill && malloc_slow) {
-		if (usize > old_usize && !zero) {
-			size_t excess_len = usize - old_usize;
-			void *excess_start = (void *)((uintptr_t)ptr + old_usize);
-			if (unlikely(opt_junk_alloc)) {
-				junk_alloc_callback(excess_start, excess_len);
-			} else if (unlikely(opt_zero)) {
-				memset(excess_start, 0, excess_len);
-			}
-		}
+	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize &&
+	    !zero) {
+		size_t excess_len = usize - old_usize;
+		void *excess_start = (void *)((uintptr_t)ptr + old_usize);
+		junk_alloc_callback(excess_start, excess_len);
 	}
 label_not_resized:
 	if (unlikely(!tsd_fast(tsd))) {
diff --git a/src/large.c b/src/large.c
index d97009a..b843937 100644
--- a/src/large.c
+++ b/src/large.c
@@ -32,9 +32,6 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		return NULL;
 	}
 
-	if (config_fill && unlikely(opt_zero)) {
-		zero = true;
-	}
 	if (likely(!tsdn_null(tsdn))) {
 		arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, usize);
 	}
-- 
cgit v0.12


From 0295aa38a2206f3229f60a4105767e15ebdca797 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 21 Apr 2020 13:29:07 -0700
Subject: Deduplicate entries in witness error message

---
 src/witness.c | 43 +++++++++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/src/witness.c b/src/witness.c
index f42b72a..e9ddf59 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -15,14 +15,41 @@ witness_init(witness_t *witness, const char *name, witness_rank_t rank,
 }
 
 static void
-witness_lock_error_impl(const witness_list_t *witnesses,
-    const witness_t *witness) {
-	witness_t *w;
+witness_print_witness(witness_t *w, unsigned n) {
+	assert(n > 0);
+	if (n == 1) {
+		malloc_printf(" %s(%u)", w->name, w->rank);
+	} else {
+		malloc_printf(" %s(%u)X%u", w->name, w->rank, n);
+	}
+}
 
-	malloc_printf("<jemalloc>: Lock rank order reversal:");
+static void
+witness_print_witnesses(const witness_list_t *witnesses) {
+	witness_t *w, *last = NULL;
+	unsigned n = 0;
 	ql_foreach(w, witnesses, link) {
-		malloc_printf(" %s(%u)", w->name, w->rank);
+		if (last != NULL && w->rank > last->rank) {
+			assert(w->name != last->name);
+			witness_print_witness(last, n);
+			n = 0;
+		} else if (last != NULL) {
+			assert(w->rank == last->rank);
+			assert(w->name == last->name);
+		}
+		last = w;
+		++n;
 	}
+	if (last != NULL) {
+		witness_print_witness(last, n);
+	}
+}
+
+static void
+witness_lock_error_impl(const witness_list_t *witnesses,
+    const witness_t *witness) {
+	malloc_printf("<jemalloc>: Lock rank order reversal:");
+	witness_print_witnesses(witnesses);
 	malloc_printf(" %s(%u)\n", witness->name, witness->rank);
 	abort();
 }
@@ -49,13 +76,9 @@ witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error =
 static void
 witness_depth_error_impl(const witness_list_t *witnesses,
     witness_rank_t rank_inclusive, unsigned depth) {
-	witness_t *w;
-
 	malloc_printf("<jemalloc>: Should own %u lock%s of rank >= %u:", depth,
 	    (depth != 1) ?  "s" : "", rank_inclusive);
-	ql_foreach(w, witnesses, link) {
-		malloc_printf(" %s(%u)", w->name, w->rank);
-	}
+	witness_print_witnesses(witnesses);
 	malloc_printf("\n");
 	abort();
 }
-- 
cgit v0.12


From 039bfd4e307df51bd46f164b2af0ffa62142ca5d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 11:08:25 -0700
Subject: Do not rollback prof idump counter in arena_prof_promote()

---
 include/jemalloc/internal/counter.h        | 25 -----------------
 include/jemalloc/internal/prof_externs.h   |  1 -
 include/jemalloc/internal/prof_inlines_a.h | 11 --------
 src/arena.c                                |  2 --
 src/prof.c                                 | 12 +--------
 test/unit/counter.c                        | 43 ------------------------------
 6 files changed, 1 insertion(+), 93 deletions(-)

diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
index 4aee23d..896fd02 100644
--- a/include/jemalloc/internal/counter.h
+++ b/include/jemalloc/internal/counter.h
@@ -51,31 +51,6 @@ counter_accum(tsdn_t *tsdn, counter_accum_t *counter, uint64_t accumbytes) {
 	return overflow;
 }
 
-JEMALLOC_ALWAYS_INLINE void
-counter_rollback(tsdn_t *tsdn, counter_accum_t *counter, uint64_t bytes) {
-	/*
-	 * Cancel out as much of the excessive accumbytes increase as possible
-	 * without underflowing.  Interval-triggered events occur slightly more
-	 * often than intended as a result of incomplete canceling.
-	 */
-	uint64_t a0, a1;
-#ifdef JEMALLOC_ATOMIC_U64
-	a0 = atomic_load_u64(&counter->accumbytes,
-	    ATOMIC_RELAXED);
-	do {
-		a1 = (a0 >= bytes) ? a0 - bytes : 0;
-	} while (!atomic_compare_exchange_weak_u64(
-	    &counter->accumbytes, &a0, a1, ATOMIC_RELAXED,
-	    ATOMIC_RELAXED));
-#else
-	malloc_mutex_lock(tsdn, &counter->mtx);
-	a0 = counter->accumbytes;
-	a1 = (a0 >= bytes) ?  a0 - bytes : 0;
-	counter->accumbytes = a1;
-	malloc_mutex_unlock(tsdn, &counter->mtx);
-#endif
-}
-
 bool counter_accum_init(counter_accum_t *counter, uint64_t interval);
 
 #endif /* JEMALLOC_INTERNAL_COUNTER_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 3518167..f03ef74 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -50,7 +50,6 @@ extern bool prof_booted;
 
 /* Functions only accessed in prof_inlines_a.h */
 bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
-void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
 
 /* Functions only accessed in prof_inlines_b.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index 61773a2..63d429e 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -36,15 +36,4 @@ prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes) {
 	return prof_idump_accum_impl(tsdn, accumbytes);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-prof_idump_rollback(tsdn_t *tsdn, size_t usize) {
-	cassert(config_prof);
-
-	if (prof_interval == 0 || !prof_active_get_unlocked()) {
-		return;
-	}
-
-	prof_idump_rollback_impl(tsdn, usize);
-}
-
 #endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
diff --git a/src/arena.c b/src/arena.c
index 4ed3c88..12c6b0a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1061,8 +1061,6 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 	edata_szind_set(edata, szind);
 	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
 
-	prof_idump_rollback(tsdn, usize);
-
 	assert(isalloc(tsdn, ptr) == usize);
 }
 
diff --git a/src/prof.c b/src/prof.c
index bbf8e9d..9c1fc2a 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -50,7 +50,7 @@ bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 bool opt_prof_experimental_use_sys_thread_name = false;
 
-/* Accessed via prof_idump_[accum/rollback](). */
+/* Accessed via prof_idump_accum(). */
 static counter_accum_t prof_idump_accumulated;
 
 /*
@@ -655,16 +655,6 @@ prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes) {
 	return counter_accum(tsdn, &prof_idump_accumulated, accumbytes);
 }
 
-void
-prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize) {
-	cassert(config_prof);
-
-	/* Rollback is only done on arena_prof_promote of small sizes. */
-	assert(SC_LARGE_MINCLASS > usize);
-	return counter_rollback(tsdn, &prof_idump_accumulated,
-	    SC_LARGE_MINCLASS - usize);
-}
-
 bool
 prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	cassert(config_prof);
diff --git a/test/unit/counter.c b/test/unit/counter.c
index 585cbc6..c14eee3 100644
--- a/test/unit/counter.c
+++ b/test/unit/counter.c
@@ -36,48 +36,6 @@ expect_counter_value(counter_accum_t *c, uint64_t v) {
 	expect_u64_eq(accum, v, "Counter value mismatch");
 }
 
-TEST_BEGIN(test_counter_rollback) {
-	uint64_t half_interval = interval / 2;
-
-	counter_accum_t c;
-	counter_accum_init(&c, interval);
-
-	tsd_t *tsd = tsd_fetch();
-	counter_rollback(tsd_tsdn(tsd), &c, half_interval);
-
-	bool trigger;
-	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	expect_b_eq(trigger, false, "Should not trigger");
-	counter_rollback(tsd_tsdn(tsd), &c, half_interval + 1);
-	expect_counter_value(&c,  0);
-
-	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	expect_b_eq(trigger, false, "Should not trigger");
-	counter_rollback(tsd_tsdn(tsd), &c, half_interval - 1);
-	expect_counter_value(&c,  1);
-
-	counter_rollback(tsd_tsdn(tsd), &c, 1);
-	expect_counter_value(&c,  0);
-
-	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	expect_b_eq(trigger, false, "Should not trigger");
-	counter_rollback(tsd_tsdn(tsd), &c, 1);
-	expect_counter_value(&c,  half_interval - 1);
-
-	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	expect_b_eq(trigger, false, "Should not trigger");
-	expect_counter_value(&c,  interval - 1);
-
-	trigger = counter_accum(tsd_tsdn(tsd), &c, 1);
-	expect_b_eq(trigger, true, "Should have triggered");
-	expect_counter_value(&c, 0);
-
-	trigger = counter_accum(tsd_tsdn(tsd), &c, interval + 1);
-	expect_b_eq(trigger, true, "Should have triggered");
-	expect_counter_value(&c, 1);
-}
-TEST_END
-
 #define N_THDS (16)
 #define N_ITER_THD (1 << 12)
 #define ITER_INCREMENT (interval >> 4)
@@ -123,6 +81,5 @@ int
 main(void) {
 	return test(
 	    test_counter_accum,
-	    test_counter_rollback,
 	    test_counter_mt);
 }
-- 
cgit v0.12


From e10e5059e87b8d9c6ec9910d803bd1a1ba55da85 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 12:13:22 -0700
Subject: Make prof_idump_accum() non-inline

---
 include/jemalloc/internal/prof_externs.h   |  4 +---
 include/jemalloc/internal/prof_inlines_a.h | 11 -----------
 src/prof.c                                 |  6 +++++-
 3 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index f03ef74..8c657c6 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -48,9 +48,6 @@ extern size_t lg_prof_sample;
 
 extern bool prof_booted;
 
-/* Functions only accessed in prof_inlines_a.h */
-bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
-
 /* Functions only accessed in prof_inlines_b.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
@@ -78,6 +75,7 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 bool prof_accum_init(void);
+bool prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index 63d429e..4450b1d 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -25,15 +25,4 @@ prof_active_get_unlocked(void) {
 	return prof_active;
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes) {
-	cassert(config_prof);
-
-	if (prof_interval == 0 || !prof_active_get_unlocked()) {
-		return false;
-	}
-
-	return prof_idump_accum_impl(tsdn, accumbytes);
-}
-
 #endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
diff --git a/src/prof.c b/src/prof.c
index 9c1fc2a..ff09a5d 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -649,9 +649,13 @@ prof_accum_init(void) {
 }
 
 bool
-prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes) {
+prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes) {
 	cassert(config_prof);
 
+	if (prof_interval == 0 || !prof_active_get_unlocked()) {
+		return false;
+	}
+
 	return counter_accum(tsdn, &prof_idump_accumulated, accumbytes);
 }
 
-- 
cgit v0.12


From 8be558449446a5190bdf661da428ecd6b9fb2a8f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 12:21:38 -0700
Subject: Initialize prof idump counter once rather than once per arena

---
 include/jemalloc/internal/prof_externs.h | 1 -
 src/arena.c                              | 6 ------
 src/prof.c                               | 8 ++++++--
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 8c657c6..0fbd3ea 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -74,7 +74,6 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 #endif
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
-bool prof_accum_init(void);
 bool prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
diff --git a/src/arena.c b/src/arena.c
index 12c6b0a..b61d373 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1463,12 +1463,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		}
 	}
 
-	if (config_prof) {
-		if (prof_accum_init()) {
-			goto label_error;
-		}
-	}
-
 	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
 	    ATOMIC_RELAXED);
 
diff --git a/src/prof.c b/src/prof.c
index ff09a5d..cb71850 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -641,8 +641,8 @@ prof_fdump(void) {
 	prof_dump(tsd, false, filename, opt_prof_leak);
 }
 
-bool
-prof_accum_init(void) {
+static bool
+prof_idump_accum_init(void) {
 	cassert(config_prof);
 
 	return counter_accum_init(&prof_idump_accumulated, prof_interval);
@@ -1021,6 +1021,10 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 			return true;
 		}
 
+		if (prof_idump_accum_init()) {
+			return true;
+		}
+
 		if (malloc_mutex_init(&prof_dump_filename_mtx, "prof_dump_filename",
 		    WITNESS_RANK_PROF_DUMP_FILENAME, malloc_mutex_rank_exclusive)) {
 			return true;
-- 
cgit v0.12


From d454af90f102c99eddb38909fc7822769c4213aa Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 12:39:05 -0700
Subject: Remove unused prof_accum field from arena

---
 include/jemalloc/internal/arena_structs.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index e8c3f81..0c3f42f 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -57,9 +57,6 @@ struct arena_s {
 	ql_head(cache_bin_array_descriptor_t)	cache_bin_array_descriptor_ql;
 	malloc_mutex_t				tcache_ql_mtx;
 
-	/* Synchronization: internal. */
-	counter_accum_t		prof_accum;
-
 	/*
 	 * Represents a dss_prec_t, but atomically.
 	 *
-- 
cgit v0.12


From e6cb6919c0c1c94e387ccec79190647a44eb7180 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 28 Apr 2020 09:59:37 -0700
Subject: Consolidate prof inline function headers

The prof inline functions are no longer involved in a circular
dependency, so consolidate the two headers into one.
---
 .../jemalloc/internal/jemalloc_internal_includes.h |   3 +-
 include/jemalloc/internal/prof_externs.h           |   2 +-
 include/jemalloc/internal/prof_inlines.h           | 247 +++++++++++++++++++++
 include/jemalloc/internal/prof_inlines_a.h         |  28 ---
 include/jemalloc/internal/prof_inlines_b.h         | 225 -------------------
 5 files changed, 249 insertions(+), 256 deletions(-)
 create mode 100644 include/jemalloc/internal/prof_inlines.h
 delete mode 100644 include/jemalloc/internal/prof_inlines_a.h
 delete mode 100644 include/jemalloc/internal/prof_inlines_b.h

diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 72b5a72..90a12a1 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -73,13 +73,12 @@
  * Include portions of arena code interleaved with tcache code in order to
  * resolve circular dependencies.
  */
-#include "jemalloc/internal/prof_inlines_a.h"
 #include "jemalloc/internal/arena_inlines_a.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_b.h"
 #include "jemalloc/internal/tcache_inlines.h"
 #include "jemalloc/internal/arena_inlines_b.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_c.h"
-#include "jemalloc/internal/prof_inlines_b.h"
+#include "jemalloc/internal/prof_inlines.h"
 #include "jemalloc/internal/background_thread_inlines.h"
 
 #endif /* JEMALLOC_INTERNAL_INCLUDES_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 0fbd3ea..cf61fea 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -48,7 +48,7 @@ extern size_t lg_prof_sample;
 
 extern bool prof_booted;
 
-/* Functions only accessed in prof_inlines_b.h */
+/* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
new file mode 100644
index 0000000..d8f401d
--- /dev/null
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -0,0 +1,247 @@
+#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
+#define JEMALLOC_INTERNAL_PROF_INLINES_B_H
+
+#include "jemalloc/internal/safety_check.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/thread_event.h"
+
+JEMALLOC_ALWAYS_INLINE void
+prof_active_assert() {
+	cassert(config_prof);
+	/*
+	 * If opt_prof is off, then prof_active must always be off, regardless
+	 * of whether prof_active_mtx is in effect or not.
+	 */
+	assert(opt_prof || !prof_active);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_active_get_unlocked(void) {
+	prof_active_assert();
+	/*
+	 * Even if opt_prof is true, sampling can be temporarily disabled by
+	 * setting prof_active to false.  No locking is used when reading
+	 * prof_active in the fast path, so there are no guarantees regarding
+	 * how long it will take for all threads to notice state changes.
+	 */
+	return prof_active;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_gdump_get_unlocked(void) {
+	/*
+	 * No locking is used when reading prof_gdump_val in the fast path, so
+	 * there are no guarantees regarding how long it will take for all
+	 * threads to notice state changes.
+	 */
+	return prof_gdump_val;
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tdata_t *
+prof_tdata_get(tsd_t *tsd, bool create) {
+	prof_tdata_t *tdata;
+
+	cassert(config_prof);
+
+	tdata = tsd_prof_tdata_get(tsd);
+	if (create) {
+		assert(tsd_reentrancy_level_get(tsd) == 0);
+		if (unlikely(tdata == NULL)) {
+			if (tsd_nominal(tsd)) {
+				tdata = prof_tdata_init(tsd);
+				tsd_prof_tdata_set(tsd, tdata);
+			}
+		} else if (unlikely(tdata->expired)) {
+			tdata = prof_tdata_reinit(tsd, tdata);
+			tsd_prof_tdata_set(tsd, tdata);
+		}
+		assert(tdata == NULL || tdata->attached);
+	}
+
+	return tdata;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
+    prof_info_t *prof_info) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+	assert(prof_info != NULL);
+
+	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, false);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_info_get_and_reset_recent(tsd_t *tsd, const void *ptr,
+    emap_alloc_ctx_t *alloc_ctx, prof_info_t *prof_info) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+	assert(prof_info != NULL);
+
+	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, true);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_reset(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	arena_prof_tctx_reset(tsd, ptr, alloc_ctx);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	arena_prof_tctx_reset_sampled(tsd, ptr);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
+	cassert(config_prof);
+	assert(edata != NULL);
+	assert((uintptr_t)tctx > (uintptr_t)1U);
+
+	arena_prof_info_set(tsd, edata, tctx);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_should_skip(tsd_t *tsd, bool sample_event) {
+	cassert(config_prof);
+
+	/* Fastpath: no need to load tdata */
+	if (likely(!sample_event)) {
+		return true;
+	}
+
+	if (tsd_reentrancy_level_get(tsd) > 0) {
+		return true;
+	}
+
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
+	if (unlikely(tdata == NULL)) {
+		return true;
+	}
+
+	return !tdata->active;
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_alloc_prep(tsd_t *tsd, bool prof_active, bool sample_event) {
+	prof_tctx_t *ret;
+
+	if (!prof_active ||
+	    likely(prof_sample_should_skip(tsd, sample_event))) {
+		ret = (prof_tctx_t *)(uintptr_t)1U;
+	} else {
+		ret = prof_tctx_create(tsd);
+	}
+
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_malloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
+    emap_alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
+
+	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
+		prof_malloc_sample_object(tsd, ptr, size, usize, tctx);
+	} else {
+		prof_tctx_reset(tsd, ptr, alloc_ctx);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
+    prof_tctx_t *tctx, bool prof_active, const void *old_ptr, size_t old_usize,
+    prof_info_t *old_prof_info, bool sample_event) {
+	bool sampled, old_sampled, moved;
+
+	cassert(config_prof);
+	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
+
+	if (prof_active && ptr != NULL) {
+		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
+		if (prof_sample_should_skip(tsd, sample_event)) {
+			/*
+			 * Don't sample.  The usize passed to prof_alloc_prep()
+			 * was larger than what actually got allocated, so a
+			 * backtrace was captured for this allocation, even
+			 * though its actual usize was insufficient to cross the
+			 * sample threshold.
+			 */
+			prof_alloc_rollback(tsd, tctx);
+			tctx = (prof_tctx_t *)(uintptr_t)1U;
+		}
+	}
+
+	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
+	old_sampled = ((uintptr_t)old_prof_info->alloc_tctx > (uintptr_t)1U);
+	moved = (ptr != old_ptr);
+
+	if (unlikely(sampled)) {
+		prof_malloc_sample_object(tsd, ptr, size, usize, tctx);
+	} else if (moved) {
+		prof_tctx_reset(tsd, ptr, NULL);
+	} else if (unlikely(old_sampled)) {
+		/*
+		 * prof_tctx_reset() would work for the !moved case as well,
+		 * but prof_tctx_reset_sampled() is slightly cheaper, and the
+		 * proper thing to do here in the presence of explicit
+		 * knowledge re: moved state.
+		 */
+		prof_tctx_reset_sampled(tsd, ptr);
+	} else {
+		prof_info_t prof_info;
+		prof_info_get(tsd, ptr, NULL, &prof_info);
+		assert((uintptr_t)prof_info.alloc_tctx == (uintptr_t)1U);
+	}
+
+	/*
+	 * The prof_free_sampled_object() call must come after the
+	 * prof_malloc_sample_object() call, because tctx and old_tctx may be
+	 * the same, in which case reversing the call order could cause the tctx
+	 * to be prematurely destroyed as a side effect of momentarily zeroed
+	 * counters.
+	 */
+	if (unlikely(old_sampled)) {
+		prof_free_sampled_object(tsd, old_usize, old_prof_info);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prof_sample_align(size_t orig_align) {
+	/*
+	 * Enforce page alignment, so that sampled allocations can be identified
+	 * w/o metadata lookup.
+	 */
+	assert(opt_prof);
+	return (config_cache_oblivious && orig_align < PAGE) ? PAGE :
+	    orig_align;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_aligned(const void *ptr) {
+	return ((uintptr_t)ptr & PAGE_MASK) == 0;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_free(tsd_t *tsd, const void *ptr, size_t usize,
+    emap_alloc_ctx_t *alloc_ctx) {
+	prof_info_t prof_info;
+	prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
+
+	cassert(config_prof);
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
+
+	if (unlikely((uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U)) {
+		assert(prof_sample_aligned(ptr));
+		prof_free_sampled_object(tsd, usize, &prof_info);
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
deleted file mode 100644
index 4450b1d..0000000
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H
-#define JEMALLOC_INTERNAL_PROF_INLINES_A_H
-
-#include "jemalloc/internal/mutex.h"
-
-JEMALLOC_ALWAYS_INLINE void
-prof_active_assert() {
-	cassert(config_prof);
-	/*
-	 * If opt_prof is off, then prof_active must always be off, regardless
-	 * of whether prof_active_mtx is in effect or not.
-	 */
-	assert(opt_prof || !prof_active);
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-prof_active_get_unlocked(void) {
-	prof_active_assert();
-	/*
-	 * Even if opt_prof is true, sampling can be temporarily disabled by
-	 * setting prof_active to false.  No locking is used when reading
-	 * prof_active in the fast path, so there are no guarantees regarding
-	 * how long it will take for all threads to notice state changes.
-	 */
-	return prof_active;
-}
-
-#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
deleted file mode 100644
index 29d4020..0000000
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ /dev/null
@@ -1,225 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
-#define JEMALLOC_INTERNAL_PROF_INLINES_B_H
-
-#include "jemalloc/internal/safety_check.h"
-#include "jemalloc/internal/sz.h"
-#include "jemalloc/internal/thread_event.h"
-
-JEMALLOC_ALWAYS_INLINE bool
-prof_gdump_get_unlocked(void) {
-	/*
-	 * No locking is used when reading prof_gdump_val in the fast path, so
-	 * there are no guarantees regarding how long it will take for all
-	 * threads to notice state changes.
-	 */
-	return prof_gdump_val;
-}
-
-JEMALLOC_ALWAYS_INLINE prof_tdata_t *
-prof_tdata_get(tsd_t *tsd, bool create) {
-	prof_tdata_t *tdata;
-
-	cassert(config_prof);
-
-	tdata = tsd_prof_tdata_get(tsd);
-	if (create) {
-		assert(tsd_reentrancy_level_get(tsd) == 0);
-		if (unlikely(tdata == NULL)) {
-			if (tsd_nominal(tsd)) {
-				tdata = prof_tdata_init(tsd);
-				tsd_prof_tdata_set(tsd, tdata);
-			}
-		} else if (unlikely(tdata->expired)) {
-			tdata = prof_tdata_reinit(tsd, tdata);
-			tsd_prof_tdata_set(tsd, tdata);
-		}
-		assert(tdata == NULL || tdata->attached);
-	}
-
-	return tdata;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
-    prof_info_t *prof_info) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-	assert(prof_info != NULL);
-
-	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, false);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_info_get_and_reset_recent(tsd_t *tsd, const void *ptr,
-    emap_alloc_ctx_t *alloc_ctx, prof_info_t *prof_info) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-	assert(prof_info != NULL);
-
-	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, true);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-
-	arena_prof_tctx_reset(tsd, ptr, alloc_ctx);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-
-	arena_prof_tctx_reset_sampled(tsd, ptr);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
-	cassert(config_prof);
-	assert(edata != NULL);
-	assert((uintptr_t)tctx > (uintptr_t)1U);
-
-	arena_prof_info_set(tsd, edata, tctx);
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-prof_sample_should_skip(tsd_t *tsd, bool sample_event) {
-	cassert(config_prof);
-
-	/* Fastpath: no need to load tdata */
-	if (likely(!sample_event)) {
-		return true;
-	}
-
-	if (tsd_reentrancy_level_get(tsd) > 0) {
-		return true;
-	}
-
-	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
-	if (unlikely(tdata == NULL)) {
-		return true;
-	}
-
-	return !tdata->active;
-}
-
-JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_alloc_prep(tsd_t *tsd, bool prof_active, bool sample_event) {
-	prof_tctx_t *ret;
-
-	if (!prof_active ||
-	    likely(prof_sample_should_skip(tsd, sample_event))) {
-		ret = (prof_tctx_t *)(uintptr_t)1U;
-	} else {
-		ret = prof_tctx_create(tsd);
-	}
-
-	return ret;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_malloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
-    emap_alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
-
-	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
-		prof_malloc_sample_object(tsd, ptr, size, usize, tctx);
-	} else {
-		prof_tctx_reset(tsd, ptr, alloc_ctx);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
-    prof_tctx_t *tctx, bool prof_active, const void *old_ptr, size_t old_usize,
-    prof_info_t *old_prof_info, bool sample_event) {
-	bool sampled, old_sampled, moved;
-
-	cassert(config_prof);
-	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
-
-	if (prof_active && ptr != NULL) {
-		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
-		if (prof_sample_should_skip(tsd, sample_event)) {
-			/*
-			 * Don't sample.  The usize passed to prof_alloc_prep()
-			 * was larger than what actually got allocated, so a
-			 * backtrace was captured for this allocation, even
-			 * though its actual usize was insufficient to cross the
-			 * sample threshold.
-			 */
-			prof_alloc_rollback(tsd, tctx);
-			tctx = (prof_tctx_t *)(uintptr_t)1U;
-		}
-	}
-
-	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
-	old_sampled = ((uintptr_t)old_prof_info->alloc_tctx > (uintptr_t)1U);
-	moved = (ptr != old_ptr);
-
-	if (unlikely(sampled)) {
-		prof_malloc_sample_object(tsd, ptr, size, usize, tctx);
-	} else if (moved) {
-		prof_tctx_reset(tsd, ptr, NULL);
-	} else if (unlikely(old_sampled)) {
-		/*
-		 * prof_tctx_reset() would work for the !moved case as well,
-		 * but prof_tctx_reset_sampled() is slightly cheaper, and the
-		 * proper thing to do here in the presence of explicit
-		 * knowledge re: moved state.
-		 */
-		prof_tctx_reset_sampled(tsd, ptr);
-	} else {
-		prof_info_t prof_info;
-		prof_info_get(tsd, ptr, NULL, &prof_info);
-		assert((uintptr_t)prof_info.alloc_tctx == (uintptr_t)1U);
-	}
-
-	/*
-	 * The prof_free_sampled_object() call must come after the
-	 * prof_malloc_sample_object() call, because tctx and old_tctx may be
-	 * the same, in which case reversing the call order could cause the tctx
-	 * to be prematurely destroyed as a side effect of momentarily zeroed
-	 * counters.
-	 */
-	if (unlikely(old_sampled)) {
-		prof_free_sampled_object(tsd, old_usize, old_prof_info);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-prof_sample_align(size_t orig_align) {
-	/*
-	 * Enforce page alignment, so that sampled allocations can be identified
-	 * w/o metadata lookup.
-	 */
-	assert(opt_prof);
-	return (config_cache_oblivious && orig_align < PAGE) ? PAGE :
-	    orig_align;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-prof_sample_aligned(const void *ptr) {
-	return ((uintptr_t)ptr & PAGE_MASK) == 0;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_free(tsd_t *tsd, const void *ptr, size_t usize,
-    emap_alloc_ctx_t *alloc_ctx) {
-	prof_info_t prof_info;
-	prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
-
-	cassert(config_prof);
-	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
-
-	if (unlikely((uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U)) {
-		assert(prof_sample_aligned(ptr));
-		prof_free_sampled_object(tsd, usize, &prof_info);
-	}
-}
-
-#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */
-- 
cgit v0.12


From fef9abdcc07227e9e9cb479c4799707c4efa86ad Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 28 Apr 2020 10:40:46 -0700
Subject: Cleanup tcache allocation logic

The logic in tcache allocation no longer involves profiling or
filling.
---
 include/jemalloc/internal/tcache_inlines.h | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 4cbc2d2..5d49c4e 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -31,7 +31,6 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t binind, bool zero, bool slow_path) {
 	void *ret;
 	bool tcache_success;
-	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	assert(binind < SC_NBINS);
 	cache_bin_t *bin = &tcache->bins[binind];
@@ -52,15 +51,9 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 	}
 
 	assert(ret);
-	/*
-	 * Only compute usize if required.  The checks in the following if
-	 * statement are all static.
-	 */
-	if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
-		usize = sz_index2size(binind);
-		assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
-	}
 	if (unlikely(zero)) {
+		size_t usize = sz_index2size(binind);
+		assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
 		memset(ret, 0, usize);
 	}
 	if (config_stats) {
@@ -94,16 +87,9 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 			return NULL;
 		}
 	} else {
-		size_t usize JEMALLOC_CC_SILENCE_INIT(0);
-
-		/* Only compute usize on demand */
-		if (config_prof || (slow_path && config_fill) ||
-		    unlikely(zero)) {
-			usize = sz_index2size(binind);
-			assert(usize <= tcache_maxclass);
-		}
-
 		if (unlikely(zero)) {
+			size_t usize = sz_index2size(binind);
+			assert(usize <= tcache_maxclass);
 			memset(ret, 0, usize);
 		}
 
-- 
cgit v0.12


From 2097e1945b262f079d82bf6ef78330bf03ebdf08 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 17 Apr 2020 14:49:20 -0700
Subject: Unify write callback signature

---
 include/jemalloc/internal/buf_writer.h              |  2 --
 include/jemalloc/internal/emitter.h                 |  4 ++--
 include/jemalloc/internal/jemalloc_internal_types.h |  3 +++
 include/jemalloc/internal/malloc_io.h               | 12 +++++++-----
 include/jemalloc/internal/prof_externs.h            |  3 +--
 include/jemalloc/internal/stats.h                   |  3 +--
 src/ctl.c                                           |  2 +-
 src/malloc_io.c                                     |  7 +++----
 src/prof_recent.c                                   |  3 +--
 src/stats.c                                         |  3 +--
 10 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index 55b18ab..37aa6de 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -10,8 +10,6 @@
  * some "option like" content for the write_cb, so it doesn't matter.
  */
 
-typedef void (write_cb_t)(void *, const char *);
-
 typedef struct {
 	write_cb_t *write_cb;
 	void *cbopaque;
diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index c3f47b2..9482f68 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -68,7 +68,7 @@ typedef struct emitter_s emitter_t;
 struct emitter_s {
 	emitter_output_t output;
 	/* The output information. */
-	void (*write_cb)(void *, const char *);
+	write_cb_t *write_cb;
 	void *cbopaque;
 	int nesting_depth;
 	/* True if we've already emitted a value at the given depth. */
@@ -240,7 +240,7 @@ emitter_json_key_prefix(emitter_t *emitter) {
 
 static inline void
 emitter_init(emitter_t *emitter, emitter_output_t emitter_output,
-    void (*write_cb)(void *, const char *), void *cbopaque) {
+    write_cb_t *write_cb, void *cbopaque) {
 	emitter->output = emitter_output;
 	emitter->write_cb = write_cb;
 	emitter->cbopaque = cbopaque;
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index d8da4de..1ce0f3a 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -17,6 +17,9 @@ enum zero_realloc_action_e {
 };
 typedef enum zero_realloc_action_e zero_realloc_action_t;
 
+/* Signature of write callback. */
+typedef void (write_cb_t)(void *, const char *);
+
 /*
  * Flags bits:
  *
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index fac6361..a375bda 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_MALLOC_IO_H
 #define JEMALLOC_INTERNAL_MALLOC_IO_H
 
+#include "jemalloc/internal/jemalloc_internal_types.h"
+
 #ifdef _WIN32
 #  ifdef _WIN64
 #    define FMT64_PREFIX "ll"
@@ -40,7 +42,7 @@
  */
 #define MALLOC_PRINTF_BUFSIZE	4096
 
-void wrtmessage(void *cbopaque, const char *s);
+write_cb_t wrtmessage;
 int buferror(int err, char *buf, size_t buflen);
 uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr,
     int base);
@@ -58,10 +60,10 @@ size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
  * The caller can set write_cb to null to choose to print with the
  * je_malloc_message hook.
  */
-void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, va_list ap);
-void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
+void malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
+    va_list ap);
+void malloc_cprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
+    ...) JEMALLOC_FORMAT_PRINTF(3, 4);
 void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 static inline ssize_t
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index cf61fea..a6b659c 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -106,7 +106,6 @@ bool prof_log_stop(tsdn_t *tsdn);
 
 ssize_t prof_recent_alloc_max_ctl_read();
 ssize_t prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max);
-void prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
-    void *cbopaque);
+void prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque);
 
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index d1f5eab..3720619 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -41,8 +41,7 @@ uint64_t stats_interval_accum_batch_size(void);
 bool stats_interval_accum(tsd_t *tsd, uint64_t bytes);
 
 /* Implements je_malloc_stats_print. */
-void stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *opts);
+void stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts);
 
 bool stats_boot(void);
 
diff --git a/src/ctl.c b/src/ctl.c
index ae17d44..c3c029f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3522,7 +3522,7 @@ label_return:
 
 typedef struct write_cb_packet_s write_cb_packet_t;
 struct write_cb_packet_s {
-	void (*write_cb)(void *, const char *);
+	write_cb_t *write_cb;
 	void *cbopaque;
 };
 
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 4b7d2e4..d2879bb 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -619,8 +619,8 @@ malloc_snprintf(char *str, size_t size, const char *format, ...) {
 }
 
 void
-malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, va_list ap) {
+malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
+    va_list ap) {
 	char buf[MALLOC_PRINTF_BUFSIZE];
 
 	if (write_cb == NULL) {
@@ -643,8 +643,7 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
  */
 JEMALLOC_FORMAT_PRINTF(3, 4)
 void
-malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, ...) {
+malloc_cprintf(write_cb_t *write_cb, void *cbopaque, const char *format, ...) {
 	va_list ap;
 
 	va_start(ap, format);
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 7fd77e9..cd72bda 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -444,8 +444,7 @@ dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
 
 #define PROF_RECENT_PRINT_BUFSIZE 4096
 void
-prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
-    void *cbopaque) {
+prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	buf_writer_t buf_writer;
 	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
 	    PROF_RECENT_PRINT_BUFSIZE);
diff --git a/src/stats.c b/src/stats.c
index dd31032..0a1a99d 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1431,8 +1431,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 }
 
 void
-stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *opts) {
+stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 	int err;
 	uint64_t epoch;
 	size_t u64sz;
-- 
cgit v0.12


From 4d970f8bfca76e55abd34ba461a738744d71e879 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 14:52:01 -0700
Subject: Add forking handling for counter module

---
 include/jemalloc/internal/counter.h   |  3 +++
 include/jemalloc/internal/lockedint.h |  8 ++++++++
 src/counter.c                         | 15 +++++++++++++++
 3 files changed, 26 insertions(+)

diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
index 896fd02..c26a08b 100644
--- a/include/jemalloc/internal/counter.h
+++ b/include/jemalloc/internal/counter.h
@@ -52,5 +52,8 @@ counter_accum(tsdn_t *tsdn, counter_accum_t *counter, uint64_t accumbytes) {
 }
 
 bool counter_accum_init(counter_accum_t *counter, uint64_t interval);
+void counter_prefork(tsdn_t *tsdn, counter_accum_t *counter);
+void counter_postfork_parent(tsdn_t *tsdn, counter_accum_t *counter);
+void counter_postfork_child(tsdn_t *tsdn, counter_accum_t *counter);
 
 #endif /* JEMALLOC_INTERNAL_COUNTER_H */
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
index 6a1f9ad..56cf646 100644
--- a/include/jemalloc/internal/lockedint.h
+++ b/include/jemalloc/internal/lockedint.h
@@ -31,12 +31,20 @@ struct locked_zu_s {
 #  define LOCKEDINT_MTX(mtx) (&(mtx))
 #  define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
 #  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
+#  define LOCKEDINT_MTX_PREFORK(tsdn, mu) malloc_mutex_prefork(tsdn, &(mu))
+#  define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)			\
+    malloc_mutex_postfork_parent(tsdn, &(mu))
+#  define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)			\
+    malloc_mutex_postfork_child(tsdn, &(mu))
 #else
 #  define LOCKEDINT_MTX_DECLARE(name)
 #  define LOCKEDINT_MTX(ptr) NULL
 #  define LOCKEDINT_MTX_INIT(ptr, name, rank, rank_mode) false
 #  define LOCKEDINT_MTX_LOCK(tsdn, mu) do {} while (0)
 #  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) do {} while (0)
+#  define LOCKEDINT_MTX_PREFORK(tsdn, mu)
+#  define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)
+#  define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)
 #endif
 
 static inline uint64_t
diff --git a/src/counter.c b/src/counter.c
index 1b8201e..6fa9c65 100644
--- a/src/counter.c
+++ b/src/counter.c
@@ -20,3 +20,18 @@ counter_accum_init(counter_accum_t *counter, uint64_t interval) {
 
 	return false;
 }
+
+void
+counter_prefork(tsdn_t *tsdn, counter_accum_t *counter) {
+	LOCKEDINT_MTX_PREFORK(tsdn, counter->mtx);
+}
+
+void
+counter_postfork_parent(tsdn_t *tsdn, counter_accum_t *counter) {
+	LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, counter->mtx);
+}
+
+void
+counter_postfork_child(tsdn_t *tsdn, counter_accum_t *counter) {
+	LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, counter->mtx);
+}
-- 
cgit v0.12


From 508303077b020ba369ab84e3cf233ae224da861b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 14:58:58 -0700
Subject: Add forking handling for prof idump counter

---
 src/prof.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/prof.c b/src/prof.c
index cb71850..c1e13e9 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1111,6 +1111,7 @@ prof_prefork0(tsdn_t *tsdn) {
 void
 prof_prefork1(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
+		counter_prefork(tsdn, &prof_idump_accumulated);
 		malloc_mutex_prefork(tsdn, &prof_active_mtx);
 		malloc_mutex_prefork(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
@@ -1132,6 +1133,7 @@ prof_postfork_parent(tsdn_t *tsdn) {
 		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
+		counter_postfork_parent(tsdn, &prof_idump_accumulated);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
 		}
@@ -1156,6 +1158,7 @@ prof_postfork_child(tsdn_t *tsdn) {
 		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
+		counter_postfork_child(tsdn, &prof_idump_accumulated);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
 		}
-- 
cgit v0.12


From f533ab6da623303de5f6621b35e5ec73832a6d22 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 15:09:32 -0700
Subject: Add forking handling for stats

---
 include/jemalloc/internal/stats.h |  3 +++
 src/jemalloc.c                    |  3 +++
 src/stats.c                       | 15 +++++++++++++++
 3 files changed, 21 insertions(+)

diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 3720619..7cd1430 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -44,5 +44,8 @@ bool stats_interval_accum(tsd_t *tsd, uint64_t bytes);
 void stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts);
 
 bool stats_boot(void);
+void stats_prefork(tsdn_t *tsdn);
+void stats_postfork_parent(tsdn_t *tsdn);
+void stats_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_INTERNAL_STATS_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 14b2a08..78da45b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3989,6 +3989,7 @@ _malloc_prefork(void)
 		}
 	}
 	prof_prefork1(tsd_tsdn(tsd));
+	stats_prefork(tsd_tsdn(tsd));
 	tsd_prefork(tsd);
 }
 
@@ -4016,6 +4017,7 @@ _malloc_postfork(void)
 
 	witness_postfork_parent(tsd_witness_tsdp_get(tsd));
 	/* Release all mutexes, now that fork() has completed. */
+	stats_postfork_parent(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
@@ -4045,6 +4047,7 @@ jemalloc_postfork_child(void) {
 
 	witness_postfork_child(tsd_witness_tsdp_get(tsd));
 	/* Release all mutexes, now that fork() has completed. */
+	stats_postfork_child(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
diff --git a/src/stats.c b/src/stats.c
index 0a1a99d..56d3b48 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1526,3 +1526,18 @@ stats_boot(void) {
 
 	return counter_accum_init(&stats_interval_accumulated, stats_interval);
 }
+
+void
+stats_prefork(tsdn_t *tsdn) {
+	counter_prefork(tsdn, &stats_interval_accumulated);
+}
+
+void
+stats_postfork_parent(tsdn_t *tsdn) {
+	counter_postfork_parent(tsdn, &stats_interval_accumulated);
+}
+
+void
+stats_postfork_child(tsdn_t *tsdn) {
+	counter_postfork_child(tsdn, &stats_interval_accumulated);
+}
-- 
cgit v0.12


From b543c20a9494eb8ace71742657f90d81e6df9f49 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 14 Apr 2020 14:52:20 -0700
Subject: Minor update to locked int

---
 include/jemalloc/internal/arena_stats.h |  2 +-
 include/jemalloc/internal/lockedint.h   | 31 +++++++++++++++++++------------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 9effa61..02c9340 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -90,7 +90,7 @@ arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
 			assert(((char *)arena_stats)[i] == 0);
 		}
 	}
-	if (LOCKEDINT_MTX_INIT(LOCKEDINT_MTX(arena_stats->mtx), "arena_stats",
+	if (LOCKEDINT_MTX_INIT(arena_stats->mtx, "arena_stats",
 	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
index 56cf646..9d9d521 100644
--- a/include/jemalloc/internal/lockedint.h
+++ b/include/jemalloc/internal/lockedint.h
@@ -26,8 +26,8 @@ struct locked_zu_s {
 
 #ifndef JEMALLOC_ATOMIC_U64
 #  define LOCKEDINT_MTX_DECLARE(name) malloc_mutex_t name;
-#  define LOCKEDINT_MTX_INIT(ptr, name, rank, rank_mode)		\
-    malloc_mutex_init(ptr, name, rank, rank_mode)
+#  define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode)			\
+    malloc_mutex_init(&(mu), name, rank, rank_mode)
 #  define LOCKEDINT_MTX(mtx) (&(mtx))
 #  define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
 #  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
@@ -38,21 +38,28 @@ struct locked_zu_s {
     malloc_mutex_postfork_child(tsdn, &(mu))
 #else
 #  define LOCKEDINT_MTX_DECLARE(name)
-#  define LOCKEDINT_MTX(ptr) NULL
-#  define LOCKEDINT_MTX_INIT(ptr, name, rank, rank_mode) false
-#  define LOCKEDINT_MTX_LOCK(tsdn, mu) do {} while (0)
-#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) do {} while (0)
+#  define LOCKEDINT_MTX(mtx) NULL
+#  define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode) false
+#  define LOCKEDINT_MTX_LOCK(tsdn, mu)
+#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu)
 #  define LOCKEDINT_MTX_PREFORK(tsdn, mu)
 #  define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)
 #  define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)
 #endif
 
+#ifdef JEMALLOC_ATOMIC_U64
+#  define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx) assert((mtx) == NULL)
+#else
+#  define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx)			\
+    malloc_mutex_assert_owner(tsdn, (mtx))
+#endif
+
 static inline uint64_t
 locked_read_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	return atomic_load_u64(&p->val, ATOMIC_RELAXED);
 #else
-	malloc_mutex_assert_owner(tsdn, mtx);
 	return p->val;
 #endif
 }
@@ -60,10 +67,10 @@ locked_read_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p) {
 static inline void
 locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
     uint64_t x) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	atomic_fetch_add_u64(&p->val, x, ATOMIC_RELAXED);
 #else
-	malloc_mutex_assert_owner(tsdn, mtx);
 	p->val += x;
 #endif
 }
@@ -71,11 +78,11 @@ locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
 static inline void
 locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
     uint64_t x) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	uint64_t r = atomic_fetch_sub_u64(&p->val, x, ATOMIC_RELAXED);
 	assert(r - x <= r);
 #else
-	malloc_mutex_assert_owner(tsdn, mtx);
 	p->val -= x;
 	assert(p->val + x >= p->val);
 #endif
@@ -108,10 +115,10 @@ locked_read_u64_unsynchronized(locked_u64_t *p) {
 
 static inline size_t
 locked_read_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	return atomic_load_zu(&p->val, ATOMIC_RELAXED);
 #else
-	malloc_mutex_assert_owner(tsdn, mtx);
 	return atomic_load_zu(&p->val, ATOMIC_RELAXED);
 #endif
 }
@@ -119,10 +126,10 @@ locked_read_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p) {
 static inline void
 locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
     size_t x) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	atomic_fetch_add_zu(&p->val, x, ATOMIC_RELAXED);
 #else
-	malloc_mutex_assert_owner(tsdn, mtx);
 	size_t cur = atomic_load_zu(&p->val, ATOMIC_RELAXED);
 	atomic_store_zu(&p->val, cur + x, ATOMIC_RELAXED);
 #endif
@@ -131,11 +138,11 @@ locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
 static inline void
 locked_dec_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
     size_t x) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	size_t r = atomic_fetch_sub_zu(&p->val, x, ATOMIC_RELAXED);
 	assert(r - x <= r);
 #else
-	malloc_mutex_assert_owner(tsdn, mtx);
 	size_t cur = atomic_load_zu(&p->val, ATOMIC_RELAXED);
 	atomic_store_zu(&p->val, cur - x, ATOMIC_RELAXED);
 #endif
-- 
cgit v0.12


From fc052ff7284ef3695b81b9127f7d8a7cb25ae0b2 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 14 Apr 2020 15:08:00 -0700
Subject: Migrate counter to use locked int

---
 include/jemalloc/internal/counter.h   | 49 +++++++++--------------------------
 include/jemalloc/internal/lockedint.h | 38 +++++++++++++++++++++++++++
 src/counter.c                         | 10 ++-----
 test/unit/counter.c                   |  7 +----
 4 files changed, 53 insertions(+), 51 deletions(-)

diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
index c26a08b..79abf06 100644
--- a/include/jemalloc/internal/counter.h
+++ b/include/jemalloc/internal/counter.h
@@ -4,50 +4,25 @@
 #include "jemalloc/internal/mutex.h"
 
 typedef struct counter_accum_s {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_t	mtx;
-	uint64_t accumbytes;
-#else
-	atomic_u64_t accumbytes;
-#endif
+	LOCKEDINT_MTX_DECLARE(mtx)
+	locked_u64_t accumbytes;
 	uint64_t interval;
 } counter_accum_t;
 
 JEMALLOC_ALWAYS_INLINE bool
-counter_accum(tsdn_t *tsdn, counter_accum_t *counter, uint64_t accumbytes) {
-	bool overflow;
-	uint64_t a0, a1;
-
+counter_accum(tsdn_t *tsdn, counter_accum_t *counter, uint64_t bytes) {
+	uint64_t interval = counter->interval;
+	assert(interval > 0);
+	LOCKEDINT_MTX_LOCK(tsdn, counter->mtx);
 	/*
 	 * If the event moves fast enough (and/or if the event handling is slow
-	 * enough), extreme overflow here (a1 >= interval * 2) can cause counter
-	 * trigger coalescing.  This is an intentional mechanism that avoids
-	 * rate-limiting allocation.
+	 * enough), extreme overflow can cause counter trigger coalescing.
+	 * This is an intentional mechanism that avoids rate-limiting
+	 * allocation.
 	 */
-	uint64_t interval = counter->interval;
-	assert(interval > 0);
-#ifdef JEMALLOC_ATOMIC_U64
-	a0 = atomic_load_u64(&counter->accumbytes, ATOMIC_RELAXED);
-	do {
-		a1 = a0 + accumbytes;
-		assert(a1 >= a0);
-		overflow = (a1 >= interval);
-		if (overflow) {
-			a1 %= interval;
-		}
-	} while (!atomic_compare_exchange_weak_u64(&counter->accumbytes, &a0, a1,
-	    ATOMIC_RELAXED, ATOMIC_RELAXED));
-#else
-	malloc_mutex_lock(tsdn, &counter->mtx);
-	a0 = counter->accumbytes;
-	a1 = a0 + accumbytes;
-	overflow = (a1 >= interval);
-	if (overflow) {
-		a1 %= interval;
-	}
-	counter->accumbytes = a1;
-	malloc_mutex_unlock(tsdn, &counter->mtx);
-#endif
+	bool overflow = locked_inc_mod_u64(tsdn, LOCKEDINT_MTX(counter->mtx),
+	    &counter->accumbytes, bytes, interval);
+	LOCKEDINT_MTX_UNLOCK(tsdn, counter->mtx);
 	return overflow;
 }
 
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
index 9d9d521..d020ebe 100644
--- a/include/jemalloc/internal/lockedint.h
+++ b/include/jemalloc/internal/lockedint.h
@@ -88,6 +88,36 @@ locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
 #endif
 }
 
+/* Increment and take modulus.  Returns whether the modulo made any change.  */
+static inline bool
+locked_inc_mod_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
+    const uint64_t x, const uint64_t modulus) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
+	uint64_t before, after;
+	bool overflow;
+#ifdef JEMALLOC_ATOMIC_U64
+	before = atomic_load_u64(&p->val, ATOMIC_RELAXED);
+	do {
+		after = before + x;
+		assert(after >= before);
+		overflow = (after >= modulus);
+		if (overflow) {
+			after %= modulus;
+		}
+	} while (!atomic_compare_exchange_weak_u64(&p->val, &before, after,
+	    ATOMIC_RELAXED, ATOMIC_RELAXED));
+#else
+	before = p->val;
+	after = before + x;
+	overflow = (after >= modulus);
+	if (overflow) {
+		after %= modulus;
+	}
+	p->val = after;
+#endif
+	return overflow;
+}
+
 /*
  * Non-atomically sets *dst += src.  *dst needs external synchronization.
  * This lets us avoid the cost of a fetch_add when its unnecessary (note that
@@ -110,7 +140,15 @@ locked_read_u64_unsynchronized(locked_u64_t *p) {
 #else
 	return p->val;
 #endif
+}
 
+static inline void
+locked_init_u64_unsynchronized(locked_u64_t *p, uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_store_u64(&p->val, x, ATOMIC_RELAXED);
+#else
+	p->val = x;
+#endif
 }
 
 static inline size_t
diff --git a/src/counter.c b/src/counter.c
index 6fa9c65..71eda69 100644
--- a/src/counter.c
+++ b/src/counter.c
@@ -6,18 +6,12 @@
 
 bool
 counter_accum_init(counter_accum_t *counter, uint64_t interval) {
-#ifndef JEMALLOC_ATOMIC_U64
-	if (malloc_mutex_init(&counter->mtx, "counter_accum",
+	if (LOCKEDINT_MTX_INIT(counter->mtx, "counter_accum",
 	    WITNESS_RANK_COUNTER_ACCUM, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	counter->accumbytes = 0;
-#else
-	atomic_store_u64(&counter->accumbytes, 0,
-	    ATOMIC_RELAXED);
-#endif
+	locked_init_u64_unsynchronized(&counter->accumbytes, 0);
 	counter->interval = interval;
-
 	return false;
 }
 
diff --git a/test/unit/counter.c b/test/unit/counter.c
index c14eee3..277baac 100644
--- a/test/unit/counter.c
+++ b/test/unit/counter.c
@@ -27,12 +27,7 @@ TEST_END
 
 void
 expect_counter_value(counter_accum_t *c, uint64_t v) {
-	uint64_t accum;
-#ifdef JEMALLOC_ATOMIC_U64
-	accum = atomic_load_u64(&(c->accumbytes), ATOMIC_RELAXED);
-#else
-	accum = c->accumbytes;
-#endif
+	uint64_t accum = locked_read_u64_unsynchronized(&c->accumbytes);
 	expect_u64_eq(accum, v, "Counter value mismatch");
 }
 
-- 
cgit v0.12


From 855d20f6f3d79d00fad35d63456fbdc0e02a0747 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 23 Mar 2020 14:57:20 -0700
Subject: Remove outdated comments in thread event

---
 include/jemalloc/internal/thread_event.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index b05ff25..229136b 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -186,11 +186,9 @@ te_ctx_next_event_set(tsd_t *tsd, te_ctx_t *ctx, uint64_t v) {
  * of thread event handling that we can rely on and need to promise.
  * The invariants are only temporarily violated in the middle of:
  * (a) event_advance() if an event is triggered (the te_event_trigger() call
- *     at the end will restore the invariants),
+ *     at the end will restore the invariants), or
  * (b) te_##event##_event_update() (the te_event_update() call at the
- *     end will restore the invariants), or
- * (c) te_alloc_rollback() if the rollback falls below the last_event
- *     (the te_event_update() call at the end will restore the invariants).
+ *     end will restore the invariants).
  */
 JEMALLOC_ALWAYS_INLINE void
 te_assert_invariants(tsd_t *tsd) {
-- 
cgit v0.12


From 1e2524e15a004af50fd79f79b4b6efcfce0164b8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 23 Mar 2020 14:58:33 -0700
Subject: Do not reset sample wait time when re-initing tdata

---
 include/jemalloc/internal/prof_data.h | 2 +-
 src/prof.c                            | 4 ++--
 src/prof_data.c                       | 6 +-----
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 95dc6b0..46a3510 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -13,7 +13,7 @@ bool prof_data_init(tsd_t *tsd);
 bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
     bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
-    uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
+    uint64_t thr_discrim, char *thread_name, bool active);
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
diff --git a/src/prof.c b/src/prof.c
index c1e13e9..2e1d768 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -795,7 +795,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn) {
 prof_tdata_t *
 prof_tdata_init(tsd_t *tsd) {
 	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
-	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)), false);
+	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
 }
 
 prof_tdata_t *
@@ -808,7 +808,7 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 
 	prof_tdata_detach(tsd, tdata);
 	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
-	    active, true);
+	    active);
 }
 
 void
diff --git a/src/prof_data.c b/src/prof_data.c
index 9721cbe..66ed36a 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1245,7 +1245,7 @@ prof_bt_keycomp(const void *k1, const void *k2) {
 
 prof_tdata_t *
 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
-    char *thread_name, bool active, bool reset_interval) {
+    char *thread_name, bool active) {
 	assert(tsd_reentrancy_level_get(tsd) == 0);
 
 	prof_tdata_t *tdata;
@@ -1274,10 +1274,6 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 		return NULL;
 	}
 
-	if (reset_interval) {
-		prof_sample_threshold_update(tsd);
-	}
-
 	tdata->enq = false;
 	tdata->enq_idump = false;
 	tdata->enq_gdump = false;
-- 
cgit v0.12


From 733ae918f0d848a64e88e622e348749fe6756d89 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 10:49:08 -0700
Subject: Extract out per event new wait time fetching

---
 include/jemalloc/internal/prof_externs.h   |  5 +++--
 include/jemalloc/internal/stats.h          |  3 ++-
 include/jemalloc/internal/tcache_externs.h |  8 ++++++--
 src/prof.c                                 | 17 +++++++----------
 src/stats.c                                |  2 +-
 src/tcache.c                               | 10 ++++++++++
 src/thread_event.c                         | 24 +++++++++++++++++++-----
 7 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index a6b659c..2284ae6 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -74,7 +74,6 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 #endif
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
-bool prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
@@ -99,7 +98,9 @@ void prof_prefork0(tsdn_t *tsdn);
 void prof_prefork1(tsdn_t *tsdn);
 void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
-void prof_sample_threshold_update(tsd_t *tsd);
+/* Only accessed by thread event. */
+uint64_t prof_sample_new_event_wait(tsd_t *tsd);
+bool prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes);
 
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 7cd1430..42c321e 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -37,7 +37,8 @@ extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
 #define STATS_INTERVAL_ACCUM_LG_BATCH_SIZE 6
 #define STATS_INTERVAL_ACCUM_BATCH_MAX (4 << 20)
 
-uint64_t stats_interval_accum_batch_size(void);
+/* Only accessed by thread event. */
+uint64_t stats_interval_new_event_wait(tsd_t *tsd);
 bool stats_interval_accum(tsd_t *tsd, uint64_t bytes);
 
 /* Implements je_malloc_stats_print. */
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 7ca38d6..7fd730d 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -26,8 +26,6 @@ extern cache_bin_info_t *tcache_bin_info;
 extern tcaches_t	*tcaches;
 
 size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
-void	tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow,
-    tcache_t *tcache);
 void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, bool *tcache_success);
 
@@ -55,4 +53,10 @@ bool tsd_tcache_enabled_data_init(tsd_t *tsd);
 
 void tcache_assert_initialized(tcache_t *tcache);
 
+/* Only accessed by thread event. */
+uint64_t tcache_gc_new_event_wait(tsd_t *tsd);
+uint64_t tcache_gc_dalloc_new_event_wait(tsd_t *tsd);
+void tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow,
+    tcache_t *tcache);
+
 #endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/src/prof.c b/src/prof.c
index 2e1d768..9405585 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -518,16 +518,11 @@ prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
  * (e.g.
  * -mno-sse) in order for the workaround to be complete.
  */
-void
-prof_sample_threshold_update(tsd_t *tsd) {
+uint64_t
+prof_sample_new_event_wait(tsd_t *tsd) {
 #ifdef JEMALLOC_PROF
-	if (!config_prof) {
-		return;
-	}
-
 	if (lg_prof_sample == 0) {
-		te_prof_sample_event_update(tsd, TE_MIN_START_WAIT);
-		return;
+		return TE_MIN_START_WAIT;
 	}
 
 	/*
@@ -557,10 +552,12 @@ prof_sample_threshold_update(tsd_t *tsd) {
 	 */
 	uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
 	double u = (r == 0U) ? 1.0 : (double)r * (1.0/9007199254740992.0L);
-	uint64_t bytes_until_sample = (uint64_t)(log(u) /
+	return (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
-	te_prof_sample_event_update(tsd, bytes_until_sample);
+#else
+	not_reached();
+	return TE_MAX_START_WAIT;
 #endif
 }
 
diff --git a/src/stats.c b/src/stats.c
index 56d3b48..9d13f59 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1499,7 +1499,7 @@ stats_interval_accum(tsd_t *tsd, uint64_t bytes) {
 }
 
 uint64_t
-stats_interval_accum_batch_size(void) {
+stats_interval_new_event_wait(tsd_t *tsd) {
 	return stats_interval_accum_batch;
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 63e1a4d..cba2ea7 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -40,6 +40,16 @@ tcache_salloc(tsdn_t *tsdn, const void *ptr) {
 	return arena_salloc(tsdn, ptr);
 }
 
+uint64_t
+tcache_gc_new_event_wait(tsd_t *tsd) {
+	return TCACHE_GC_INCR_BYTES;
+}
+
+uint64_t
+tcache_gc_dalloc_new_event_wait(tsd_t *tsd) {
+	return TCACHE_GC_INCR_BYTES;
+}
+
 void
 tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 	szind_t binind = tcache_slow->next_gc_bin;
diff --git a/src/thread_event.c b/src/thread_event.c
index c96dea6..99467ee 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -4,6 +4,17 @@
 
 #include "jemalloc/internal/thread_event.h"
 
+/*
+ * Signatures for functions computing new event wait time.  The functions
+ * should be defined by the modules owning each event.  The signatures here are
+ * used to verify that the definitions are in the right shape.
+ */
+#define E(event, condition_unused, is_alloc_event_unused)		\
+uint64_t event##_new_event_wait(tsd_t *tsd);
+
+ITERATE_OVER_ALL_EVENTS
+#undef E
+
 /* TSD event init function signatures. */
 #define E(event, condition_unused, is_alloc_event_unused)		\
 static void te_tsd_##event##_event_init(tsd_t *tsd);
@@ -22,26 +33,29 @@ ITERATE_OVER_ALL_EVENTS
 static void
 te_tsd_tcache_gc_event_init(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
-	te_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	uint64_t wait = tcache_gc_new_event_wait(tsd);
+	te_tcache_gc_event_update(tsd, wait);
 }
 
 static void
 te_tsd_tcache_gc_dalloc_event_init(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
-	te_tcache_gc_dalloc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	uint64_t wait = tcache_gc_dalloc_new_event_wait(tsd);
+	te_tcache_gc_dalloc_event_update(tsd, wait);
 }
 
 static void
 te_tsd_prof_sample_event_init(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
-	prof_sample_threshold_update(tsd);
+	uint64_t wait = prof_sample_new_event_wait(tsd);
+	te_prof_sample_event_update(tsd, wait);
 }
 
 static void
 te_tsd_stats_interval_event_init(tsd_t *tsd) {
 	assert(opt_stats_interval >= 0);
-	uint64_t interval = stats_interval_accum_batch_size();
-	te_stats_interval_event_update(tsd, interval);
+	uint64_t wait = stats_interval_new_event_wait(tsd);
+	te_stats_interval_event_update(tsd, wait);
 }
 
 /* Handler functions. */
-- 
cgit v0.12


From 6de77799de0d8a705c595aa11f9dc70f147501ad Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 24 Mar 2020 08:31:34 -0700
Subject: Move thread event wait time update to local

---
 include/jemalloc/internal/thread_event.h | 24 ------------------------
 src/thread_event.c                       | 25 +++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 229136b..60fbfcb 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -33,7 +33,6 @@ typedef struct te_ctx_s {
 
 void te_assert_invariants_debug(tsd_t *tsd);
 void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event);
-void te_event_update(tsd_t *tsd, bool alloc_event);
 void te_recompute_fast_threshold(tsd_t *tsd);
 void tsd_te_init(tsd_t *tsd);
 
@@ -251,27 +250,4 @@ thread_alloc_event(tsd_t *tsd, size_t usize) {
 	te_event_advance(tsd, usize, true);
 }
 
-#define E(event, condition, is_alloc)					\
-JEMALLOC_ALWAYS_INLINE void						\
-te_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {		\
-	te_assert_invariants(tsd);					\
-	assert(condition);						\
-	assert(tsd_nominal(tsd));					\
-	assert(tsd_reentrancy_level_get(tsd) == 0);			\
-	assert(event_wait > 0U);					\
-	if (TE_MIN_START_WAIT > 1U &&					\
-	    unlikely(event_wait < TE_MIN_START_WAIT)) {			\
-		event_wait = TE_MIN_START_WAIT;				\
-	}								\
-	if (TE_MAX_START_WAIT < UINT64_MAX &&				\
-	    unlikely(event_wait > TE_MAX_START_WAIT)) {			\
-		event_wait = TE_MAX_START_WAIT;				\
-	}								\
-	event##_event_wait_set(tsd, event_wait);			\
-	te_event_update(tsd, is_alloc);					\
-}
-
-ITERATE_OVER_ALL_EVENTS
-#undef E
-
 #endif /* JEMALLOC_INTERNAL_THREAD_EVENT_H */
diff --git a/src/thread_event.c b/src/thread_event.c
index 99467ee..8f718dd 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -4,6 +4,31 @@
 
 #include "jemalloc/internal/thread_event.h"
 
+static void te_event_update(tsd_t *tsd, bool alloc_event);
+
+#define E(event, condition, is_alloc)					\
+static void								\
+te_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {		\
+	te_assert_invariants(tsd);					\
+	assert(condition);						\
+	assert(tsd_nominal(tsd));					\
+	assert(tsd_reentrancy_level_get(tsd) == 0);			\
+	assert(event_wait > 0U);					\
+	if (TE_MIN_START_WAIT > 1U &&					\
+	    unlikely(event_wait < TE_MIN_START_WAIT)) {			\
+		event_wait = TE_MIN_START_WAIT;				\
+	}								\
+	if (TE_MAX_START_WAIT < UINT64_MAX &&				\
+	    unlikely(event_wait > TE_MAX_START_WAIT)) {			\
+		event_wait = TE_MAX_START_WAIT;				\
+	}								\
+	event##_event_wait_set(tsd, event_wait);			\
+	te_event_update(tsd, is_alloc);					\
+}
+
+ITERATE_OVER_ALL_EVENTS
+#undef E
+
 /*
  * Signatures for functions computing new event wait time.  The functions
  * should be defined by the modules owning each event.  The signatures here are
-- 
cgit v0.12


From 7324c4f85f8d3d9597a1942dffcc6bf98b02fb8c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 16 Apr 2020 10:00:46 -0700
Subject: Break down event init and handler functions

---
 src/thread_event.c | 66 +++++++++---------------------------------------------
 1 file changed, 11 insertions(+), 55 deletions(-)

diff --git a/src/thread_event.c b/src/thread_event.c
index 8f718dd..d5649df 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -40,53 +40,16 @@ uint64_t event##_new_event_wait(tsd_t *tsd);
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
-/* TSD event init function signatures. */
-#define E(event, condition_unused, is_alloc_event_unused)		\
-static void te_tsd_##event##_event_init(tsd_t *tsd);
-
-ITERATE_OVER_ALL_EVENTS
-#undef E
-
 /* Event handler function signatures. */
 #define E(event, condition_unused, is_alloc_event_unused)		\
-static void te_##event##_event_handler(tsd_t *tsd);
+static void event##_event_handler(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
-/* (Re)Init functions. */
-static void
-te_tsd_tcache_gc_event_init(tsd_t *tsd) {
-	assert(TCACHE_GC_INCR_BYTES > 0);
-	uint64_t wait = tcache_gc_new_event_wait(tsd);
-	te_tcache_gc_event_update(tsd, wait);
-}
-
-static void
-te_tsd_tcache_gc_dalloc_event_init(tsd_t *tsd) {
-	assert(TCACHE_GC_INCR_BYTES > 0);
-	uint64_t wait = tcache_gc_dalloc_new_event_wait(tsd);
-	te_tcache_gc_dalloc_event_update(tsd, wait);
-}
-
-static void
-te_tsd_prof_sample_event_init(tsd_t *tsd) {
-	assert(config_prof && opt_prof);
-	uint64_t wait = prof_sample_new_event_wait(tsd);
-	te_prof_sample_event_update(tsd, wait);
-}
-
-static void
-te_tsd_stats_interval_event_init(tsd_t *tsd) {
-	assert(opt_stats_interval >= 0);
-	uint64_t wait = stats_interval_new_event_wait(tsd);
-	te_stats_interval_event_update(tsd, wait);
-}
-
 /* Handler functions. */
 static void
 tcache_gc_event(tsd_t *tsd) {
-	assert(TCACHE_GC_INCR_BYTES > 0);
 	tcache_t *tcache = tcache_get(tsd);
 	if (tcache != NULL) {
 		tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
@@ -95,45 +58,35 @@ tcache_gc_event(tsd_t *tsd) {
 }
 
 static void
-te_tcache_gc_event_handler(tsd_t *tsd) {
-	assert(tcache_gc_event_wait_get(tsd) == 0U);
-	te_tsd_tcache_gc_event_init(tsd);
+tcache_gc_event_handler(tsd_t *tsd) {
 	tcache_gc_event(tsd);
 }
 
 static void
-te_tcache_gc_dalloc_event_handler(tsd_t *tsd) {
-	assert(tcache_gc_dalloc_event_wait_get(tsd) == 0U);
-	te_tsd_tcache_gc_dalloc_event_init(tsd);
+tcache_gc_dalloc_event_handler(tsd_t *tsd) {
 	tcache_gc_event(tsd);
 }
 
 static void
-te_prof_sample_event_handler(tsd_t *tsd) {
-	assert(config_prof && opt_prof);
-	assert(prof_sample_event_wait_get(tsd) == 0U);
+prof_sample_event_handler(tsd_t *tsd) {
 	uint64_t last_event = thread_allocated_last_event_get(tsd);
 	uint64_t last_sample_event = prof_sample_last_event_get(tsd);
 	prof_sample_last_event_set(tsd, last_event);
 	if (prof_idump_accum(tsd_tsdn(tsd), last_event - last_sample_event)) {
 		prof_idump(tsd_tsdn(tsd));
 	}
-	te_tsd_prof_sample_event_init(tsd);
 }
 
 static void
-te_stats_interval_event_handler(tsd_t *tsd) {
-	assert(opt_stats_interval >= 0);
-	assert(stats_interval_event_wait_get(tsd) == 0U);
+stats_interval_event_handler(tsd_t *tsd) {
 	uint64_t last_event = thread_allocated_last_event_get(tsd);
 	uint64_t last_stats_event = stats_interval_last_event_get(tsd);
 	stats_interval_last_event_set(tsd, last_event);
-
 	if (stats_interval_accum(tsd, last_event - last_stats_event)) {
 		je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
 	}
-	te_tsd_stats_interval_event_init(tsd);
 }
+
 /* Per event facilities done. */
 
 static bool
@@ -352,7 +305,9 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event) {
 	if (is_alloc == alloc_event && condition &&			\
 	    event##_event_wait_get(tsd) == 0U) {			\
 		assert(allow_event_trigger);				\
-		te_##event##_event_handler(tsd);			\
+		uint64_t wait = event##_new_event_wait(tsd);		\
+		te_##event##_event_update(tsd, wait);			\
+		event##_event_handler(tsd);				\
 	}
 
 	ITERATE_OVER_ALL_EVENTS
@@ -384,7 +339,8 @@ void tsd_te_init(tsd_t *tsd) {
 
 #define E(event, condition, is_alloc_event_unused)			\
 	if (condition) {						\
-		te_tsd_##event##_event_init(tsd);			\
+		uint64_t wait = event##_new_event_wait(tsd);		\
+		te_##event##_event_update(tsd, wait);			\
 	}
 
 	ITERATE_OVER_ALL_EVENTS
-- 
cgit v0.12


From f72014d09773c529e863eab653331461a740c60c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 25 Mar 2020 09:33:52 -0700
Subject: Only compute thread event threshold once per trigger

---
 include/jemalloc/internal/thread_event.h |  12 ++-
 src/thread_event.c                       | 140 ++++++++++++-------------------
 2 files changed, 60 insertions(+), 92 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 60fbfcb..321baaa 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -32,7 +32,7 @@ typedef struct te_ctx_s {
 } te_ctx_t;
 
 void te_assert_invariants_debug(tsd_t *tsd);
-void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event);
+void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx);
 void te_recompute_fast_threshold(tsd_t *tsd);
 void tsd_te_init(tsd_t *tsd);
 
@@ -183,11 +183,9 @@ te_ctx_next_event_set(tsd_t *tsd, te_ctx_t *ctx, uint64_t v) {
  * The function checks in debug mode whether the thread event counters are in
  * a consistent state, which forms the invariants before and after each round
  * of thread event handling that we can rely on and need to promise.
- * The invariants are only temporarily violated in the middle of:
- * (a) event_advance() if an event is triggered (the te_event_trigger() call
- *     at the end will restore the invariants), or
- * (b) te_##event##_event_update() (the te_event_update() call at the
- *     end will restore the invariants).
+ * The invariants are only temporarily violated in the middle of
+ * te_event_advance() if an event is triggered (the te_event_trigger() call at
+ * the end will restore the invariants).
  */
 JEMALLOC_ALWAYS_INLINE void
 te_assert_invariants(tsd_t *tsd) {
@@ -236,7 +234,7 @@ te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
 	if (likely(usize < te_ctx_next_event_get(&ctx) - bytes_before)) {
 		te_assert_invariants(tsd);
 	} else {
-		te_event_trigger(tsd, &ctx, false);
+		te_event_trigger(tsd, &ctx);
 	}
 }
 
diff --git a/src/thread_event.c b/src/thread_event.c
index d5649df..0d71f32 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -4,31 +4,6 @@
 
 #include "jemalloc/internal/thread_event.h"
 
-static void te_event_update(tsd_t *tsd, bool alloc_event);
-
-#define E(event, condition, is_alloc)					\
-static void								\
-te_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {		\
-	te_assert_invariants(tsd);					\
-	assert(condition);						\
-	assert(tsd_nominal(tsd));					\
-	assert(tsd_reentrancy_level_get(tsd) == 0);			\
-	assert(event_wait > 0U);					\
-	if (TE_MIN_START_WAIT > 1U &&					\
-	    unlikely(event_wait < TE_MIN_START_WAIT)) {			\
-		event_wait = TE_MIN_START_WAIT;				\
-	}								\
-	if (TE_MAX_START_WAIT < UINT64_MAX &&				\
-	    unlikely(event_wait > TE_MAX_START_WAIT)) {			\
-		event_wait = TE_MAX_START_WAIT;				\
-	}								\
-	event##_event_wait_set(tsd, event_wait);			\
-	te_event_update(tsd, is_alloc);					\
-}
-
-ITERATE_OVER_ALL_EVENTS
-#undef E
-
 /*
  * Signatures for functions computing new event wait time.  The functions
  * should be defined by the modules owning each event.  The signatures here are
@@ -246,33 +221,49 @@ te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
 }
 
 static uint64_t
-te_batch_accum(tsd_t *tsd, uint64_t accumbytes, bool is_alloc,
-    bool allow_event_trigger) {
+te_clip_event_wait(uint64_t event_wait) {
+	assert(event_wait > 0U);
+	if (TE_MIN_START_WAIT > 1U &&
+	    unlikely(event_wait < TE_MIN_START_WAIT)) {
+		event_wait = TE_MIN_START_WAIT;
+	}
+	if (TE_MAX_START_WAIT < UINT64_MAX &&
+	    unlikely(event_wait > TE_MAX_START_WAIT)) {
+		event_wait = TE_MAX_START_WAIT;
+	}
+	return event_wait;
+}
+
+void
+te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
+	/* usize has already been added to thread_allocated. */
+	uint64_t bytes_after = te_ctx_current_bytes_get(ctx);
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t accumbytes = bytes_after - te_ctx_last_event_get(ctx);
+
+	te_ctx_last_event_set(ctx, bytes_after);
+
+	bool allow_event_trigger = tsd_nominal(tsd) &&
+	    tsd_reentrancy_level_get(tsd) == 0;
+	bool is_alloc = ctx->is_alloc;
 	uint64_t wait = TE_MAX_START_WAIT;
 
 #define E(event, condition, alloc_event)				\
+	bool is_##event##_triggered = false;				\
 	if (is_alloc == alloc_event && condition) {			\
 		uint64_t event_wait = event##_event_wait_get(tsd);	\
 		assert(event_wait <= TE_MAX_START_WAIT);		\
 		if (event_wait > accumbytes) {				\
 			event_wait -= accumbytes;			\
+		} else if (!allow_event_trigger) {			\
+			event_wait = TE_MIN_START_WAIT;			\
 		} else {						\
-			event_wait = 0U;				\
-			if (!allow_event_trigger) {			\
-				event_wait = TE_MIN_START_WAIT;		\
-			}						\
+			is_##event##_triggered = true;			\
+			event_wait = event##_new_event_wait(tsd);	\
 		}							\
-		assert(event_wait <= TE_MAX_START_WAIT);		\
+		event_wait = te_clip_event_wait(event_wait);		\
 		event##_event_wait_set(tsd, event_wait);		\
-		/*							\
-		 * If there is a single event, then the remaining wait	\
-		 * time may become zero, and we rely on either the	\
-		 * event handler or a te_event_update() call later	\
-		 * to properly set next_event; if there are multiple	\
-		 * events, then	here we can get the minimum remaining	\
-		 * wait time to	the next already set event.		\
-		 */							\
-		if (event_wait > 0U && event_wait < wait) {		\
+		if (event_wait < wait) {				\
 			wait = event_wait;				\
 		}							\
 	}
@@ -281,68 +272,47 @@ te_batch_accum(tsd_t *tsd, uint64_t accumbytes, bool is_alloc,
 #undef E
 
 	assert(wait <= TE_MAX_START_WAIT);
-	return wait;
-}
-
-void
-te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event) {
-	/* usize has already been added to thread_allocated. */
-	uint64_t bytes_after = te_ctx_current_bytes_get(ctx);
-	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t accumbytes = bytes_after - te_ctx_last_event_get(ctx);
-
-	te_ctx_last_event_set(ctx, bytes_after);
-	bool allow_event_trigger = !delay_event && tsd_nominal(tsd) &&
-	    tsd_reentrancy_level_get(tsd) == 0;
-
-	bool is_alloc = ctx->is_alloc;
-	uint64_t wait = te_batch_accum(tsd, accumbytes, is_alloc,
-	    allow_event_trigger);
 	te_adjust_thresholds_helper(tsd, ctx, wait);
-
 	te_assert_invariants(tsd);
+
 #define E(event, condition, alloc_event)				\
 	if (is_alloc == alloc_event && condition &&			\
-	    event##_event_wait_get(tsd) == 0U) {			\
+	    is_##event##_triggered) {					\
 		assert(allow_event_trigger);				\
-		uint64_t wait = event##_new_event_wait(tsd);		\
-		te_##event##_event_update(tsd, wait);			\
 		event##_event_handler(tsd);				\
 	}
 
 	ITERATE_OVER_ALL_EVENTS
 #undef E
+
 	te_assert_invariants(tsd);
 }
 
-void
-te_event_update(tsd_t *tsd, bool is_alloc) {
+static void
+te_init(tsd_t *tsd, bool is_alloc) {
+	uint64_t wait = TE_MAX_START_WAIT;
+#define E(event, condition, alloc_event)				\
+	if (is_alloc == alloc_event && condition) {			\
+		uint64_t event_wait = event##_new_event_wait(tsd);	\
+		event_wait = te_clip_event_wait(event_wait);		\
+		event##_event_wait_set(tsd, event_wait);		\
+		if (event_wait < wait) {				\
+			wait = event_wait;				\
+		}							\
+	}
+
+	ITERATE_OVER_ALL_EVENTS
+#undef E
 	te_ctx_t ctx;
 	te_ctx_get(tsd, &ctx, is_alloc);
-
-	uint64_t wait = te_next_event_compute(tsd, is_alloc);
 	te_adjust_thresholds_helper(tsd, &ctx, wait);
-
-	uint64_t last_event = te_ctx_last_event_get(&ctx);
-	/* Both subtractions are intentionally susceptible to underflow. */
-	if (te_ctx_current_bytes_get(&ctx) - last_event >=
-	    te_ctx_next_event_get(&ctx) - last_event) {
-		te_event_trigger(tsd, &ctx, true);
-	} else {
-		te_assert_invariants(tsd);
-	}
 }
 
-void tsd_te_init(tsd_t *tsd) {
+void
+tsd_te_init(tsd_t *tsd) {
 	/* Make sure no overflow for the bytes accumulated on event_trigger. */
 	assert(TE_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
-
-#define E(event, condition, is_alloc_event_unused)			\
-	if (condition) {						\
-		uint64_t wait = event##_new_event_wait(tsd);		\
-		te_##event##_event_update(tsd, wait);			\
-	}
-
-	ITERATE_OVER_ALL_EVENTS
-#undef E
+	te_init(tsd, true);
+	te_init(tsd, false);
+	te_assert_invariants(tsd);
 }
-- 
cgit v0.12


From abd467493110efbcf92f0e85a699f9cda47daff7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 16 Apr 2020 13:33:56 -0700
Subject: Extract out per event postponed wait time fetching

---
 include/jemalloc/internal/prof_externs.h   |  2 ++
 include/jemalloc/internal/stats.h          |  1 +
 include/jemalloc/internal/tcache_externs.h |  2 ++
 src/prof.c                                 |  5 +++++
 src/stats.c                                |  5 +++++
 src/tcache.c                               | 10 ++++++++++
 src/thread_event.c                         | 17 ++++++++++++-----
 7 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 2284ae6..d500f56 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -98,8 +98,10 @@ void prof_prefork0(tsdn_t *tsdn);
 void prof_prefork1(tsdn_t *tsdn);
 void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
+
 /* Only accessed by thread event. */
 uint64_t prof_sample_new_event_wait(tsd_t *tsd);
+uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
 bool prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes);
 
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 42c321e..4989fe1 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -39,6 +39,7 @@ extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
 
 /* Only accessed by thread event. */
 uint64_t stats_interval_new_event_wait(tsd_t *tsd);
+uint64_t stats_interval_postponed_event_wait(tsd_t *tsd);
 bool stats_interval_accum(tsd_t *tsd, uint64_t bytes);
 
 /* Implements je_malloc_stats_print. */
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 7fd730d..3be6528 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -55,7 +55,9 @@ void tcache_assert_initialized(tcache_t *tcache);
 
 /* Only accessed by thread event. */
 uint64_t tcache_gc_new_event_wait(tsd_t *tsd);
+uint64_t tcache_gc_postponed_event_wait(tsd_t *tsd);
 uint64_t tcache_gc_dalloc_new_event_wait(tsd_t *tsd);
+uint64_t tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd);
 void tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow,
     tcache_t *tcache);
 
diff --git a/src/prof.c b/src/prof.c
index 9405585..ad83cfe 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -561,6 +561,11 @@ prof_sample_new_event_wait(tsd_t *tsd) {
 #endif
 }
 
+uint64_t
+prof_sample_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
 int
 prof_getpid(void) {
 #ifdef _WIN32
diff --git a/src/stats.c b/src/stats.c
index 9d13f59..16d4e88 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1503,6 +1503,11 @@ stats_interval_new_event_wait(tsd_t *tsd) {
 	return stats_interval_accum_batch;
 }
 
+uint64_t
+stats_interval_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
 bool
 stats_boot(void) {
 	uint64_t stats_interval;
diff --git a/src/tcache.c b/src/tcache.c
index cba2ea7..16c87cb 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -46,10 +46,20 @@ tcache_gc_new_event_wait(tsd_t *tsd) {
 }
 
 uint64_t
+tcache_gc_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
+uint64_t
 tcache_gc_dalloc_new_event_wait(tsd_t *tsd) {
 	return TCACHE_GC_INCR_BYTES;
 }
 
+uint64_t
+tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
 void
 tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 	szind_t binind = tcache_slow->next_gc_bin;
diff --git a/src/thread_event.c b/src/thread_event.c
index 0d71f32..9de8376 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -5,12 +5,19 @@
 #include "jemalloc/internal/thread_event.h"
 
 /*
- * Signatures for functions computing new event wait time.  The functions
- * should be defined by the modules owning each event.  The signatures here are
- * used to verify that the definitions are in the right shape.
+ * Signatures for functions computing new / postponed event wait time.  New
+ * event wait time is the time till the next event if an event is currently
+ * being triggered; postponed event wait time is the time till the next event
+ * if an event should be triggered but needs to be postponed, e.g. when the TSD
+ * is not nominal or during reentrancy.
+ *
+ * These event wait time computation functions should be defined by the modules
+ * owning each event.  The signatures here are used to verify that the
+ * definitions follow the right format.
  */
 #define E(event, condition_unused, is_alloc_event_unused)		\
-uint64_t event##_new_event_wait(tsd_t *tsd);
+uint64_t event##_new_event_wait(tsd_t *tsd);				\
+uint64_t event##_postponed_event_wait(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
@@ -256,7 +263,7 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
 		if (event_wait > accumbytes) {				\
 			event_wait -= accumbytes;			\
 		} else if (!allow_event_trigger) {			\
-			event_wait = TE_MIN_START_WAIT;			\
+			event_wait = event##_postponed_event_wait(tsd);	\
 		} else {						\
 			is_##event##_triggered = true;			\
 			event_wait = event##_new_event_wait(tsd);	\
-- 
cgit v0.12


From 381c97caa41eb85b52afca40794b2223e7f36d33 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 16 Apr 2020 13:37:19 -0700
Subject: Treat postponed prof sample event as new event

---
 src/prof.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/prof.c b/src/prof.c
index ad83cfe..77aa44d 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -563,7 +563,15 @@ prof_sample_new_event_wait(tsd_t *tsd) {
 
 uint64_t
 prof_sample_postponed_event_wait(tsd_t *tsd) {
-	return TE_MIN_START_WAIT;
+	/*
+	 * The postponed wait time for prof sample event is computed as if we
+	 * want a new wait time (i.e. as if the event were triggered).  If we
+	 * instead postpone to the immediate next allocation, like how we're
+	 * handling the other events, then we can have sampling bias, if e.g.
+	 * the allocation immediately following a reentrancy always comes from
+	 * the same stack trace.
+	 */
+	return prof_sample_new_event_wait(tsd);
 }
 
 int
-- 
cgit v0.12


From b06dfb9ccc1fb942c6d871a8e184fed496b59fc1 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 17 Apr 2020 10:38:06 -0700
Subject: Push event handlers to constituent modules

---
 include/jemalloc/internal/prof_externs.h   |  2 +-
 include/jemalloc/internal/stats.h          |  2 +-
 include/jemalloc/internal/tcache_externs.h |  4 +-
 include/jemalloc/internal/thread_event.h   |  6 +++
 src/prof.c                                 | 25 +++++++------
 src/stats.c                                | 14 ++++---
 src/tcache.c                               | 22 ++++++++++-
 src/thread_event.c                         | 60 +++++++++++++-----------------
 8 files changed, 78 insertions(+), 57 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index d500f56..3d9fcfb 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -102,7 +102,7 @@ void prof_postfork_child(tsdn_t *tsdn);
 /* Only accessed by thread event. */
 uint64_t prof_sample_new_event_wait(tsd_t *tsd);
 uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
-bool prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes);
+void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 4989fe1..93bde22 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -40,7 +40,7 @@ extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
 /* Only accessed by thread event. */
 uint64_t stats_interval_new_event_wait(tsd_t *tsd);
 uint64_t stats_interval_postponed_event_wait(tsd_t *tsd);
-bool stats_interval_accum(tsd_t *tsd, uint64_t bytes);
+void stats_interval_event_handler(tsd_t *tsd, uint64_t elapsed);
 
 /* Implements je_malloc_stats_print. */
 void stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts);
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 3be6528..6eca928 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -56,9 +56,9 @@ void tcache_assert_initialized(tcache_t *tcache);
 /* Only accessed by thread event. */
 uint64_t tcache_gc_new_event_wait(tsd_t *tsd);
 uint64_t tcache_gc_postponed_event_wait(tsd_t *tsd);
+void tcache_gc_event_handler(tsd_t *tsd, uint64_t elapsed);
 uint64_t tcache_gc_dalloc_new_event_wait(tsd_t *tsd);
 uint64_t tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd);
-void tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow,
-    tcache_t *tcache);
+void tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 321baaa..3a84882 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -23,6 +23,12 @@
  */
 #define TE_MAX_INTERVAL ((uint64_t)(4U << 20))
 
+/*
+ * Invalid elapsed time, for situations where elapsed time is not needed.  See
+ * comments in thread_event.c for more info.
+ */
+#define TE_INVALID_ELAPSED UINT64_MAX
+
 typedef struct te_ctx_s {
 	bool is_alloc;
 	uint64_t *current;
diff --git a/src/prof.c b/src/prof.c
index 77aa44d..c8da81d 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -50,7 +50,7 @@ bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 bool opt_prof_experimental_use_sys_thread_name = false;
 
-/* Accessed via prof_idump_accum(). */
+/* Accessed via prof_sample_event_handler(). */
 static counter_accum_t prof_idump_accumulated;
 
 /*
@@ -574,6 +574,18 @@ prof_sample_postponed_event_wait(tsd_t *tsd) {
 	return prof_sample_new_event_wait(tsd);
 }
 
+void
+prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	cassert(config_prof);
+	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
+	if (prof_interval == 0 || !prof_active_get_unlocked()) {
+		return;
+	}
+	if (counter_accum(tsd_tsdn(tsd), &prof_idump_accumulated, elapsed)) {
+		prof_idump(tsd_tsdn(tsd));
+	}
+}
+
 int
 prof_getpid(void) {
 #ifdef _WIN32
@@ -659,17 +671,6 @@ prof_idump_accum_init(void) {
 }
 
 bool
-prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes) {
-	cassert(config_prof);
-
-	if (prof_interval == 0 || !prof_active_get_unlocked()) {
-		return false;
-	}
-
-	return counter_accum(tsdn, &prof_idump_accumulated, accumbytes);
-}
-
-bool
 prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	cassert(config_prof);
 	ctl_mtx_assert_held(tsdn);
diff --git a/src/stats.c b/src/stats.c
index 16d4e88..42e4a1c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1493,11 +1493,6 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 	emitter_end(&emitter);
 }
 
-bool
-stats_interval_accum(tsd_t *tsd, uint64_t bytes) {
-	return counter_accum(tsd_tsdn(tsd), &stats_interval_accumulated, bytes);
-}
-
 uint64_t
 stats_interval_new_event_wait(tsd_t *tsd) {
 	return stats_interval_accum_batch;
@@ -1508,6 +1503,15 @@ stats_interval_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
+void
+stats_interval_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
+	if (counter_accum(tsd_tsdn(tsd), &stats_interval_accumulated,
+	    elapsed)) {
+		je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
+	}
+}
+
 bool
 stats_boot(void) {
 	uint64_t stats_interval;
diff --git a/src/tcache.c b/src/tcache.c
index 16c87cb..f8188cb 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -60,8 +60,14 @@ tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
-void
-tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
+static void
+tcache_event(tsd_t *tsd) {
+	tcache_t *tcache = tcache_get(tsd);
+	if (tcache == NULL) {
+		return;
+	}
+
+	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
 	szind_t binind = tcache_slow->next_gc_bin;
 	bool is_small = (binind < SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[binind];
@@ -110,6 +116,18 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 	}
 }
 
+void
+tcache_gc_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	assert(elapsed == TE_INVALID_ELAPSED);
+	tcache_event(tsd);
+}
+
+void
+tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	assert(elapsed == TE_INVALID_ELAPSED);
+	tcache_event(tsd);
+}
+
 void *
 tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
     tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind,
diff --git a/src/thread_event.c b/src/thread_event.c
index 9de8376..40c0487 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -5,68 +5,59 @@
 #include "jemalloc/internal/thread_event.h"
 
 /*
- * Signatures for functions computing new / postponed event wait time.  New
+ * Signatures for event specific functions.  These functions should be defined
+ * by the modules owning each event.  The signatures here verify that the
+ * definitions follow the right format.
+ *
+ * The first two are functions computing new / postponed event wait time.  New
  * event wait time is the time till the next event if an event is currently
  * being triggered; postponed event wait time is the time till the next event
  * if an event should be triggered but needs to be postponed, e.g. when the TSD
  * is not nominal or during reentrancy.
  *
- * These event wait time computation functions should be defined by the modules
- * owning each event.  The signatures here are used to verify that the
- * definitions follow the right format.
+ * The third is the event handler function, which is called whenever an event
+ * is triggered.  The parameter is the elapsed time since the last time an
+ * event of the same type was triggered.
  */
 #define E(event, condition_unused, is_alloc_event_unused)		\
 uint64_t event##_new_event_wait(tsd_t *tsd);				\
-uint64_t event##_postponed_event_wait(tsd_t *tsd);
+uint64_t event##_postponed_event_wait(tsd_t *tsd);			\
+void event##_event_handler(tsd_t *tsd, uint64_t elapsed);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
-/* Event handler function signatures. */
+/* Signatures for internal functions fetching elapsed time. */
 #define E(event, condition_unused, is_alloc_event_unused)		\
-static void event##_event_handler(tsd_t *tsd);
+static uint64_t event##_fetch_elapsed(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
-/* Handler functions. */
-static void
-tcache_gc_event(tsd_t *tsd) {
-	tcache_t *tcache = tcache_get(tsd);
-	if (tcache != NULL) {
-		tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
-		tcache_event_hard(tsd, tcache_slow, tcache);
-	}
-}
-
-static void
-tcache_gc_event_handler(tsd_t *tsd) {
-	tcache_gc_event(tsd);
+static uint64_t
+tcache_gc_fetch_elapsed(tsd_t *tsd) {
+	return TE_INVALID_ELAPSED;
 }
 
-static void
-tcache_gc_dalloc_event_handler(tsd_t *tsd) {
-	tcache_gc_event(tsd);
+static uint64_t
+tcache_gc_dalloc_fetch_elapsed(tsd_t *tsd) {
+	return TE_INVALID_ELAPSED;
 }
 
-static void
-prof_sample_event_handler(tsd_t *tsd) {
+static uint64_t
+prof_sample_fetch_elapsed(tsd_t *tsd) {
 	uint64_t last_event = thread_allocated_last_event_get(tsd);
 	uint64_t last_sample_event = prof_sample_last_event_get(tsd);
 	prof_sample_last_event_set(tsd, last_event);
-	if (prof_idump_accum(tsd_tsdn(tsd), last_event - last_sample_event)) {
-		prof_idump(tsd_tsdn(tsd));
-	}
+	return last_event - last_sample_event;
 }
 
-static void
-stats_interval_event_handler(tsd_t *tsd) {
+static uint64_t
+stats_interval_fetch_elapsed(tsd_t *tsd) {
 	uint64_t last_event = thread_allocated_last_event_get(tsd);
 	uint64_t last_stats_event = stats_interval_last_event_get(tsd);
 	stats_interval_last_event_set(tsd, last_event);
-	if (stats_interval_accum(tsd, last_event - last_stats_event)) {
-		je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
-	}
+	return last_event - last_stats_event;
 }
 
 /* Per event facilities done. */
@@ -286,7 +277,8 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
 	if (is_alloc == alloc_event && condition &&			\
 	    is_##event##_triggered) {					\
 		assert(allow_event_trigger);				\
-		event##_event_handler(tsd);				\
+		uint64_t elapsed = event##_fetch_elapsed(tsd);		\
+		event##_event_handler(tsd, elapsed);			\
 	}
 
 	ITERATE_OVER_ALL_EVENTS
-- 
cgit v0.12


From 75dae934a167424f0dad663e9f96fefdac25ae1b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 17 Apr 2020 14:10:18 -0700
Subject: Always initialize TE counters in TSD init

---
 src/tsd.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/tsd.c b/src/tsd.c
index c07a4bf..cc1b3ac 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -221,14 +221,8 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
 	te_recompute_fast_threshold(tsd);
 }
 
-static bool
-tsd_data_init(tsd_t *tsd) {
-	/*
-	 * We initialize the rtree context first (before the tcache), since the
-	 * tcache initialization depends on it.
-	 */
-	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
-
+static void
+tsd_prng_state_init(tsd_t *tsd) {
 	/*
 	 * A nondeterministic seed based on the address of tsd reduces
 	 * the likelihood of lockstep non-uniform cache index
@@ -238,10 +232,17 @@ tsd_data_init(tsd_t *tsd) {
 	 */
 	*tsd_prng_statep_get(tsd) = config_debug ? 0 :
 	    (uint64_t)(uintptr_t)tsd;
+}
 
-	/* event_init may use the prng state above. */
-	tsd_te_init(tsd);
-
+static bool
+tsd_data_init(tsd_t *tsd) {
+	/*
+	 * We initialize the rtree context first (before the tcache), since the
+	 * tcache initialization depends on it.
+	 */
+	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
+	tsd_prng_state_init(tsd);
+	tsd_te_init(tsd); /* event_init may use the prng state above. */
 	return tsd_tcache_enabled_data_init(tsd);
 }
 
@@ -270,6 +271,8 @@ tsd_data_init_nocleanup(tsd_t *tsd) {
 	*tsd_arenas_tdata_bypassp_get(tsd) = true;
 	*tsd_tcache_enabledp_get_unsafe(tsd) = false;
 	*tsd_reentrancy_levelp_get(tsd) = 1;
+	tsd_prng_state_init(tsd);
+	tsd_te_init(tsd); /* event_init may use the prng state above. */
 	assert_tsd_data_cleanup_done(tsd);
 
 	return false;
-- 
cgit v0.12


From dcea2c0f8b91d045a58eed6b6b1935719c7acd4b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 16 Apr 2020 11:50:29 -0700
Subject: Get rid of TSD -> thread event dependency

---
 include/jemalloc/internal/thread_event.h |  6 ++++++
 include/jemalloc/internal/tsd.h          | 18 ++++++------------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 3a84882..5b5bb9f 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -6,6 +6,12 @@
 /* "te" is short for "thread_event" */
 
 /*
+ * TE_MIN_START_WAIT should not exceed the minimal allocation usize.
+ */
+#define TE_MIN_START_WAIT ((uint64_t)1U)
+#define TE_MAX_START_WAIT UINT64_MAX
+
+/*
  * Maximum threshold on thread_(de)allocated_next_event_fast, so that there is
  * no need to check overflow in malloc fast path. (The allocation size in malloc
  * fast path never exceeds SC_LOOKUP_MAXCLASS.)
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 0f9ec12..18bdb8f 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -86,14 +86,14 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* reentrancy_level */	0,					\
     /* narenas_tdata */		0,					\
     /* thread_allocated_last_event */	0,				\
-    /* thread_allocated_next_event */	TE_MIN_START_WAIT,		\
+    /* thread_allocated_next_event */	0,				\
     /* thread_deallocated_last_event */	0,				\
-    /* thread_deallocated_next_event */	TE_MIN_START_WAIT,		\
-    /* tcache_gc_event_wait */		TE_MIN_START_WAIT,		\
-    /* tcache_gc_dalloc_event_wait */	TE_MIN_START_WAIT,		\
-    /* prof_sample_event_wait */	TE_MIN_START_WAIT,		\
+    /* thread_deallocated_next_event */	0,				\
+    /* tcache_gc_event_wait */		0,				\
+    /* tcache_gc_dalloc_event_wait */	0,				\
+    /* prof_sample_event_wait */	0,				\
     /* prof_sample_last_event */	0,				\
-    /* stats_interval_event_wait */	TE_MIN_START_WAIT,		\
+    /* stats_interval_event_wait */	0,				\
     /* stats_interval_last_event */	0,				\
     /* prof_tdata */		NULL,					\
     /* prng_state */		0,					\
@@ -131,12 +131,6 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* test data */		MALLOC_TEST_TSD_INITIALIZER
 
 
-/*
- * TE_MIN_START_WAIT should not exceed the minimal allocation usize.
- */
-#define TE_MIN_START_WAIT ((uint64_t)1U)
-#define TE_MAX_START_WAIT UINT64_MAX
-
 #define TSD_INITIALIZER {						\
     				TSD_DATA_SLOW_INITIALIZER		\
     /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
-- 
cgit v0.12


From 5dead37a9d38494341a6808bd09b8896282becc1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 13 May 2020 12:20:30 -0700
Subject: Allow narenas:default.

This can be useful when you know you want to override some lower-priority
configuration setting with its default value, but don't know what that value
would be.
---
 src/jemalloc.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 78da45b..d5d54e2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1282,9 +1282,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 				CONF_CONTINUE;
 			}
-			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
-			    UINT_MAX, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    false)
+			if (CONF_MATCH("narenas")) {
+				if (CONF_MATCH_VALUE("default")) {
+					opt_narenas = 0;
+					CONF_CONTINUE;
+				} else {
+					CONF_HANDLE_UNSIGNED(opt_narenas,
+					    "narenas", 1, UINT_MAX,
+					    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+					    /* clip */ false)
+				}
+			}
 			if (CONF_MATCH("bin_shards")) {
 				const char *bin_shards_segment_cur = v;
 				size_t vlen_left = vlen;
-- 
cgit v0.12


From eda9c2858f267961d7e88cb3f3e841f197372125 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 13 May 2020 12:42:04 -0700
Subject: Edata: zero stack edatas before initializing.

This avoids some UB. No compilers take advantage of it for now, but no sense in
tempting fate.
---
 include/jemalloc/internal/edata.h | 7 +++++++
 src/emap.c                        | 2 +-
 src/extent_dss.c                  | 2 +-
 test/unit/rtree.c                 | 6 +++---
 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 3a9ebc8..ac8d647 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -507,6 +507,13 @@ edata_is_head_set(edata_t *edata, bool is_head) {
 	    ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
 }
 
+/*
+ * Because this function is implemented as a sequence of bitfield modifications,
+ * even though each individual bit is properly initialized, we technically read
+ * uninitialized data within it.  This is mostly fine, since most callers get
+ * their edatas from zeroing sources, but callers who make stack edata_ts need
+ * to manually zero them.
+ */
 static inline void
 edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
diff --git a/src/emap.c b/src/emap.c
index 637d332..ec1b4cd 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -247,7 +247,7 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	 * and commit state, and head status.  This is a fake edata_t, used to
 	 * facilitate a lookup.
 	 */
-	edata_t lead;
+	edata_t lead = {0};
 	edata_init(&lead, 0U, edata_addr_get(edata), size_a, false, 0, 0,
 	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 18b6895..17a0822 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -198,7 +198,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					*commit = pages_decommit(ret, size);
 				}
 				if (*zero && *commit) {
-					edata_t edata;
+					edata_t edata = {0};
 					ehooks_t *ehooks = arena_get_ehooks(
 					    arena);
 
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 2802966..63d6e37 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -33,7 +33,7 @@ TEST_END
 #undef SEED
 
 TEST_BEGIN(test_rtree_extrema) {
-	edata_t edata_a, edata_b;
+	edata_t edata_a = {0}, edata_b = {0};
 	edata_init(&edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
 	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
 	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
@@ -91,7 +91,7 @@ TEST_BEGIN(test_rtree_bits) {
 	uintptr_t keys[] = {PAGE, PAGE + 1,
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
 
-	edata_t edata;
+	edata_t edata = {0};
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
@@ -141,7 +141,7 @@ TEST_BEGIN(test_rtree_random) {
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 
-	edata_t edata;
+	edata_t edata = {0};
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
-- 
cgit v0.12


From 27f29e424ba9c4f8208e9dd98cb3d39eeb76d5ee Mon Sep 17 00:00:00 2001
From: Brooks Davis <brooks@one-eyed-alien.net>
Date: Fri, 1 May 2020 22:08:37 +0100
Subject: LQ_QUANTUM should be 4 on mips64 hardware.

This matches the ABI stack alignment requirements.
---
 include/jemalloc/internal/quantum.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index 821086e..11e870a 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -34,7 +34,11 @@
 #    define LG_QUANTUM		3
 #  endif
 #  ifdef __mips__
-#    define LG_QUANTUM		3
+#    if defined(__mips_n32) || defined(__mips_n64)
+#      define LG_QUANTUM		4
+#    else
+#      define LG_QUANTUM		3
+#    endif
 #  endif
 #  ifdef __nios2__
 #    define LG_QUANTUM		3
-- 
cgit v0.12


From 33372cbd4075e70b1e365a6dd6708edd0d68c3a4 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Mon, 27 Apr 2020 20:28:17 +0100
Subject: cpu instruction spin wait for arm32/64

---
 configure.ac | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/configure.ac b/configure.ac
index f67fc3d..98cb4bc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -416,6 +416,16 @@ case "${host_cpu}" in
 	    fi
 	fi
 	;;
+  aarch64|arm*)
+	HAVE_CPU_SPINWAIT=1
+    AC_CACHE_VAL([je_cv_yield],
+      [JE_COMPILABLE([yield instruction], [],
+                    [[__asm__ volatile("yield"); return 0;]],
+                    [je_cv_yield])])
+	if test "x${je_cv_yield}" = "xyes" ; then
+	CPU_SPINWAIT='__asm__ volatile("yield")'
+	fi
+    ;;
   *)
 	HAVE_CPU_SPINWAIT=0
 	;;
-- 
cgit v0.12


From 97b7a9cf7702371d5f9827f71b6daf7eafe890ec Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 13:05:36 -0700
Subject: Add a fill/flush microbenchmark.

---
 Makefile.in              |  1 +
 test/stress/fill_flush.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 test/stress/fill_flush.c

diff --git a/Makefile.in b/Makefile.in
index d35b74b..e7666fb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -289,6 +289,7 @@ CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
 endif
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
+	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/large_microbench.c \
 	$(srcroot)test/stress/hookbench.c
 
diff --git a/test/stress/fill_flush.c b/test/stress/fill_flush.c
new file mode 100644
index 0000000..6ea3ff9
--- /dev/null
+++ b/test/stress/fill_flush.c
@@ -0,0 +1,77 @@
+#include "test/jemalloc_test.h"
+#include "test/bench.h"
+
+#define SMALL_ALLOC_SIZE 128
+#define LARGE_ALLOC_SIZE SC_LARGE_MINCLASS
+#define NALLOCS 1000
+
+/*
+ * We make this volatile so the 1-at-a-time variants can't leave the allocation
+ * in a register, just to try to get the cache behavior closer.
+ */
+void *volatile allocs[NALLOCS];
+
+static void
+array_alloc_dalloc_small(void) {
+	for (int i = 0; i < NALLOCS; i++) {
+		void *p = mallocx(SMALL_ALLOC_SIZE, 0);
+		assert_ptr_not_null(p, "mallocx shouldn't fail");
+		allocs[i] = p;
+	}
+	for (int i = 0; i < NALLOCS; i++) {
+		sdallocx(allocs[i], SMALL_ALLOC_SIZE, 0);
+	}
+}
+
+static void
+item_alloc_dalloc_small(void) {
+	for (int i = 0; i < NALLOCS; i++) {
+		void *p = mallocx(SMALL_ALLOC_SIZE, 0);
+		assert_ptr_not_null(p, "mallocx shouldn't fail");
+		allocs[i] = p;
+		sdallocx(allocs[i], SMALL_ALLOC_SIZE, 0);
+	}
+}
+
+TEST_BEGIN(test_array_vs_item_small) {
+	compare_funcs(1 * 1000, 10 * 1000,
+	    "array of small allocations", array_alloc_dalloc_small,
+	    "small item allocation", item_alloc_dalloc_small);
+}
+TEST_END
+
+static void
+array_alloc_dalloc_large(void) {
+	for (int i = 0; i < NALLOCS; i++) {
+		void *p = mallocx(LARGE_ALLOC_SIZE, 0);
+		assert_ptr_not_null(p, "mallocx shouldn't fail");
+		allocs[i] = p;
+	}
+	for (int i = 0; i < NALLOCS; i++) {
+		sdallocx(allocs[i], LARGE_ALLOC_SIZE, 0);
+	}
+}
+
+static void
+item_alloc_dalloc_large(void) {
+	for (int i = 0; i < NALLOCS; i++) {
+		void *p = mallocx(LARGE_ALLOC_SIZE, 0);
+		assert_ptr_not_null(p, "mallocx shouldn't fail");
+		allocs[i] = p;
+		sdallocx(allocs[i], LARGE_ALLOC_SIZE, 0);
+	}
+}
+
+TEST_BEGIN(test_array_vs_item_large) {
+	compare_funcs(100, 1000,
+	    "array of large allocations", array_alloc_dalloc_large,
+	    "large item allocation", item_alloc_dalloc_large);
+}
+TEST_END
+
+
+int main(void) {
+	return test_no_reentrancy(
+	    test_array_vs_item_small,
+	    test_array_vs_item_large);
+}
-- 
cgit v0.12


From 634afc4124100b5ff11e892481d912d56099be1a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 12:08:19 -0700
Subject: Tcache: Make size computation configurable.

---
 include/jemalloc/internal/tcache_externs.h |  1 +
 src/jemalloc.c                             |  6 +++
 src/tcache.c                               | 60 ++++++++++++++++++++++--------
 3 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 6eca928..67fdc00 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -3,6 +3,7 @@
 
 extern bool	opt_tcache;
 extern ssize_t	opt_lg_tcache_max;
+extern ssize_t	opt_lg_tcache_nslots_mul;
 
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d5d54e2..fbec733 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1373,6 +1373,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
 			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
 			    -1, (sizeof(size_t) << 3) - 1)
+			/*
+			 * Anyone trying to set a value outside -16 to 16 is
+			 * deeply confused.
+			 */
+			CONF_HANDLE_SSIZE_T(opt_lg_tcache_nslots_mul,
+			    "lg_tcache_nslots_mul", -16, 16)
 
 			/*
 			 * The runtime option of oversize_threshold remains
diff --git a/src/tcache.c b/src/tcache.c
index f8188cb..a18d91d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -13,6 +13,16 @@
 bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 
+/*
+ * We attempt to make the number of slots in a tcache bin for a given size class
+ * equal to the number of objects in a slab times some multiplier.  By default,
+ * the multiplier is 1/2 (i.e. we set the maximum number of objects in the
+ * tcache to half the number of objects in a slab).
+ * This is bounded by some other constraints as well, like the fact that it
+ * must be even, must be less than TCACHE_NSLOTS_SMALL_MAX, etc..
+ */
+ssize_t	opt_lg_tcache_nslots_mul = -1;
+
 cache_bin_info_t	*tcache_bin_info;
 
 /* Total stack size required (per tcache).  Include the padding above. */
@@ -778,6 +788,37 @@ tcaches_destroy(tsd_t *tsd, unsigned ind) {
 	}
 }
 
+static unsigned
+tcache_ncached_max_compute(szind_t szind) {
+	if (szind >= SC_NBINS) {
+		assert(szind < nhbins);
+		return TCACHE_NSLOTS_LARGE;
+	}
+	unsigned slab_nregs = bin_infos[szind].nregs;
+
+	unsigned candidate;
+	if (opt_lg_tcache_nslots_mul < 0) {
+		candidate = slab_nregs >> (-opt_lg_tcache_nslots_mul);
+	} else {
+		candidate = slab_nregs << opt_lg_tcache_nslots_mul;
+	}
+	if (candidate % 2 != 0) {
+		/*
+		 * We need the candidate size to be even -- we assume that we
+		 * can divide by two and get a positive number (e.g. when
+		 * flushing).
+		 */
+		++candidate;
+	}
+	if (candidate <= TCACHE_NSLOTS_SMALL_MIN) {
+		return TCACHE_NSLOTS_SMALL_MIN;
+	} else if (candidate <= TCACHE_NSLOTS_SMALL_MAX) {
+		return candidate;
+	} else {
+		return TCACHE_NSLOTS_SMALL_MAX;
+	}
+}
+
 bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
 	/* If necessary, clamp opt_lg_tcache_max. */
@@ -801,23 +842,12 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 	if (tcache_bin_info == NULL) {
 		return true;
 	}
-	unsigned i, ncached_max;
-	for (i = 0; i < SC_NBINS; i++) {
-		if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
-			ncached_max = TCACHE_NSLOTS_SMALL_MIN;
-		} else if ((bin_infos[i].nregs << 1) <=
-		    TCACHE_NSLOTS_SMALL_MAX) {
-			ncached_max = bin_infos[i].nregs << 1;
-		} else {
-			ncached_max = TCACHE_NSLOTS_SMALL_MAX;
-		}
+	for (szind_t i = 0; i < nhbins; i++) {
+		unsigned ncached_max = tcache_ncached_max_compute(i);
 		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
 	}
-	for (; i < nhbins; i++) {
-		cache_bin_info_init(&tcache_bin_info[i], TCACHE_NSLOTS_LARGE);
-	}
-	cache_bin_info_compute_alloc(tcache_bin_info, i, &tcache_bin_alloc_size,
-	    &tcache_bin_alloc_alignment);
+	cache_bin_info_compute_alloc(tcache_bin_info, nhbins,
+	    &tcache_bin_alloc_size, &tcache_bin_alloc_alignment);
 
 	return false;
 }
-- 
cgit v0.12


From b58dea8d1b6894eed1616a1264bb9c893194f770 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 14:19:37 -0700
Subject: Cache bin: expose ncached_max publicly.

---
 include/jemalloc/internal/cache_bin.h | 10 ++++++++++
 src/cache_bin.c                       |  1 +
 2 files changed, 11 insertions(+)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index a56b4a1..c016769 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -21,6 +21,16 @@
 typedef uint16_t cache_bin_sz_t;
 
 /*
+ * That implies the following value, for the maximum number of items in any
+ * individual bin.  The cache bins track their bounds looking just at the low
+ * bits of a pointer, compared against a cache_bin_sz_t.  So that's
+ *   1 << (sizeof(cache_bin_sz_t) * 8)
+ * bytes spread across pointer sized objects to get the maximum.
+ */
+#define CACHE_BIN_NCACHED_MAX (((size_t)1 << sizeof(cache_bin_sz_t) * 8) \
+    / sizeof(void *) - 1)
+
+/*
  * This lives inside the cache_bin (for locality reasons), and is initialized
  * alongside it, but is otherwise not modified by any cache bin operations.
  * It's logically public and maintained by its callers.
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 51b8749..1e26c4e 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -6,6 +6,7 @@
 void
 cache_bin_info_init(cache_bin_info_t *info,
     cache_bin_sz_t ncached_max) {
+	assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
 	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
 	info->ncached_max = (cache_bin_sz_t)ncached_max;
-- 
cgit v0.12


From 181093173d589569a846f2d5d4c9e8ca8fd57b5d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 15:03:06 -0700
Subject: Tcache: make slot sizing configurable.

---
 include/jemalloc/internal/tcache_externs.h |  8 +++--
 include/jemalloc/internal/tcache_types.h   | 17 -----------
 src/jemalloc.c                             | 10 ++++++
 src/tcache.c                               | 49 ++++++++++++++++++++++++------
 test/unit/cache_bin.c                      |  5 +--
 5 files changed, 59 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 67fdc00..e043ef4 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -1,9 +1,13 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 #define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 
-extern bool	opt_tcache;
-extern ssize_t	opt_lg_tcache_max;
+extern bool opt_tcache;
+extern ssize_t opt_lg_tcache_max;
 extern ssize_t	opt_lg_tcache_nslots_mul;
+extern unsigned opt_tcache_nslots_small_min;
+extern unsigned opt_tcache_nslots_small_max;
+extern unsigned opt_tcache_nslots_large;
+extern ssize_t opt_lg_tcache_shift;
 
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index cba86f4..34a0599 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -17,23 +17,6 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
 #define TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
 
-/*
- * Absolute minimum number of cache slots for each small bin.
- */
-#define TCACHE_NSLOTS_SMALL_MIN		20
-
-/*
- * Absolute maximum number of cache slots for each small bin in the thread
- * cache.  This is an additional constraint beyond that imposed as: twice the
- * number of regions per slab for this size class.
- *
- * This constant must be an even number.
- */
-#define TCACHE_NSLOTS_SMALL_MAX		200
-
-/* Number of cache slots for large size classes. */
-#define TCACHE_NSLOTS_LARGE		20
-
 /* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
 #define LG_TCACHE_MAXCLASS_DEFAULT	15
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index fbec733..4f911e2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1379,6 +1379,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			 */
 			CONF_HANDLE_SSIZE_T(opt_lg_tcache_nslots_mul,
 			    "lg_tcache_nslots_mul", -16, 16)
+			/* Ditto with values past 2048. */
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_min,
+			    "tcache_nslots_small_min", 1, 2048,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_max,
+			    "tcache_nslots_small_max", 1, 2048,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_large,
+			    "tcache_nslots_large", 1, 2048,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
 
 			/*
 			 * The runtime option of oversize_threshold remains
diff --git a/src/tcache.c b/src/tcache.c
index a18d91d..9586556 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -10,8 +10,13 @@
 /******************************************************************************/
 /* Data. */
 
-bool	opt_tcache = true;
-ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
+bool opt_tcache = true;
+ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
+
+/* Reasonable defaults for min and max values. */
+unsigned opt_tcache_nslots_small_min = 20;
+unsigned opt_tcache_nslots_small_max = 200;
+unsigned opt_tcache_nslots_large = 20;
 
 /*
  * We attempt to make the number of slots in a tcache bin for a given size class
@@ -19,7 +24,7 @@ ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
  * the multiplier is 1/2 (i.e. we set the maximum number of objects in the
  * tcache to half the number of objects in a slab).
  * This is bounded by some other constraints as well, like the fact that it
- * must be even, must be less than TCACHE_NSLOTS_SMALL_MAX, etc..
+ * must be even, must be less than opt_tcache_nslots_small_max, etc..
  */
 ssize_t	opt_lg_tcache_nslots_mul = -1;
 
@@ -485,7 +490,6 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	tcache_slow->arena = NULL;
 	tcache_slow->dyn_alloc = mem;
 
-	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
 
 	size_t cur_offset = 0;
@@ -792,10 +796,37 @@ static unsigned
 tcache_ncached_max_compute(szind_t szind) {
 	if (szind >= SC_NBINS) {
 		assert(szind < nhbins);
-		return TCACHE_NSLOTS_LARGE;
+		return opt_tcache_nslots_large;
 	}
 	unsigned slab_nregs = bin_infos[szind].nregs;
 
+	/* We may modify these values; start with the opt versions. */
+	unsigned nslots_small_min = opt_tcache_nslots_small_min;
+	unsigned nslots_small_max = opt_tcache_nslots_small_max;
+
+	/*
+	 * Clamp values to meet our constraints -- even, nonzero, min < max, and
+	 * suitable for a cache bin size.
+	 */
+	if (opt_tcache_nslots_small_max > CACHE_BIN_NCACHED_MAX) {
+		nslots_small_max = CACHE_BIN_NCACHED_MAX;
+	}
+	if (nslots_small_min % 2 != 0) {
+		nslots_small_min++;
+	}
+	if (nslots_small_max % 2 != 0) {
+		nslots_small_max--;
+	}
+	if (nslots_small_min < 2) {
+		nslots_small_min = 2;
+	}
+	if (nslots_small_max < 2) {
+		nslots_small_max = 2;
+	}
+	if (nslots_small_min > nslots_small_max) {
+		nslots_small_min = nslots_small_max;
+	}
+
 	unsigned candidate;
 	if (opt_lg_tcache_nslots_mul < 0) {
 		candidate = slab_nregs >> (-opt_lg_tcache_nslots_mul);
@@ -810,12 +841,12 @@ tcache_ncached_max_compute(szind_t szind) {
 		 */
 		++candidate;
 	}
-	if (candidate <= TCACHE_NSLOTS_SMALL_MIN) {
-		return TCACHE_NSLOTS_SMALL_MIN;
-	} else if (candidate <= TCACHE_NSLOTS_SMALL_MAX) {
+	if (candidate <= nslots_small_min) {
+		return nslots_small_min;
+	} else if (candidate <= nslots_small_max) {
 		return candidate;
 	} else {
-		return TCACHE_NSLOTS_SMALL_MAX;
+		return nslots_small_max;
 	}
 }
 
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index cbd8ce0..43fe8c6 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -53,12 +53,13 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 }
 
 TEST_BEGIN(test_cache_bin) {
+	const int ncached_max = 100;
 	bool success;
 	void *ptr;
 
 	cache_bin_t bin;
 	cache_bin_info_t info;
-	cache_bin_info_init(&info, TCACHE_NSLOTS_SMALL_MAX);
+	cache_bin_info_init(&info, ncached_max);
 
 	size_t size;
 	size_t alignment;
@@ -74,7 +75,7 @@ TEST_BEGIN(test_cache_bin) {
 	assert_zu_eq(cur_offset, size, "Should use all requested memory");
 
 	/* Initialize to empty; should then have 0 elements. */
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(&info);
+	expect_d_eq(ncached_max, cache_bin_info_ncached_max(&info), "");
 	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
 	expect_true(cache_bin_low_water_get(&bin, &info) == 0, "");
 
-- 
cgit v0.12


From 10b96f635190cd8e27ed73f6b44293a7357e4013 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 15:27:50 -0700
Subject: Tcache: Remove some unused gc constants.

---
 include/jemalloc/internal/tcache_types.h | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 34a0599..0806df9 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -20,17 +20,6 @@ typedef struct tcaches_s tcaches_t;
 /* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
 #define LG_TCACHE_MAXCLASS_DEFAULT	15
 
-/*
- * TCACHE_GC_SWEEP is the approximate number of allocation events between
- * full GC sweeps.  Integer rounding may cause the actual number to be
- * slightly higher, since GC is performed incrementally.
- */
-#define TCACHE_GC_SWEEP			8192
-
-/* Number of tcache deallocation events between incremental GCs. */
-#define TCACHE_GC_INCR							\
-    ((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1))
-
 /* Number of allocation bytes between tcache incremental GCs. */
 #define TCACHE_GC_INCR_BYTES 65536U
 
-- 
cgit v0.12


From ec0b5795639fe96883366691e0380eeb0845836b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 15:33:23 -0700
Subject: Tcache: Privatize opt_lg_tcache_max default.

---
 include/jemalloc/internal/tcache_types.h | 3 ---
 src/tcache.c                             | 7 ++++++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 0806df9..c8fd4c3 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -17,9 +17,6 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
 #define TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
 
-/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
-#define LG_TCACHE_MAXCLASS_DEFAULT	15
-
 /* Number of allocation bytes between tcache incremental GCs. */
 #define TCACHE_GC_INCR_BYTES 65536U
 
diff --git a/src/tcache.c b/src/tcache.c
index 9586556..0366149 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -11,7 +11,12 @@
 /* Data. */
 
 bool opt_tcache = true;
-ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
+
+/*
+ * (1U << opt_lg_tcache_max) is used to compute tcache_maxclass.  This choice
+ * (32kb by default) works well as a default in practice.
+ */
+ssize_t opt_lg_tcache_max = 15;
 
 /* Reasonable defaults for min and max values. */
 unsigned opt_tcache_nslots_small_min = 20;
-- 
cgit v0.12


From d338dd45d7402df287adb10e82ca98be831ac16b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 15:53:30 -0700
Subject: Tcache: Make incremental gc bytes configurable.

---
 include/jemalloc/internal/tcache_externs.h |  1 +
 include/jemalloc/internal/tcache_types.h   |  3 ---
 include/jemalloc/internal/thread_event.h   |  4 ++--
 src/jemalloc.c                             |  4 ++++
 src/tcache.c                               | 10 ++++++++--
 test/stress/fill_flush.c                   |  1 -
 6 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index e043ef4..1924fd9 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -8,6 +8,7 @@ extern unsigned opt_tcache_nslots_small_min;
 extern unsigned opt_tcache_nslots_small_max;
 extern unsigned opt_tcache_nslots_large;
 extern ssize_t opt_lg_tcache_shift;
+extern size_t opt_tcache_gc_incr_bytes;
 
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index c8fd4c3..fb311e7 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -17,9 +17,6 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
 #define TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
 
-/* Number of allocation bytes between tcache incremental GCs. */
-#define TCACHE_GC_INCR_BYTES 65536U
-
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
 #define TCACHE_ZERO_INITIALIZER {0}
 #define TCACHE_SLOW_ZERO_INITIALIZER {0}
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 5b5bb9f..2fcaa88 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -53,10 +53,10 @@ void tsd_te_init(tsd_t *tsd);
  *  E(event,		(condition), is_alloc_event)
  */
 #define ITERATE_OVER_ALL_EVENTS						\
-    E(tcache_gc,	(TCACHE_GC_INCR_BYTES > 0), true)		\
+    E(tcache_gc,	(opt_tcache_gc_incr_bytes > 0), true)		\
     E(prof_sample,	(config_prof && opt_prof), true)	    	\
     E(stats_interval,	(opt_stats_interval >= 0), true)	    	\
-    E(tcache_gc_dalloc,	(TCACHE_GC_INCR_BYTES > 0), false)
+    E(tcache_gc_dalloc,	(opt_tcache_gc_incr_bytes > 0), false)
 
 #define E(event, condition_unused, is_alloc_event_unused)		\
     C(event##_event_wait)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4f911e2..068a840 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1389,6 +1389,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_large,
 			    "tcache_nslots_large", 1, 2048,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_tcache_gc_incr_bytes,
+			    "tcache_gc_incr_bytes", 1024, SIZE_T_MAX,
+			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ true)
 
 			/*
 			 * The runtime option of oversize_threshold remains
diff --git a/src/tcache.c b/src/tcache.c
index 0366149..9b4a7b7 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -33,6 +33,12 @@ unsigned opt_tcache_nslots_large = 20;
  */
 ssize_t	opt_lg_tcache_nslots_mul = -1;
 
+/*
+ * Number of allocation bytes between tcache incremental GCs.  Again, this
+ * default just seems to work well; more tuning is possible.
+ */
+size_t opt_tcache_gc_incr_bytes = 65536;
+
 cache_bin_info_t	*tcache_bin_info;
 
 /* Total stack size required (per tcache).  Include the padding above. */
@@ -62,7 +68,7 @@ tcache_salloc(tsdn_t *tsdn, const void *ptr) {
 
 uint64_t
 tcache_gc_new_event_wait(tsd_t *tsd) {
-	return TCACHE_GC_INCR_BYTES;
+	return opt_tcache_gc_incr_bytes;
 }
 
 uint64_t
@@ -72,7 +78,7 @@ tcache_gc_postponed_event_wait(tsd_t *tsd) {
 
 uint64_t
 tcache_gc_dalloc_new_event_wait(tsd_t *tsd) {
-	return TCACHE_GC_INCR_BYTES;
+	return opt_tcache_gc_incr_bytes;
 }
 
 uint64_t
diff --git a/test/stress/fill_flush.c b/test/stress/fill_flush.c
index 6ea3ff9..a2db044 100644
--- a/test/stress/fill_flush.c
+++ b/test/stress/fill_flush.c
@@ -69,7 +69,6 @@ TEST_BEGIN(test_array_vs_item_large) {
 }
 TEST_END
 
-
 int main(void) {
 	return test_no_reentrancy(
 	    test_array_vs_item_small,
-- 
cgit v0.12


From ee72bf1cfd236d6e076d9d9bdfcb09787016d62b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 16:24:17 -0700
Subject: Tcache: Add tcache gc delay option.

This can reduce flushing frequency for small size classes.
---
 include/jemalloc/internal/tcache_externs.h |   1 +
 include/jemalloc/internal/tcache_structs.h |   5 ++
 src/jemalloc.c                             |   4 +
 src/tcache.c                               | 116 ++++++++++++++++++++++-------
 4 files changed, 99 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 1924fd9..1ee6319 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -9,6 +9,7 @@ extern unsigned opt_tcache_nslots_small_max;
 extern unsigned opt_tcache_nslots_large;
 extern ssize_t opt_lg_tcache_shift;
 extern size_t opt_tcache_gc_incr_bytes;
+extern size_t opt_tcache_gc_delay_bytes;
 
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 1c9d4db..331bd24 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -37,6 +37,11 @@ struct tcache_slow_s {
 	/* For small bins, whether has been refilled since last GC. */
 	bool		bin_refilled[SC_NBINS];
 	/*
+	 * For small bins, the number of items we can pretend to flush before
+	 * actually flushing.
+	 */
+	uint8_t		bin_flush_delay_items[SC_NBINS];
+	/*
 	 * The start of the allocation containing the dynamic allocation for
 	 * either the cache bins alone, or the cache bin memory as well as this
 	 * tcache_slow_t and its associated tcache_t.
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 068a840..2903a41 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1393,6 +1393,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "tcache_gc_incr_bytes", 1024, SIZE_T_MAX,
 			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
 			    /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_tcache_gc_delay_bytes,
+			    "tcache_gc_delay_bytes", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ false)
 
 			/*
 			 * The runtime option of oversize_threshold remains
diff --git a/src/tcache.c b/src/tcache.c
index 9b4a7b7..363a5b3 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -34,6 +34,20 @@ unsigned opt_tcache_nslots_large = 20;
 ssize_t	opt_lg_tcache_nslots_mul = -1;
 
 /*
+ * With default settings, we may end up flushing small bins frequently with
+ * small flush amounts.  To limit this tendency, we can set a number of bytes to
+ * "delay" by.  If we try to flush N M-byte items, we decrease that size-class's
+ * delay by N * M.  So, if delay is 1024 and we're looking at the 64-byte size
+ * class, we won't do any flushing until we've been asked to flush 1024/64 == 16
+ * items.  This can happen in any configuration (i.e. being asked to flush 16
+ * items once, or 4 items 4 times).
+ *
+ * Practically, this is stored as a count of items in a uint8_t, so the
+ * effective maximum value for a size class is 255 * sz.
+ */
+size_t opt_tcache_gc_delay_bytes = 0;
+
+/*
  * Number of allocation bytes between tcache incremental GCs.  Again, this
  * default just seems to work well; more tuning is possible.
  */
@@ -86,6 +100,67 @@ tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
+static uint8_t
+tcache_gc_item_delay_compute(szind_t szind) {
+	assert(szind < SC_NBINS);
+	size_t sz = sz_index2size(szind);
+	size_t item_delay = opt_tcache_gc_delay_bytes / sz;
+	size_t delay_max = ZU(1)
+	    << (sizeof(((tcache_slow_t *)NULL)->bin_flush_delay_items[0]) * 8);
+	if (item_delay >= delay_max) {
+		item_delay = delay_max - 1;
+	}
+	return item_delay;
+}
+
+static void
+tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
+    szind_t szind) {
+	/* Aim to flush 3/4 of items below low-water. */
+	assert(szind < SC_NBINS);
+
+	cache_bin_t *cache_bin = &tcache->bins[szind];
+	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
+	    &tcache_bin_info[szind]);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
+	    &tcache_bin_info[szind]);
+	assert(!tcache_slow->bin_refilled[szind]);
+
+	size_t nflush = low_water - (low_water >> 2);
+	if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
+		tcache_slow->bin_flush_delay_items[szind] -= nflush;
+		return;
+	} else {
+		tcache_slow->bin_flush_delay_items[szind]
+		    = tcache_gc_item_delay_compute(szind);
+	}
+
+	tcache_bin_flush_small(tsd, tcache, cache_bin, szind, ncached - nflush);
+
+	/*
+	 * Reduce fill count by 2X.  Limit lg_fill_div such that
+	 * the fill count is always at least 1.
+	 */
+	if ((cache_bin_info_ncached_max(&tcache_bin_info[szind])
+	    >> (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
+		tcache_slow->lg_fill_div[szind]++;
+	}
+}
+
+static void
+tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
+    szind_t szind) {
+	/* Like the small GC; flush 3/4 of untouched items. */
+	assert(szind >= SC_NBINS);
+	cache_bin_t *cache_bin = &tcache->bins[szind];
+	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
+	    &tcache_bin_info[szind]);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
+	    &tcache_bin_info[szind]);
+	tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
+	    ncached - low_water + (low_water >> 2));
+}
+
 static void
 tcache_event(tsd_t *tsd) {
 	tcache_t *tcache = tcache_get(tsd);
@@ -94,45 +169,28 @@ tcache_event(tsd_t *tsd) {
 	}
 
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
-	szind_t binind = tcache_slow->next_gc_bin;
-	bool is_small = (binind < SC_NBINS);
-	cache_bin_t *cache_bin = &tcache->bins[binind];
+	szind_t szind = tcache_slow->next_gc_bin;
+	bool is_small = (szind < SC_NBINS);
+	cache_bin_t *cache_bin = &tcache->bins[szind];
 
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &tcache_bin_info[binind]);
-	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
-	    &tcache_bin_info[binind]);
+	    &tcache_bin_info[szind]);
 	if (low_water > 0) {
-		/*
-		 * Flush (ceiling) 3/4 of the objects below the low water mark.
-		 */
 		if (is_small) {
-			assert(!tcache_slow->bin_refilled[binind]);
-			tcache_bin_flush_small(tsd, tcache, cache_bin, binind,
-			    ncached - low_water + (low_water >> 2));
-			/*
-			 * Reduce fill count by 2X.  Limit lg_fill_div such that
-			 * the fill count is always at least 1.
-			 */
-			if ((cache_bin_info_ncached_max(
-			    &tcache_bin_info[binind]) >>
-			    (tcache_slow->lg_fill_div[binind] + 1)) >= 1) {
-				tcache_slow->lg_fill_div[binind]++;
-			}
+			tcache_gc_small(tsd, tcache_slow, tcache, szind);
 		} else {
-			tcache_bin_flush_large(tsd, tcache, cache_bin, binind,
-			     ncached - low_water + (low_water >> 2));
+			tcache_gc_large(tsd, tcache_slow, tcache, szind);
 		}
-	} else if (is_small && tcache_slow->bin_refilled[binind]) {
+	} else if (is_small && tcache_slow->bin_refilled[szind]) {
 		assert(low_water == 0);
 		/*
 		 * Increase fill count by 2X for small bins.  Make sure
 		 * lg_fill_div stays greater than 0.
 		 */
-		if (tcache_slow->lg_fill_div[binind] > 1) {
-			tcache_slow->lg_fill_div[binind]--;
+		if (tcache_slow->lg_fill_div[szind] > 1) {
+			tcache_slow->lg_fill_div[szind]--;
 		}
-		tcache_slow->bin_refilled[binind] = false;
+		tcache_slow->bin_refilled[szind] = false;
 	}
 	cache_bin_low_water_set(cache_bin);
 
@@ -519,6 +577,10 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	    &cur_offset);
 	/* Sanity check that the whole stack is used. */
 	assert(cur_offset == tcache_bin_alloc_size);
+	for (unsigned i = 0; i < SC_NBINS; i++) {
+		tcache_slow->bin_flush_delay_items[i]
+		    = tcache_gc_item_delay_compute(i);
+	}
 }
 
 /* Initialize auto tcache (embedded in TSD). */
-- 
cgit v0.12


From 7503b5b33a9ea446c30e3c51f6ad68660fa6e931 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 13 May 2020 10:36:27 -0700
Subject: Stats, CTL: Expose new tcache settings.

---
 src/ctl.c    | 28 +++++++++++++++++++++++++---
 src/stats.c  |  6 ++++++
 src/tcache.c | 12 ++++++------
 3 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index c3c029f..c5964d8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -103,9 +103,15 @@ CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_tcache)
+CTL_PROTO(opt_lg_tcache_max)
+CTL_PROTO(opt_tcache_nslots_small_min)
+CTL_PROTO(opt_tcache_nslots_small_max)
+CTL_PROTO(opt_tcache_nslots_large)
+CTL_PROTO(opt_lg_tcache_nslots_mul)
+CTL_PROTO(opt_tcache_gc_incr_bytes)
+CTL_PROTO(opt_tcache_gc_delay_bytes)
 CTL_PROTO(opt_thp)
 CTL_PROTO(opt_lg_extent_max_active_fit)
-CTL_PROTO(opt_lg_tcache_max)
 CTL_PROTO(opt_prof)
 CTL_PROTO(opt_prof_prefix)
 CTL_PROTO(opt_prof_active)
@@ -340,9 +346,17 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("utrace"),	CTL(opt_utrace)},
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("tcache"),	CTL(opt_tcache)},
+	{NAME("lg_tcache_max"),	CTL(opt_lg_tcache_max)},
+	{NAME("tcache_nslots_small_min"),
+		CTL(opt_tcache_nslots_small_min)},
+	{NAME("tcache_nslots_small_max"),
+		CTL(opt_tcache_nslots_small_max)},
+	{NAME("tcache_nslots_large"),	CTL(opt_tcache_nslots_large)},
+	{NAME("lg_tcache_nslots_mul"),	CTL(opt_lg_tcache_nslots_mul)},
+	{NAME("tcache_gc_incr_bytes"),	CTL(opt_tcache_gc_incr_bytes)},
+	{NAME("tcache_gc_delay_bytes"),	CTL(opt_tcache_gc_delay_bytes)},
 	{NAME("thp"),		CTL(opt_thp)},
 	{NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
-	{NAME("lg_tcache_max"),	CTL(opt_lg_tcache_max)},
 	{NAME("prof"),		CTL(opt_prof)},
 	{NAME("prof_prefix"),	CTL(opt_prof_prefix)},
 	{NAME("prof_active"),	CTL(opt_prof_active)},
@@ -1793,10 +1807,18 @@ CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
+CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
+CTL_RO_NL_GEN(opt_tcache_nslots_small_min, opt_tcache_nslots_small_min,
+    unsigned)
+CTL_RO_NL_GEN(opt_tcache_nslots_small_max, opt_tcache_nslots_small_max,
+    unsigned)
+CTL_RO_NL_GEN(opt_tcache_nslots_large, opt_tcache_nslots_large, unsigned)
+CTL_RO_NL_GEN(opt_lg_tcache_nslots_mul, opt_lg_tcache_nslots_mul, ssize_t)
+CTL_RO_NL_GEN(opt_tcache_gc_incr_bytes, opt_tcache_gc_incr_bytes, size_t)
+CTL_RO_NL_GEN(opt_tcache_gc_delay_bytes, opt_tcache_gc_delay_bytes, size_t)
 CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *)
 CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit,
     size_t)
-CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
 CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
diff --git a/src/stats.c b/src/stats.c
index 42e4a1c..8be69ca 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1107,6 +1107,12 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("xmalloc")
 	OPT_WRITE_BOOL("tcache")
 	OPT_WRITE_SSIZE_T("lg_tcache_max")
+	OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
+	OPT_WRITE_UNSIGNED("tcache_nslots_small_max")
+	OPT_WRITE_UNSIGNED("tcache_nslots_large")
+	OPT_WRITE_SSIZE_T("lg_tcache_nslots_mul")
+	OPT_WRITE_SIZE_T("tcache_gc_incr_bytes")
+	OPT_WRITE_SIZE_T("tcache_gc_delay_bytes")
 	OPT_WRITE_CHAR_P("thp")
 	OPT_WRITE_BOOL("prof")
 	OPT_WRITE_CHAR_P("prof_prefix")
diff --git a/src/tcache.c b/src/tcache.c
index 363a5b3..c9cb785 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -34,6 +34,12 @@ unsigned opt_tcache_nslots_large = 20;
 ssize_t	opt_lg_tcache_nslots_mul = -1;
 
 /*
+ * Number of allocation bytes between tcache incremental GCs.  Again, this
+ * default just seems to work well; more tuning is possible.
+ */
+size_t opt_tcache_gc_incr_bytes = 65536;
+
+/*
  * With default settings, we may end up flushing small bins frequently with
  * small flush amounts.  To limit this tendency, we can set a number of bytes to
  * "delay" by.  If we try to flush N M-byte items, we decrease that size-class's
@@ -47,12 +53,6 @@ ssize_t	opt_lg_tcache_nslots_mul = -1;
  */
 size_t opt_tcache_gc_delay_bytes = 0;
 
-/*
- * Number of allocation bytes between tcache incremental GCs.  Again, this
- * default just seems to work well; more tuning is possible.
- */
-size_t opt_tcache_gc_incr_bytes = 65536;
-
 cache_bin_info_t	*tcache_bin_info;
 
 /* Total stack size required (per tcache).  Include the padding above. */
-- 
cgit v0.12


From 6cdac3c573de86c8d59d69fca8f1778bdbec25e0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 13 May 2020 15:32:18 -0700
Subject: Tcache: Make flush fractions configurable.

---
 include/jemalloc/internal/tcache_externs.h |  2 ++
 include/jemalloc/internal/tcache_inlines.h |  4 ++--
 src/ctl.c                                  | 10 ++++++++++
 src/jemalloc.c                             |  6 ++++++
 src/stats.c                                |  2 ++
 src/tcache.c                               |  7 +++++++
 6 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 1ee6319..f044d32 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -10,6 +10,8 @@ extern unsigned opt_tcache_nslots_large;
 extern ssize_t opt_lg_tcache_shift;
 extern size_t opt_tcache_gc_incr_bytes;
 extern size_t opt_tcache_gc_delay_bytes;
+extern unsigned opt_lg_tcache_flush_small_div;
+extern unsigned opt_lg_tcache_flush_large_div;
 
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 5d49c4e..1cba918 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -110,7 +110,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_info_ncached_max(
-		    &tcache_bin_info[binind]) >> 1;
+		    &tcache_bin_info[binind]) >> opt_lg_tcache_flush_small_div;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
@@ -128,7 +128,7 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_info_ncached_max(
-		    &tcache_bin_info[binind]) >> 1;
+		    &tcache_bin_info[binind]) >> opt_lg_tcache_flush_large_div;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/src/ctl.c b/src/ctl.c
index c5964d8..be8be10 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -110,6 +110,8 @@ CTL_PROTO(opt_tcache_nslots_large)
 CTL_PROTO(opt_lg_tcache_nslots_mul)
 CTL_PROTO(opt_tcache_gc_incr_bytes)
 CTL_PROTO(opt_tcache_gc_delay_bytes)
+CTL_PROTO(opt_lg_tcache_flush_small_div)
+CTL_PROTO(opt_lg_tcache_flush_large_div)
 CTL_PROTO(opt_thp)
 CTL_PROTO(opt_lg_extent_max_active_fit)
 CTL_PROTO(opt_prof)
@@ -355,6 +357,10 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("lg_tcache_nslots_mul"),	CTL(opt_lg_tcache_nslots_mul)},
 	{NAME("tcache_gc_incr_bytes"),	CTL(opt_tcache_gc_incr_bytes)},
 	{NAME("tcache_gc_delay_bytes"),	CTL(opt_tcache_gc_delay_bytes)},
+	{NAME("lg_tcache_flush_small_div"),
+		CTL(opt_lg_tcache_flush_small_div)},
+	{NAME("lg_tcache_flush_large_div"),
+		CTL(opt_lg_tcache_flush_large_div)},
 	{NAME("thp"),		CTL(opt_thp)},
 	{NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
 	{NAME("prof"),		CTL(opt_prof)},
@@ -1816,6 +1822,10 @@ CTL_RO_NL_GEN(opt_tcache_nslots_large, opt_tcache_nslots_large, unsigned)
 CTL_RO_NL_GEN(opt_lg_tcache_nslots_mul, opt_lg_tcache_nslots_mul, ssize_t)
 CTL_RO_NL_GEN(opt_tcache_gc_incr_bytes, opt_tcache_gc_incr_bytes, size_t)
 CTL_RO_NL_GEN(opt_tcache_gc_delay_bytes, opt_tcache_gc_delay_bytes, size_t)
+CTL_RO_NL_GEN(opt_lg_tcache_flush_small_div, opt_lg_tcache_flush_small_div,
+    unsigned)
+CTL_RO_NL_GEN(opt_lg_tcache_flush_large_div, opt_lg_tcache_flush_large_div,
+    unsigned)
 CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *)
 CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit,
     size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2903a41..74355d4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1397,6 +1397,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "tcache_gc_delay_bytes", 0, SIZE_T_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
 			    /* clip */ false)
+			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_small_div,
+			    "lg_tcache_flush_small_div", 1, 16,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_large_div,
+			    "lg_tcache_flush_large_div", 1, 16,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
 
 			/*
 			 * The runtime option of oversize_threshold remains
diff --git a/src/stats.c b/src/stats.c
index 8be69ca..fb88e5a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1113,6 +1113,8 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SSIZE_T("lg_tcache_nslots_mul")
 	OPT_WRITE_SIZE_T("tcache_gc_incr_bytes")
 	OPT_WRITE_SIZE_T("tcache_gc_delay_bytes")
+	OPT_WRITE_UNSIGNED("lg_tcache_flush_small_div")
+	OPT_WRITE_UNSIGNED("lg_tcache_flush_large_div")
 	OPT_WRITE_CHAR_P("thp")
 	OPT_WRITE_BOOL("prof")
 	OPT_WRITE_CHAR_P("prof_prefix")
diff --git a/src/tcache.c b/src/tcache.c
index c9cb785..2513ca3 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -53,6 +53,13 @@ size_t opt_tcache_gc_incr_bytes = 65536;
  */
 size_t opt_tcache_gc_delay_bytes = 0;
 
+/*
+ * When a cache bin is flushed because it's full, how much of it do we flush?
+ * By default, we flush half the maximum number of items.
+ */
+unsigned opt_lg_tcache_flush_small_div = 1;
+unsigned opt_lg_tcache_flush_large_div = 1;
+
 cache_bin_info_t	*tcache_bin_info;
 
 /* Total stack size required (per tcache).  Include the padding above. */
-- 
cgit v0.12


From cd28e60337d3e4ef183f407df734f0095a3c1352 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 May 2020 17:29:25 -0700
Subject: Don't warn on uniform initialization.

---
 configure.ac | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configure.ac b/configure.ac
index 98cb4bc..787ef1b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -256,6 +256,8 @@ if test "x$GCC" = "xyes" ; then
   dnl has lots of nested structs).  See the discussion at.
   dnl https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119
   JE_CFLAGS_ADD([-Wno-missing-braces])
+  dnl This one too.
+  JE_CFLAGS_ADD([-Wno-missing-field-initializers])
   JE_CFLAGS_ADD([-pipe])
   JE_CFLAGS_ADD([-g3])
 elif test "x$je_cv_msvc" = "xyes" ; then
-- 
cgit v0.12


From 8da0896b7913470250a0220504822028e2aa8f2a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 May 2020 17:43:23 -0700
Subject: Tcache: Make an integer conversion explicit.

---
 src/tcache.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 2513ca3..ff42884 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -142,7 +142,8 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 		    = tcache_gc_item_delay_compute(szind);
 	}
 
-	tcache_bin_flush_small(tsd, tcache, cache_bin, szind, ncached - nflush);
+	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
+	    (unsigned)(ncached - nflush));
 
 	/*
 	 * Reduce fill count by 2X.  Limit lg_fill_div such that
@@ -165,7 +166,7 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &tcache_bin_info[szind]);
 	tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
-	    ncached - low_water + (low_water >> 2));
+	    (unsigned)(ncached - low_water + (low_water >> 2)));
 }
 
 static void
-- 
cgit v0.12


From 035be448674b852637f04d86bd85d04b672d71b3 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 10 Apr 2020 15:41:20 -0700
Subject: Separate out dumping for each prof recent record

---
 src/prof_recent.c | 78 +++++++++++++++++++++++++++++--------------------------
 1 file changed, 41 insertions(+), 37 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index cd72bda..22ce473 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -431,7 +431,7 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 }
 
 static void
-dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
+prof_recent_alloc_dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
 	char bt_buf[2 * sizeof(intptr_t) + 3];
 	char *s = bt_buf;
 	assert(tctx != NULL);
@@ -442,6 +442,43 @@ dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
 	}
 }
 
+static void
+prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
+	emitter_json_object_begin(emitter);
+
+	emitter_json_kv(emitter, "size", emitter_type_size, &node->size);
+	size_t usize = sz_s2u(node->size);
+	emitter_json_kv(emitter, "usize", emitter_type_size, &usize);
+	bool released = node->alloc_edata == NULL;
+	emitter_json_kv(emitter, "released", emitter_type_bool, &released);
+
+	emitter_json_kv(emitter, "alloc_thread_uid", emitter_type_uint64,
+	    &node->alloc_tctx->thr_uid);
+	uint64_t alloc_time_ns = nstime_ns(&node->alloc_time);
+	emitter_json_kv(emitter, "alloc_time", emitter_type_uint64,
+	    &alloc_time_ns);
+	emitter_json_array_kv_begin(emitter, "alloc_trace");
+	prof_recent_alloc_dump_bt(emitter, node->alloc_tctx);
+	emitter_json_array_end(emitter);
+
+	if (node->dalloc_tctx != NULL) {
+		assert(released);
+		emitter_json_kv(emitter, "dalloc_thread_uid",
+		    emitter_type_uint64, &node->dalloc_tctx->thr_uid);
+		assert(!nstime_equals_zero(&node->dalloc_time));
+		uint64_t dalloc_time_ns = nstime_ns(&node->dalloc_time);
+		emitter_json_kv(emitter, "dalloc_time", emitter_type_uint64,
+		    &dalloc_time_ns);
+		emitter_json_array_kv_begin(emitter, "dalloc_trace");
+		prof_recent_alloc_dump_bt(emitter, node->dalloc_tctx);
+		emitter_json_array_end(emitter);
+	} else {
+		assert(nstime_equals_zero(&node->dalloc_time));
+	}
+
+	emitter_json_object_end(emitter);
+}
+
 #define PROF_RECENT_PRINT_BUFSIZE 4096
 void
 prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
@@ -465,42 +502,9 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize, &max);
 
 	emitter_json_array_kv_begin(&emitter, "recent_alloc");
-	prof_recent_t *n;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
-		emitter_json_object_begin(&emitter);
-
-		emitter_json_kv(&emitter, "size", emitter_type_size, &n->size);
-		size_t usize = sz_s2u(n->size);
-		emitter_json_kv(&emitter, "usize", emitter_type_size, &usize);
-		bool released = n->alloc_edata == NULL;
-		emitter_json_kv(&emitter, "released", emitter_type_bool,
-		    &released);
-
-		emitter_json_kv(&emitter, "alloc_thread_uid",
-		    emitter_type_uint64, &n->alloc_tctx->thr_uid);
-		uint64_t alloc_time_ns = nstime_ns(&n->alloc_time);
-		emitter_json_kv(&emitter, "alloc_time", emitter_type_uint64,
-		    &alloc_time_ns);
-		emitter_json_array_kv_begin(&emitter, "alloc_trace");
-		dump_bt(&emitter, n->alloc_tctx);
-		emitter_json_array_end(&emitter);
-
-		if (n->dalloc_tctx != NULL) {
-			assert(released);
-			emitter_json_kv(&emitter, "dalloc_thread_uid",
-			    emitter_type_uint64, &n->dalloc_tctx->thr_uid);
-			assert(!nstime_equals_zero(&n->dalloc_time));
-			uint64_t dalloc_time_ns = nstime_ns(&n->dalloc_time);
-			emitter_json_kv(&emitter, "dalloc_time",
-			    emitter_type_uint64, &dalloc_time_ns);
-			emitter_json_array_kv_begin(&emitter, "dalloc_trace");
-			dump_bt(&emitter, n->dalloc_tctx);
-			emitter_json_array_end(&emitter);
-		} else {
-			assert(nstime_equals_zero(&n->dalloc_time));
-		}
-
-		emitter_json_object_end(&emitter);
+	prof_recent_t *node;
+	ql_foreach(node, &prof_recent_alloc_list, link) {
+		prof_recent_alloc_dump_node(&emitter, node);
 	}
 	emitter_json_array_end(&emitter);
 
-- 
cgit v0.12


From 730658f72fd8b7eafabdb50ba83a4d04aa7afbb5 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 10 Apr 2020 15:54:40 -0700
Subject: Extract alloc/dalloc utility for last-N nodes

---
 src/prof_recent.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index 22ce473..5292c21 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -43,6 +43,20 @@ prof_recent_alloc_max_update(tsd_t *tsd, ssize_t max) {
 	return old_max;
 }
 
+static prof_recent_t *
+prof_recent_allocate_node(tsdn_t *tsdn) {
+	return (prof_recent_t *)iallocztm(tsdn, sizeof(prof_recent_t),
+	    sz_size2index(sizeof(prof_recent_t)), false, NULL, true,
+	    arena_get(tsdn, 0, false), true);
+}
+
+static void
+prof_recent_free_node(tsdn_t *tsdn, prof_recent_t *node) {
+	assert(node != NULL);
+	assert(isalloc(tsdn, node) == sz_s2u(sizeof(prof_recent_t)));
+	idalloctm(tsdn, node, NULL, NULL, true, true);
+}
+
 static inline void
 increment_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
@@ -277,10 +291,7 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
 	    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
 		assert(prof_recent_alloc_max_get(tsd) != 0);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		reserve = (prof_recent_t *)iallocztm(tsd_tsdn(tsd),
-		    sizeof(prof_recent_t), sz_size2index(sizeof(prof_recent_t)),
-		    false, NULL, true, arena_get(tsd_tsdn(tsd), 0, false),
-		    true);
+		reserve = prof_recent_allocate_node(tsd_tsdn(tsd));
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		prof_recent_alloc_assert_count(tsd);
 	}
@@ -331,7 +342,7 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	if (reserve != NULL) {
-		idalloctm(tsd_tsdn(tsd), reserve, NULL, NULL, true, true);
+		prof_recent_free_node(tsd_tsdn(tsd), reserve);
 	}
 
 	/*
@@ -353,7 +364,7 @@ label_rollback:
 	prof_recent_alloc_assert_count(tsd);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	if (reserve != NULL) {
-		idalloctm(tsd_tsdn(tsd), reserve, NULL, NULL, true, true);
+		prof_recent_free_node(tsd_tsdn(tsd), reserve);
 	}
 	decrement_recent_count(tsd, tctx);
 }
@@ -422,7 +433,7 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 		if (node->dalloc_tctx != NULL) {
 			decrement_recent_count(tsd, node->dalloc_tctx);
 		}
-		idalloctm(tsd_tsdn(tsd), node, NULL, NULL, true, true);
+		prof_recent_free_node(tsd_tsdn(tsd), node);
 		--count;
 	} while (!ql_empty(&old_list));
 	assert(count == 0);
-- 
cgit v0.12


From b8bdea6b26509b3fd06bb9b3344fca7b2f22dee9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 10 Apr 2020 16:02:39 -0700
Subject: Fix: prof_recent_alloc_max_ctl_read() does not take tsd

---
 test/unit/prof_recent.c | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 4aa9f9e..d7dd352 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -7,7 +7,7 @@
 
 /* Invariant before and after every test (when config_prof is on) */
 static void
-confirm_prof_setup(tsd_t *tsd) {
+confirm_prof_setup() {
 	/* Options */
 	assert_true(opt_prof, "opt_prof not on");
 	assert_true(opt_prof_active, "opt_prof_active not on");
@@ -16,13 +16,13 @@ confirm_prof_setup(tsd_t *tsd) {
 
 	/* Dynamics */
 	assert_true(prof_active, "prof_active not on");
-	assert_zd_eq(prof_recent_alloc_max_ctl_read(tsd), OPT_ALLOC_MAX,
+	assert_zd_eq(prof_recent_alloc_max_ctl_read(), OPT_ALLOC_MAX,
 	    "prof_recent_alloc_max not set correctly");
 }
 
 TEST_BEGIN(test_confirm_setup) {
 	test_skip_if(!config_prof);
-	confirm_prof_setup(tsd_fetch());
+	confirm_prof_setup();
 }
 TEST_END
 
@@ -58,13 +58,11 @@ TEST_BEGIN(test_prof_recent_on) {
 	ssize_t past, future;
 	size_t len = sizeof(ssize_t);
 
-	tsd_t *tsd = tsd_fetch();
-
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, NULL, 0), 0, "no-op mallctl should be allowed");
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, NULL, 0), 0, "Read error");
@@ -93,7 +91,7 @@ TEST_BEGIN(test_prof_recent_on) {
 	expect_zd_eq(past, -1,
 	    "Output should not be touched given invalid write");
 
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 }
 TEST_END
 
@@ -151,9 +149,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	prof_recent_t *n;
 	ssize_t future;
 
-	tsd_t *tsd = tsd_fetch();
-
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	/*
 	 * First batch of 2 * OPT_ALLOC_MAX allocations.  After the
@@ -190,7 +186,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		free(p);
 	}
 
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	b = false;
 	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
@@ -219,7 +215,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
 	    "mallctl for turning on prof_active failed");
 
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	/*
 	 * Third batch of OPT_ALLOC_MAX allocations.  Since prof_active is
@@ -338,7 +334,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_true(ql_empty(&prof_recent_alloc_list),
 	    "Recent list should be empty");
 
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 }
 TEST_END
 
@@ -485,8 +481,7 @@ confirm_record(const char *template,
 TEST_BEGIN(test_prof_recent_alloc_dump) {
 	test_skip_if(!config_prof);
 
-	tsd_t *tsd = tsd_fetch();
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	ssize_t future;
 	void *p, *q;
@@ -531,7 +526,7 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	future = OPT_ALLOC_MAX;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 }
 TEST_END
 
@@ -588,7 +583,7 @@ f_thread(void *arg) {
 		} else if (rand % 5 == 0) {
 			prof_recent_alloc_dump(tsd, test_write_cb, NULL);
 		} else if (rand % 5 == 1) {
-			last_max = prof_recent_alloc_max_ctl_read(tsd);
+			last_max = prof_recent_alloc_max_ctl_read();
 		} else if (rand % 5 == 2) {
 			last_max =
 			    prof_recent_alloc_max_ctl_write(tsd, test_max * 2);
@@ -613,8 +608,7 @@ f_thread(void *arg) {
 TEST_BEGIN(test_prof_recent_stress) {
 	test_skip_if(!config_prof);
 
-	tsd_t *tsd = tsd_fetch();
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	test_max = OPT_ALLOC_MAX;
 	for (size_t i = 0; i < N_THREADS; i++) {
@@ -643,7 +637,7 @@ TEST_BEGIN(test_prof_recent_stress) {
 	test_max = OPT_ALLOC_MAX;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 }
 TEST_END
 
-- 
cgit v0.12


From 857ebd3daf71963e522cdbc51725ad33b7368186 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 10 Apr 2020 16:26:55 -0700
Subject: Make edata pointer on prof recent record an atomic fence

---
 include/jemalloc/internal/prof_recent.h  |  1 +
 include/jemalloc/internal/prof_structs.h |  2 +-
 src/prof_recent.c                        | 49 +++++++++++++++++++++++---------
 test/unit/prof_recent.c                  |  9 +++---
 4 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index bd04652..defc5fb 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -9,6 +9,7 @@ void edata_prof_recent_alloc_init(edata_t *edata);
 #ifdef JEMALLOC_JET
 typedef ql_head(prof_recent_t) prof_recent_list_t;
 extern prof_recent_list_t prof_recent_alloc_list;
+edata_t *prof_recent_alloc_edata_get_no_lock(const prof_recent_t *node);
 prof_recent_t *edata_prof_recent_alloc_get_no_lock(const edata_t *edata);
 #endif
 
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 73ef8fc..26942aa 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -205,8 +205,8 @@ struct prof_recent_s {
 
 	ql_elm(prof_recent_t) link;
 	size_t size;
+	atomic_p_t alloc_edata; /* NULL means allocation has been freed. */
 	prof_tctx_t *alloc_tctx;
-	edata_t *alloc_edata; /* NULL means allocation has been freed. */
 	prof_tctx_t *dalloc_tctx;
 };
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 5292c21..37fb01d 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -102,6 +102,26 @@ decrement_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
 	prof_tctx_try_destroy(tsd, tctx);
 }
 
+#ifndef JEMALLOC_JET
+static inline
+#endif
+edata_t *
+prof_recent_alloc_edata_get_no_lock(const prof_recent_t *n) {
+	return (edata_t *)atomic_load_p(&n->alloc_edata, ATOMIC_ACQUIRE);
+}
+
+static inline edata_t *
+prof_recent_alloc_edata_get(tsd_t *tsd, const prof_recent_t *n) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	return prof_recent_alloc_edata_get_no_lock(n);
+}
+
+static void
+prof_recent_alloc_edata_set(tsd_t *tsd, prof_recent_t *n, edata_t *edata) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	atomic_store_p(&n->alloc_edata, edata, ATOMIC_RELEASE);
+}
+
 void
 edata_prof_recent_alloc_init(edata_t *edata) {
 	edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
@@ -120,7 +140,8 @@ edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *recent_alloc =
 	    edata_prof_recent_alloc_get_no_lock(edata);
-	assert(recent_alloc == NULL || recent_alloc->alloc_edata == edata);
+	assert(recent_alloc == NULL ||
+	    prof_recent_alloc_edata_get(tsd, recent_alloc) == edata);
 	return recent_alloc;
 }
 
@@ -137,22 +158,24 @@ edata_prof_recent_alloc_update_internal(tsd_t *tsd, edata_t *edata,
 static void
 edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
     prof_recent_t *recent_alloc) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(recent_alloc != NULL);
 	prof_recent_t *old_recent_alloc =
 	    edata_prof_recent_alloc_update_internal(tsd, edata, recent_alloc);
 	assert(old_recent_alloc == NULL);
-	recent_alloc->alloc_edata = edata;
+	prof_recent_alloc_edata_set(tsd, recent_alloc, edata);
 }
 
 static void
 edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata,
     prof_recent_t *recent_alloc) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(recent_alloc != NULL);
 	prof_recent_t *old_recent_alloc =
 	    edata_prof_recent_alloc_update_internal(tsd, edata, NULL);
 	assert(old_recent_alloc == recent_alloc);
-	assert(edata == recent_alloc->alloc_edata);
-	recent_alloc->alloc_edata = NULL;
+	assert(edata == prof_recent_alloc_edata_get(tsd, recent_alloc));
+	prof_recent_alloc_edata_set(tsd, recent_alloc, NULL);
 }
 
 /*
@@ -191,7 +214,6 @@ prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
 	/* Check again after acquiring the lock.  */
 	prof_recent_t *recent = edata_prof_recent_alloc_get(tsd, edata);
 	if (recent != NULL) {
-		edata_prof_recent_alloc_reset(tsd, edata, recent);
 		assert(nstime_equals_zero(&recent->dalloc_time));
 		assert(recent->dalloc_tctx == NULL);
 		if (dalloc_tctx != NULL) {
@@ -199,6 +221,7 @@ prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
 			recent->dalloc_tctx = dalloc_tctx;
 			dalloc_tctx = NULL;
 		}
+		edata_prof_recent_alloc_reset(tsd, edata, recent);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
@@ -209,10 +232,11 @@ prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
 }
 
 static void
-prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent) {
+prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent_alloc) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	if (recent->alloc_edata != NULL) {
-		edata_prof_recent_alloc_reset(tsd, recent->alloc_edata, recent);
+	edata_t *edata = prof_recent_alloc_edata_get(tsd, recent_alloc);
+	if (edata != NULL) {
+		edata_prof_recent_alloc_reset(tsd, edata, recent_alloc);
 	}
 }
 
@@ -333,9 +357,9 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
 	tail->size = size;
 	nstime_copy(&tail->alloc_time, edata_prof_alloc_time_get(edata));
 	tail->alloc_tctx = tctx;
-	edata_prof_recent_alloc_set(tsd, edata, tail);
 	nstime_init_zero(&tail->dalloc_time);
 	tail->dalloc_tctx = NULL;
+	edata_prof_recent_alloc_set(tsd, edata, tail);
 
 	assert(!prof_recent_alloc_is_empty(tsd));
 	prof_recent_alloc_assert_count(tsd);
@@ -460,7 +484,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	emitter_json_kv(emitter, "size", emitter_type_size, &node->size);
 	size_t usize = sz_s2u(node->size);
 	emitter_json_kv(emitter, "usize", emitter_type_size, &usize);
-	bool released = node->alloc_edata == NULL;
+	bool released = prof_recent_alloc_edata_get_no_lock(node) == NULL;
 	emitter_json_kv(emitter, "released", emitter_type_bool, &released);
 
 	emitter_json_kv(emitter, "alloc_thread_uid", emitter_type_uint64,
@@ -472,8 +496,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	prof_recent_alloc_dump_bt(emitter, node->alloc_tctx);
 	emitter_json_array_end(emitter);
 
-	if (node->dalloc_tctx != NULL) {
-		assert(released);
+	if (released && node->dalloc_tctx != NULL) {
 		emitter_json_kv(emitter, "dalloc_thread_uid",
 		    emitter_type_uint64, &node->dalloc_tctx->thr_uid);
 		assert(!nstime_equals_zero(&node->dalloc_time));
@@ -483,8 +506,6 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 		emitter_json_array_kv_begin(emitter, "dalloc_trace");
 		prof_recent_alloc_dump_bt(emitter, node->dalloc_tctx);
 		emitter_json_array_end(emitter);
-	} else {
-		assert(nstime_equals_zero(&node->dalloc_time));
 	}
 
 	emitter_json_object_end(emitter);
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index d7dd352..791cc4f 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -107,7 +107,7 @@ confirm_malloc(void *p) {
 	assert_ptr_not_null(n, "Record in edata should not be NULL");
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	expect_ptr_eq(e, n->alloc_edata,
+	expect_ptr_eq(e, prof_recent_alloc_edata_get_no_lock(n),
 	    "edata pointer in record is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
@@ -122,9 +122,10 @@ static void
 confirm_record_living(prof_recent_t *n) {
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	assert_ptr_not_null(n->alloc_edata,
+	edata_t *edata = prof_recent_alloc_edata_get_no_lock(n);
+	assert_ptr_not_null(edata,
 	    "Recorded edata should not be NULL for living pointer");
-	expect_ptr_eq(n, edata_prof_recent_alloc_get_no_lock(n->alloc_edata),
+	expect_ptr_eq(n, edata_prof_recent_alloc_get_no_lock(edata),
 	    "Record in edata is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
@@ -133,7 +134,7 @@ static void
 confirm_record_released(prof_recent_t *n) {
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	expect_ptr_null(n->alloc_edata,
+	expect_ptr_null(prof_recent_alloc_edata_get_no_lock(n),
 	    "Recorded edata should be NULL for released pointer");
 	expect_ptr_not_null(n->dalloc_tctx,
 	    "dalloc_tctx in record should not be NULL for released pointer");
-- 
cgit v0.12


From 264d89d6415be31ee00dd3dd2460140f46cea2e9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 13 Apr 2020 11:48:01 -0700
Subject: Extract restore and async cleanup functions for prof last-N list

---
 src/prof_recent.c | 65 +++++++++++++++++++++++++------------------------------
 1 file changed, 29 insertions(+), 36 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index 37fb01d..fd63d50 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -399,23 +399,17 @@ prof_recent_alloc_max_ctl_read() {
 	return prof_recent_alloc_max_get_no_lock();
 }
 
-ssize_t
-prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
-	assert(max >= -1);
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	prof_recent_alloc_assert_count(tsd);
-
-	const ssize_t old_max = prof_recent_alloc_max_update(tsd, max);
-
+static void
+prof_recent_alloc_restore_locked(tsd_t *tsd, prof_recent_list_t *to_delete) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	ssize_t max = prof_recent_alloc_max_get(tsd);
 	if (max == -1 || prof_recent_alloc_count <= max) {
 		/* Easy case - no need to alter the list. */
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		return old_max;
+		ql_new(to_delete);
+		prof_recent_alloc_assert_count(tsd);
+		return;
 	}
 
-	/* For verification purpose only. */
-	ssize_t count = prof_recent_alloc_count - max;
 	prof_recent_t *node;
 	ql_foreach(node, &prof_recent_alloc_list, link) {
 		if (prof_recent_alloc_count == max) {
@@ -426,42 +420,41 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 	}
 	assert(prof_recent_alloc_count == max);
 
-	prof_recent_list_t old_list;
-	ql_move(&old_list, &prof_recent_alloc_list);
+	ql_move(to_delete, &prof_recent_alloc_list);
 	if (max == 0) {
 		assert(node == NULL);
 	} else {
 		assert(node != NULL);
-		ql_split(&old_list, node, &prof_recent_alloc_list, link);
+		ql_split(to_delete, node, &prof_recent_alloc_list, link);
 	}
-	assert(!ql_empty(&old_list));
-
+	assert(!ql_empty(to_delete));
 	prof_recent_alloc_assert_count(tsd);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+}
 
-	/*
-	 * Asynchronously handle the tctx of the to-be-deleted nodes, so that
-	 * there's no simultaneous holdings of prof_recent_alloc_mtx and
-	 * tdata->lock.  In the worst case there can be slightly extra space
-	 * overhead taken by these nodes, but the total number of nodes at any
-	 * time is bounded by (max + sum(decreases)), where "max" means the
-	 * most recent prof_recent_alloc_max and "sum(decreases)" means the
-	 * sum of the deltas of all decreases in prof_recent_alloc_max in the
-	 * past.  This (max + sum(decreases)) value is completely transparent
-	 * to and controlled by application.
-	 */
-	do {
-		node = ql_first(&old_list);
-		ql_remove(&old_list, node, link);
+static void
+prof_recent_alloc_async_cleanup(tsd_t *tsd, prof_recent_list_t *to_delete) {
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	while (!ql_empty(to_delete)) {
+		prof_recent_t *node = ql_first(to_delete);
+		ql_remove(to_delete, node, link);
 		decrement_recent_count(tsd, node->alloc_tctx);
 		if (node->dalloc_tctx != NULL) {
 			decrement_recent_count(tsd, node->dalloc_tctx);
 		}
 		prof_recent_free_node(tsd_tsdn(tsd), node);
-		--count;
-	} while (!ql_empty(&old_list));
-	assert(count == 0);
+	}
+}
 
+ssize_t
+prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
+	assert(max >= -1);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_alloc_assert_count(tsd);
+	const ssize_t old_max = prof_recent_alloc_max_update(tsd, max);
+	prof_recent_list_t to_delete;
+	prof_recent_alloc_restore_locked(tsd, &to_delete);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_alloc_async_cleanup(tsd, &to_delete);
 	return old_max;
 }
 
-- 
cgit v0.12


From fc8bc4b5c04501f17f7a3c3a5f3efafbf9b2a82e Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 13 Apr 2020 11:51:25 -0700
Subject: Increase dump buffer for prof last-N list

---
 src/prof_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index fd63d50..ab4ab8d 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -504,7 +504,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	emitter_json_object_end(emitter);
 }
 
-#define PROF_RECENT_PRINT_BUFSIZE 4096
+#define PROF_RECENT_PRINT_BUFSIZE 65536
 void
 prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	buf_writer_t buf_writer;
-- 
cgit v0.12


From a835d9cf85286cb0f05c644790df48461544c4d9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 13 Apr 2020 11:54:03 -0700
Subject: Make prof last-N dumping non-blocking

---
 src/prof_recent.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index ab4ab8d..d0a83aa 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -513,31 +513,37 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	emitter_t emitter;
 	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
 	    &buf_writer);
-	emitter_begin(&emitter);
+	prof_recent_list_t temp_list;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_alloc_assert_count(tsd);
+	ssize_t dump_max = prof_recent_alloc_max_get(tsd);
+	ql_move(&temp_list, &prof_recent_alloc_list);
+	ssize_t dump_count = prof_recent_alloc_count;
+	prof_recent_alloc_count = 0;
+	prof_recent_alloc_assert_count(tsd);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
-	/*
-	 * Set prof_recent_alloc_max to 0 so that dumping won't block sampled
-	 * allocations: the allocations can complete but will not be recorded.
-	 */
-	ssize_t max = prof_recent_alloc_max_update(tsd, 0);
-
-	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize, &max);
-
+	emitter_begin(&emitter);
+	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize,
+	    &dump_max);
 	emitter_json_array_kv_begin(&emitter, "recent_alloc");
 	prof_recent_t *node;
-	ql_foreach(node, &prof_recent_alloc_list, link) {
+	ql_foreach(node, &temp_list, link) {
 		prof_recent_alloc_dump_node(&emitter, node);
 	}
 	emitter_json_array_end(&emitter);
+	emitter_end(&emitter);
 
-	max = prof_recent_alloc_max_update(tsd, max);
-	assert(max == 0);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_alloc_assert_count(tsd);
+	ql_concat(&temp_list, &prof_recent_alloc_list, link);
+	ql_move(&prof_recent_alloc_list, &temp_list);
+	prof_recent_alloc_count += dump_count;
+	prof_recent_alloc_restore_locked(tsd, &temp_list);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
-	emitter_end(&emitter);
+	prof_recent_alloc_async_cleanup(tsd, &temp_list);
 	buf_writer_terminate(tsd_tsdn(tsd), &buf_writer);
 }
 #undef PROF_RECENT_PRINT_BUFSIZE
-- 
cgit v0.12


From 3e19ebd2ea5372c2f5932af6bb268ae8cb5df354 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 13 Apr 2020 12:05:51 -0700
Subject: Add lock to protect prof last-N dumping

---
 include/jemalloc/internal/prof_recent.h |  2 ++
 include/jemalloc/internal/witness.h     | 27 ++++++++++++++-------------
 src/prof.c                              |  3 +++
 src/prof_recent.c                       | 18 ++++++++++++++----
 4 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index defc5fb..f97273c 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
 #define JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
 
+extern malloc_mutex_t prof_recent_dump_mtx;
+
 bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
 void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size);
 void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index b5fa1c0..58f7266 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -29,7 +29,8 @@
 #define WITNESS_RANK_PROF_TDATA		8U
 #define WITNESS_RANK_PROF_LOG		9U
 #define WITNESS_RANK_PROF_GCTX		10U
-#define WITNESS_RANK_BACKGROUND_THREAD	11U
+#define WITNESS_RANK_PROF_RECENT_DUMP	11U
+#define WITNESS_RANK_BACKGROUND_THREAD	12U
 
 /*
  * Used as an argument to witness_assert_depth_to_rank() in order to validate
@@ -37,19 +38,19 @@
  * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
  * definition can have the same value as the minimally ranked core lock.
  */
-#define WITNESS_RANK_CORE		12U
+#define WITNESS_RANK_CORE		13U
 
-#define WITNESS_RANK_DECAY		12U
-#define WITNESS_RANK_TCACHE_QL		13U
-#define WITNESS_RANK_EXTENT_GROW	14U
-#define WITNESS_RANK_EXTENTS		15U
-#define WITNESS_RANK_EDATA_CACHE	16U
+#define WITNESS_RANK_DECAY		13U
+#define WITNESS_RANK_TCACHE_QL		14U
+#define WITNESS_RANK_EXTENT_GROW	15U
+#define WITNESS_RANK_EXTENTS		16U
+#define WITNESS_RANK_EDATA_CACHE	17U
 
-#define WITNESS_RANK_EMAP		17U
-#define WITNESS_RANK_RTREE		18U
-#define WITNESS_RANK_BASE		19U
-#define WITNESS_RANK_ARENA_LARGE	20U
-#define WITNESS_RANK_HOOK		21U
+#define WITNESS_RANK_EMAP		18U
+#define WITNESS_RANK_RTREE		19U
+#define WITNESS_RANK_BASE		20U
+#define WITNESS_RANK_ARENA_LARGE	21U
+#define WITNESS_RANK_HOOK		22U
 
 #define WITNESS_RANK_LEAF		0xffffffffU
 #define WITNESS_RANK_BIN		WITNESS_RANK_LEAF
@@ -60,8 +61,8 @@
 #define WITNESS_RANK_PROF_DUMP_FILENAME	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_RECENT_ALLOC	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
 
 /******************************************************************************/
 /* PER-WITNESS DATA */
diff --git a/src/prof.c b/src/prof.c
index c8da81d..38a3db2 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1116,6 +1116,7 @@ prof_prefork0(tsdn_t *tsdn) {
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
 		}
+		malloc_mutex_prefork(tsdn, &prof_recent_dump_mtx);
 	}
 }
 
@@ -1145,6 +1146,7 @@ prof_postfork_parent(tsdn_t *tsdn) {
 		malloc_mutex_postfork_parent(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
 		counter_postfork_parent(tsdn, &prof_idump_accumulated);
+		malloc_mutex_postfork_parent(tsdn, &prof_recent_dump_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
 		}
@@ -1170,6 +1172,7 @@ prof_postfork_child(tsdn_t *tsdn) {
 		malloc_mutex_postfork_child(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
 		counter_postfork_child(tsdn, &prof_idump_accumulated);
+		malloc_mutex_postfork_child(tsdn, &prof_recent_dump_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
 		}
diff --git a/src/prof_recent.c b/src/prof_recent.c
index d0a83aa..949ae76 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -18,6 +18,8 @@ static
 #endif
 prof_recent_list_t prof_recent_alloc_list;
 
+malloc_mutex_t prof_recent_dump_mtx; /* Protects dumping. */
+
 static void
 prof_recent_alloc_max_init() {
 	atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max,
@@ -433,6 +435,7 @@ prof_recent_alloc_restore_locked(tsd_t *tsd, prof_recent_list_t *to_delete) {
 
 static void
 prof_recent_alloc_async_cleanup(tsd_t *tsd, prof_recent_list_t *to_delete) {
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_dump_mtx);
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	while (!ql_empty(to_delete)) {
 		prof_recent_t *node = ql_first(to_delete);
@@ -507,6 +510,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 #define PROF_RECENT_PRINT_BUFSIZE 65536
 void
 prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_dump_mtx);
 	buf_writer_t buf_writer;
 	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
 	    PROF_RECENT_PRINT_BUFSIZE);
@@ -543,8 +547,10 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	prof_recent_alloc_restore_locked(tsd, &temp_list);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
-	prof_recent_alloc_async_cleanup(tsd, &temp_list);
 	buf_writer_terminate(tsd_tsdn(tsd), &buf_writer);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_dump_mtx);
+
+	prof_recent_alloc_async_cleanup(tsd, &temp_list);
 }
 #undef PROF_RECENT_PRINT_BUFSIZE
 
@@ -552,9 +558,13 @@ bool
 prof_recent_init() {
 	prof_recent_alloc_max_init();
 
-	if (malloc_mutex_init(&prof_recent_alloc_mtx,
-	    "prof_recent_alloc", WITNESS_RANK_PROF_RECENT_ALLOC,
-	    malloc_mutex_rank_exclusive)) {
+	if (malloc_mutex_init(&prof_recent_alloc_mtx, "prof_recent_alloc",
+	    WITNESS_RANK_PROF_RECENT_ALLOC, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+
+	if (malloc_mutex_init(&prof_recent_dump_mtx, "prof_recent_dump",
+	    WITNESS_RANK_PROF_RECENT_DUMP, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
-- 
cgit v0.12


From 17a64fe91c4b424d10c96c94051d562390471810 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 May 2020 11:56:36 -0700
Subject: Add a small program to print data structure sizes.

---
 Makefile.in         |  3 ++-
 test/stress/sizes.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 1 deletion(-)
 create mode 100644 test/stress/sizes.c

diff --git a/Makefile.in b/Makefile.in
index e7666fb..cd927cf 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -291,7 +291,8 @@ endif
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
 	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/large_microbench.c \
-	$(srcroot)test/stress/hookbench.c
+	$(srcroot)test/stress/hookbench.c \
+	$(srcroot)test/stress/sizes.c
 
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) $(TESTS_STRESS)
diff --git a/test/stress/sizes.c b/test/stress/sizes.c
new file mode 100644
index 0000000..7360494
--- /dev/null
+++ b/test/stress/sizes.c
@@ -0,0 +1,50 @@
+#include "test/jemalloc_test.h"
+
+#include <stdio.h>
+
+/*
+ * Print the sizes of various important core data structures.  OK, I guess this
+ * isn't really a "stress" test, but it does give useful information about
+ * low-level performance characteristics, as the other things in this directory
+ * do.
+ */
+
+static void
+do_print(const char *name, size_t sz_bytes) {
+	const char *sizes[] = {"bytes", "KB", "MB", "GB", "TB", "PB", "EB",
+		"ZB"};
+	size_t sizes_max = sizeof(sizes)/sizeof(sizes[0]);
+
+	size_t ind = 0;
+	double sz = sz_bytes;
+	while (sz >= 1024 && ind < sizes_max) {
+		sz /= 1024;
+		ind++;
+	}
+	if (ind == 0) {
+		printf("%-20s: %zu bytes\n", name, sz_bytes);
+	} else {
+		printf("%-20s: %f %s\n", name, sz, sizes[ind]);
+	}
+}
+
+int
+main() {
+#define P(type)								\
+	do_print(#type, sizeof(type))
+	P(arena_t);
+	P(arena_stats_t);
+	P(base_t);
+	P(decay_t);
+	P(edata_t);
+	P(ecache_t);
+	P(eset_t);
+	P(malloc_mutex_t);
+	P(prof_tctx_t);
+	P(prof_gctx_t);
+	P(prof_tdata_t);
+	P(tcache_t);
+	P(tcache_slow_t);
+	P(tsd_t);
+#undef P
+}
-- 
cgit v0.12


From fe7108305a449df3d28f68e6bd9ff74dea68946b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 May 2020 14:30:28 -0700
Subject: Add peak_t, for tracking allocator net max.

---
 Makefile.in                      |  1 +
 include/jemalloc/internal/peak.h | 37 +++++++++++++++++++++++++++++++
 test/unit/peak.c                 | 47 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 85 insertions(+)
 create mode 100644 include/jemalloc/internal/peak.h
 create mode 100644 test/unit/peak.c

diff --git a/Makefile.in b/Makefile.in
index cd927cf..b211f88 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -222,6 +222,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/pa.c \
 	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/pages.c \
+	$(srcroot)test/unit/peak.c \
 	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
diff --git a/include/jemalloc/internal/peak.h b/include/jemalloc/internal/peak.h
new file mode 100644
index 0000000..59da3e4
--- /dev/null
+++ b/include/jemalloc/internal/peak.h
@@ -0,0 +1,37 @@
+#ifndef JEMALLOC_INTERNAL_PEAK_H
+#define JEMALLOC_INTERNAL_PEAK_H
+
+typedef struct peak_s peak_t;
+struct peak_s {
+	/* The highest recorded peak value, after adjustment (see below). */
+	uint64_t cur_max;
+	/*
+	 * The difference between alloc and dalloc at the last set_zero call;
+	 * this lets us cancel out the appropriate amount of excess.
+	 */
+	uint64_t adjustment;
+};
+
+#define PEAK_INITIALIZER {0, 0}
+
+static inline uint64_t
+peak_max(peak_t *peak) {
+	return peak->cur_max;
+}
+
+static inline void
+peak_update(peak_t *peak, uint64_t alloc, uint64_t dalloc) {
+	int64_t candidate_max = (int64_t)(alloc - dalloc - peak->adjustment);
+	if (candidate_max > (int64_t)peak->cur_max) {
+		peak->cur_max = candidate_max;
+	}
+}
+
+/* Resets the counter to zero; all peaks are now relative to this point. */
+static inline void
+peak_set_zero(peak_t *peak, uint64_t alloc, uint64_t dalloc) {
+	peak->cur_max = 0;
+	peak->adjustment = alloc - dalloc;
+}
+
+#endif /* JEMALLOC_INTERNAL_PEAK_H */
diff --git a/test/unit/peak.c b/test/unit/peak.c
new file mode 100644
index 0000000..1112978
--- /dev/null
+++ b/test/unit/peak.c
@@ -0,0 +1,47 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/peak.h"
+
+TEST_BEGIN(test_peak) {
+	peak_t peak = PEAK_INITIALIZER;
+	expect_u64_eq(0, peak_max(&peak),
+	    "Peak should be zero at initialization");
+	peak_update(&peak, 100, 50);
+	expect_u64_eq(50, peak_max(&peak),
+	    "Missed update");
+	peak_update(&peak, 100, 100);
+	expect_u64_eq(50, peak_max(&peak), "Dallocs shouldn't change peak");
+	peak_update(&peak, 100, 200);
+	expect_u64_eq(50, peak_max(&peak), "Dallocs shouldn't change peak");
+	peak_update(&peak, 200, 200);
+	expect_u64_eq(50, peak_max(&peak), "Haven't reached peak again");
+	peak_update(&peak, 300, 200);
+	expect_u64_eq(100, peak_max(&peak), "Missed an update.");
+	peak_set_zero(&peak, 300, 200);
+	expect_u64_eq(0, peak_max(&peak), "No effect from zeroing");
+	peak_update(&peak, 300, 300);
+	expect_u64_eq(0, peak_max(&peak), "Dalloc shouldn't change peak");
+	peak_update(&peak, 400, 300);
+	expect_u64_eq(0, peak_max(&peak), "Should still be net negative");
+	peak_update(&peak, 500, 300);
+	expect_u64_eq(100, peak_max(&peak), "Missed an update.");
+	/*
+	 * Above, we set to zero while a net allocator; let's try as a
+	 * net-deallocator.
+	 */
+	peak_set_zero(&peak, 600, 700);
+	expect_u64_eq(0, peak_max(&peak), "No effect from zeroing.");
+	peak_update(&peak, 600, 800);
+	expect_u64_eq(0, peak_max(&peak), "Dalloc shouldn't change peak.");
+	peak_update(&peak, 700, 800);
+	expect_u64_eq(0, peak_max(&peak), "Should still be net negative.");
+	peak_update(&peak, 800, 800);
+	expect_u64_eq(100, peak_max(&peak), "Missed an update.");
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_peak);
+}
-- 
cgit v0.12


From d82a164d0ddb5418de3b6a07dd302edddc347129 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 May 2020 14:31:00 -0700
Subject: Add thread.peak.[read|reset] mallctls.

These can be used to track net allocator activity on a per-thread basis.
---
 Makefile.in                                        |  1 +
 doc/jemalloc.xml.in                                | 36 +++++++++++
 include/jemalloc/internal/peak_event.h             | 24 ++++++++
 include/jemalloc/internal/thread_event.h           | 10 ++--
 include/jemalloc/internal/tsd.h                    |  7 +++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  3 +
 src/ctl.c                                          | 41 +++++++++++++
 src/peak_event.c                                   | 67 +++++++++++++++++++++
 src/thread_event.c                                 | 10 ++++
 test/unit/mallctl.c                                | 70 +++++++++++++++++++++-
 13 files changed, 269 insertions(+), 5 deletions(-)
 create mode 100644 include/jemalloc/internal/peak_event.h
 create mode 100644 src/peak_event.c

diff --git a/Makefile.in b/Makefile.in
index b211f88..2f3fea1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -129,6 +129,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pa.c \
 	$(srcroot)src/pa_extra.c \
 	$(srcroot)src/pages.c \
+	$(srcroot)src/peak_event.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/prof_data.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 1baf1f6..5ab8456 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1621,6 +1621,42 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         should not be modified by the application.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="thread.peak.read">
+        <term>
+          <mallctl>thread.peak.read</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Get an approximation of the maximum value of the
+        difference between the number of bytes allocated and the number of bytes
+        deallocated by the calling thread since the last call to <link
+        linkend="thread.peak.reset"><mallctl>thread.peak.reset</mallctl></link>,
+        or since the thread's creation if it has not called <link
+        linkend="thread.peak.reset"><mallctl>thread.peak.reset</mallctl></link>.
+        No guarantees are made about the quality of the approximation, but
+        jemalloc currently endeavors to maintain accuracy to within one hundred
+        kilobytes.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry id="thread.peak.reset">
+        <term>
+          <mallctl>thread.peak.reset</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Resets the counter for net bytes allocated in the calling
+        thread to zero. This affects subsequent calls to <link
+        linkend="thread.peak.read"><mallctl>thread.peak.read</mallctl></link>,
+        but not the values returned by <link
+        linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
+        or <link
+        linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="thread.tcache.enabled">
         <term>
           <mallctl>thread.tcache.enabled</mallctl>
diff --git a/include/jemalloc/internal/peak_event.h b/include/jemalloc/internal/peak_event.h
new file mode 100644
index 0000000..b808ce0
--- /dev/null
+++ b/include/jemalloc/internal/peak_event.h
@@ -0,0 +1,24 @@
+#ifndef JEMALLOC_INTERNAL_PEAK_EVENT_H
+#define JEMALLOC_INTERNAL_PEAK_EVENT_H
+
+/*
+ * While peak.h contains the simple helper struct that tracks state, this
+ * contains the allocator tie-ins (and knows about tsd, the event module, etc.).
+ */
+
+/* Update the peak with current tsd state. */
+void peak_event_update(tsd_t *tsd);
+/* Set current state to zero. */
+void peak_event_zero(tsd_t *tsd);
+uint64_t peak_event_max(tsd_t *tsd);
+
+/* Manual hooks. */
+/* The activity-triggered hooks. */
+uint64_t peak_alloc_new_event_wait(tsd_t *tsd);
+uint64_t peak_alloc_postponed_event_wait(tsd_t *tsd);
+void peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed);
+uint64_t peak_dalloc_new_event_wait(tsd_t *tsd);
+uint64_t peak_dalloc_postponed_event_wait(tsd_t *tsd);
+void peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
+
+#endif /* JEMALLOC_INTERNAL_PEAK_EVENT_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 2fcaa88..bca8a44 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -53,10 +53,12 @@ void tsd_te_init(tsd_t *tsd);
  *  E(event,		(condition), is_alloc_event)
  */
 #define ITERATE_OVER_ALL_EVENTS						\
-    E(tcache_gc,	(opt_tcache_gc_incr_bytes > 0), true)		\
-    E(prof_sample,	(config_prof && opt_prof), true)	    	\
-    E(stats_interval,	(opt_stats_interval >= 0), true)	    	\
-    E(tcache_gc_dalloc,	(opt_tcache_gc_incr_bytes > 0), false)
+    E(tcache_gc,		(opt_tcache_gc_incr_bytes > 0), true)	\
+    E(prof_sample,		(config_prof && opt_prof), true)  	\
+    E(stats_interval,		(opt_stats_interval >= 0), true)   	\
+    E(tcache_gc_dalloc,		(opt_tcache_gc_incr_bytes > 0), false)	\
+    E(peak_alloc,		config_stats, true)			\
+    E(peak_dalloc,		config_stats, false)
 
 #define E(event, condition_unused, is_alloc_event_unused)		\
     C(event##_event_wait)
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 18bdb8f..9408b2c 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/peak.h"
 #include "jemalloc/internal/prof_types.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/rtree_tsd.h"
@@ -69,6 +70,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
     O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
     O(stats_interval_last_event,	uint64_t,	uint64_t)	\
+    O(peak_alloc_event_wait,	uint64_t,		uint64_t)	\
+    O(peak_dalloc_event_wait,	uint64_t,	uint64_t)		\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(prng_state,		uint64_t,		uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
@@ -77,6 +80,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
     O(tsd_link,			tsd_link_t,		tsd_link_t)	\
     O(in_hook,			bool,			bool)		\
+    O(peak,			peak_t,			peak_t)		\
     O(tcache_slow,		tcache_slow_t,		tcache_slow_t)	\
     O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)
 
@@ -95,6 +99,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* prof_sample_last_event */	0,				\
     /* stats_interval_event_wait */	0,				\
     /* stats_interval_last_event */	0,				\
+    /* peak_alloc_event_wait */		0,				\
+    /* peak_dalloc_event_wait */	0,				\
     /* prof_tdata */		NULL,					\
     /* prng_state */		0,					\
     /* iarena */		NULL,					\
@@ -103,6 +109,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
     /* tsd_link */		{NULL},					\
     /* in_hook */		false,					\
+    /* peak */			PEAK_INITIALIZER,			\
     /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,
 
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 9f81e21..d50fa88 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -70,6 +70,7 @@
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 15fe7f0..94db8c0 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -94,6 +94,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prng.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index b5fccae..337dcfe 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -70,6 +70,7 @@
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 15fe7f0..94db8c0 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -94,6 +94,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prng.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index be8be10..0bd38fe 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -9,6 +9,7 @@
 #include "jemalloc/internal/inspect.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/peak_event.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/util.h"
 
@@ -61,6 +62,8 @@ CTL_PROTO(background_thread)
 CTL_PROTO(max_background_threads)
 CTL_PROTO(thread_tcache_enabled)
 CTL_PROTO(thread_tcache_flush)
+CTL_PROTO(thread_peak_read)
+CTL_PROTO(thread_peak_reset)
 CTL_PROTO(thread_prof_name)
 CTL_PROTO(thread_prof_active)
 CTL_PROTO(thread_arena)
@@ -294,6 +297,11 @@ static const ctl_named_node_t	thread_tcache_node[] = {
 	{NAME("flush"),		CTL(thread_tcache_flush)}
 };
 
+static const ctl_named_node_t	thread_peak_node[] = {
+	{NAME("read"),		CTL(thread_peak_read)},
+	{NAME("reset"),		CTL(thread_peak_reset)},
+};
+
 static const ctl_named_node_t	thread_prof_node[] = {
 	{NAME("name"),		CTL(thread_prof_name)},
 	{NAME("active"),	CTL(thread_prof_active)}
@@ -306,6 +314,7 @@ static const ctl_named_node_t	thread_node[] = {
 	{NAME("deallocated"),	CTL(thread_deallocated)},
 	{NAME("deallocatedp"),	CTL(thread_deallocatedp)},
 	{NAME("tcache"),	CHILD(named, thread_tcache)},
+	{NAME("peak"),		CHILD(named, thread_peak)},
 	{NAME("prof"),		CHILD(named, thread_prof)},
 	{NAME("idle"),		CTL(thread_idle)}
 };
@@ -1954,6 +1963,38 @@ label_return:
 }
 
 static int
+thread_peak_read_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
+	int ret;
+	if (!config_stats) {
+		return ENOENT;
+	}
+	READONLY();
+	peak_event_update(tsd);
+	uint64_t result = peak_event_max(tsd);
+	READ(result, uint64_t);
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+thread_peak_reset_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
+	int ret;
+	if (!config_stats) {
+		return ENOENT;
+	}
+	NEITHER_READ_NOR_WRITE();
+	peak_event_zero(tsd);
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
 thread_prof_name_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp,
     size_t newlen) {
diff --git a/src/peak_event.c b/src/peak_event.c
new file mode 100644
index 0000000..ffb061b
--- /dev/null
+++ b/src/peak_event.c
@@ -0,0 +1,67 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/peak.h"
+#include "jemalloc/internal/peak_event.h"
+
+/*
+ * Update every 100k by default.  We're not exposing this as a configuration
+ * option for now; we don't want to bind ourselves too tightly to any particular
+ * performance requirements for small values, or guarantee that we'll even be
+ * able to provide fine-grained accuracy.
+ */
+#define PEAK_EVENT_WAIT (100 * 1024)
+
+/* Update the peak with current tsd state. */
+void
+peak_event_update(tsd_t *tsd) {
+	uint64_t alloc = tsd_thread_allocated_get(tsd);
+	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
+	peak_t *peak = tsd_peakp_get(tsd);
+	peak_update(peak, alloc, dalloc);
+}
+
+/* Set current state to zero. */
+void
+peak_event_zero(tsd_t *tsd) {
+	uint64_t alloc = tsd_thread_allocated_get(tsd);
+	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
+	peak_t *peak = tsd_peakp_get(tsd);
+	peak_set_zero(peak, alloc, dalloc);
+}
+
+uint64_t
+peak_event_max(tsd_t *tsd) {
+	peak_t *peak = tsd_peakp_get(tsd);
+	return peak_max(peak);
+}
+
+uint64_t
+peak_alloc_new_event_wait(tsd_t *tsd) {
+	return PEAK_EVENT_WAIT;
+}
+
+uint64_t
+peak_alloc_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
+void
+peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	peak_event_update(tsd);
+}
+
+uint64_t
+peak_dalloc_new_event_wait(tsd_t *tsd) {
+	return PEAK_EVENT_WAIT;
+}
+
+uint64_t
+peak_dalloc_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
+void
+peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	peak_event_update(tsd);
+}
diff --git a/src/thread_event.c b/src/thread_event.c
index 40c0487..99a188d 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -60,6 +60,16 @@ stats_interval_fetch_elapsed(tsd_t *tsd) {
 	return last_event - last_stats_event;
 }
 
+static uint64_t
+peak_alloc_fetch_elapsed(tsd_t *tsd) {
+	return TE_INVALID_ELAPSED;
+}
+
+static uint64_t
+peak_dalloc_fetch_elapsed(tsd_t *tsd) {
+	return TE_INVALID_ELAPSED;
+}
+
 /* Per event facilities done. */
 
 static bool
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index cc1d531..10d809f 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -955,6 +955,73 @@ TEST_BEGIN(test_thread_idle) {
 }
 TEST_END
 
+TEST_BEGIN(test_thread_peak) {
+	test_skip_if(!config_stats);
+
+	/*
+	 * We don't commit to any stable amount of accuracy for peak tracking
+	 * (in practice, when this test was written, we made sure to be within
+	 * 100k).  But 10MB is big for more or less any definition of big.
+	 */
+	size_t big_size = 10 * 1024 * 1024;
+	size_t small_size = 256;
+
+	void *ptr;
+	int err;
+	size_t sz;
+	uint64_t peak;
+	sz = sizeof(uint64_t);
+
+	err = mallctl("thread.peak.reset", NULL, NULL, NULL, 0);
+	expect_d_eq(err, 0, "");
+	ptr = mallocx(SC_SMALL_MAXCLASS, 0);
+	err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
+	expect_d_eq(err, 0, "");
+	expect_u64_eq(peak, SC_SMALL_MAXCLASS, "Missed an update");
+	free(ptr);
+	err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
+	expect_d_eq(err, 0, "");
+	expect_u64_eq(peak, SC_SMALL_MAXCLASS, "Freeing changed peak");
+	ptr = mallocx(big_size, 0);
+	free(ptr);
+	/*
+	 * The peak should have hit big_size in the last two lines, even though
+	 * the net allocated bytes has since dropped back down to zero.  We
+	 * should have noticed the peak change without having down any mallctl
+	 * calls while net allocated bytes was high.
+	 */
+	err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
+	expect_d_eq(err, 0, "");
+	expect_u64_ge(peak, big_size, "Missed a peak change.");
+
+	/* Allocate big_size, but using small allocations. */
+	size_t nallocs = big_size / small_size;
+	void **ptrs = calloc(nallocs, sizeof(void *));
+	err = mallctl("thread.peak.reset", NULL, NULL, NULL, 0);
+	expect_d_eq(err, 0, "");
+	err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
+	expect_d_eq(err, 0, "");
+	expect_u64_eq(0, peak, "Missed a reset.");
+	for (size_t i = 0; i < nallocs; i++) {
+		ptrs[i] = mallocx(small_size, 0);
+	}
+	for (size_t i = 0; i < nallocs; i++) {
+		free(ptrs[i]);
+	}
+	err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
+	expect_d_eq(err, 0, "");
+	/*
+	 * We don't guarantee exactness; make sure we're within 10% of the peak,
+	 * though.
+	 */
+	expect_u64_ge(peak, nallocx(small_size, 0) * nallocs * 9 / 10,
+	    "Missed some peak changes.");
+	expect_u64_le(peak, nallocx(small_size, 0) * nallocs * 11 / 10,
+	    "Overcounted peak changes.");
+	free(ptrs);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -987,5 +1054,6 @@ main(void) {
 	    test_stats_arenas,
 	    test_hooks,
 	    test_hooks_exhaustion,
-	    test_thread_idle);
+	    test_thread_idle,
+	    test_thread_peak);
 }
-- 
cgit v0.12


From 4aea7432795414a72034ef35959078c64c69078e Mon Sep 17 00:00:00 2001
From: Jon Haslam <jonhaslam@fb.com>
Date: Tue, 2 Jun 2020 06:42:44 -0700
Subject: High Resolution Timestamps for Profiling

---
 configure.ac                                       | 12 +++++
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |  5 ++
 .../jemalloc/internal/jemalloc_internal_externs.h  |  1 +
 include/jemalloc/internal/jemalloc_preamble.h.in   |  7 +++
 include/jemalloc/internal/nstime.h                 | 17 ++++++-
 src/ctl.c                                          |  6 ++-
 src/jemalloc.c                                     | 21 +++++++++
 src/large.c                                        |  2 +-
 src/nstime.c                                       | 54 +++++++++++++++++++---
 src/prof_log.c                                     |  7 ++-
 src/prof_recent.c                                  |  2 +-
 test/unit/arena_decay.c                            |  3 +-
 test/unit/nstime.c                                 | 25 +---------
 13 files changed, 123 insertions(+), 39 deletions(-)

diff --git a/configure.ac b/configure.ac
index 787ef1b..d9fdebd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1776,6 +1776,18 @@ if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
 fi
 
+dnl check for CLOCK_REALTIME (always should be available on Linux)
+JE_COMPILABLE([clock_gettime(CLOCK_REALTIME, ...)], [
+#include <time.h>
+], [
+	struct timespec ts;
+
+	clock_gettime(CLOCK_REALTIME, &ts);
+], [je_cv_clock_realtime])
+if test "x${je_cv_clock_realtime}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_REALTIME])
+fi
+
 dnl Use syscall(2) (if available) by default.
 AC_ARG_ENABLE([syscall],
   [AS_HELP_STRING([--disable-syscall], [Disable use of syscall(2)])],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c442a21..83e733e 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -101,6 +101,11 @@
 #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
 
 /*
+ * Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_REALTIME
+
+/*
  * Defined if _malloc_thread_cleanup() exists.  At least in the case of
  * FreeBSD, pthread_key_create() allocates, which if used during malloc
  * bootstrapping will cause recursion into the pthreads library.  Therefore, if
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 338a590..3dea1e2 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/tsd_types.h"
+#include "jemalloc/internal/nstime.h"
 
 /* TSD checks this to set thread local slow state accordingly. */
 extern bool malloc_slow;
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 3418cbf..66302ab 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -209,5 +209,12 @@ static const bool have_background_thread =
     false
 #endif
     ;
+static const bool config_high_res_timer =
+#ifdef JEMALLOC_HAVE_CLOCK_REALTIME
+    true
+#else
+    false
+#endif
+    ;
 
 #endif /* JEMALLOC_PREAMBLE_H */
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index c4bee24..76e4351 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -30,10 +30,23 @@ uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
 typedef bool (nstime_monotonic_t)(void);
 extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
 
-typedef bool (nstime_update_t)(nstime_t *);
+typedef void (nstime_update_t)(nstime_t *);
 extern nstime_update_t *JET_MUTABLE nstime_update;
 
-bool nstime_init_update(nstime_t *time);
+typedef void (nstime_prof_update_t)(nstime_t *);
+extern nstime_prof_update_t *JET_MUTABLE nstime_prof_update;
+
+void nstime_init_update(nstime_t *time);
+void nstime_prof_init_update(nstime_t *time);
+
+enum prof_time_res_e {
+	prof_time_res_default = 0,
+	prof_time_res_high = 1
+};
+typedef enum prof_time_res_e prof_time_res_t;
+
+extern prof_time_res_t opt_prof_time_res;
+extern const char *prof_time_res_mode_names[];
 
 JEMALLOC_ALWAYS_INLINE void
 nstime_init_zero(nstime_t *time) {
diff --git a/src/ctl.c b/src/ctl.c
index 0bd38fe..24d9eb3 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -129,6 +129,7 @@ CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(opt_prof_recent_alloc_max)
 CTL_PROTO(opt_prof_experimental_use_sys_thread_name)
+CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
@@ -385,7 +386,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)},
 	{NAME("prof_experimental_use_sys_thread_name"),
 	    CTL(opt_prof_experimental_use_sys_thread_name)},
-	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
+	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
+	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)}
 };
 
 static const ctl_named_node_t	tcache_node[] = {
@@ -1853,6 +1855,8 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
     opt_prof_recent_alloc_max, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_experimental_use_sys_thread_name,
     opt_prof_experimental_use_sys_thread_name, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_time_res,
+    prof_time_res_mode_names[opt_prof_time_res], const char *)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 74355d4..bb1b38c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -14,6 +14,7 @@
 #include "jemalloc/internal/log.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sc.h"
@@ -1497,6 +1498,26 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(
 				    opt_prof_experimental_use_sys_thread_name,
 				    "prof_experimental_use_sys_thread_name")
+				if (CONF_MATCH("prof_time_resolution")) {
+					if (CONF_MATCH_VALUE("default")) {
+						opt_prof_time_res =
+						    prof_time_res_default;
+					} else if (CONF_MATCH_VALUE("high")) {
+						if (!config_high_res_timer) {
+							CONF_ERROR(
+							    "No high resolution"
+							    " timer support",
+							    k, klen, v, vlen);
+						} else {
+							opt_prof_time_res =
+							    prof_time_res_high;
+						}
+					} else {
+						CONF_ERROR("Invalid conf value",
+						    k, klen, v, vlen);
+					}
+				}
+				CONF_CONTINUE;
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {
diff --git a/src/large.c b/src/large.c
index b843937..cc3e727 100644
--- a/src/large.c
+++ b/src/large.c
@@ -305,7 +305,7 @@ large_prof_tctx_reset(edata_t *edata) {
 void
 large_prof_info_set(edata_t *edata, prof_tctx_t *tctx) {
 	nstime_t t;
-	nstime_init_update(&t);
+	nstime_prof_init_update(&t);
 	edata_prof_alloc_time_set(edata, &t);
 	edata_prof_recent_alloc_init(edata);
 	large_prof_tctx_set(edata, tctx);
diff --git a/src/nstime.c b/src/nstime.c
index eb8f6c0..184aa4c 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -152,25 +152,65 @@ nstime_monotonic_impl(void) {
 }
 nstime_monotonic_t *JET_MUTABLE nstime_monotonic = nstime_monotonic_impl;
 
-static bool
+prof_time_res_t opt_prof_time_res =
+	prof_time_res_default;
+
+const char *prof_time_res_mode_names[] = {
+	"default",
+	"high",
+};
+
+
+static void
+nstime_get_realtime(nstime_t *time) {
+#if defined(JEMALLOC_HAVE_CLOCK_REALTIME) && !defined(_WIN32)
+	struct timespec ts;
+
+	clock_gettime(CLOCK_REALTIME, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+#else
+	unreachable();
+#endif
+}
+
+static void
+nstime_prof_update_impl(nstime_t *time) {
+	nstime_t old_time;
+
+	nstime_copy(&old_time, time);
+
+	if (opt_prof_time_res == prof_time_res_high) {
+		nstime_get_realtime(time);
+	} else {
+		nstime_get(time);
+	}
+}
+nstime_prof_update_t *JET_MUTABLE nstime_prof_update = nstime_prof_update_impl;
+
+static void
 nstime_update_impl(nstime_t *time) {
 	nstime_t old_time;
 
 	nstime_copy(&old_time, time);
-	nstime_get(time);
+  nstime_get(time);
 
 	/* Handle non-monotonic clocks. */
 	if (unlikely(nstime_compare(&old_time, time) > 0)) {
 		nstime_copy(time, &old_time);
-		return true;
 	}
-
-	return false;
 }
 nstime_update_t *JET_MUTABLE nstime_update = nstime_update_impl;
 
-bool
+void
 nstime_init_update(nstime_t *time) {
 	nstime_init_zero(time);
-	return nstime_update(time);
+	nstime_update(time);
 }
+
+void
+nstime_prof_init_update(nstime_t *time) {
+	nstime_init_zero(time);
+	nstime_prof_update(time);
+}
+
+
diff --git a/src/prof_log.c b/src/prof_log.c
index 1635979..7fea854 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -235,7 +235,7 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 
 	nstime_t alloc_time = prof_info->alloc_time;
 	nstime_t free_time;
-	nstime_init_update(&free_time);
+	nstime_prof_init_update(&free_time);
 
 	size_t sz = sizeof(prof_alloc_node_t);
 	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
@@ -572,6 +572,11 @@ prof_log_emit_metadata(emitter_t *emitter) {
 	emitter_json_kv(emitter, "lg_sample_rate",
 	    emitter_type_int, &lg_prof_sample);
 
+  const char *res_type =
+    prof_time_res_mode_names[opt_prof_time_res];
+  emitter_json_kv(emitter, "prof_time_resolution",
+      emitter_type_string, &res_type);
+
 	int pid = prof_getpid();
 	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 949ae76..270691a 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -219,7 +219,7 @@ prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
 		assert(nstime_equals_zero(&recent->dalloc_time));
 		assert(recent->dalloc_tctx == NULL);
 		if (dalloc_tctx != NULL) {
-			nstime_update(&recent->dalloc_time);
+			nstime_prof_update(&recent->dalloc_time);
 			recent->dalloc_tctx = dalloc_tctx;
 			dalloc_tctx = NULL;
 		}
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 7ed270f..86f7057 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -26,13 +26,12 @@ nstime_monotonic_mock(void) {
 	return monotonic_mock;
 }
 
-static bool
+static void
 nstime_update_mock(nstime_t *time) {
 	nupdates_mock++;
 	if (monotonic_mock) {
 		nstime_copy(time, &time_mock);
 	}
-	return !monotonic_mock;
 }
 
 static unsigned
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index bf87501..083002b 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -206,28 +206,6 @@ TEST_BEGIN(test_nstime_monotonic) {
 }
 TEST_END
 
-TEST_BEGIN(test_nstime_update) {
-	nstime_t nst;
-
-	expect_false(nstime_init_update(&nst), "Basic time update failed.");
-
-	/* Only Rip Van Winkle sleeps this long. */
-	{
-		nstime_t addend;
-		nstime_init2(&addend, 631152000, 0);
-		nstime_add(&nst, &addend);
-	}
-	{
-		nstime_t nst0;
-		nstime_copy(&nst0, &nst);
-		expect_true(nstime_update(&nst),
-		    "Update should detect time roll-back.");
-		expect_d_eq(nstime_compare(&nst, &nst0), 0,
-		    "Time should not have been modified");
-	}
-}
-TEST_END
-
 int
 main(void) {
 	return test(
@@ -242,6 +220,5 @@ main(void) {
 	    test_nstime_imultiply,
 	    test_nstime_idivide,
 	    test_nstime_divide,
-	    test_nstime_monotonic,
-	    test_nstime_update);
+	    test_nstime_monotonic);
 }
-- 
cgit v0.12


From 40672b0b78207f3b624bd20772b24865d208f215 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 12 Jun 2020 20:12:15 -0700
Subject: Remove duplicate logging in malloc.

---
 src/jemalloc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index bb1b38c..f18fa61 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2437,7 +2437,11 @@ malloc_default(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	LOG("core.malloc.entry", "size: %zu", size);
+	/*
+	 * This variant has logging hook on exit but not on entry.  It's callled
+	 * only by je_malloc, below, which emits the entry one for us (and, if
+	 * it calls us, does so only via tail call).
+	 */
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
-- 
cgit v0.12


From dcfa6fd507d29e4d686abb5263a195c22d187ca0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 12 Jun 2020 09:37:39 -0700
Subject: stress/sizes: Add a couple more types.

---
 test/stress/sizes.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/stress/sizes.c b/test/stress/sizes.c
index 7360494..1bdfe16 100644
--- a/test/stress/sizes.c
+++ b/test/stress/sizes.c
@@ -43,6 +43,9 @@ main() {
 	P(prof_tctx_t);
 	P(prof_gctx_t);
 	P(prof_tdata_t);
+	P(rtree_t);
+	P(rtree_leaf_elm_t);
+	P(slab_data_t);
 	P(tcache_t);
 	P(tcache_slow_t);
 	P(tsd_t);
-- 
cgit v0.12


From 7e09a57b395dc88af218873fd7f47c99c0542f4f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 12 Jun 2020 09:39:46 -0700
Subject: stress/sizes: Fix an off-by-one issue.

Algorithmically, a size greater than 1024 ZB could access one-past-the-end of
the sizes array.  This couldn't really happen since SIZE_MAX is less than 1024
ZB on all platforms we support (and we pick the arguments to this function to be
reasonable anyways), but it's not like there's any reason *not* to fix it,
either.
---
 test/stress/sizes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/stress/sizes.c b/test/stress/sizes.c
index 1bdfe16..44c9de5 100644
--- a/test/stress/sizes.c
+++ b/test/stress/sizes.c
@@ -17,7 +17,7 @@ do_print(const char *name, size_t sz_bytes) {
 
 	size_t ind = 0;
 	double sz = sz_bytes;
-	while (sz >= 1024 && ind < sizes_max) {
+	while (sz >= 1024 && ind < sizes_max - 1) {
 		sz /= 1024;
 		ind++;
 	}
-- 
cgit v0.12


From 40fa6674a99a1bac85a4cb0f5cf10ce0e4878a5e Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 17 Jun 2020 15:20:51 -0700
Subject: Fix prof timestamp conf reading

---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index f18fa61..0d84a01 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1516,8 +1516,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 						CONF_ERROR("Invalid conf value",
 						    k, klen, v, vlen);
 					}
+					CONF_CONTINUE;
 				}
-				CONF_CONTINUE;
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {
-- 
cgit v0.12


From b7858abfc0c605c451027c5f0209680b25ec8891 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 17 Jun 2020 09:57:54 -0700
Subject: Expose prof testing internal functions

---
 include/jemalloc/internal/prof_externs.h | 27 ++++++++++++---------------
 include/jemalloc/internal/prof_log.h     |  4 ++--
 include/jemalloc/internal/prof_recent.h  |  8 ++++----
 src/prof.c                               |  4 ----
 src/prof_data.c                          |  8 ++++----
 src/prof_log.c                           | 19 +++++++------------
 src/prof_recent.c                        | 24 ++++++++++++------------
 test/unit/prof_recent.c                  | 10 +++++-----
 8 files changed, 46 insertions(+), 58 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 3d9fcfb..b433965 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -57,21 +57,6 @@ void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx);
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
-#ifdef JEMALLOC_JET
-typedef int (prof_read_sys_thread_name_t)(char *buf, size_t limit);
-extern prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name;
-size_t prof_tdata_count(void);
-size_t prof_bt_count(void);
-#endif
-typedef int (prof_dump_open_t)(bool, const char *);
-extern prof_dump_open_t *JET_MUTABLE prof_dump_open;
-
-typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
-extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
-#ifdef JEMALLOC_JET
-void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
-    uint64_t *accumbytes);
-#endif
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 void prof_idump(tsdn_t *tsdn);
@@ -104,6 +89,18 @@ uint64_t prof_sample_new_event_wait(tsd_t *tsd);
 uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
 void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 
+/* Used by unit tests. */
+typedef int (prof_read_sys_thread_name_t)(char *buf, size_t limit);
+extern prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name;
+size_t prof_tdata_count(void);
+size_t prof_bt_count(void);
+typedef int (prof_dump_open_t)(bool, const char *);
+extern prof_dump_open_t *JET_MUTABLE prof_dump_open;
+typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
+extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
+void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
+    uint64_t *accumbytes);
+
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
 
diff --git a/include/jemalloc/internal/prof_log.h b/include/jemalloc/internal/prof_log.h
index c8cc5a3..928bf27 100644
--- a/include/jemalloc/internal/prof_log.h
+++ b/include/jemalloc/internal/prof_log.h
@@ -7,13 +7,13 @@ extern malloc_mutex_t log_mtx;
 
 void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 bool prof_log_init(tsd_t *tsdn);
-#ifdef JEMALLOC_JET
+
+/* Used in unit tests. */
 size_t prof_log_bt_count(void);
 size_t prof_log_alloc_count(void);
 size_t prof_log_thr_count(void);
 bool prof_log_is_logging(void);
 bool prof_log_rep_check(void);
 void prof_log_dummy_set(bool new_value);
-#endif
 
 #endif /* JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index f97273c..f88413d 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -8,11 +8,11 @@ void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size);
 void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
 bool prof_recent_init();
 void edata_prof_recent_alloc_init(edata_t *edata);
-#ifdef JEMALLOC_JET
+
+/* Used in unit tests. */
 typedef ql_head(prof_recent_t) prof_recent_list_t;
 extern prof_recent_list_t prof_recent_alloc_list;
-edata_t *prof_recent_alloc_edata_get_no_lock(const prof_recent_t *node);
-prof_recent_t *edata_prof_recent_alloc_get_no_lock(const edata_t *edata);
-#endif
+edata_t *prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *node);
+prof_recent_t *edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata);
 
 #endif /* JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H */
diff --git a/src/prof.c b/src/prof.c
index 38a3db2..db895f8 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -205,12 +205,8 @@ prof_read_sys_thread_name_impl(char *buf, size_t limit) {
 	return ENOSYS;
 #endif
 }
-#ifdef JEMALLOC_JET
 prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name =
     prof_read_sys_thread_name_impl;
-#else
-#define prof_read_sys_thread_name prof_read_sys_thread_name_impl
-#endif
 
 static void
 prof_fetch_sys_thread_name(tsd_t *tsd) {
diff --git a/src/prof_data.c b/src/prof_data.c
index 66ed36a..fe9ef15 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -421,7 +421,7 @@ prof_tctx_create(tsd_t *tsd) {
 	return prof_lookup(tsd, &bt);
 }
 
-#ifdef JEMALLOC_JET
+/* Used in unit tests. */
 static prof_tdata_t *
 prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
     void *arg) {
@@ -432,6 +432,7 @@ prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 	return NULL;
 }
 
+/* Used in unit tests. */
 size_t
 prof_tdata_count(void) {
 	size_t tdata_count = 0;
@@ -446,6 +447,7 @@ prof_tdata_count(void) {
 	return tdata_count;
 }
 
+/* Used in unit tests. */
 size_t
 prof_bt_count(void) {
 	size_t bt_count;
@@ -464,7 +466,6 @@ prof_bt_count(void) {
 
 	return bt_count;
 }
-#endif
 
 static int
 prof_dump_open_impl(bool propagate_err, const char *filename) {
@@ -1174,7 +1175,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
 	return false;
 }
 
-#ifdef JEMALLOC_JET
+/* Used in unit tests. */
 void
 prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes) {
@@ -1219,7 +1220,6 @@ prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
 	}
 }
-#endif
 
 void
 prof_bt_hash(const void *key, size_t r_hash[2]) {
diff --git a/src/prof_log.c b/src/prof_log.c
index 7fea854..00c7659 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -27,9 +27,8 @@ enum prof_logging_state_e {
  */
 prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
 
-#ifdef JEMALLOC_JET
+/* Used in unit tests. */
 static bool prof_log_dummy = false;
-#endif
 
 /* Incremented for every log file that is output. */
 static uint64_t log_seq = 0;
@@ -305,7 +304,7 @@ prof_thr_node_keycomp(const void *k1, const void *k2) {
 	return thr_node1->thr_uid == thr_node2->thr_uid;
 }
 
-#ifdef JEMALLOC_JET
+/* Used in unit tests. */
 size_t
 prof_log_bt_count(void) {
 	size_t cnt = 0;
@@ -317,6 +316,7 @@ prof_log_bt_count(void) {
 	return cnt;
 }
 
+/* Used in unit tests. */
 size_t
 prof_log_alloc_count(void) {
 	size_t cnt = 0;
@@ -328,6 +328,7 @@ prof_log_alloc_count(void) {
 	return cnt;
 }
 
+/* Used in unit tests. */
 size_t
 prof_log_thr_count(void) {
 	size_t cnt = 0;
@@ -339,11 +340,13 @@ prof_log_thr_count(void) {
 	return cnt;
 }
 
+/* Used in unit tests. */
 bool
 prof_log_is_logging(void) {
 	return prof_logging_state == prof_logging_state_started;
 }
 
+/* Used in unit tests. */
 bool
 prof_log_rep_check(void) {
 	if (prof_logging_state == prof_logging_state_stopped
@@ -395,11 +398,11 @@ prof_log_rep_check(void) {
 	return false;
 }
 
+/* Used in unit tests. */
 void
 prof_log_dummy_set(bool new_value) {
 	prof_log_dummy = new_value;
 }
-#endif
 
 bool
 prof_log_start(tsdn_t *tsdn, const char *filename) {
@@ -451,11 +454,9 @@ prof_emitter_write_cb(void *opaque, const char *to_write) {
 	struct prof_emitter_cb_arg_s *arg =
 	    (struct prof_emitter_cb_arg_s *)opaque;
 	size_t bytes = strlen(to_write);
-#ifdef JEMALLOC_JET
 	if (prof_log_dummy) {
 		return;
 	}
-#endif
 	arg->ret = malloc_write_fd(arg->fd, to_write, bytes);
 }
 
@@ -612,15 +613,11 @@ prof_log_stop(tsdn_t *tsdn) {
 	/* Create a file. */
 
 	int fd;
-#ifdef JEMALLOC_JET
 	if (prof_log_dummy) {
 		fd = 0;
 	} else {
 		fd = creat(log_filename, 0644);
 	}
-#else
-	fd = creat(log_filename, 0644);
-#endif
 
 	if (fd == -1) {
 		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
@@ -668,11 +665,9 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_logging_state = prof_logging_state_stopped;
 	malloc_mutex_unlock(tsdn, &log_mtx);
 
-#ifdef JEMALLOC_JET
 	if (prof_log_dummy) {
 		return false;
 	}
-#endif
 	return close(fd) || arg.ret == -1;
 }
 #undef PROF_LOG_STOP_BUFSIZE
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 270691a..9af753f 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -12,10 +12,6 @@ ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
 malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
 static atomic_zd_t prof_recent_alloc_max;
 static ssize_t prof_recent_alloc_count = 0;
-#ifndef JEMALLOC_JET
-typedef ql_head(prof_recent_t) prof_recent_list_t;
-static
-#endif
 prof_recent_list_t prof_recent_alloc_list;
 
 malloc_mutex_t prof_recent_dump_mtx; /* Protects dumping. */
@@ -104,14 +100,16 @@ decrement_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
 	prof_tctx_try_destroy(tsd, tctx);
 }
 
-#ifndef JEMALLOC_JET
-static inline
-#endif
-edata_t *
+static inline edata_t *
 prof_recent_alloc_edata_get_no_lock(const prof_recent_t *n) {
 	return (edata_t *)atomic_load_p(&n->alloc_edata, ATOMIC_ACQUIRE);
 }
 
+edata_t *
+prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *n) {
+	return prof_recent_alloc_edata_get_no_lock(n);
+}
+
 static inline edata_t *
 prof_recent_alloc_edata_get(tsd_t *tsd, const prof_recent_t *n) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -129,14 +127,16 @@ edata_prof_recent_alloc_init(edata_t *edata) {
 	edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
 }
 
-#ifndef JEMALLOC_JET
-static inline
-#endif
-prof_recent_t *
+static inline prof_recent_t *
 edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
 	return edata_prof_recent_alloc_get_dont_call_directly(edata);
 }
 
+prof_recent_t *
+edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata) {
+	return edata_prof_recent_alloc_get_no_lock(edata);
+}
+
 static inline prof_recent_t *
 edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 791cc4f..1885a1a 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -103,11 +103,11 @@ confirm_malloc(void *p) {
 	assert_ptr_not_null(p, "malloc failed unexpectedly");
 	edata_t *e = emap_edata_lookup(TSDN_NULL, &arena_emap_global, p);
 	assert_ptr_not_null(e, "NULL edata for living pointer");
-	prof_recent_t *n = edata_prof_recent_alloc_get_no_lock(e);
+	prof_recent_t *n = edata_prof_recent_alloc_get_no_lock_test(e);
 	assert_ptr_not_null(n, "Record in edata should not be NULL");
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	expect_ptr_eq(e, prof_recent_alloc_edata_get_no_lock(n),
+	expect_ptr_eq(e, prof_recent_alloc_edata_get_no_lock_test(n),
 	    "edata pointer in record is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
@@ -122,10 +122,10 @@ static void
 confirm_record_living(prof_recent_t *n) {
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	edata_t *edata = prof_recent_alloc_edata_get_no_lock(n);
+	edata_t *edata = prof_recent_alloc_edata_get_no_lock_test(n);
 	assert_ptr_not_null(edata,
 	    "Recorded edata should not be NULL for living pointer");
-	expect_ptr_eq(n, edata_prof_recent_alloc_get_no_lock(edata),
+	expect_ptr_eq(n, edata_prof_recent_alloc_get_no_lock_test(edata),
 	    "Record in edata is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
@@ -134,7 +134,7 @@ static void
 confirm_record_released(prof_recent_t *n) {
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	expect_ptr_null(prof_recent_alloc_edata_get_no_lock(n),
+	expect_ptr_null(prof_recent_alloc_edata_get_no_lock_test(n),
 	    "Recorded edata should be NULL for released pointer");
 	expect_ptr_not_null(n->dalloc_tctx,
 	    "dalloc_tctx in record should not be NULL for released pointer");
-- 
cgit v0.12


From 2a84f9b8fcf2ff8d87f0f3246b4b6d897520b240 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 13 May 2020 11:16:07 -0700
Subject: Unify alignment flag reading and computation

---
 src/jemalloc.c | 98 ++++++++++++++++++++++++++++------------------------------
 1 file changed, 48 insertions(+), 50 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0d84a01..3d5d744 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2074,6 +2074,37 @@ dynamic_opts_init(dynamic_opts_t *dynamic_opts) {
 	dynamic_opts->arena_ind = ARENA_IND_AUTOMATIC;
 }
 
+/*
+ * ind parameter is optional and is only checked and filled if alignment == 0;
+ * return true if result is out of range.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind,
+    bool bump_empty_aligned_alloc) {
+	assert(usize != NULL);
+	if (alignment == 0) {
+		if (ind != NULL) {
+			*ind = sz_size2index(size);
+			if (unlikely(*ind >= SC_NSIZES)) {
+				return true;
+			}
+			*usize = sz_index2size(*ind);
+			assert(*usize > 0 && *usize <= SC_LARGE_MAXCLASS);
+			return false;
+		}
+		*usize = sz_s2u(size);
+	} else {
+		if (bump_empty_aligned_alloc && unlikely(size == 0)) {
+			size = 1;
+		}
+		*usize = sz_sa2u(size, alignment);
+	}
+	if (unlikely(*usize == 0 || *usize > SC_LARGE_MAXCLASS)) {
+		return true;
+	}
+	return false;
+}
+
 /* ind is ignored if dopts->alignment > 0. */
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
@@ -2227,26 +2258,11 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	if (config_fill && sopts->slow && opt_zero) {
 		dopts->zero = true;
 	}
-	if (dopts->alignment == 0) {
-		ind = sz_size2index(size);
-		if (unlikely(ind >= SC_NSIZES)) {
-			goto label_oom;
-		}
-		usize = sz_index2size(ind);
-		assert(usize > 0 && usize <= SC_LARGE_MAXCLASS);
-		dopts->usize = usize;
-	} else {
-		if (sopts->bump_empty_aligned_alloc) {
-			if (unlikely(size == 0)) {
-				size = 1;
-			}
-		}
-		usize = sz_sa2u(size, dopts->alignment);
-		dopts->usize = usize;
-		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
-			goto label_oom;
-		}
+	if (aligned_usize_get(size, dopts->alignment, &usize, &ind,
+	    sopts->bump_empty_aligned_alloc)) {
+		goto label_oom;
 	}
+	dopts->usize = usize;
 	/* Validate the user input. */
 	if (sopts->assert_nonempty_alloc) {
 		assert (size != 0);
@@ -3109,9 +3125,7 @@ JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
 	dopts.num_items = 1;
 	dopts.item_size = size;
 	if (unlikely(flags != 0)) {
-		if ((flags & MALLOCX_LG_ALIGN_MASK) != 0) {
-			dopts.alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
-		}
+		dopts.alignment = MALLOCX_ALIGN_GET(flags);
 
 		dopts.zero = MALLOCX_ZERO_GET(flags);
 
@@ -3162,9 +3176,7 @@ je_mallocx(size_t size, int flags) {
 	dopts.num_items = 1;
 	dopts.item_size = size;
 	if (unlikely(flags != 0)) {
-		if ((flags & MALLOCX_LG_ALIGN_MASK) != 0) {
-			dopts.alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
-		}
+		dopts.alignment = MALLOCX_ALIGN_GET(flags);
 
 		dopts.zero = MALLOCX_ZERO_GET(flags);
 
@@ -3316,9 +3328,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	hook_ralloc_args_t hook_args = {is_realloc, {(uintptr_t)ptr, size,
 		flags, 0}};
 	if (config_prof && opt_prof) {
-		usize = (alignment == 0) ?
-		    sz_s2u(size) : sz_sa2u(size, alignment);
-		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
+		if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
 			goto label_oom;
 		}
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
@@ -3501,22 +3511,14 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	 * prof_realloc() will use the actual usize to decide whether to sample.
 	 */
 	size_t usize_max;
-	if (alignment == 0) {
-		usize_max = sz_s2u(size+extra);
-		assert(usize_max > 0
-		    && usize_max <= SC_LARGE_MAXCLASS);
-	} else {
-		usize_max = sz_sa2u(size+extra, alignment);
-		if (unlikely(usize_max == 0
-		    || usize_max > SC_LARGE_MAXCLASS)) {
-			/*
-			 * usize_max is out of range, and chances are that
-			 * allocation will fail, but use the maximum possible
-			 * value and carry on with prof_alloc_prep(), just in
-			 * case allocation succeeds.
-			 */
-			usize_max = SC_LARGE_MAXCLASS;
-		}
+	if (aligned_usize_get(size + extra, alignment, &usize_max, NULL,
+	    false)) {
+		/*
+		 * usize_max is out of range, and chances are that allocation
+		 * will fail, but use the maximum possible value and carry on
+		 * with prof_alloc_prep(), just in case allocation succeeds.
+		 */
+		usize_max = SC_LARGE_MAXCLASS;
 	}
 	bool prof_active = prof_active_get_unlocked();
 	bool sample_event = te_prof_sample_event_lookahead(tsd, usize_max);
@@ -3726,13 +3728,9 @@ je_dallocx(void *ptr, int flags) {
 JEMALLOC_ALWAYS_INLINE size_t
 inallocx(tsdn_t *tsdn, size_t size, int flags) {
 	check_entry_exit_locking(tsdn);
-
 	size_t usize;
-	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) {
-		usize = sz_s2u(size);
-	} else {
-		usize = sz_sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
-	}
+	/* In case of out of range, let the user see it rather than fail. */
+	aligned_usize_get(size, MALLOCX_ALIGN_GET(flags), &usize, NULL, false);
 	check_entry_exit_locking(tsdn);
 	return usize;
 }
-- 
cgit v0.12


From 4b0c008489020bd9d66c21e1452fe8324d11b3f0 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 13 May 2020 11:19:09 -0700
Subject: Unify zero flag reading and setting

---
 src/jemalloc.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3d5d744..afd4890 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2105,6 +2105,15 @@ aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind,
 	return false;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+zero_get(bool guarantee, bool slow) {
+	if (config_fill && slow && unlikely(opt_zero)) {
+		return true;
+	} else {
+		return guarantee;
+	}
+}
+
 /* ind is ignored if dopts->alignment > 0. */
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
@@ -2255,9 +2264,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	}
 
 	/* This is the beginning of the "core" algorithm. */
-	if (config_fill && sopts->slow && opt_zero) {
-		dopts->zero = true;
-	}
+	dopts->zero = zero_get(dopts->zero, sopts->slow);
 	if (aligned_usize_get(size, dopts->alignment, &usize, &ind,
 	    sopts->bump_empty_aligned_alloc)) {
 		goto label_oom;
@@ -3293,10 +3300,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	tsd = tsd_fetch();
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	bool zero = flags & MALLOCX_ZERO;
-	if (config_fill && unlikely(opt_zero)) {
-		zero = true;
-	}
+	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
@@ -3562,11 +3566,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	tsd_t *tsd;
 	size_t usize, old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
-
-	bool zero = flags & MALLOCX_ZERO;
-	if (config_fill && unlikely(opt_zero)) {
-		zero = true;
-	}
+	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
 	LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
 	    "flags: %d", ptr, size, extra, flags);
-- 
cgit v0.12


From 95a59d2f72f4799b1d7aa07216c558408a91917a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 13 May 2020 14:06:43 -0700
Subject: Unify tcache flag reading and selection

---
 src/jemalloc.c | 177 ++++++++++++++++++++++-----------------------------------
 1 file changed, 69 insertions(+), 108 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index afd4890..aacec7b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2114,27 +2114,45 @@ zero_get(bool guarantee, bool slow) {
 	}
 }
 
-/* ind is ignored if dopts->alignment > 0. */
-JEMALLOC_ALWAYS_INLINE void *
-imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
-    size_t size, size_t usize, szind_t ind) {
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
 	tcache_t *tcache;
-	arena_t *arena;
-
-	/* Fill in the tcache. */
-	if (dopts->tcache_ind == TCACHE_IND_AUTOMATIC) {
-		if (likely(!sopts->slow)) {
+	if (tcache_ind == TCACHE_IND_AUTOMATIC) {
+		if (likely(!slow)) {
 			/* Getting tcache ptr unconditionally. */
 			tcache = tsd_tcachep_get(tsd);
 			assert(tcache == tcache_get(tsd));
-		} else {
+		} else if (is_alloc ||
+		    likely(tsd_reentrancy_level_get(tsd) == 0)) {
 			tcache = tcache_get(tsd);
+		} else {
+			tcache = NULL;
 		}
-	} else if (dopts->tcache_ind == TCACHE_IND_NONE) {
-		tcache = NULL;
 	} else {
-		tcache = tcaches_get(tsd, dopts->tcache_ind);
+		/*
+		 * Should not specify tcache on deallocation path when being
+		 * reentrant.
+		 */
+		assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 ||
+		    tsd_state_nocleanup(tsd));
+		if (tcache_ind == TCACHE_IND_NONE) {
+			tcache = NULL;
+		} else {
+			tcache = tcaches_get(tsd, tcache_ind);
+		}
 	}
+	return tcache;
+}
+
+/* ind is ignored if dopts->alignment > 0. */
+JEMALLOC_ALWAYS_INLINE void *
+imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
+    size_t size, size_t usize, szind_t ind) {
+	arena_t *arena;
+
+	/* Fill in the tcache. */
+	tcache_t *tcache = tcache_get_from_ind(tsd, dopts->tcache_ind,
+	    sopts->slow, /* is_alloc */ true);
 
 	/* Fill in the arena. */
 	if (dopts->arena_ind == ARENA_IND_AUTOMATIC) {
@@ -2579,7 +2597,8 @@ je_malloc(size_t size) {
 	}
 	assert(tsd_fast(tsd));
 
-	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
+	    /* slow */ false, /* is_alloc */ true);
 	cache_bin_t *bin = &tcache->bins[ind];
 	bool tcache_success;
 	void *ret;
@@ -2839,22 +2858,20 @@ free_default(void *ptr) {
 		tsd_t *tsd = tsd_fetch_min();
 		check_entry_exit_locking(tsd_tsdn(tsd));
 
-		tcache_t *tcache;
 		if (likely(tsd_fast(tsd))) {
-			tsd_assert_fast(tsd);
-			/* Unconditionally get tcache ptr on fast path. */
-			tcache = tsd_tcachep_get(tsd);
-			ifree(tsd, ptr, tcache, false);
+			tcache_t *tcache = tcache_get_from_ind(tsd,
+			    TCACHE_IND_AUTOMATIC, /* slow */ false,
+			    /* is_alloc */ false);
+			ifree(tsd, ptr, tcache, /* slow */ false);
 		} else {
-			if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
-				tcache = tcache_get(tsd);
-			} else {
-				tcache = NULL;
-			}
+			tcache_t *tcache = tcache_get_from_ind(tsd,
+			    TCACHE_IND_AUTOMATIC, /* slow */ true,
+			    /* is_alloc */ false);
 			uintptr_t args_raw[3] = {(uintptr_t)ptr};
 			hook_invoke_dalloc(hook_dalloc_free, ptr, args_raw);
-			ifree(tsd, ptr, tcache, true);
+			ifree(tsd, ptr, tcache, /* slow */ true);
 		}
+
 		check_entry_exit_locking(tsd_tsdn(tsd));
 	}
 }
@@ -2912,7 +2929,8 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		return false;
 	}
 
-	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
+	    /* slow */ false, /* is_alloc */ false);
 	cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
 
 	/*
@@ -3088,6 +3106,17 @@ int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign);
  * Begin non-standard functions.
  */
 
+JEMALLOC_ALWAYS_INLINE unsigned
+mallocx_tcache_get(int flags) {
+	if (likely((flags & MALLOCX_TCACHE_MASK) == 0)) {
+		return TCACHE_IND_AUTOMATIC;
+	} else if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
+		return TCACHE_IND_NONE;
+	} else {
+		return MALLOCX_TCACHE_GET(flags);
+	}
+}
+
 #ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
 
 #define JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y) x ## y
@@ -3136,16 +3165,7 @@ JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
 
 		dopts.zero = MALLOCX_ZERO_GET(flags);
 
-		if ((flags & MALLOCX_TCACHE_MASK) != 0) {
-			if ((flags & MALLOCX_TCACHE_MASK)
-			    == MALLOCX_TCACHE_NONE) {
-				dopts.tcache_ind = TCACHE_IND_NONE;
-			} else {
-				dopts.tcache_ind = MALLOCX_TCACHE_GET(flags);
-			}
-		} else {
-			dopts.tcache_ind = TCACHE_IND_AUTOMATIC;
-		}
+		dopts.tcache_ind = mallocx_tcache_get(flags);
 
 		if ((flags & MALLOCX_ARENA_MASK) != 0)
 			dopts.arena_ind = MALLOCX_ARENA_GET(flags);
@@ -3187,16 +3207,7 @@ je_mallocx(size_t size, int flags) {
 
 		dopts.zero = MALLOCX_ZERO_GET(flags);
 
-		if ((flags & MALLOCX_TCACHE_MASK) != 0) {
-			if ((flags & MALLOCX_TCACHE_MASK)
-			    == MALLOCX_TCACHE_NONE) {
-				dopts.tcache_ind = TCACHE_IND_NONE;
-			} else {
-				dopts.tcache_ind = MALLOCX_TCACHE_GET(flags);
-			}
-		} else {
-			dopts.tcache_ind = TCACHE_IND_AUTOMATIC;
-		}
+		dopts.tcache_ind = mallocx_tcache_get(flags);
 
 		if ((flags & MALLOCX_ARENA_MASK) != 0)
 			dopts.arena_ind = MALLOCX_ARENA_GET(flags);
@@ -3292,7 +3303,6 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	size_t old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	arena_t *arena;
-	tcache_t *tcache;
 
 	assert(ptr != NULL);
 	assert(size != 0);
@@ -3312,15 +3322,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		arena = NULL;
 	}
 
-	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
-		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
-			tcache = NULL;
-		} else {
-			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
-		}
-	} else {
-		tcache = tcache_get(tsd);
-	}
+	unsigned tcache_ind = mallocx_tcache_get(flags);
+	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
+	    /* slow */ true, /* is_alloc */ true);
 
 	emap_alloc_ctx_t alloc_ctx;
 	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
@@ -3400,19 +3404,14 @@ do_realloc_nonnull_zero(void *ptr) {
 		return do_rallocx(ptr, 1, MALLOCX_TCACHE_NONE, true);
 	} else if (opt_zero_realloc_action == zero_realloc_action_free) {
 		UTRACE(ptr, 0, 0);
-		tcache_t *tcache;
 		tsd_t *tsd = tsd_fetch();
 		check_entry_exit_locking(tsd_tsdn(tsd));
 
-		if (tsd_reentrancy_level_get(tsd) == 0) {
-			tcache = tcache_get(tsd);
-		} else {
-			tcache = NULL;
-		}
-
+		tcache_t *tcache = tcache_get_from_ind(tsd,
+		    TCACHE_IND_AUTOMATIC, /* slow */ true,
+		    /* is_alloc */ false);
 		uintptr_t args[3] = {(uintptr_t)ptr, 0};
 		hook_invoke_dalloc(hook_dalloc_realloc, ptr, args);
-
 		ifree(tsd, ptr, tcache, true);
 
 		check_entry_exit_locking(tsd_tsdn(tsd));
@@ -3688,28 +3687,9 @@ je_dallocx(void *ptr, int flags) {
 	bool fast = tsd_fast(tsd);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	tcache_t *tcache;
-	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
-		/* Not allowed to be reentrant and specify a custom tcache. */
-		assert(tsd_reentrancy_level_get(tsd) == 0 ||
-		    tsd_state_nocleanup(tsd));
-		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
-			tcache = NULL;
-		} else {
-			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
-		}
-	} else {
-		if (likely(fast)) {
-			tcache = tsd_tcachep_get(tsd);
-			assert(tcache == tcache_get(tsd));
-		} else {
-			if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
-				tcache = tcache_get(tsd);
-			}  else {
-				tcache = NULL;
-			}
-		}
-	}
+	unsigned tcache_ind = mallocx_tcache_get(flags);
+	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind, !fast,
+	    /* is_alloc */ false);
 
 	UTRACE(ptr, 0, 0);
 	if (likely(fast)) {
@@ -3746,28 +3726,9 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	tcache_t *tcache;
-	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
-		/* Not allowed to be reentrant and specify a custom tcache. */
-		assert(tsd_reentrancy_level_get(tsd) == 0 ||
-		    tsd_state_nocleanup(tsd));
-		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
-			tcache = NULL;
-		} else {
-			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
-		}
-	} else {
-		if (likely(fast)) {
-			tcache = tsd_tcachep_get(tsd);
-			assert(tcache == tcache_get(tsd));
-		} else {
-			if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
-				tcache = tcache_get(tsd);
-			} else {
-				tcache = NULL;
-			}
-		}
-	}
+	unsigned tcache_ind = mallocx_tcache_get(flags);
+	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind, !fast,
+	    /* is_alloc */ false);
 
 	UTRACE(ptr, 0, 0);
 	if (likely(fast)) {
-- 
cgit v0.12


From e128b170a0b884aa34ca7fe3f61e89fc54fce918 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 13 May 2020 14:17:54 -0700
Subject: Do not fallback to auto arena when manual arena is requested

---
 src/jemalloc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index aacec7b..5d7c266 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2164,6 +2164,10 @@ imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 		arena = NULL;
 	} else {
 		arena = arena_get(tsd_tsdn(tsd), dopts->arena_ind, true);
+		if (unlikely(arena == NULL) &&
+		    dopts->arena_ind >= narenas_auto) {
+			return NULL;
+		}
 	}
 
 	if (unlikely(dopts->alignment != 0)) {
@@ -3315,7 +3319,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
 		arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
-		if (unlikely(arena == NULL)) {
+		if (unlikely(arena == NULL) && arena_ind >= narenas_auto) {
 			goto label_oom;
 		}
 	} else {
-- 
cgit v0.12


From 24bbf376cee49691ff734eb5d0415e14fbbe72ca Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 13 May 2020 14:49:41 -0700
Subject: Unify arena flag reading and selection

---
 src/jemalloc.c | 69 ++++++++++++++++++++++++++++++----------------------------
 1 file changed, 36 insertions(+), 33 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5d7c266..573118e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2144,30 +2144,37 @@ tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
 	return tcache;
 }
 
+/* Return true if a manual arena is specified and arena_get() OOMs. */
+JEMALLOC_ALWAYS_INLINE bool
+arena_get_from_ind(tsd_t *tsd, unsigned arena_ind, arena_t **arena_p) {
+	if (arena_ind == ARENA_IND_AUTOMATIC) {
+		/*
+		 * In case of automatic arena management, we defer arena
+		 * computation until as late as we can, hoping to fill the
+		 * allocation out of the tcache.
+		 */
+		*arena_p = NULL;
+	} else {
+		*arena_p = arena_get(tsd_tsdn(tsd), arena_ind, true);
+		if (unlikely(*arena_p == NULL) && arena_ind >= narenas_auto) {
+			return true;
+		}
+	}
+	return false;
+}
+
 /* ind is ignored if dopts->alignment > 0. */
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
     size_t size, size_t usize, szind_t ind) {
-	arena_t *arena;
-
 	/* Fill in the tcache. */
 	tcache_t *tcache = tcache_get_from_ind(tsd, dopts->tcache_ind,
 	    sopts->slow, /* is_alloc */ true);
 
 	/* Fill in the arena. */
-	if (dopts->arena_ind == ARENA_IND_AUTOMATIC) {
-		/*
-		 * In case of automatic arena management, we defer arena
-		 * computation until as late as we can, hoping to fill the
-		 * allocation out of the tcache.
-		 */
-		arena = NULL;
-	} else {
-		arena = arena_get(tsd_tsdn(tsd), dopts->arena_ind, true);
-		if (unlikely(arena == NULL) &&
-		    dopts->arena_ind >= narenas_auto) {
-			return NULL;
-		}
+	arena_t *arena;
+	if (arena_get_from_ind(tsd, dopts->arena_ind, &arena)) {
+		return NULL;
 	}
 
 	if (unlikely(dopts->alignment != 0)) {
@@ -3121,6 +3128,15 @@ mallocx_tcache_get(int flags) {
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE unsigned
+mallocx_arena_get(int flags) {
+	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
+		return MALLOCX_ARENA_GET(flags);
+	} else {
+		return ARENA_IND_AUTOMATIC;
+	}
+}
+
 #ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
 
 #define JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y) x ## y
@@ -3166,13 +3182,9 @@ JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
 	dopts.item_size = size;
 	if (unlikely(flags != 0)) {
 		dopts.alignment = MALLOCX_ALIGN_GET(flags);
-
 		dopts.zero = MALLOCX_ZERO_GET(flags);
-
 		dopts.tcache_ind = mallocx_tcache_get(flags);
-
-		if ((flags & MALLOCX_ARENA_MASK) != 0)
-			dopts.arena_ind = MALLOCX_ARENA_GET(flags);
+		dopts.arena_ind = mallocx_arena_get(flags);
 	}
 
 	imalloc(&sopts, &dopts);
@@ -3208,13 +3220,9 @@ je_mallocx(size_t size, int flags) {
 	dopts.item_size = size;
 	if (unlikely(flags != 0)) {
 		dopts.alignment = MALLOCX_ALIGN_GET(flags);
-
 		dopts.zero = MALLOCX_ZERO_GET(flags);
-
 		dopts.tcache_ind = mallocx_tcache_get(flags);
-
-		if ((flags & MALLOCX_ARENA_MASK) != 0)
-			dopts.arena_ind = MALLOCX_ARENA_GET(flags);
+		dopts.arena_ind = mallocx_arena_get(flags);
 	}
 
 	imalloc(&sopts, &dopts);
@@ -3316,14 +3324,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 
 	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
-	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
-		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
-		if (unlikely(arena == NULL) && arena_ind >= narenas_auto) {
-			goto label_oom;
-		}
-	} else {
-		arena = NULL;
+	unsigned arena_ind = mallocx_arena_get(flags);
+	if (arena_get_from_ind(tsd, arena_ind, &arena)) {
+		goto label_oom;
 	}
 
 	unsigned tcache_ind = mallocx_tcache_get(flags);
-- 
cgit v0.12


From a795b1932780503cf5422920975a1c38994c7581 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 17 Jun 2020 16:15:06 -0700
Subject: Remove beginning define in source files

```
sed -i "/^#define JEMALLOC_[A-Z_]*_C_$/d" src/*.c;
```
---
 src/arena.c             | 1 -
 src/background_thread.c | 1 -
 src/base.c              | 1 -
 src/bitmap.c            | 1 -
 src/buf_writer.c        | 1 -
 src/ckh.c               | 1 -
 src/counter.c           | 1 -
 src/ctl.c               | 1 -
 src/extent_dss.c        | 1 -
 src/extent_mmap.c       | 1 -
 src/hash.c              | 1 -
 src/large.c             | 1 -
 src/malloc_io.c         | 1 -
 src/mutex.c             | 1 -
 src/mutex_pool.c        | 1 -
 src/pages.c             | 1 -
 src/prng.c              | 1 -
 src/prof.c              | 1 -
 src/prof_data.c         | 1 -
 src/prof_log.c          | 1 -
 src/prof_recent.c       | 1 -
 src/rtree.c             | 1 -
 src/stats.c             | 1 -
 src/tcache.c            | 1 -
 src/thread_event.c      | 1 -
 src/ticker.c            | 1 -
 src/tsd.c               | 1 -
 src/witness.c           | 1 -
 28 files changed, 28 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index b61d373..2a3af5c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_ARENA_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/background_thread.c b/src/background_thread.c
index 6b68053..db11405 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_BACKGROUND_THREAD_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/base.c b/src/base.c
index ebb42da..d3732ba 100644
--- a/src/base.c
+++ b/src/base.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_BASE_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/bitmap.c b/src/bitmap.c
index 468b317..0ccedc5 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_BITMAP_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/buf_writer.c b/src/buf_writer.c
index 06a2735..7c6f794 100644
--- a/src/buf_writer.c
+++ b/src/buf_writer.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_BUF_WRITER_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/ckh.c b/src/ckh.c
index 1bf6df5..9441fba 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -34,7 +34,6 @@
  * respectively.
  *
  ******************************************************************************/
-#define JEMALLOC_CKH_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 
 #include "jemalloc/internal/ckh.h"
diff --git a/src/counter.c b/src/counter.c
index 71eda69..8f1ae3a 100644
--- a/src/counter.c
+++ b/src/counter.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_COUNTER_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/ctl.c b/src/ctl.c
index 24d9eb3..24c959c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_CTL_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 17a0822..81161b3 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_EXTENT_DSS_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 17fd1c8..5f0ee2d 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_EXTENT_MMAP_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/hash.c b/src/hash.c
index 7b2bdc2..3a26b39 100644
--- a/src/hash.c
+++ b/src/hash.c
@@ -1,3 +1,2 @@
-#define JEMALLOC_HASH_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/large.c b/src/large.c
index cc3e727..3ea08be 100644
--- a/src/large.c
+++ b/src/large.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_LARGE_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/malloc_io.c b/src/malloc_io.c
index d2879bb..f5004f0 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_MALLOC_IO_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/mutex.c b/src/mutex.c
index bffcfb5..83d9ce7 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_MUTEX_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/mutex_pool.c b/src/mutex_pool.c
index f24d10e..d7861dc 100644
--- a/src/mutex_pool.c
+++ b/src/mutex_pool.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_MUTEX_POOL_C_
 
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/pages.c b/src/pages.c
index 62e84f0..9413d87 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_PAGES_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 
 #include "jemalloc/internal/pages.h"
diff --git a/src/prng.c b/src/prng.c
index 83c04bf..3a26b39 100644
--- a/src/prng.c
+++ b/src/prng.c
@@ -1,3 +1,2 @@
-#define JEMALLOC_PRNG_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/prof.c b/src/prof.c
index db895f8..8ab6893 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_PROF_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/prof_data.c b/src/prof_data.c
index fe9ef15..49cc6ee 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_PROF_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/prof_log.c b/src/prof_log.c
index 00c7659..bda01d0 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_PROF_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 9af753f..426f62e 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_PROF_RECENT_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/rtree.c b/src/rtree.c
index 07a4e9a..6496b5a 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_RTREE_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/stats.c b/src/stats.c
index fb88e5a..407b60c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_STATS_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/tcache.c b/src/tcache.c
index ff42884..b73fd0d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_TCACHE_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/thread_event.c b/src/thread_event.c
index 99a188d..bb91baa 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_THREAD_EVENT_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/ticker.c b/src/ticker.c
index d7b8cd2..3a26b39 100644
--- a/src/ticker.c
+++ b/src/ticker.c
@@ -1,3 +1,2 @@
-#define JEMALLOC_TICKER_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/tsd.c b/src/tsd.c
index cc1b3ac..0dd4036 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_TSD_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/witness.c b/src/witness.c
index e9ddf59..4474af0 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_WITNESS_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-- 
cgit v0.12


From 092fcac0b4b3854c12c51d22174df00303a3fe6a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 19 Jun 2020 08:58:22 -0700
Subject: Remove unnecessary source files

---
 Makefile.in                                            | 3 ---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 3 ---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 9 ---------
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj         | 3 ---
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 9 ---------
 src/hash.c                                             | 2 --
 src/prng.c                                             | 2 --
 src/ticker.c                                           | 2 --
 8 files changed, 33 deletions(-)
 delete mode 100644 src/hash.c
 delete mode 100644 src/prng.c
 delete mode 100644 src/ticker.c

diff --git a/Makefile.in b/Makefile.in
index 2f3fea1..35b4a05 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -117,7 +117,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
-	$(srcroot)src/hash.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
@@ -130,7 +129,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pa_extra.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/peak_event.c \
-	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
@@ -143,7 +141,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/test_hooks.c \
 	$(srcroot)src/thread_event.c \
-	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
 	$(srcroot)src/witness.c
 ifeq ($(enable_zone_allocator), 1)
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index d50fa88..bbe814b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -57,7 +57,6 @@
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
-    <ClCompile Include="..\..\..\..\src\hash.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -71,7 +70,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
-    <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
@@ -84,7 +82,6 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
-    <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 94db8c0..6f7027b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -55,9 +55,6 @@
     <ClCompile Include="..\..\..\..\src\extent_mmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\hash.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -97,9 +94,6 @@
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prng.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -136,9 +130,6 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\ticker.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 337dcfe..ae60133 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -57,7 +57,6 @@
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
-    <ClCompile Include="..\..\..\..\src\hash.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -71,7 +70,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
-    <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
@@ -84,7 +82,6 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
-    <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 94db8c0..6f7027b 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -55,9 +55,6 @@
     <ClCompile Include="..\..\..\..\src\extent_mmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\hash.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -97,9 +94,6 @@
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prng.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -136,9 +130,6 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\ticker.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/hash.c b/src/hash.c
deleted file mode 100644
index 3a26b39..0000000
--- a/src/hash.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/prng.c b/src/prng.c
deleted file mode 100644
index 3a26b39..0000000
--- a/src/prng.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/ticker.c b/src/ticker.c
deleted file mode 100644
index 3a26b39..0000000
--- a/src/ticker.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-- 
cgit v0.12


From 25e43c60223c169ce7dc66982f9472aa6e33306b Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Thu, 18 Jun 2020 15:41:56 -0700
Subject: Witness: Make ranks an enum.

This lets us avoid having to increment a bunch of values manually every time we
add a new sort of lock.
---
 include/jemalloc/internal/witness.h | 120 ++++++++++++++++++------------------
 1 file changed, 61 insertions(+), 59 deletions(-)

diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 58f7266..652afe6 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -7,62 +7,65 @@
 /* LOCK RANKS */
 /******************************************************************************/
 
-/*
- * Witnesses with rank WITNESS_RANK_OMIT are completely ignored by the witness
- * machinery.
- */
-
-#define WITNESS_RANK_OMIT		0U
-
-#define WITNESS_RANK_MIN		1U
-
-#define WITNESS_RANK_INIT		1U
-#define WITNESS_RANK_CTL		1U
-#define WITNESS_RANK_TCACHES		2U
-#define WITNESS_RANK_ARENAS		3U
-
-#define WITNESS_RANK_BACKGROUND_THREAD_GLOBAL	4U
-
-#define WITNESS_RANK_PROF_DUMP		5U
-#define WITNESS_RANK_PROF_BT2GCTX	6U
-#define WITNESS_RANK_PROF_TDATAS	7U
-#define WITNESS_RANK_PROF_TDATA		8U
-#define WITNESS_RANK_PROF_LOG		9U
-#define WITNESS_RANK_PROF_GCTX		10U
-#define WITNESS_RANK_PROF_RECENT_DUMP	11U
-#define WITNESS_RANK_BACKGROUND_THREAD	12U
-
-/*
- * Used as an argument to witness_assert_depth_to_rank() in order to validate
- * depth excluding non-core locks with lower ranks.  Since the rank argument to
- * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
- * definition can have the same value as the minimally ranked core lock.
- */
-#define WITNESS_RANK_CORE		13U
-
-#define WITNESS_RANK_DECAY		13U
-#define WITNESS_RANK_TCACHE_QL		14U
-#define WITNESS_RANK_EXTENT_GROW	15U
-#define WITNESS_RANK_EXTENTS		16U
-#define WITNESS_RANK_EDATA_CACHE	17U
-
-#define WITNESS_RANK_EMAP		18U
-#define WITNESS_RANK_RTREE		19U
-#define WITNESS_RANK_BASE		20U
-#define WITNESS_RANK_ARENA_LARGE	21U
-#define WITNESS_RANK_HOOK		22U
-
-#define WITNESS_RANK_LEAF		0xffffffffU
-#define WITNESS_RANK_BIN		WITNESS_RANK_LEAF
-#define WITNESS_RANK_ARENA_STATS	WITNESS_RANK_LEAF
-#define WITNESS_RANK_COUNTER_ACCUM	WITNESS_RANK_LEAF
-#define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_DUMP_FILENAME	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_RECENT_ALLOC	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
+enum witness_rank_e {
+	/*
+	 * Order matters within this enum listing -- higher valued locks can
+	 * only be acquired after lower-valued ones.  We use the
+	 * auto-incrementing-ness of enum values to enforce this.
+	 */
+
+	/*
+	 * Witnesses with rank WITNESS_RANK_OMIT are completely ignored by the
+	 * witness machinery.
+	 */
+	WITNESS_RANK_OMIT,
+	WITNESS_RANK_MIN,
+	WITNESS_RANK_INIT = WITNESS_RANK_MIN,
+	WITNESS_RANK_CTL,
+	WITNESS_RANK_TCACHES,
+	WITNESS_RANK_ARENAS,
+	WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
+	WITNESS_RANK_PROF_DUMP,
+	WITNESS_RANK_PROF_BT2GCTX,
+	WITNESS_RANK_PROF_TDATAS,
+	WITNESS_RANK_PROF_TDATA,
+	WITNESS_RANK_PROF_LOG,
+	WITNESS_RANK_PROF_GCTX,
+	WITNESS_RANK_PROF_RECENT_DUMP,
+	WITNESS_RANK_BACKGROUND_THREAD,
+	/*
+	 * Used as an argument to witness_assert_depth_to_rank() in order to
+	 * validate depth excluding non-core locks with lower ranks.  Since the
+	 * rank argument to witness_assert_depth_to_rank() is inclusive rather
+	 * than exclusive, this definition can have the same value as the
+	 * minimally ranked core lock.
+	 */
+	WITNESS_RANK_CORE,
+	WITNESS_RANK_DECAY = WITNESS_RANK_CORE,
+	WITNESS_RANK_TCACHE_QL,
+	WITNESS_RANK_EXTENT_GROW,
+	WITNESS_RANK_EXTENTS,
+	WITNESS_RANK_EDATA_CACHE,
+
+	WITNESS_RANK_EMAP,
+	WITNESS_RANK_RTREE,
+	WITNESS_RANK_BASE,
+	WITNESS_RANK_ARENA_LARGE,
+	WITNESS_RANK_HOOK,
+
+	WITNESS_RANK_LEAF=0x1000,
+	WITNESS_RANK_BIN = WITNESS_RANK_LEAF,
+	WITNESS_RANK_ARENA_STATS = WITNESS_RANK_LEAF,
+	WITNESS_RANK_COUNTER_ACCUM = WITNESS_RANK_LEAF,
+	WITNESS_RANK_DSS = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_ACTIVE = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_DUMP_FILENAME = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_GDUMP = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_NEXT_THR_UID = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_RECENT_ALLOC = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT = WITNESS_RANK_LEAF,
+};
+typedef enum witness_rank_e witness_rank_t;
 
 /******************************************************************************/
 /* PER-WITNESS DATA */
@@ -74,7 +77,6 @@
 #endif
 
 typedef struct witness_s witness_t;
-typedef unsigned witness_rank_t;
 typedef ql_head(witness_t) witness_list_t;
 typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
     void *);
@@ -84,8 +86,8 @@ struct witness_s {
 	const char		*name;
 
 	/*
-	 * Witness rank, where 0 is lowest and UINT_MAX is highest.  Witnesses
-	 * must be acquired in order of increasing rank.
+	 * Witness rank, where 0 is lowest and WITNESS_RANK_LEAF is highest.
+	 * Witnesses must be acquired in order of increasing rank.
 	 */
 	witness_rank_t		rank;
 
-- 
cgit v0.12


From d460333efb22466713dd646b3947bbf0f868b02d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 19 Jun 2020 15:16:53 -0700
Subject: Improve naming for prof system thread name option

---
 Makefile.in                              |  2 +-
 include/jemalloc/internal/prof_externs.h |  6 +--
 src/ctl.c                                | 15 +++----
 src/jemalloc.c                           |  5 +--
 src/prof.c                               | 18 ++++----
 test/unit/mallctl.c                      |  2 +-
 test/unit/prof_sys_thread_name.c         | 75 ++++++++++++++++++++++++++++++++
 test/unit/prof_sys_thread_name.sh        |  5 +++
 test/unit/prof_use_sys_thread_name.c     | 75 --------------------------------
 test/unit/prof_use_sys_thread_name.sh    |  5 ---
 10 files changed, 103 insertions(+), 105 deletions(-)
 create mode 100644 test/unit/prof_sys_thread_name.c
 create mode 100644 test/unit/prof_sys_thread_name.sh
 delete mode 100644 test/unit/prof_use_sys_thread_name.c
 delete mode 100644 test/unit/prof_use_sys_thread_name.sh

diff --git a/Makefile.in b/Makefile.in
index 35b4a05..fd52ffc 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -232,7 +232,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_reset.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
-	$(srcroot)test/unit/prof_use_sys_thread_name.c \
+	$(srcroot)test/unit/prof_sys_thread_name.c \
 	$(srcroot)test/unit/ql.c \
 	$(srcroot)test/unit/qr.c \
 	$(srcroot)test/unit/rb.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index b433965..6021cf4 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -29,7 +29,7 @@ extern ssize_t opt_prof_recent_alloc_max;
 extern malloc_mutex_t prof_recent_alloc_mtx;
 
 /* Whether to use thread name provided by the system or by mallctl. */
-extern bool opt_prof_experimental_use_sys_thread_name;
+extern bool opt_prof_sys_thread_name;
 
 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
 extern bool prof_active;
@@ -90,8 +90,8 @@ uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
 void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 
 /* Used by unit tests. */
-typedef int (prof_read_sys_thread_name_t)(char *buf, size_t limit);
-extern prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name;
+typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
+extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
 typedef int (prof_dump_open_t)(bool, const char *);
diff --git a/src/ctl.c b/src/ctl.c
index 24c959c..5cba9af 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -127,7 +127,7 @@ CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(opt_prof_recent_alloc_max)
-CTL_PROTO(opt_prof_experimental_use_sys_thread_name)
+CTL_PROTO(opt_prof_sys_thread_name)
 CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(tcache_create)
@@ -382,11 +382,10 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_final"),	CTL(opt_prof_final)},
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
 	{NAME("prof_accum"),	CTL(opt_prof_accum)},
-	{NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)},
-	{NAME("prof_experimental_use_sys_thread_name"),
-	    CTL(opt_prof_experimental_use_sys_thread_name)},
-	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
-	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)}
+	{NAME("prof_recent_alloc_max"),	CTL(opt_prof_recent_alloc_max)},
+	{NAME("prof_sys_thread_name"),	CTL(opt_prof_sys_thread_name)},
+	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)},
+	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
 };
 
 static const ctl_named_node_t	tcache_node[] = {
@@ -1852,8 +1851,8 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
     opt_prof_recent_alloc_max, ssize_t)
-CTL_RO_NL_CGEN(config_prof, opt_prof_experimental_use_sys_thread_name,
-    opt_prof_experimental_use_sys_thread_name, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_sys_thread_name, opt_prof_sys_thread_name,
+    bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_time_res,
     prof_time_res_mode_names[opt_prof_time_res], const char *)
 CTL_RO_NL_GEN(opt_zero_realloc,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 573118e..b468d82 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1495,9 +1495,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
-				CONF_HANDLE_BOOL(
-				    opt_prof_experimental_use_sys_thread_name,
-				    "prof_experimental_use_sys_thread_name")
+				CONF_HANDLE_BOOL(opt_prof_sys_thread_name,
+				    "prof_sys_thread_name")
 				if (CONF_MATCH("prof_time_resolution")) {
 					if (CONF_MATCH_VALUE("default")) {
 						opt_prof_time_res =
diff --git a/src/prof.c b/src/prof.c
index 8ab6893..5e29f40 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -47,7 +47,7 @@ bool opt_prof_final = false;
 bool opt_prof_leak = false;
 bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
-bool opt_prof_experimental_use_sys_thread_name = false;
+bool opt_prof_sys_thread_name = false;
 
 /* Accessed via prof_sample_event_handler(). */
 static counter_accum_t prof_idump_accumulated;
@@ -197,21 +197,21 @@ prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 }
 
 static int
-prof_read_sys_thread_name_impl(char *buf, size_t limit) {
+prof_sys_thread_name_read_impl(char *buf, size_t limit) {
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
 	return pthread_getname_np(pthread_self(), buf, limit);
 #else
 	return ENOSYS;
 #endif
 }
-prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name =
-    prof_read_sys_thread_name_impl;
+prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read =
+    prof_sys_thread_name_read_impl;
 
 static void
-prof_fetch_sys_thread_name(tsd_t *tsd) {
+prof_sys_thread_name_fetch(tsd_t *tsd) {
 #define THREAD_NAME_MAX_LEN 16
 	char buf[THREAD_NAME_MAX_LEN];
-	if (!prof_read_sys_thread_name(buf, THREAD_NAME_MAX_LEN)) {
+	if (!prof_sys_thread_name_read(buf, THREAD_NAME_MAX_LEN)) {
 		prof_thread_name_set_impl(tsd, buf);
 	}
 #undef THREAD_NAME_MAX_LEN
@@ -220,8 +220,8 @@ prof_fetch_sys_thread_name(tsd_t *tsd) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx) {
-	if (opt_prof_experimental_use_sys_thread_name) {
-		prof_fetch_sys_thread_name(tsd);
+	if (opt_prof_sys_thread_name) {
+		prof_sys_thread_name_fetch(tsd);
 	}
 
 	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
@@ -870,7 +870,7 @@ prof_thread_name_get(tsd_t *tsd) {
 
 int
 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
-	if (opt_prof_experimental_use_sys_thread_name) {
+	if (opt_prof_sys_thread_name) {
 		return ENOENT;
 	} else {
 		return prof_thread_name_set_impl(tsd, thread_name);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 10d809f..3de5694 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -192,7 +192,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_final, prof);
 	TEST_MALLCTL_OPT(bool, prof_leak, prof);
 	TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof);
-	TEST_MALLCTL_OPT(bool, prof_experimental_use_sys_thread_name, prof);
+	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
 
 #undef TEST_MALLCTL_OPT
 }
diff --git a/test/unit/prof_sys_thread_name.c b/test/unit/prof_sys_thread_name.c
new file mode 100644
index 0000000..ec1e774
--- /dev/null
+++ b/test/unit/prof_sys_thread_name.c
@@ -0,0 +1,75 @@
+#include "test/jemalloc_test.h"
+
+static const char *test_thread_name = "test_name";
+
+static int
+test_prof_sys_thread_name_read_error(char *buf, size_t limit) {
+	return ENOSYS;
+}
+
+static int
+test_prof_sys_thread_name_read(char *buf, size_t limit) {
+	assert(strlen(test_thread_name) < limit);
+	strncpy(buf, test_thread_name, limit);
+	return 0;
+}
+
+static int
+test_prof_sys_thread_name_read_clear(char *buf, size_t limit) {
+	assert(limit > 0);
+	buf[0] = '\0';
+	return 0;
+}
+
+TEST_BEGIN(test_prof_sys_thread_name) {
+	test_skip_if(!config_prof);
+
+	bool oldval;
+	size_t sz = sizeof(oldval);
+	assert_d_eq(mallctl("opt.prof_sys_thread_name", &oldval, &sz, NULL, 0),
+	    0, "mallctl failed");
+	assert_true(oldval, "option was not set correctly");
+
+	const char *thread_name;
+	sz = sizeof(thread_name);
+	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
+	    "mallctl read for thread name should not fail");
+	expect_str_eq(thread_name, "", "Initial thread name should be empty");
+
+	thread_name = test_thread_name;
+	assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, sz),
+	    ENOENT, "mallctl write for thread name should fail");
+	assert_ptr_eq(thread_name, test_thread_name,
+	    "Thread name should not be touched");
+
+	prof_sys_thread_name_read = test_prof_sys_thread_name_read_error;
+	void *p = malloc(1);
+	free(p);
+	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
+	    "mallctl read for thread name should not fail");
+	assert_str_eq(thread_name, "",
+	    "Thread name should stay the same if the system call fails");
+
+	prof_sys_thread_name_read = test_prof_sys_thread_name_read;
+	p = malloc(1);
+	free(p);
+	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
+	    "mallctl read for thread name should not fail");
+	assert_str_eq(thread_name, test_thread_name,
+	    "Thread name should be changed if the system call succeeds");
+
+	prof_sys_thread_name_read = test_prof_sys_thread_name_read_clear;
+	p = malloc(1);
+	free(p);
+	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
+	    "mallctl read for thread name should not fail");
+	expect_str_eq(thread_name, "", "Thread name should be updated if the "
+	    "system call returns a different name");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_prof_sys_thread_name);
+}
diff --git a/test/unit/prof_sys_thread_name.sh b/test/unit/prof_sys_thread_name.sh
new file mode 100644
index 0000000..281cf9a
--- /dev/null
+++ b/test/unit/prof_sys_thread_name.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_sys_thread_name:true"
+fi
diff --git a/test/unit/prof_use_sys_thread_name.c b/test/unit/prof_use_sys_thread_name.c
deleted file mode 100644
index 60cb55b..0000000
--- a/test/unit/prof_use_sys_thread_name.c
+++ /dev/null
@@ -1,75 +0,0 @@
-#include "test/jemalloc_test.h"
-
-static const char *test_thread_name = "test_name";
-
-static int
-test_prof_read_sys_thread_name_error(char *buf, size_t limit) {
-	return ENOSYS;
-}
-
-static int
-test_prof_read_sys_thread_name(char *buf, size_t limit) {
-	assert(strlen(test_thread_name) < limit);
-	strncpy(buf, test_thread_name, limit);
-	return 0;
-}
-
-static int
-test_prof_read_sys_thread_name_clear(char *buf, size_t limit) {
-	assert(limit > 0);
-	buf[0] = '\0';
-	return 0;
-}
-
-TEST_BEGIN(test_prof_experimental_use_sys_thread_name) {
-	test_skip_if(!config_prof);
-
-	bool oldval;
-	size_t sz = sizeof(oldval);
-	assert_d_eq(mallctl("opt.prof_experimental_use_sys_thread_name",
-	    &oldval, &sz, NULL,	0), 0, "mallctl failed");
-	assert_true(oldval, "option was not set correctly");
-
-	const char *thread_name;
-	sz = sizeof(thread_name);
-	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
-	    "mallctl read for thread name should not fail");
-	expect_str_eq(thread_name, "", "Initial thread name should be empty");
-
-	thread_name = test_thread_name;
-	assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, sz),
-	    ENOENT, "mallctl write for thread name should fail");
-	assert_ptr_eq(thread_name, test_thread_name,
-	    "Thread name should not be touched");
-
-	prof_read_sys_thread_name = test_prof_read_sys_thread_name_error;
-	void *p = malloc(1);
-	free(p);
-	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
-	    "mallctl read for thread name should not fail");
-	assert_str_eq(thread_name, "",
-	    "Thread name should stay the same if the system call fails");
-
-	prof_read_sys_thread_name = test_prof_read_sys_thread_name;
-	p = malloc(1);
-	free(p);
-	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
-	    "mallctl read for thread name should not fail");
-	assert_str_eq(thread_name, test_thread_name,
-	    "Thread name should be changed if the system call succeeds");
-
-	prof_read_sys_thread_name = test_prof_read_sys_thread_name_clear;
-	p = malloc(1);
-	free(p);
-	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
-	    "mallctl read for thread name should not fail");
-	expect_str_eq(thread_name, "", "Thread name should be updated if the "
-	    "system call returns a different name");
-}
-TEST_END
-
-int
-main(void) {
-	return test(
-	    test_prof_experimental_use_sys_thread_name);
-}
diff --git a/test/unit/prof_use_sys_thread_name.sh b/test/unit/prof_use_sys_thread_name.sh
deleted file mode 100644
index 0e0e0d9..0000000
--- a/test/unit/prof_use_sys_thread_name.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/sh
-
-if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_experimental_use_sys_thread_name:true"
-fi
-- 
cgit v0.12


From 537a4bedb4d4ae6238762df85ae1ad2bc8d0ff47 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 4 May 2020 14:58:25 -0700
Subject: Add a tool to examine random number distributions

---
 .gitignore                           |   5 +
 Makefile.in                          |  33 ++++-
 test/analyze/rand.c                  | 276 +++++++++++++++++++++++++++++++++++
 test/include/test/jemalloc_test.h.in |   4 +-
 4 files changed, 311 insertions(+), 7 deletions(-)
 create mode 100644 test/analyze/rand.c

diff --git a/.gitignore b/.gitignore
index 5ca0ad1..31cdbb8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,11 @@ test/include/test/jemalloc_test_defs.h
 /test/unit/*.[od]
 /test/unit/*.out
 
+/test/analyze/[A-Za-z]*
+!/test/analyze/[A-Za-z]*.*
+/test/analyze/*.[od]
+/test/analyze/*.out
+
 /VERSION
 
 *.pdb
diff --git a/Makefile.in b/Makefile.in
index fd52ffc..4a0ef87 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -287,6 +287,7 @@ else
 CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
 endif
+TESTS_ANALYZE := $(srcroot)test/analyze/rand.c
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
 	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/large_microbench.c \
@@ -294,7 +295,8 @@ TESTS_STRESS := $(srcroot)test/stress/microbench.c \
 	$(srcroot)test/stress/sizes.c
 
 
-TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) $(TESTS_STRESS)
+TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
+	$(TESTS_ANALYZE) $(TESTS_STRESS)
 
 PRIVATE_NAMESPACE_HDRS := $(objroot)include/jemalloc/internal/private_namespace.h $(objroot)include/jemalloc/internal/private_namespace_jet.h
 PRIVATE_NAMESPACE_GEN_HDRS := $(PRIVATE_NAMESPACE_HDRS:%.h=%.gen.h)
@@ -310,14 +312,19 @@ C_JET_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.$(O))
 C_TESTLIB_UNIT_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.unit.$(O))
 C_TESTLIB_INTEGRATION_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O))
 C_UTIL_INTEGRATION_OBJS := $(C_UTIL_INTEGRATION_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O))
+C_TESTLIB_ANALYZE_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.analyze.$(O))
 C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O))
-C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_STRESS_OBJS)
+C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) \
+	$(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_ANALYZE_OBJS) \
+	$(C_TESTLIB_STRESS_OBJS)
 
 TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_INTEGRATION_CPP_OBJS := $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%.$(O))
+TESTS_ANALYZE_OBJS := $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O))
-TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS)
+TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_ANALYZE_OBJS) \
+	$(TESTS_STRESS_OBJS)
 TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS)
 
 .PHONY: all dist build_doc_html build_doc_man build_doc
@@ -391,12 +398,15 @@ $(C_TESTLIB_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
 $(C_TESTLIB_INTEGRATION_OBJS): $(objroot)test/src/%.integration.$(O): $(srcroot)test/src/%.c
 $(C_TESTLIB_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
 $(C_UTIL_INTEGRATION_OBJS): $(objroot)src/%.integration.$(O): $(srcroot)src/%.c
+$(C_TESTLIB_ANALYZE_OBJS): $(objroot)test/src/%.analyze.$(O): $(srcroot)test/src/%.c
+$(C_TESTLIB_ANALYZE_OBJS): CPPFLAGS += -DJEMALLOC_ANALYZE_TEST
 $(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/%.c
 $(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB
 $(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
 $(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
 $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
 $(TESTS_INTEGRATION_CPP_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_CPP_TEST
+$(TESTS_ANALYZE_OBJS): CPPFLAGS += -DJEMALLOC_ANALYZE_TEST
 $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST
 $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
 $(TESTS_CPP_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.cpp
@@ -416,7 +426,7 @@ $(TESTS_OBJS) $(TESTS_CPP_OBJS): $(objroot)test/include/test/jemalloc_test.h
 endif
 
 $(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_INTEGRATION_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace.h
-$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_STRESS_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
+$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_ANALYZE_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_ANALYZE_OBJS) $(TESTS_STRESS_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
 
 $(C_SYM_OBJS) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O):
 	@mkdir -p $(@D)
@@ -479,6 +489,10 @@ $(objroot)test/integration/cpp/%$(EXE): $(objroot)test/integration/cpp/%.$(O) $(
 	@mkdir -p $(@D)
 	$(CXX) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
 
+$(objroot)test/analyze/%$(EXE): $(objroot)test/analyze/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_ANALYZE_OBJS)
+	@mkdir -p $(@D)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
+
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
 	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
@@ -559,13 +573,16 @@ endif
 
 tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
+tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE))
-tests: tests_unit tests_integration tests_stress
+tests: tests_unit tests_integration tests_analyze tests_stress
 
 check_unit_dir:
 	@mkdir -p $(objroot)test/unit
 check_integration_dir:
 	@mkdir -p $(objroot)test/integration
+analyze_dir:
+	@mkdir -p $(objroot)test/analyze
 stress_dir:
 	@mkdir -p $(objroot)test/stress
 check_dir: check_unit_dir check_integration_dir
@@ -582,6 +599,12 @@ check_integration_decay: tests_integration check_integration_dir
 	$(MALLOC_CONF)="dirty_decay_ms:0,muzzy_decay_ms:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
 check_integration: tests_integration check_integration_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
+analyze: tests_analyze analyze_dir
+ifeq ($(enable_prof), 1)
+	$(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%)
+else
+	$(SHELL) $(objroot)test/test.sh $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%)
+endif
 stress: tests_stress stress_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
 check: check_unit check_integration check_integration_decay check_integration_prof
diff --git a/test/analyze/rand.c b/test/analyze/rand.c
new file mode 100644
index 0000000..a4ab49a
--- /dev/null
+++ b/test/analyze/rand.c
@@ -0,0 +1,276 @@
+#include "test/jemalloc_test.h"
+
+/******************************************************************************/
+
+/*
+ * General purpose tool for examining random number distributions.
+ *
+ * Input -
+ * (a) a random number generator, and
+ * (b) the buckets:
+ *     (1) number of buckets,
+ *     (2) width of each bucket, in log scale,
+ *     (3) expected mean and stddev of the count of random numbers in each
+ *         bucket, and
+ * (c) number of iterations to invoke the generator.
+ *
+ * The program generates the specified amount of random numbers, and assess how
+ * well they conform to the expectations: for each bucket, output -
+ * (a) the (given) expected mean and stddev,
+ * (b) the actual count and any interesting level of deviation:
+ *     (1) ~68% buckets should show no interesting deviation, meaning a
+ *         deviation less than stddev from the expectation;
+ *     (2) ~27% buckets should show '+' / '-', meaning a deviation in the range
+ *         of [stddev, 2 * stddev) from the expectation;
+ *     (3) ~4% buckets should show '++' / '--', meaning a deviation in the
+ *         range of [2 * stddev, 3 * stddev) from the expectation; and
+ *     (4) less than 0.3% buckets should show more than two '+'s / '-'s.
+ *
+ * Technical remarks:
+ * (a) The generator is expected to output uint64_t numbers, so you might need
+ *     to define a wrapper.
+ * (b) The buckets must be of equal width and the lowest bucket starts at
+ *     [0, 2^lg_bucket_width - 1).
+ * (c) Any generated number >= n_bucket * 2^lg_bucket_width will be counted
+ *     towards the last bucket; the expected mean and stddev provided should
+ *     also reflect that.
+ * (d) The number of iterations is adviced to be determined so that the bucket
+ *     with the minimal expected proportion gets a sufficient count.
+ */
+
+static void
+fill(size_t a[], const size_t n, const size_t k) {
+	for (size_t i = 0; i < n; ++i) {
+		a[i] = k;
+	}
+}
+
+static void
+collect_buckets(uint64_t (*gen)(void *), void *opaque, size_t buckets[],
+    const size_t n_bucket, const size_t lg_bucket_width, const size_t n_iter) {
+	for (size_t i = 0; i < n_iter; ++i) {
+		uint64_t num = gen(opaque);
+		uint64_t bucket_id = num >> lg_bucket_width;
+		if (bucket_id >= n_bucket) {
+			bucket_id = n_bucket - 1;
+		}
+		++buckets[bucket_id];
+	}
+}
+
+static void
+print_buckets(const size_t buckets[], const size_t means[],
+    const size_t stddevs[], const size_t n_bucket) {
+	for (size_t i = 0; i < n_bucket; ++i) {
+		malloc_printf("%zu:\tmean = %zu,\tstddev = %zu,\tbucket = %zu",
+		    i, means[i], stddevs[i], buckets[i]);
+
+		/* Make sure there's no overflow. */
+		assert(buckets[i] + stddevs[i] >= stddevs[i]);
+		assert(means[i] + stddevs[i] >= stddevs[i]);
+
+		if (buckets[i] + stddevs[i] <= means[i]) {
+			malloc_write(" ");
+			for (size_t t = means[i] - buckets[i]; t >= stddevs[i];
+			    t -= stddevs[i]) {
+				malloc_write("-");
+			}
+		} else if (buckets[i] >= means[i] + stddevs[i]) {
+			malloc_write(" ");
+			for (size_t t = buckets[i] - means[i]; t >= stddevs[i];
+			    t -= stddevs[i]) {
+				malloc_write("+");
+			}
+		}
+		malloc_write("\n");
+	}
+}
+
+static void
+bucket_analysis(uint64_t (*gen)(void *), void *opaque, size_t buckets[],
+    const size_t means[], const size_t stddevs[], const size_t n_bucket,
+    const size_t lg_bucket_width, const size_t n_iter) {
+	for (size_t i = 1; i <= 3; ++i) {
+		malloc_printf("round %zu\n", i);
+		fill(buckets, n_bucket, 0);
+		collect_buckets(gen, opaque, buckets, n_bucket,
+		    lg_bucket_width, n_iter);
+		print_buckets(buckets, means, stddevs, n_bucket);
+	}
+}
+
+/* (Recommended) minimal bucket mean. */
+#define MIN_BUCKET_MEAN 10000
+
+/******************************************************************************/
+
+/* Uniform random number generator. */
+
+typedef struct uniform_gen_arg_s uniform_gen_arg_t;
+struct uniform_gen_arg_s {
+	uint64_t state;
+	const unsigned lg_range;
+};
+
+static uint64_t
+uniform_gen(void *opaque) {
+	uniform_gen_arg_t *arg = (uniform_gen_arg_t *)opaque;
+	return prng_lg_range_u64(&arg->state, arg->lg_range);
+}
+
+TEST_BEGIN(test_uniform) {
+#define LG_N_BUCKET 5
+#define N_BUCKET (1 << LG_N_BUCKET)
+
+#define QUOTIENT_CEIL(n, d) (((n) - 1) / (d) + 1)
+
+	const unsigned lg_range_test = 25;
+
+	/*
+	 * Mathematical tricks to guarantee that both mean and stddev are
+	 * integers, and that the minimal bucket mean is at least
+	 * MIN_BUCKET_MEAN.
+	 */
+	const size_t q = 1 << QUOTIENT_CEIL(LG_CEIL(QUOTIENT_CEIL(
+	    MIN_BUCKET_MEAN, N_BUCKET * (N_BUCKET - 1))), 2);
+	const size_t stddev = (N_BUCKET - 1) * q;
+	const size_t mean = N_BUCKET * stddev * q;
+	const size_t n_iter = N_BUCKET * mean;
+
+	size_t means[N_BUCKET];
+	fill(means, N_BUCKET, mean);
+	size_t stddevs[N_BUCKET];
+	fill(stddevs, N_BUCKET, stddev);
+
+	uniform_gen_arg_t arg = {(uint64_t)(uintptr_t)&lg_range_test,
+	    lg_range_test};
+	size_t buckets[N_BUCKET];
+	assert_zu_ge(lg_range_test, LG_N_BUCKET, "");
+	const size_t lg_bucket_width = lg_range_test - LG_N_BUCKET;
+
+	bucket_analysis(uniform_gen, &arg, buckets, means, stddevs,
+	    N_BUCKET, lg_bucket_width, n_iter);
+
+#undef LG_N_BUCKET
+#undef N_BUCKET
+#undef QUOTIENT_CEIL
+}
+TEST_END
+
+/******************************************************************************/
+
+/* Geometric random number generator; compiled only when prof is on. */
+
+#ifdef JEMALLOC_PROF
+
+/*
+ * Fills geometric proportions and returns the minimal proportion.  See
+ * comments in test_prof_sample for explanations for n_divide.
+ */
+static double
+fill_geometric_proportions(double proportions[], const size_t n_bucket,
+    const size_t n_divide) {
+	assert(n_bucket > 0);
+	assert(n_divide > 0);
+	double x = 1.;
+	for (size_t i = 0; i < n_bucket; ++i) {
+		if (i == n_bucket - 1) {
+			proportions[i] = x;
+		} else {
+			double y = x * exp(-1. / n_divide);
+			proportions[i] = x - y;
+			x = y;
+		}
+	}
+	/*
+	 * The minimal proportion is the smaller one of the last two
+	 * proportions for geometric distribution.
+	 */
+	double min_proportion = proportions[n_bucket - 1];
+	if (n_bucket >= 2 && proportions[n_bucket - 2] < min_proportion) {
+		min_proportion = proportions[n_bucket - 2];
+	}
+	return min_proportion;
+}
+
+static size_t
+round_to_nearest(const double x) {
+	return (size_t)(x + .5);
+}
+
+static void
+fill_references(size_t means[], size_t stddevs[], const double proportions[],
+    const size_t n_bucket, const size_t n_iter) {
+	for (size_t i = 0; i < n_bucket; ++i) {
+		double x = n_iter * proportions[i];
+		means[i] = round_to_nearest(x);
+		stddevs[i] = round_to_nearest(sqrt(x * (1. - proportions[i])));
+	}
+}
+
+static uint64_t
+prof_sample_gen(void *opaque) {
+	return prof_sample_new_event_wait((tsd_t *)opaque) - 1;
+}
+
+#endif /* JEMALLOC_PROF */
+
+TEST_BEGIN(test_prof_sample) {
+	test_skip_if(!config_prof);
+#ifdef JEMALLOC_PROF
+
+/* Number of divisions within [0, mean). */
+#define LG_N_DIVIDE 3
+#define N_DIVIDE (1 << LG_N_DIVIDE)
+
+/* Coverage of buckets in terms of multiples of mean. */
+#define LG_N_MULTIPLY 2
+#define N_GEO_BUCKET (N_DIVIDE << LG_N_MULTIPLY)
+
+	test_skip_if(!opt_prof);
+
+	size_t lg_prof_sample_test = 25;
+
+	size_t lg_prof_sample_orig = lg_prof_sample;
+	assert_d_eq(mallctl("prof.reset", NULL, NULL, &lg_prof_sample_test,
+	    sizeof(size_t)), 0, "");
+	malloc_printf("lg_prof_sample = %zu\n", lg_prof_sample_test);
+
+	double proportions[N_GEO_BUCKET + 1];
+	const double min_proportion = fill_geometric_proportions(proportions,
+	    N_GEO_BUCKET + 1, N_DIVIDE);
+	const size_t n_iter = round_to_nearest(MIN_BUCKET_MEAN /
+	    min_proportion);
+	size_t means[N_GEO_BUCKET + 1];
+	size_t stddevs[N_GEO_BUCKET + 1];
+	fill_references(means, stddevs, proportions, N_GEO_BUCKET + 1, n_iter);
+
+	tsd_t *tsd = tsd_fetch();
+	assert_ptr_not_null(tsd, "");
+	size_t buckets[N_GEO_BUCKET + 1];
+	assert_zu_ge(lg_prof_sample, LG_N_DIVIDE, "");
+	const size_t lg_bucket_width = lg_prof_sample - LG_N_DIVIDE;
+
+	bucket_analysis(prof_sample_gen, tsd, buckets, means, stddevs,
+	    N_GEO_BUCKET + 1, lg_bucket_width, n_iter);
+
+	assert_d_eq(mallctl("prof.reset", NULL, NULL, &lg_prof_sample_orig,
+	    sizeof(size_t)), 0, "");
+
+#undef LG_N_DIVIDE
+#undef N_DIVIDE
+#undef LG_N_MULTIPLY
+#undef N_GEO_BUCKET
+
+#endif /* JEMALLOC_PROF */
+}
+TEST_END
+
+/******************************************************************************/
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_uniform,
+	    test_prof_sample);
+}
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index e5d6306..ae67574 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -38,9 +38,9 @@ extern "C" {
 
 /******************************************************************************/
 /*
- * For unit tests, expose all public and private interfaces.
+ * For unit tests and analytics tests, expose all public and private interfaces.
  */
-#ifdef JEMALLOC_UNIT_TEST
+#if defined(JEMALLOC_UNIT_TEST) || defined (JEMALLOC_ANALYZE_TEST)
 #  define JEMALLOC_JET
 #  define JEMALLOC_MANGLE
 #  include "jemalloc/internal/jemalloc_preamble.h"
-- 
cgit v0.12


From d8cea8756242a3a50dde4baf4fb8bf38eddac55d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 25 Jun 2020 09:38:23 -0700
Subject: Move size inspections to test/analyze

---
 Makefile.in          |  6 +++---
 test/analyze/sizes.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 test/stress/sizes.c  | 53 ----------------------------------------------------
 3 files changed, 56 insertions(+), 56 deletions(-)
 create mode 100644 test/analyze/sizes.c
 delete mode 100644 test/stress/sizes.c

diff --git a/Makefile.in b/Makefile.in
index 4a0ef87..1c9e400 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -287,12 +287,12 @@ else
 CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
 endif
-TESTS_ANALYZE := $(srcroot)test/analyze/rand.c
+TESTS_ANALYZE := $(srcroot)test/analyze/rand.c \
+	$(srcroot)test/analyze/sizes.c
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
 	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/large_microbench.c \
-	$(srcroot)test/stress/hookbench.c \
-	$(srcroot)test/stress/sizes.c
+	$(srcroot)test/stress/hookbench.c
 
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
diff --git a/test/analyze/sizes.c b/test/analyze/sizes.c
new file mode 100644
index 0000000..44c9de5
--- /dev/null
+++ b/test/analyze/sizes.c
@@ -0,0 +1,53 @@
+#include "test/jemalloc_test.h"
+
+#include <stdio.h>
+
+/*
+ * Print the sizes of various important core data structures.  OK, I guess this
+ * isn't really a "stress" test, but it does give useful information about
+ * low-level performance characteristics, as the other things in this directory
+ * do.
+ */
+
+static void
+do_print(const char *name, size_t sz_bytes) {
+	const char *sizes[] = {"bytes", "KB", "MB", "GB", "TB", "PB", "EB",
+		"ZB"};
+	size_t sizes_max = sizeof(sizes)/sizeof(sizes[0]);
+
+	size_t ind = 0;
+	double sz = sz_bytes;
+	while (sz >= 1024 && ind < sizes_max - 1) {
+		sz /= 1024;
+		ind++;
+	}
+	if (ind == 0) {
+		printf("%-20s: %zu bytes\n", name, sz_bytes);
+	} else {
+		printf("%-20s: %f %s\n", name, sz, sizes[ind]);
+	}
+}
+
+int
+main() {
+#define P(type)								\
+	do_print(#type, sizeof(type))
+	P(arena_t);
+	P(arena_stats_t);
+	P(base_t);
+	P(decay_t);
+	P(edata_t);
+	P(ecache_t);
+	P(eset_t);
+	P(malloc_mutex_t);
+	P(prof_tctx_t);
+	P(prof_gctx_t);
+	P(prof_tdata_t);
+	P(rtree_t);
+	P(rtree_leaf_elm_t);
+	P(slab_data_t);
+	P(tcache_t);
+	P(tcache_slow_t);
+	P(tsd_t);
+#undef P
+}
diff --git a/test/stress/sizes.c b/test/stress/sizes.c
deleted file mode 100644
index 44c9de5..0000000
--- a/test/stress/sizes.c
+++ /dev/null
@@ -1,53 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#include <stdio.h>
-
-/*
- * Print the sizes of various important core data structures.  OK, I guess this
- * isn't really a "stress" test, but it does give useful information about
- * low-level performance characteristics, as the other things in this directory
- * do.
- */
-
-static void
-do_print(const char *name, size_t sz_bytes) {
-	const char *sizes[] = {"bytes", "KB", "MB", "GB", "TB", "PB", "EB",
-		"ZB"};
-	size_t sizes_max = sizeof(sizes)/sizeof(sizes[0]);
-
-	size_t ind = 0;
-	double sz = sz_bytes;
-	while (sz >= 1024 && ind < sizes_max - 1) {
-		sz /= 1024;
-		ind++;
-	}
-	if (ind == 0) {
-		printf("%-20s: %zu bytes\n", name, sz_bytes);
-	} else {
-		printf("%-20s: %f %s\n", name, sz, sizes[ind]);
-	}
-}
-
-int
-main() {
-#define P(type)								\
-	do_print(#type, sizeof(type))
-	P(arena_t);
-	P(arena_stats_t);
-	P(base_t);
-	P(decay_t);
-	P(edata_t);
-	P(ecache_t);
-	P(eset_t);
-	P(malloc_mutex_t);
-	P(prof_tctx_t);
-	P(prof_gctx_t);
-	P(prof_tdata_t);
-	P(rtree_t);
-	P(rtree_leaf_elm_t);
-	P(slab_data_t);
-	P(tcache_t);
-	P(tcache_slow_t);
-	P(tsd_t);
-#undef P
-}
-- 
cgit v0.12


From f307b25804064eb26077f98b1481e6eb42f1dbad Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 17 Mar 2020 11:05:07 -0700
Subject: Only replace the dump file opening function in test

---
 include/jemalloc/internal/prof_externs.h |  4 ++--
 src/prof_data.c                          | 15 ++++++++++-----
 test/unit/prof_accum.c                   |  6 +++---
 test/unit/prof_gdump.c                   |  6 +++---
 test/unit/prof_idump.c                   |  6 +++---
 test/unit/prof_reset.c                   |  6 +++---
 6 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 6021cf4..2f9f2c9 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -94,8 +94,8 @@ typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
 extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
-typedef int (prof_dump_open_t)(bool, const char *);
-extern prof_dump_open_t *JET_MUTABLE prof_dump_open;
+typedef int (prof_dump_open_file_t)(const char *, int);
+extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
 typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
diff --git a/src/prof_data.c b/src/prof_data.c
index 49cc6ee..396cea0 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -467,13 +467,19 @@ prof_bt_count(void) {
 }
 
 static int
-prof_dump_open_impl(bool propagate_err, const char *filename) {
+prof_dump_open_file_impl(const char *filename, int mode) {
+	return creat(filename, mode);
+}
+prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file =
+    prof_dump_open_file_impl;
+
+static int
+prof_dump_open(bool propagate_err, const char *filename) {
 	int fd;
 
-	fd = creat(filename, 0644);
+	fd = prof_dump_open_file(filename, 0644);
 	if (fd == -1 && !propagate_err) {
-		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
-		    filename);
+		malloc_printf("<jemalloc>: failed to open \"%s\"\n", filename);
 		if (opt_abort) {
 			abort();
 		}
@@ -481,7 +487,6 @@ prof_dump_open_impl(bool propagate_err, const char *filename) {
 
 	return fd;
 }
-prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
 
 static bool
 prof_dump_flush(bool propagate_err) {
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index 8dfa678..8fc5881 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -6,11 +6,11 @@
 #define BT_COUNT_CHECK_INTERVAL	5
 
 static int
-prof_dump_open_intercept(bool propagate_err, const char *filename) {
+prof_dump_open_file_intercept(const char *filename, int mode) {
 	int fd;
 
 	fd = open("/dev/null", O_WRONLY);
-	expect_d_ne(fd, -1, "Unexpected open() failure");
+	assert_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -62,7 +62,7 @@ TEST_BEGIN(test_idump) {
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");
 
-	prof_dump_open = prof_dump_open_intercept;
+	prof_dump_open_file = prof_dump_open_file_intercept;
 
 	for (i = 0; i < NTHREADS; i++) {
 		thd_args[i] = i;
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 4c6afbd..6209255 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -3,13 +3,13 @@
 static bool did_prof_dump_open;
 
 static int
-prof_dump_open_intercept(bool propagate_err, const char *filename) {
+prof_dump_open_file_intercept(const char *filename, int mode) {
 	int fd;
 
 	did_prof_dump_open = true;
 
 	fd = open("/dev/null", O_WRONLY);
-	expect_d_ne(fd, -1, "Unexpected open() failure");
+	assert_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -26,7 +26,7 @@ TEST_BEGIN(test_gdump) {
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");
 
-	prof_dump_open = prof_dump_open_intercept;
+	prof_dump_open_file = prof_dump_open_file_intercept;
 
 	did_prof_dump_open = false;
 	p = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index dfcc0ff..b0c1bc2 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -5,7 +5,7 @@
 static bool did_prof_dump_open;
 
 static int
-prof_dump_open_intercept(bool propagate_err, const char *filename) {
+prof_dump_open_file_intercept(const char *filename, int mode) {
 	int fd;
 
 	did_prof_dump_open = true;
@@ -15,7 +15,7 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	    - 1), 0, "Dump file name should start with \"" TEST_PREFIX ".\"");
 
 	fd = open("/dev/null", O_WRONLY);
-	expect_d_ne(fd, -1, "Unexpected open() failure");
+	assert_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -38,7 +38,7 @@ TEST_BEGIN(test_idump) {
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");
 
-	prof_dump_open = prof_dump_open_intercept;
+	prof_dump_open_file = prof_dump_open_file_intercept;
 
 	did_prof_dump_open = false;
 	p = mallocx(1, 0);
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index e643e54..29fa02b 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -1,11 +1,11 @@
 #include "test/jemalloc_test.h"
 
 static int
-prof_dump_open_intercept(bool propagate_err, const char *filename) {
+prof_dump_open_file_intercept(const char *filename, int mode) {
 	int fd;
 
 	fd = open("/dev/null", O_WRONLY);
-	expect_d_ne(fd, -1, "Unexpected open() failure");
+	assert_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -276,7 +276,7 @@ TEST_END
 int
 main(void) {
 	/* Intercept dumping prior to running any tests. */
-	prof_dump_open = prof_dump_open_intercept;
+	prof_dump_open_file = prof_dump_open_file_intercept;
 
 	return test_no_reentrancy(
 	    test_prof_reset_basic,
-- 
cgit v0.12


From 4bb4037dbe2450c985d09eabd29a1d8534e20641 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 17 Mar 2020 19:46:18 -0700
Subject: Extract utility function for opening maps file

---
 src/prof_data.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index 396cea0..62b650c 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -919,7 +919,7 @@ label_return:
 #ifndef _WIN32
 JEMALLOC_FORMAT_PRINTF(1, 2)
 static int
-prof_open_maps(const char *format, ...) {
+prof_open_maps_internal(const char *format, ...) {
 	int mfd;
 	va_list ap;
 	char filename[PATH_MAX + 1];
@@ -941,26 +941,31 @@ prof_open_maps(const char *format, ...) {
 }
 #endif
 
-static bool
-prof_dump_maps(bool propagate_err) {
-	bool ret;
+static int
+prof_dump_open_maps() {
 	int mfd;
 
 	cassert(config_prof);
 #ifdef __FreeBSD__
-	mfd = prof_open_maps("/proc/curproc/map");
+	mfd = prof_open_maps_internal("/proc/curproc/map");
 #elif defined(_WIN32)
 	mfd = -1; // Not implemented
 #else
-	{
-		int pid = prof_getpid();
+	int pid = prof_getpid();
 
-		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
-		if (mfd == -1) {
-			mfd = prof_open_maps("/proc/%d/maps", pid);
-		}
+	mfd = prof_open_maps_internal("/proc/%d/task/%d/maps", pid, pid);
+	if (mfd == -1) {
+		mfd = prof_open_maps_internal("/proc/%d/maps", pid);
 	}
 #endif
+	return mfd;
+}
+
+static bool
+prof_dump_maps(bool propagate_err) {
+	bool ret;
+	int mfd = prof_dump_open_maps();
+
 	if (mfd != -1) {
 		ssize_t nread;
 
-- 
cgit v0.12


From 21e44c45d994798d50df9fa77c905465a38a4675 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 17 Mar 2020 19:57:06 -0700
Subject: Make maps file opening replaceable in test

---
 include/jemalloc/internal/prof_externs.h | 2 ++
 src/prof_data.c                          | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 2f9f2c9..e5d6ff7 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -98,6 +98,8 @@ typedef int (prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
 typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
+typedef int (prof_dump_open_maps_t)();
+extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
 void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes);
 
diff --git a/src/prof_data.c b/src/prof_data.c
index 62b650c..b9b211d 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -942,7 +942,7 @@ prof_open_maps_internal(const char *format, ...) {
 #endif
 
 static int
-prof_dump_open_maps() {
+prof_dump_open_maps_impl() {
 	int mfd;
 
 	cassert(config_prof);
@@ -960,6 +960,8 @@ prof_dump_open_maps() {
 #endif
 	return mfd;
 }
+prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps =
+    prof_dump_open_maps_impl;
 
 static bool
 prof_dump_maps(bool propagate_err) {
-- 
cgit v0.12


From 7455813e5762c93fd2dcaf0672324dffa8aae5a2 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 17 Mar 2020 20:27:52 -0700
Subject: Make dump file writing replaceable in test

---
 include/jemalloc/internal/prof_externs.h | 2 ++
 src/prof_data.c                          | 7 +++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index e5d6ff7..d644be6 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -96,6 +96,8 @@ size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
 typedef int (prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
+typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
+extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
 typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 typedef int (prof_dump_open_maps_t)();
diff --git a/src/prof_data.c b/src/prof_data.c
index b9b211d..0de728b 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -488,6 +488,8 @@ prof_dump_open(bool propagate_err, const char *filename) {
 	return fd;
 }
 
+prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file = malloc_write_fd;
+
 static bool
 prof_dump_flush(bool propagate_err) {
 	bool ret = false;
@@ -495,10 +497,11 @@ prof_dump_flush(bool propagate_err) {
 
 	cassert(config_prof);
 
-	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
+	err = prof_dump_write_file(prof_dump_fd, prof_dump_buf,
+	    prof_dump_buf_end);
 	if (err == -1) {
 		if (!propagate_err) {
-			malloc_write("<jemalloc>: write() failed during heap "
+			malloc_write("<jemalloc>: failed to write during heap "
 			    "profile flush\n");
 			if (opt_abort) {
 				abort();
-- 
cgit v0.12


From 354183b10d286876ef9811fd9e94758926e66927 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 19 Jun 2020 12:03:12 -0700
Subject: Define prof dump buffer size centrally

---
 include/jemalloc/internal/prof_types.h | 7 ++++++-
 src/prof_data.c                        | 9 +--------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 4abe5b5..98750f3 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -29,7 +29,12 @@ typedef struct prof_recent_s prof_recent_t;
 #define PROF_CKH_MINITEMS		64
 
 /* Size of memory buffer to use when writing dump files. */
-#define PROF_DUMP_BUFSIZE		65536
+#ifndef JEMALLOC_PROF
+/* Minimize memory bloat for non-prof builds. */
+#  define PROF_DUMP_BUFSIZE		1
+#else
+#  define PROF_DUMP_BUFSIZE		65536
+#endif
 
 /* Size of stack-allocated buffer used by prof_printf(). */
 #define PROF_PRINTF_BUFSIZE		128
diff --git a/src/prof_data.c b/src/prof_data.c
index 0de728b..d5f5524 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -59,14 +59,7 @@ static prof_tdata_tree_t tdatas;
  * This buffer is rather large for stack allocation, so use a single buffer for
  * all profile dumps.
  */
-static char prof_dump_buf[
-    /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
-    PROF_DUMP_BUFSIZE
-#else
-    1
-#endif
-];
+static char prof_dump_buf[PROF_DUMP_BUFSIZE];
 static size_t prof_dump_buf_end;
 static int prof_dump_fd;
 
-- 
cgit v0.12


From f541871f5df5d711df6fd13830496f86d72439ce Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 19 Jun 2020 12:21:17 -0700
Subject: Reduce prof dump buffer size in debug build

---
 include/jemalloc/internal/prof_types.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 98750f3..dbd758f 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -32,6 +32,9 @@ typedef struct prof_recent_s prof_recent_t;
 #ifndef JEMALLOC_PROF
 /* Minimize memory bloat for non-prof builds. */
 #  define PROF_DUMP_BUFSIZE		1
+#elif defined(JEMALLOC_DEBUG)
+/* Use a small buffer size in debug build, mainly to facilitate testing. */
+#  define PROF_DUMP_BUFSIZE		16
 #else
 #  define PROF_DUMP_BUFSIZE		65536
 #endif
-- 
cgit v0.12


From 5d292b56609ae2b85658f4c544b03d46b41e66be Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 31 Mar 2020 10:00:37 -0700
Subject: Push error handling logic out of core dumping logic

---
 Makefile.in                              |   1 +
 include/jemalloc/internal/prof_externs.h |   2 +-
 src/prof_data.c                          | 340 +++++++++++--------------------
 test/unit/prof_mdump.c                   | 214 +++++++++++++++++++
 test/unit/prof_mdump.sh                  |   6 +
 test/unit/prof_reset.c                   |   7 +-
 6 files changed, 346 insertions(+), 224 deletions(-)
 create mode 100644 test/unit/prof_mdump.c
 create mode 100644 test/unit/prof_mdump.sh

diff --git a/Makefile.in b/Makefile.in
index 1c9e400..87ddd33 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -228,6 +228,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_gdump.c \
 	$(srcroot)test/unit/prof_idump.c \
 	$(srcroot)test/unit/prof_log.c \
+	$(srcroot)test/unit/prof_mdump.c \
 	$(srcroot)test/unit/prof_recent.c \
 	$(srcroot)test/unit/prof_reset.c \
 	$(srcroot)test/unit/prof_tctx.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index d644be6..9a2b122 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -98,7 +98,7 @@ typedef int (prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
 typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
 extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
-typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
+typedef void (prof_dump_header_t)(tsdn_t *, const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 typedef int (prof_dump_open_maps_t)();
 extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
diff --git a/src/prof_data.c b/src/prof_data.c
index d5f5524..210b153 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -55,6 +55,20 @@ static ckh_t bt2gctx;
  */
 static prof_tdata_tree_t tdatas;
 
+/* The following are needed for dumping and are protected by prof_dump_mtx. */
+/*
+ * Whether there has been an error in the dumping process, which could have
+ * happened either in file opening or in file writing.  When an error has
+ * already occurred, we will stop further writing to the file.
+ */
+static bool prof_dump_error;
+/*
+ * Whether error should be handled locally: if true, then we print out error
+ * message as well as abort (if opt_abort is true) when an error occurred, and
+ * we also report the error back to the caller in the end; if false, then we
+ * only report the error back to the caller in the end.
+ */
+static bool prof_dump_handle_error_locally;
 /*
  * This buffer is rather large for stack allocation, so use a single buffer for
  * all profile dumps.
@@ -459,6 +473,30 @@ prof_bt_count(void) {
 	return bt_count;
 }
 
+static void
+prof_dump_check_possible_error(bool err_cond, const char *format, ...) {
+	assert(!prof_dump_error);
+	if (!err_cond) {
+		return;
+	}
+
+	prof_dump_error = true;
+	if (!prof_dump_handle_error_locally) {
+		return;
+	}
+
+	va_list ap;
+	char buf[PROF_PRINTF_BUFSIZE];
+	va_start(ap, format);
+	malloc_vsnprintf(buf, sizeof(buf), format, ap);
+	va_end(ap);
+	malloc_write(buf);
+
+	if (opt_abort) {
+		abort();
+	}
+}
+
 static int
 prof_dump_open_file_impl(const char *filename, int mode) {
 	return creat(filename, mode);
@@ -466,61 +504,37 @@ prof_dump_open_file_impl(const char *filename, int mode) {
 prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file =
     prof_dump_open_file_impl;
 
-static int
-prof_dump_open(bool propagate_err, const char *filename) {
-	int fd;
-
-	fd = prof_dump_open_file(filename, 0644);
-	if (fd == -1 && !propagate_err) {
-		malloc_printf("<jemalloc>: failed to open \"%s\"\n", filename);
-		if (opt_abort) {
-			abort();
-		}
-	}
-
-	return fd;
+static void
+prof_dump_open(const char *filename) {
+	prof_dump_fd = prof_dump_open_file(filename, 0644);
+	prof_dump_check_possible_error(prof_dump_fd == -1,
+	    "<jemalloc>: failed to open \"%s\"\n", filename);
 }
 
 prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file = malloc_write_fd;
 
-static bool
-prof_dump_flush(bool propagate_err) {
-	bool ret = false;
-	ssize_t err;
-
+static void
+prof_dump_flush() {
 	cassert(config_prof);
-
-	err = prof_dump_write_file(prof_dump_fd, prof_dump_buf,
-	    prof_dump_buf_end);
-	if (err == -1) {
-		if (!propagate_err) {
-			malloc_write("<jemalloc>: failed to write during heap "
-			    "profile flush\n");
-			if (opt_abort) {
-				abort();
-			}
-		}
-		ret = true;
+	if (!prof_dump_error) {
+		ssize_t err = prof_dump_write_file(prof_dump_fd, prof_dump_buf,
+		    prof_dump_buf_end);
+		prof_dump_check_possible_error(err == -1,
+		    "<jemalloc>: failed to write during heap profile flush\n");
 	}
 	prof_dump_buf_end = 0;
-
-	return ret;
 }
 
-static bool
-prof_dump_close(bool propagate_err) {
-	bool ret;
-
-	assert(prof_dump_fd != -1);
-	ret = prof_dump_flush(propagate_err);
-	close(prof_dump_fd);
-	prof_dump_fd = -1;
-
-	return ret;
+static void
+prof_dump_close() {
+	if (prof_dump_fd != -1) {
+		prof_dump_flush();
+		close(prof_dump_fd);
+	}
 }
 
-static bool
-prof_dump_write(bool propagate_err, const char *s) {
+static void
+prof_dump_write(const char *s) {
 	size_t i, slen, n;
 
 	cassert(config_prof);
@@ -530,9 +544,7 @@ prof_dump_write(bool propagate_err, const char *s) {
 	while (i < slen) {
 		/* Flush the buffer if it is full. */
 		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			if (prof_dump_flush(propagate_err) && propagate_err) {
-				return true;
-			}
+			prof_dump_flush();
 		}
 
 		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
@@ -547,23 +559,18 @@ prof_dump_write(bool propagate_err, const char *s) {
 		i += n;
 	}
 	assert(i == slen);
-
-	return false;
 }
 
-JEMALLOC_FORMAT_PRINTF(2, 3)
-static bool
-prof_dump_printf(bool propagate_err, const char *format, ...) {
-	bool ret;
+JEMALLOC_FORMAT_PRINTF(1, 2)
+static void
+prof_dump_printf(const char *format, ...) {
 	va_list ap;
 	char buf[PROF_PRINTF_BUFSIZE];
 
 	va_start(ap, format);
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
 	va_end(ap);
-	ret = prof_dump_write(propagate_err, buf);
-
-	return ret;
+	prof_dump_write(buf);
 }
 
 static void
@@ -630,17 +637,10 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	return NULL;
 }
 
-struct prof_tctx_dump_iter_arg_s {
-	tsdn_t	*tsdn;
-	bool	propagate_err;
-};
-
 static prof_tctx_t *
-prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
-	struct prof_tctx_dump_iter_arg_s *arg =
-	    (struct prof_tctx_dump_iter_arg_s *)opaque;
-
-	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
@@ -649,13 +649,11 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		if (prof_dump_printf(arg->propagate_err,
+		prof_dump_printf(
 		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
 		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
 		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
-		    tctx->dump_cnts.accumbytes)) {
-			return tctx;
-		}
+		    tctx->dump_cnts.accumbytes);
 		break;
 	default:
 		not_reached();
@@ -817,53 +815,37 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 
 static prof_tdata_t *
 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	bool propagate_err = *(bool *)arg;
-
+    void *unused) {
 	if (!tdata->dumping) {
 		return NULL;
 	}
 
-	if (prof_dump_printf(propagate_err,
+	prof_dump_printf(
 	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
 	    tdata->thr_uid, tdata->cnt_summed.curobjs,
 	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
 	    tdata->cnt_summed.accumbytes,
 	    (tdata->thread_name != NULL) ? " " : "",
-	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
-		return tdata;
-	}
+	    (tdata->thread_name != NULL) ? tdata->thread_name : "");
 	return NULL;
 }
 
-static bool
-prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
-    const prof_cnt_t *cnt_all) {
-	bool ret;
-
-	if (prof_dump_printf(propagate_err,
-	    "heap_v2/%"FMTu64"\n"
+static void
+prof_dump_header_impl(tsdn_t *tsdn, const prof_cnt_t *cnt_all) {
+	prof_dump_printf("heap_v2/%"FMTu64"\n"
 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
 	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
-	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
-		return true;
-	}
+	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes);
 
 	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
-	    (void *)&propagate_err) != NULL);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, NULL);
 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-	return ret;
 }
 prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
 
-static bool
-prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
-    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
-	bool ret;
-	unsigned i;
-	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
-
+static void
+prof_dump_gctx(tsdn_t *tsdn, prof_gctx_t *gctx, const prof_bt_t *bt,
+    prof_gctx_tree_t *gctxs) {
 	cassert(config_prof);
 	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
@@ -874,42 +856,21 @@ prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
 		assert(gctx->cnt_summed.curbytes == 0);
 		assert(gctx->cnt_summed.accumobjs == 0);
 		assert(gctx->cnt_summed.accumbytes == 0);
-		ret = false;
-		goto label_return;
+		return;
 	}
 
-	if (prof_dump_printf(propagate_err, "@")) {
-		ret = true;
-		goto label_return;
-	}
-	for (i = 0; i < bt->len; i++) {
-		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
-		    (uintptr_t)bt->vec[i])) {
-			ret = true;
-			goto label_return;
-		}
+	prof_dump_write("@");
+	for (unsigned i = 0; i < bt->len; i++) {
+		prof_dump_printf(" %#"FMTxPTR, (uintptr_t)bt->vec[i]);
 	}
 
-	if (prof_dump_printf(propagate_err,
-	    "\n"
-	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+	prof_dump_printf(
+	    "\n  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
 	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
-	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
-		ret = true;
-		goto label_return;
-	}
-
-	prof_tctx_dump_iter_arg.tsdn = tsdn;
-	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
-	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
-	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
-		ret = true;
-		goto label_return;
-	}
+	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes);
 
-	ret = false;
-label_return:
-	return ret;
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
+	    (void *)tsdn);
 }
 
 #ifndef _WIN32
@@ -959,45 +920,26 @@ prof_dump_open_maps_impl() {
 prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps =
     prof_dump_open_maps_impl;
 
-static bool
-prof_dump_maps(bool propagate_err) {
-	bool ret;
+static void
+prof_dump_maps() {
 	int mfd = prof_dump_open_maps();
+	if (mfd == -1) {
+		return;
+	}
 
-	if (mfd != -1) {
-		ssize_t nread;
-
-		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
-		    propagate_err) {
-			ret = true;
-			goto label_return;
+	prof_dump_write("\nMAPPED_LIBRARIES:\n");
+	ssize_t nread = 0;
+	do {
+		prof_dump_buf_end += nread;
+		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+			/* Make space in prof_dump_buf before read(). */
+			prof_dump_flush();
 		}
-		nread = 0;
-		do {
-			prof_dump_buf_end += nread;
-			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-				/* Make space in prof_dump_buf before read(). */
-				if (prof_dump_flush(propagate_err) &&
-				    propagate_err) {
-					ret = true;
-					goto label_return;
-				}
-			}
-			nread = malloc_read_fd(mfd,
-			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
-			    - prof_dump_buf_end);
-		} while (nread > 0);
-	} else {
-		ret = true;
-		goto label_return;
-	}
+		nread = malloc_read_fd(mfd, &prof_dump_buf[prof_dump_buf_end],
+		    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
+	} while (nread > 0);
 
-	ret = false;
-label_return:
-	if (mfd != -1) {
-		close(mfd);
-	}
-	return ret;
+	close(mfd);
 }
 
 /*
@@ -1035,29 +977,13 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
 #endif
 }
 
-struct prof_gctx_dump_iter_arg_s {
-	tsdn_t	*tsdn;
-	bool	propagate_err;
-};
-
 static prof_gctx_t *
 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	prof_gctx_t *ret;
-	struct prof_gctx_dump_iter_arg_s *arg =
-	    (struct prof_gctx_dump_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, gctx->lock);
-
-	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
-	    gctxs)) {
-		ret = gctx;
-		goto label_return;
-	}
-
-	ret = NULL;
-label_return:
-	malloc_mutex_unlock(arg->tsdn, gctx->lock);
-	return ret;
+	tsdn_t *tsdn = (tsdn_t *)opaque;
+	malloc_mutex_lock(tsdn, gctx->lock);
+	prof_dump_gctx(tsdn, gctx, &gctx->bt, gctxs);
+	malloc_mutex_unlock(tsdn, gctx->lock);
+	return NULL;
 }
 
 static void
@@ -1104,43 +1030,23 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
 
 static bool
 prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck, prof_tdata_t *tdata,
-    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
-    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
-    struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
+    bool leakcheck, prof_tdata_t *tdata, const prof_cnt_t *cnt_all,
     prof_gctx_tree_t *gctxs) {
-	/* Create dump file. */
-	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
-		return true;
-	}
+	prof_dump_error = false;
+	prof_dump_handle_error_locally = !propagate_err;
 
+	/* Create dump file. */
+	prof_dump_open(filename);
 	/* Dump profile header. */
-	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
-	    &prof_tdata_merge_iter_arg->cnt_all)) {
-		goto label_write_error;
-	}
-
+	prof_dump_header(tsd_tsdn(tsd), cnt_all);
 	/* Dump per gctx profile stats. */
-	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
-	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
-	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
-	    (void *)prof_gctx_dump_iter_arg) != NULL) {
-		goto label_write_error;
-	}
-
+	gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter, (void *)tsd_tsdn(tsd));
 	/* Dump /proc/<pid>/maps if possible. */
-	if (prof_dump_maps(propagate_err)) {
-		goto label_write_error;
-	}
-
-	if (prof_dump_close(propagate_err)) {
-		return true;
-	}
+	prof_dump_maps();
+	/* Close dump file. */
+	prof_dump_close();
 
-	return false;
-label_write_error:
-	prof_dump_close(propagate_err);
-	return true;
+	return prof_dump_error;
 }
 
 bool
@@ -1160,12 +1066,10 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
 	prof_gctx_tree_t gctxs;
 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
 	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
-	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
 	    &prof_gctx_merge_iter_arg, &gctxs);
-	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
-	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
-	    &prof_gctx_dump_iter_arg, &gctxs);
+	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck,
+	    tdata, &prof_tdata_merge_iter_arg.cnt_all, &gctxs);
 	prof_gctx_finish(tsd, &gctxs);
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
diff --git a/test/unit/prof_mdump.c b/test/unit/prof_mdump.c
new file mode 100644
index 0000000..3779c24
--- /dev/null
+++ b/test/unit/prof_mdump.c
@@ -0,0 +1,214 @@
+#include "test/jemalloc_test.h"
+
+static const char *test_filename = "test_filename";
+static bool did_prof_dump_open;
+
+static int
+prof_dump_open_file_intercept(const char *filename, int mode) {
+	int fd;
+
+	did_prof_dump_open = true;
+
+	/*
+	 * Stronger than a strcmp() - verifying that we internally directly use
+	 * the caller supplied char pointer.
+	 */
+	expect_ptr_eq(filename, test_filename,
+	    "Dump file name should be \"%s\"", test_filename);
+
+	fd = open("/dev/null", O_WRONLY);
+	assert_d_ne(fd, -1, "Unexpected open() failure");
+
+	return fd;
+}
+
+TEST_BEGIN(test_mdump_normal) {
+	test_skip_if(!config_prof);
+
+	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
+
+	void *p = mallocx(1, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+
+	prof_dump_open_file = prof_dump_open_file_intercept;
+	did_prof_dump_open = false;
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
+	    sizeof(test_filename)), 0,
+	    "Unexpected mallctl failure while dumping");
+	expect_true(did_prof_dump_open, "Expected a profile dump");
+
+	dallocx(p, 0);
+
+	prof_dump_open_file = open_file_orig;
+}
+TEST_END
+
+static int
+prof_dump_open_file_error(const char *filename, int mode) {
+	return -1;
+}
+
+/*
+ * In the context of test_mdump_output_error, prof_dump_write_file_count is the
+ * total number of times prof_dump_write_file_error() is expected to be called.
+ * In the context of test_mdump_maps_error, prof_dump_write_file_count is the
+ * total number of times prof_dump_write_file_error() is expected to be called
+ * starting from the one that contains an 'M' (beginning the "MAPPED_LIBRARIES"
+ * header).
+ */
+static int prof_dump_write_file_count;
+
+static ssize_t
+prof_dump_write_file_error(int fd, const void *s, size_t len) {
+	--prof_dump_write_file_count;
+
+	expect_d_ge(prof_dump_write_file_count, 0,
+	    "Write is called after error occurs");
+
+	if (prof_dump_write_file_count == 0) {
+		return -1;
+	} else {
+		/*
+		 * Any non-negative number indicates success, and for
+		 * simplicity we just use 0.  When prof_dump_write_file_count
+		 * is positive, it means that we haven't reached the write that
+		 * we want to fail; when prof_dump_write_file_count is
+		 * negative, it means that we've already violated the
+		 * expect_d_ge(prof_dump_write_file_count, 0) statement above,
+		 * but instead of aborting, we continue the rest of the test,
+		 * and we indicate that all the writes after the failed write
+		 * are successful.
+		 */
+		return 0;
+	}
+}
+
+static void
+expect_write_failure(int count) {
+	prof_dump_write_file_count = count;
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
+	    sizeof(test_filename)), EFAULT, "Dump should err");
+	expect_d_eq(prof_dump_write_file_count, 0,
+	    "Dumping stopped after a wrong number of writes");
+}
+
+TEST_BEGIN(test_mdump_output_error) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_debug);
+
+	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
+	prof_dump_write_file_t *write_file_orig = prof_dump_write_file;
+
+	prof_dump_write_file = prof_dump_write_file_error;
+
+	void *p = mallocx(1, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+
+	/*
+	 * When opening the dump file fails, there shouldn't be any write, and
+	 * mallctl() should return failure.
+	 */
+	prof_dump_open_file = prof_dump_open_file_error;
+	expect_write_failure(0);
+
+	/*
+	 * When the n-th write fails, there shouldn't be any more write, and
+	 * mallctl() should return failure.
+	 */
+	prof_dump_open_file = prof_dump_open_file_intercept;
+	expect_write_failure(1); /* First write fails. */
+	expect_write_failure(2); /* Second write fails. */
+
+	dallocx(p, 0);
+
+	prof_dump_open_file = open_file_orig;
+	prof_dump_write_file = write_file_orig;
+}
+TEST_END
+
+static int
+prof_dump_open_maps_error() {
+	return -1;
+}
+
+static bool started_piping_maps_file;
+
+static ssize_t
+prof_dump_write_maps_file_error(int fd, const void *s, size_t len) {
+	/* The main dump doesn't contain any capital 'M'. */
+	if (!started_piping_maps_file && strchr(s, 'M') != NULL) {
+		started_piping_maps_file = true;
+	}
+
+	if (started_piping_maps_file) {
+		return prof_dump_write_file_error(fd, s, len);
+	} else {
+		/* Return success when we haven't started piping maps. */
+		return 0;
+	}
+}
+
+static void
+expect_maps_write_failure(int count) {
+	int mfd = prof_dump_open_maps();
+	if (mfd == -1) {
+		/* No need to continue if we just can't find the maps file. */
+		return;
+	}
+	close(mfd);
+	started_piping_maps_file = false;
+	expect_write_failure(count);
+	expect_true(started_piping_maps_file, "Should start piping maps");
+}
+
+TEST_BEGIN(test_mdump_maps_error) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_debug);
+
+	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
+	prof_dump_write_file_t *write_file_orig = prof_dump_write_file;
+	prof_dump_open_maps_t *open_maps_orig = prof_dump_open_maps;
+
+	prof_dump_open_file = prof_dump_open_file_intercept;
+	prof_dump_write_file = prof_dump_write_maps_file_error;
+
+	void *p = mallocx(1, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+
+	/*
+	 * When opening the maps file fails, there shouldn't be any maps write,
+	 * and mallctl() should return success.
+	 */
+	prof_dump_open_maps = prof_dump_open_maps_error;
+	started_piping_maps_file = false;
+	prof_dump_write_file_count = 0;
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
+	    sizeof(test_filename)), 0,
+	    "mallctl should not fail in case of maps file opening failure");
+	expect_false(started_piping_maps_file, "Shouldn't start piping maps");
+	expect_d_eq(prof_dump_write_file_count, 0,
+	    "Dumping stopped after a wrong number of writes");
+
+	/*
+	 * When the n-th maps write fails (given that we are able to find the
+	 * maps file), there shouldn't be any more maps write, and mallctl()
+	 * should return failure.
+	 */
+	prof_dump_open_maps = open_maps_orig;
+	expect_maps_write_failure(1); /* First write fails. */
+	expect_maps_write_failure(2); /* Second write fails. */
+
+	dallocx(p, 0);
+
+	prof_dump_open_file = open_file_orig;
+	prof_dump_write_file = write_file_orig;
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_mdump_normal,
+	    test_mdump_output_error,
+	    test_mdump_maps_error);
+}
diff --git a/test/unit/prof_mdump.sh b/test/unit/prof_mdump.sh
new file mode 100644
index 0000000..d14cb8c
--- /dev/null
+++ b/test/unit/prof_mdump.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi
+
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 29fa02b..dc64a04 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -83,13 +83,10 @@ TEST_END
 
 bool prof_dump_header_intercepted = false;
 prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
-static bool
-prof_dump_header_intercept(tsdn_t *tsdn, bool propagate_err,
-    const prof_cnt_t *cnt_all) {
+static void
+prof_dump_header_intercept(tsdn_t *tsdn, const prof_cnt_t *cnt_all) {
 	prof_dump_header_intercepted = true;
 	memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
-
-	return false;
 }
 
 TEST_BEGIN(test_prof_reset_cleanup) {
-- 
cgit v0.12


From c8683bee80768c191b2e08f1fcef583bc17c9203 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 30 Mar 2020 16:48:45 -0700
Subject: Unify printing for prof counts object

---
 doc/jemalloc.xml.in |  6 +++---
 src/prof_data.c     | 42 ++++++++++++++++++++++--------------------
 2 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 5ab8456..5472294 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -3437,7 +3437,7 @@ heap_v2/524288
   [...]
 @ 0x5f86da8 0x5f5a1dc [...] 0x29e4d4e 0xa200316 0xabb2988 [...]
   t*: 13: 6688 [0: 0]
-  t3: 12: 6496 [0: ]
+  t3: 12: 6496 [0: 0]
   t99: 1: 192 [0: 0]
 [...]
 
@@ -3448,9 +3448,9 @@ descriptions of the corresponding fields.  <programlisting><![CDATA[
 <heap_profile_format_version>/<mean_sample_interval>
   <aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
   [...]
-  <thread_3_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+  <thread_3_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
   [...]
-  <thread_99_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+  <thread_99_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
   [...]
 @ <top_frame> <frame> [...] <frame> <frame> <frame> [...]
   <backtrace_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
diff --git a/src/prof_data.c b/src/prof_data.c
index 210b153..3f8c991 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -574,6 +574,12 @@ prof_dump_printf(const char *format, ...) {
 }
 
 static void
+prof_dump_print_cnts(const prof_cnt_t *cnts) {
+	prof_dump_printf("%"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]",
+	    cnts->curobjs, cnts->curbytes, cnts->accumobjs, cnts->accumbytes);
+}
+
+static void
 prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
 
@@ -649,11 +655,9 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		prof_dump_printf(
-		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
-		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
-		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
-		    tctx->dump_cnts.accumbytes);
+		prof_dump_printf("  t%"FMTu64": ", tctx->thr_uid);
+		prof_dump_print_cnts(&tctx->dump_cnts);
+		prof_dump_write("\n");
 		break;
 	default:
 		not_reached();
@@ -820,22 +824,21 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 		return NULL;
 	}
 
-	prof_dump_printf(
-	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
-	    tdata->thr_uid, tdata->cnt_summed.curobjs,
-	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
-	    tdata->cnt_summed.accumbytes,
-	    (tdata->thread_name != NULL) ? " " : "",
-	    (tdata->thread_name != NULL) ? tdata->thread_name : "");
+	prof_dump_printf("  t%"FMTu64": ", tdata->thr_uid);
+	prof_dump_print_cnts(&tdata->cnt_summed);
+	if (tdata->thread_name != NULL) {
+		prof_dump_printf(" %s", tdata->thread_name);
+	}
+	prof_dump_write("\n");
 	return NULL;
 }
 
 static void
 prof_dump_header_impl(tsdn_t *tsdn, const prof_cnt_t *cnt_all) {
-	prof_dump_printf("heap_v2/%"FMTu64"\n"
-	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
-	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
-	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes);
+	prof_dump_printf("heap_v2/%"FMTu64"\n  t*: ",
+	    ((uint64_t)1U << lg_prof_sample));
+	prof_dump_print_cnts(cnt_all);
+	prof_dump_write("\n");
 
 	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, NULL);
@@ -864,10 +867,9 @@ prof_dump_gctx(tsdn_t *tsdn, prof_gctx_t *gctx, const prof_bt_t *bt,
 		prof_dump_printf(" %#"FMTxPTR, (uintptr_t)bt->vec[i]);
 	}
 
-	prof_dump_printf(
-	    "\n  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
-	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
-	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes);
+	prof_dump_write("\n  t*: ");
+	prof_dump_print_cnts(&gctx->cnt_summed);
+	prof_dump_write("\n");
 
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
 	    (void *)tsdn);
-- 
cgit v0.12


From f43ac8543e8e6d38a0f0caf9afad22500118f75f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 31 Mar 2020 09:42:11 -0700
Subject: Correct prof header macro namings

---
 include/jemalloc/internal/prof_data.h   | 6 +++---
 include/jemalloc/internal/prof_log.h    | 6 +++---
 include/jemalloc/internal/prof_recent.h | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 46a3510..09a4099 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H
-#define JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H
+#ifndef JEMALLOC_INTERNAL_PROF_DATA_H
+#define JEMALLOC_INTERNAL_PROF_DATA_H
 
 #include "jemalloc/internal/mutex.h"
 
@@ -19,4 +19,4 @@ void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
-#endif /* JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H */
+#endif /* JEMALLOC_INTERNAL_PROF_DATA_H */
diff --git a/include/jemalloc/internal/prof_log.h b/include/jemalloc/internal/prof_log.h
index 928bf27..e833ced 100644
--- a/include/jemalloc/internal/prof_log.h
+++ b/include/jemalloc/internal/prof_log.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H
-#define JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H
+#ifndef JEMALLOC_INTERNAL_PROF_LOG_H
+#define JEMALLOC_INTERNAL_PROF_LOG_H
 
 #include "jemalloc/internal/mutex.h"
 
@@ -16,4 +16,4 @@ bool prof_log_is_logging(void);
 bool prof_log_rep_check(void);
 void prof_log_dummy_set(bool new_value);
 
-#endif /* JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H */
+#endif /* JEMALLOC_INTERNAL_PROF_LOG_H */
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index f88413d..4f376c7 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
-#define JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
+#ifndef JEMALLOC_INTERNAL_PROF_RECENT_H
+#define JEMALLOC_INTERNAL_PROF_RECENT_H
 
 extern malloc_mutex_t prof_recent_dump_mtx;
 
@@ -15,4 +15,4 @@ extern prof_recent_list_t prof_recent_alloc_list;
 edata_t *prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *node);
 prof_recent_t *edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata);
 
-#endif /* JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H */
+#endif /* JEMALLOC_INTERNAL_PROF_RECENT_H */
-- 
cgit v0.12


From 8118056c034aae3b8d3d250bed36e95eae6676a3 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 31 Mar 2020 10:13:55 -0700
Subject: Expose prof_data testing internals only in prof tests

---
 include/jemalloc/internal/prof_data.h    | 8 ++++++++
 include/jemalloc/internal/prof_externs.h | 6 ------
 test/unit/prof_accum.c                   | 2 ++
 test/unit/prof_active.c                  | 2 ++
 test/unit/prof_reset.c                   | 2 ++
 test/unit/prof_tctx.c                    | 2 ++
 6 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 09a4099..c1dc3ec 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -19,4 +19,12 @@ void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
+/* Used in unit tests. */
+size_t prof_tdata_count(void);
+size_t prof_bt_count(void);
+typedef void (prof_dump_header_t)(tsdn_t *, const prof_cnt_t *);
+extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
+void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
+    uint64_t *accumbytes);
+
 #endif /* JEMALLOC_INTERNAL_PROF_DATA_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 9a2b122..a1baaff 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -92,18 +92,12 @@ void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 /* Used by unit tests. */
 typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
 extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
-size_t prof_tdata_count(void);
-size_t prof_bt_count(void);
 typedef int (prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
 typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
 extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
-typedef void (prof_dump_header_t)(tsdn_t *, const prof_cnt_t *);
-extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 typedef int (prof_dump_open_maps_t)();
 extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
-void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
-    uint64_t *accumbytes);
 
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index 8fc5881..5b8085e 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_data.h"
+
 #define NTHREADS		4
 #define NALLOCS_PER_THREAD	50
 #define DUMP_INTERVAL		1
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index 41c0512..af29e7a 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_data.h"
+
 static void
 mallctl_bool_get(const char *name, bool expected, const char *func, int line) {
 	bool old;
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index dc64a04..22bf796 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_data.h"
+
 static int
 prof_dump_open_file_intercept(const char *filename, int mode) {
 	int fd;
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 4bc597b..801e5f7 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_data.h"
+
 TEST_BEGIN(test_prof_realloc) {
 	tsd_t *tsd;
 	int flags;
-- 
cgit v0.12


From 841af2b4269b425c28b32c032340ac572d4773ae Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 31 Mar 2020 10:39:40 -0700
Subject: Move thread name handling to prof_data module

---
 include/jemalloc/internal/prof_data.h |  2 ++
 src/prof.c                            | 63 -----------------------------------
 src/prof_data.c                       | 63 +++++++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 63 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index c1dc3ec..6c6c534 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -10,6 +10,8 @@ void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
 
 bool prof_data_init(tsd_t *tsd);
+char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
+int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
 bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
     bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
diff --git a/src/prof.c b/src/prof.c
index 5e29f40..1457746 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -133,69 +133,6 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	}
 }
 
-static char *
-prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
-	char *ret;
-	size_t size;
-
-	if (thread_name == NULL) {
-		return NULL;
-	}
-
-	size = strlen(thread_name) + 1;
-	if (size == 1) {
-		return "";
-	}
-
-	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (ret == NULL) {
-		return NULL;
-	}
-	memcpy(ret, thread_name, size);
-	return ret;
-}
-
-static int
-prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
-	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	prof_tdata_t *tdata;
-	unsigned i;
-	char *s;
-
-	tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return EAGAIN;
-	}
-
-	/* Validate input. */
-	if (thread_name == NULL) {
-		return EFAULT;
-	}
-	for (i = 0; thread_name[i] != '\0'; i++) {
-		char c = thread_name[i];
-		if (!isgraph(c) && !isblank(c)) {
-			return EFAULT;
-		}
-	}
-
-	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
-	if (s == NULL) {
-		return EAGAIN;
-	}
-
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
-		tdata->thread_name = NULL;
-	}
-	if (strlen(s) > 0) {
-		tdata->thread_name = s;
-	}
-	return 0;
-}
-
 static int
 prof_sys_thread_name_read_impl(char *buf, size_t limit) {
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
diff --git a/src/prof_data.c b/src/prof_data.c
index 3f8c991..d2ad374 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -473,6 +473,69 @@ prof_bt_count(void) {
 	return bt_count;
 }
 
+char *
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
+	char *ret;
+	size_t size;
+
+	if (thread_name == NULL) {
+		return NULL;
+	}
+
+	size = strlen(thread_name) + 1;
+	if (size == 1) {
+		return "";
+	}
+
+	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (ret == NULL) {
+		return NULL;
+	}
+	memcpy(ret, thread_name, size);
+	return ret;
+}
+
+int
+prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
+	prof_tdata_t *tdata;
+	unsigned i;
+	char *s;
+
+	tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return EAGAIN;
+	}
+
+	/* Validate input. */
+	if (thread_name == NULL) {
+		return EFAULT;
+	}
+	for (i = 0; thread_name[i] != '\0'; i++) {
+		char c = thread_name[i];
+		if (!isgraph(c) && !isblank(c)) {
+			return EFAULT;
+		}
+	}
+
+	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
+	if (s == NULL) {
+		return EAGAIN;
+	}
+
+	if (tdata->thread_name != NULL) {
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
+		    true);
+		tdata->thread_name = NULL;
+	}
+	if (strlen(s) > 0) {
+		tdata->thread_name = s;
+	}
+	return 0;
+}
+
 static void
 prof_dump_check_possible_error(bool err_cond, const char *format, ...) {
 	assert(!prof_dump_error);
-- 
cgit v0.12


From adfd9d7b1d69a997a74193bf9d03951616f22ba6 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 31 Mar 2020 10:43:04 -0700
Subject: Change tsdn to tsd for thread name allocation

---
 include/jemalloc/internal/prof_data.h | 2 +-
 src/prof.c                            | 2 +-
 src/prof_data.c                       | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 6c6c534..26b8b28 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -10,7 +10,7 @@ void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
 
 bool prof_data_init(tsd_t *tsd);
-char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
+char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
 bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
     bool leakcheck);
diff --git a/src/prof.c b/src/prof.c
index 1457746..29eb3e6 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -746,7 +746,7 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	uint64_t thr_uid = tdata->thr_uid;
 	uint64_t thr_discrim = tdata->thr_discrim + 1;
 	char *thread_name = (tdata->thread_name != NULL) ?
-	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
+	    prof_thread_name_alloc(tsd, tdata->thread_name) : NULL;
 	bool active = tdata->active;
 
 	prof_tdata_detach(tsd, tdata);
diff --git a/src/prof_data.c b/src/prof_data.c
index d2ad374..9563293 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -474,7 +474,7 @@ prof_bt_count(void) {
 }
 
 char *
-prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
+prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) {
 	char *ret;
 	size_t size;
 
@@ -487,8 +487,8 @@ prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
 		return "";
 	}
 
-	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
+	ret = iallocztm(tsd_tsdn(tsd), size, sz_size2index(size), false, NULL,
+	    true, arena_get(TSDN_NULL, 0, true), true);
 	if (ret == NULL) {
 		return NULL;
 	}
@@ -520,7 +520,7 @@ prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 		}
 	}
 
-	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
+	s = prof_thread_name_alloc(tsd, thread_name);
 	if (s == NULL) {
 		return EAGAIN;
 	}
-- 
cgit v0.12


From 03ae509f325e952a1447d8b933ee57f3d116434d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 31 Mar 2020 09:02:55 -0700
Subject: Create prof_sys module for reading system thread name

---
 Makefile.in                                        |  1 +
 include/jemalloc/internal/prof_externs.h           |  2 --
 include/jemalloc/internal/prof_sys.h               | 10 ++++++++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  3 +++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  3 +++
 src/prof.c                                         | 22 +-----------------
 src/prof_sys.c                                     | 27 ++++++++++++++++++++++
 test/unit/prof_sys_thread_name.c                   |  2 ++
 10 files changed, 49 insertions(+), 23 deletions(-)
 create mode 100644 include/jemalloc/internal/prof_sys.h
 create mode 100644 src/prof_sys.c

diff --git a/Makefile.in b/Makefile.in
index 87ddd33..7f07d96 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -133,6 +133,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/prof_recent.c \
+	$(srcroot)src/prof_sys.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
 	$(srcroot)src/sc.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index a1baaff..135fb29 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -90,8 +90,6 @@ uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
 void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 
 /* Used by unit tests. */
-typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
-extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
 typedef int (prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
 typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
new file mode 100644
index 0000000..cfa0059
--- /dev/null
+++ b/include/jemalloc/internal/prof_sys.h
@@ -0,0 +1,10 @@
+#ifndef JEMALLOC_INTERNAL_PROF_SYS_H
+#define JEMALLOC_INTERNAL_PROF_SYS_H
+
+void prof_sys_thread_name_fetch(tsd_t *tsd);
+
+/* Used in unit tests. */
+typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
+extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
+
+#endif /* JEMALLOC_INTERNAL_PROF_SYS_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index bbe814b..00ea2be 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -74,6 +74,7 @@
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_sys.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 6f7027b..0bcb45a 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -106,6 +106,9 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_sys.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index ae60133..446ea60 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -74,6 +74,7 @@
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_sys.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 6f7027b..0bcb45a 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -106,6 +106,9 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_sys.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/prof.c b/src/prof.c
index 29eb3e6..ea63cfd 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -8,6 +8,7 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_log.h"
 #include "jemalloc/internal/prof_recent.h"
+#include "jemalloc/internal/prof_sys.h"
 #include "jemalloc/internal/thread_event.h"
 
 /*
@@ -133,27 +134,6 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	}
 }
 
-static int
-prof_sys_thread_name_read_impl(char *buf, size_t limit) {
-#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
-	return pthread_getname_np(pthread_self(), buf, limit);
-#else
-	return ENOSYS;
-#endif
-}
-prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read =
-    prof_sys_thread_name_read_impl;
-
-static void
-prof_sys_thread_name_fetch(tsd_t *tsd) {
-#define THREAD_NAME_MAX_LEN 16
-	char buf[THREAD_NAME_MAX_LEN];
-	if (!prof_sys_thread_name_read(buf, THREAD_NAME_MAX_LEN)) {
-		prof_thread_name_set_impl(tsd, buf);
-	}
-#undef THREAD_NAME_MAX_LEN
-}
-
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx) {
diff --git a/src/prof_sys.c b/src/prof_sys.c
new file mode 100644
index 0000000..521a71a
--- /dev/null
+++ b/src/prof_sys.c
@@ -0,0 +1,27 @@
+#define JEMALLOC_PROF_SYS_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_sys.h"
+
+static int
+prof_sys_thread_name_read_impl(char *buf, size_t limit) {
+#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+	return pthread_getname_np(pthread_self(), buf, limit);
+#else
+	return ENOSYS;
+#endif
+}
+prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read =
+    prof_sys_thread_name_read_impl;
+
+void
+prof_sys_thread_name_fetch(tsd_t *tsd) {
+#define THREAD_NAME_MAX_LEN 16
+	char buf[THREAD_NAME_MAX_LEN];
+	if (!prof_sys_thread_name_read(buf, THREAD_NAME_MAX_LEN)) {
+		prof_thread_name_set_impl(tsd, buf);
+	}
+#undef THREAD_NAME_MAX_LEN
+}
diff --git a/test/unit/prof_sys_thread_name.c b/test/unit/prof_sys_thread_name.c
index ec1e774..affc788 100644
--- a/test/unit/prof_sys_thread_name.c
+++ b/test/unit/prof_sys_thread_name.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_sys.h"
+
 static const char *test_thread_name = "test_name";
 
 static int
-- 
cgit v0.12


From 767a2e1790656f038123036772fed6656175c7e6 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 16:20:01 -0700
Subject: Move file handling logic in prof to prof_sys

---
 include/jemalloc/internal/prof_externs.h |   2 -
 include/jemalloc/internal/prof_sys.h     |  10 ++
 src/ctl.c                                |   1 +
 src/prof.c                               | 142 +--------------------------
 src/prof_log.c                           |   1 +
 src/prof_sys.c                           | 158 +++++++++++++++++++++++++++++++
 6 files changed, 175 insertions(+), 139 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 135fb29..96e08c8 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -58,11 +58,9 @@ void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
 int prof_getpid(void);
-void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
-bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
 
 void prof_reset(tsd_t *tsd, size_t lg_sample);
 void prof_tdata_cleanup(tsd_t *tsd);
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index cfa0059..166df6f 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -1,10 +1,20 @@
 #ifndef JEMALLOC_INTERNAL_PROF_SYS_H
 #define JEMALLOC_INTERNAL_PROF_SYS_H
 
+extern malloc_mutex_t prof_dump_filename_mtx;
+extern base_t *prof_base;
+
 void prof_sys_thread_name_fetch(tsd_t *tsd);
 
 /* Used in unit tests. */
 typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
 extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
 
+void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
+bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
+void prof_fdump_impl(tsd_t *tsd);
+void prof_idump_impl(tsd_t *tsd);
+bool prof_mdump_impl(tsd_t *tsd, const char *filename);
+void prof_gdump_impl(tsd_t *tsd);
+
 #endif /* JEMALLOC_INTERNAL_PROF_SYS_H */
diff --git a/src/ctl.c b/src/ctl.c
index 5cba9af..fe0b9f9 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -9,6 +9,7 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/peak_event.h"
+#include "jemalloc/internal/prof_sys.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/util.h"
 
diff --git a/src/prof.c b/src/prof.c
index ea63cfd..7732ede 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -86,38 +86,13 @@ malloc_mutex_t tdatas_mtx;
 static uint64_t next_thr_uid;
 static malloc_mutex_t next_thr_uid_mtx;
 
-static malloc_mutex_t prof_dump_filename_mtx;
-static uint64_t prof_dump_seq;
-static uint64_t prof_dump_iseq;
-static uint64_t prof_dump_mseq;
-static uint64_t prof_dump_useq;
-
-/* The fallback allocator profiling functionality will use. */
-base_t *prof_base;
-
 malloc_mutex_t prof_dump_mtx;
-static char *prof_dump_prefix = NULL;
 
 /* Do not dump any profiles until bootstrapping is complete. */
 bool prof_booted = false;
 
 /******************************************************************************/
 
-/*
- * If profiling is off, then PROF_DUMP_FILENAME_LEN is 1, so we'll end up
- * calling strncpy with a size of 0, which triggers a -Wstringop-truncation
- * warning (strncpy can never actually be called in this case, since we bail out
- * much earlier when config_prof is false).  This function works around the
- * warning to let us leave the warning on.
- */
-static inline void
-prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
-	cassert(config_prof);
-#ifdef JEMALLOC_PROF
-	strncpy(dest, src, size);
-#endif
-}
-
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	cassert(config_prof);
@@ -507,57 +482,9 @@ prof_getpid(void) {
 #endif
 }
 
-static const char *
-prof_dump_prefix_get(tsdn_t* tsdn) {
-	malloc_mutex_assert_owner(tsdn, &prof_dump_filename_mtx);
-
-	return prof_dump_prefix == NULL ? opt_prof_prefix : prof_dump_prefix;
-}
-
-static bool
-prof_dump_prefix_is_empty(tsdn_t *tsdn) {
-	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	bool ret = (prof_dump_prefix_get(tsdn)[0] == '\0');
-	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-	return ret;
-}
-
-#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
-#define VSEQ_INVALID UINT64_C(0xffffffffffffffff)
-static void
-prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
-	cassert(config_prof);
-
-	assert(tsd_reentrancy_level_get(tsd) == 0);
-	const char *prof_prefix = prof_dump_prefix_get(tsd_tsdn(tsd));
-
-	if (vseq != VSEQ_INVALID) {
-	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
-		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
-		    prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
-	} else {
-	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
-		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c.heap",
-		    prof_prefix, prof_getpid(), prof_dump_seq, v);
-	}
-	prof_dump_seq++;
-}
-
-void
-prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind) {
-	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
-	    "%s.%d.%"FMTu64".json", prof_dump_prefix_get(tsdn), prof_getpid(),
-	    ind);
-	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-}
-
 static void
 prof_fdump(void) {
 	tsd_t *tsd;
-	char filename[DUMP_FILENAME_BUFSIZE];
 
 	cassert(config_prof);
 	assert(opt_prof_final);
@@ -567,12 +494,8 @@ prof_fdump(void) {
 	}
 	tsd = tsd_fetch();
 	assert(tsd_reentrancy_level_get(tsd) == 0);
-	assert(!prof_dump_prefix_is_empty(tsd_tsdn(tsd)));
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-	prof_dump_filename(tsd, filename, 'f', VSEQ_INVALID);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-	prof_dump(tsd, false, filename, opt_prof_leak);
+	prof_fdump_impl(tsd);
 }
 
 static bool
@@ -582,31 +505,6 @@ prof_idump_accum_init(void) {
 	return counter_accum_init(&prof_idump_accumulated, prof_interval);
 }
 
-bool
-prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
-	cassert(config_prof);
-	ctl_mtx_assert_held(tsdn);
-	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	if (prof_dump_prefix == NULL) {
-		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-		/* Everything is still guarded by ctl_mtx. */
-		char *buffer = base_alloc(tsdn, prof_base,
-		    PROF_DUMP_FILENAME_LEN, QUANTUM);
-		if (buffer == NULL) {
-			return true;
-		}
-		malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-		prof_dump_prefix = buffer;
-	}
-	assert(prof_dump_prefix != NULL);
-
-	prof_strncpy(prof_dump_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
-	prof_dump_prefix[PROF_DUMP_FILENAME_LEN - 1] = '\0';
-	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-
-	return false;
-}
-
 void
 prof_idump(tsdn_t *tsdn) {
 	tsd_t *tsd;
@@ -631,16 +529,7 @@ prof_idump(tsdn_t *tsdn) {
 		return;
 	}
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-	if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		return;
-	}
-	char filename[PATH_MAX + 1];
-	prof_dump_filename(tsd, filename, 'i', prof_dump_iseq);
-	prof_dump_iseq++;
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-	prof_dump(tsd, false, filename, false);
+	prof_idump_impl(tsd);
 }
 
 bool
@@ -651,20 +540,8 @@ prof_mdump(tsd_t *tsd, const char *filename) {
 	if (!opt_prof || !prof_booted) {
 		return true;
 	}
-	char filename_buf[DUMP_FILENAME_BUFSIZE];
-	if (filename == NULL) {
-		/* No filename specified, so automatically generate one. */
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
-			malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-			return true;
-		}
-		prof_dump_filename(tsd, filename_buf, 'm', prof_dump_mseq);
-		prof_dump_mseq++;
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		filename = filename_buf;
-	}
-	return prof_dump(tsd, true, filename, false);
+
+	return prof_mdump_impl(tsd, filename);
 }
 
 void
@@ -691,16 +568,7 @@ prof_gdump(tsdn_t *tsdn) {
 		return;
 	}
 
-	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	if (prof_dump_prefix_get(tsdn)[0] == '\0') {
-		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-		return;
-	}
-	char filename[DUMP_FILENAME_BUFSIZE];
-	prof_dump_filename(tsd, filename, 'u', prof_dump_useq);
-	prof_dump_useq++;
-	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-	prof_dump(tsd, false, filename, false);
+	prof_gdump_impl(tsd);
 }
 
 static uint64_t
diff --git a/src/prof_log.c b/src/prof_log.c
index bda01d0..b32d6f6 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -10,6 +10,7 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_log.h"
+#include "jemalloc/internal/prof_sys.h"
 
 bool opt_prof_log = false;
 typedef enum prof_logging_state_e prof_logging_state_t;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 521a71a..47bc43b 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -2,9 +2,22 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_sys.h"
 
+malloc_mutex_t prof_dump_filename_mtx;
+
+static uint64_t prof_dump_seq;
+static uint64_t prof_dump_iseq;
+static uint64_t prof_dump_mseq;
+static uint64_t prof_dump_useq;
+
+static char *prof_dump_prefix = NULL;
+
+/* The fallback allocator profiling functionality will use. */
+base_t *prof_base;
+
 static int
 prof_sys_thread_name_read_impl(char *buf, size_t limit) {
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
@@ -25,3 +38,148 @@ prof_sys_thread_name_fetch(tsd_t *tsd) {
 	}
 #undef THREAD_NAME_MAX_LEN
 }
+
+/*
+ * If profiling is off, then PROF_DUMP_FILENAME_LEN is 1, so we'll end up
+ * calling strncpy with a size of 0, which triggers a -Wstringop-truncation
+ * warning (strncpy can never actually be called in this case, since we bail out
+ * much earlier when config_prof is false).  This function works around the
+ * warning to let us leave the warning on.
+ */
+static inline void
+prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
+	cassert(config_prof);
+#ifdef JEMALLOC_PROF
+	strncpy(dest, src, size);
+#endif
+}
+
+static const char *
+prof_dump_prefix_get(tsdn_t* tsdn) {
+	malloc_mutex_assert_owner(tsdn, &prof_dump_filename_mtx);
+
+	return prof_dump_prefix == NULL ? opt_prof_prefix : prof_dump_prefix;
+}
+
+static bool
+prof_dump_prefix_is_empty(tsdn_t *tsdn) {
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	bool ret = (prof_dump_prefix_get(tsdn)[0] == '\0');
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+	return ret;
+}
+
+#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
+#define VSEQ_INVALID UINT64_C(0xffffffffffffffff)
+static void
+prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
+	cassert(config_prof);
+
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+	const char *prof_prefix = prof_dump_prefix_get(tsd_tsdn(tsd));
+
+	if (vseq != VSEQ_INVALID) {
+	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
+		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
+		    prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
+	} else {
+	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
+		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+		    "%s.%d.%"FMTu64".%c.heap",
+		    prof_prefix, prof_getpid(), prof_dump_seq, v);
+	}
+	prof_dump_seq++;
+}
+
+void
+prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind) {
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
+	    "%s.%d.%"FMTu64".json", prof_dump_prefix_get(tsdn), prof_getpid(),
+	    ind);
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+}
+
+void
+prof_fdump_impl(tsd_t *tsd) {
+	char filename[DUMP_FILENAME_BUFSIZE];
+
+	assert(!prof_dump_prefix_is_empty(tsd_tsdn(tsd)));
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+	prof_dump_filename(tsd, filename, 'f', VSEQ_INVALID);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+	prof_dump(tsd, false, filename, opt_prof_leak);
+}
+
+bool
+prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
+	cassert(config_prof);
+	ctl_mtx_assert_held(tsdn);
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	if (prof_dump_prefix == NULL) {
+		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+		/* Everything is still guarded by ctl_mtx. */
+		char *buffer = base_alloc(tsdn, prof_base,
+		    PROF_DUMP_FILENAME_LEN, QUANTUM);
+		if (buffer == NULL) {
+			return true;
+		}
+		malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+		prof_dump_prefix = buffer;
+	}
+	assert(prof_dump_prefix != NULL);
+
+	prof_strncpy(prof_dump_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
+	prof_dump_prefix[PROF_DUMP_FILENAME_LEN - 1] = '\0';
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+
+	return false;
+}
+
+void
+prof_idump_impl(tsd_t *tsd) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+	if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+		return;
+	}
+	char filename[PATH_MAX + 1];
+	prof_dump_filename(tsd, filename, 'i', prof_dump_iseq);
+	prof_dump_iseq++;
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+	prof_dump(tsd, false, filename, false);
+}
+
+bool
+prof_mdump_impl(tsd_t *tsd, const char *filename) {
+	char filename_buf[DUMP_FILENAME_BUFSIZE];
+	if (filename == NULL) {
+		/* No filename specified, so automatically generate one. */
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+		if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
+			malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+			return true;
+		}
+		prof_dump_filename(tsd, filename_buf, 'm', prof_dump_mseq);
+		prof_dump_mseq++;
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+		filename = filename_buf;
+	}
+	return prof_dump(tsd, true, filename, false);
+}
+
+void
+prof_gdump_impl(tsd_t *tsd) {
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	if (prof_dump_prefix_get(tsdn)[0] == '\0') {
+		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+		return;
+	}
+	char filename[DUMP_FILENAME_BUFSIZE];
+	prof_dump_filename(tsd, filename, 'u', prof_dump_useq);
+	prof_dump_useq++;
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+	prof_dump(tsd, false, filename, false);
+}
-- 
cgit v0.12


From 4736fb4fc9c105320c71dad5425a535cebf390b3 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 16:39:41 -0700
Subject: Move file handling logic in prof_data to prof_sys

---
 include/jemalloc/internal/prof_data.h    |   4 +-
 include/jemalloc/internal/prof_externs.h |   8 --
 include/jemalloc/internal/prof_sys.h     |  15 +-
 src/prof_data.c                          | 240 ++-----------------------------
 src/prof_sys.c                           | 207 ++++++++++++++++++++++++++
 test/unit/prof_accum.c                   |   1 +
 test/unit/prof_gdump.c                   |   2 +
 test/unit/prof_idump.c                   |   2 +
 test/unit/prof_mdump.c                   |   2 +
 test/unit/prof_reset.c                   |   1 +
 10 files changed, 239 insertions(+), 243 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 26b8b28..5c3f129 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -12,8 +12,8 @@ bool prof_bt_keycomp(const void *k1, const void *k2);
 bool prof_data_init(tsd_t *tsd);
 char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
-bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck);
+void prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata,
+    void (*write_cb)(const char *), bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
     uint64_t thr_discrim, char *thread_name, bool active);
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 96e08c8..c7c3ccb 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -87,14 +87,6 @@ uint64_t prof_sample_new_event_wait(tsd_t *tsd);
 uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
 void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 
-/* Used by unit tests. */
-typedef int (prof_dump_open_file_t)(const char *, int);
-extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
-typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
-extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
-typedef int (prof_dump_open_maps_t)();
-extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
-
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
 
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 166df6f..0d97cb9 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -5,11 +5,6 @@ extern malloc_mutex_t prof_dump_filename_mtx;
 extern base_t *prof_base;
 
 void prof_sys_thread_name_fetch(tsd_t *tsd);
-
-/* Used in unit tests. */
-typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
-extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
-
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
 void prof_fdump_impl(tsd_t *tsd);
@@ -17,4 +12,14 @@ void prof_idump_impl(tsd_t *tsd);
 bool prof_mdump_impl(tsd_t *tsd, const char *filename);
 void prof_gdump_impl(tsd_t *tsd);
 
+/* Used in unit tests. */
+typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
+extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
+typedef int (prof_dump_open_file_t)(const char *, int);
+extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
+typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
+extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
+typedef int (prof_dump_open_maps_t)();
+extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
+
 #endif /* JEMALLOC_INTERNAL_PROF_SYS_H */
diff --git a/src/prof_data.c b/src/prof_data.c
index 9563293..8077229 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -55,27 +55,8 @@ static ckh_t bt2gctx;
  */
 static prof_tdata_tree_t tdatas;
 
-/* The following are needed for dumping and are protected by prof_dump_mtx. */
-/*
- * Whether there has been an error in the dumping process, which could have
- * happened either in file opening or in file writing.  When an error has
- * already occurred, we will stop further writing to the file.
- */
-static bool prof_dump_error;
-/*
- * Whether error should be handled locally: if true, then we print out error
- * message as well as abort (if opt_abort is true) when an error occurred, and
- * we also report the error back to the caller in the end; if false, then we
- * only report the error back to the caller in the end.
- */
-static bool prof_dump_handle_error_locally;
-/*
- * This buffer is rather large for stack allocation, so use a single buffer for
- * all profile dumps.
- */
-static char prof_dump_buf[PROF_DUMP_BUFSIZE];
-static size_t prof_dump_buf_end;
-static int prof_dump_fd;
+/* Dump write callback; stored global to simplify function interfaces.  */
+static void (*prof_dump_write)(const char *);
 
 /******************************************************************************/
 /* Red-black trees. */
@@ -536,94 +517,6 @@ prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 	return 0;
 }
 
-static void
-prof_dump_check_possible_error(bool err_cond, const char *format, ...) {
-	assert(!prof_dump_error);
-	if (!err_cond) {
-		return;
-	}
-
-	prof_dump_error = true;
-	if (!prof_dump_handle_error_locally) {
-		return;
-	}
-
-	va_list ap;
-	char buf[PROF_PRINTF_BUFSIZE];
-	va_start(ap, format);
-	malloc_vsnprintf(buf, sizeof(buf), format, ap);
-	va_end(ap);
-	malloc_write(buf);
-
-	if (opt_abort) {
-		abort();
-	}
-}
-
-static int
-prof_dump_open_file_impl(const char *filename, int mode) {
-	return creat(filename, mode);
-}
-prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file =
-    prof_dump_open_file_impl;
-
-static void
-prof_dump_open(const char *filename) {
-	prof_dump_fd = prof_dump_open_file(filename, 0644);
-	prof_dump_check_possible_error(prof_dump_fd == -1,
-	    "<jemalloc>: failed to open \"%s\"\n", filename);
-}
-
-prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file = malloc_write_fd;
-
-static void
-prof_dump_flush() {
-	cassert(config_prof);
-	if (!prof_dump_error) {
-		ssize_t err = prof_dump_write_file(prof_dump_fd, prof_dump_buf,
-		    prof_dump_buf_end);
-		prof_dump_check_possible_error(err == -1,
-		    "<jemalloc>: failed to write during heap profile flush\n");
-	}
-	prof_dump_buf_end = 0;
-}
-
-static void
-prof_dump_close() {
-	if (prof_dump_fd != -1) {
-		prof_dump_flush();
-		close(prof_dump_fd);
-	}
-}
-
-static void
-prof_dump_write(const char *s) {
-	size_t i, slen, n;
-
-	cassert(config_prof);
-
-	i = 0;
-	slen = strlen(s);
-	while (i < slen) {
-		/* Flush the buffer if it is full. */
-		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			prof_dump_flush();
-		}
-
-		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
-			/* Finish writing. */
-			n = slen - i;
-		} else {
-			/* Write as much of s as will fit. */
-			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
-		}
-		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
-		prof_dump_buf_end += n;
-		i += n;
-	}
-	assert(i == slen);
-}
-
 JEMALLOC_FORMAT_PRINTF(1, 2)
 static void
 prof_dump_printf(const char *format, ...) {
@@ -938,82 +831,12 @@ prof_dump_gctx(tsdn_t *tsdn, prof_gctx_t *gctx, const prof_bt_t *bt,
 	    (void *)tsdn);
 }
 
-#ifndef _WIN32
-JEMALLOC_FORMAT_PRINTF(1, 2)
-static int
-prof_open_maps_internal(const char *format, ...) {
-	int mfd;
-	va_list ap;
-	char filename[PATH_MAX + 1];
-
-	va_start(ap, format);
-	malloc_vsnprintf(filename, sizeof(filename), format, ap);
-	va_end(ap);
-
-#if defined(O_CLOEXEC)
-	mfd = open(filename, O_RDONLY | O_CLOEXEC);
-#else
-	mfd = open(filename, O_RDONLY);
-	if (mfd != -1) {
-		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
-	}
-#endif
-
-	return mfd;
-}
-#endif
-
-static int
-prof_dump_open_maps_impl() {
-	int mfd;
-
-	cassert(config_prof);
-#ifdef __FreeBSD__
-	mfd = prof_open_maps_internal("/proc/curproc/map");
-#elif defined(_WIN32)
-	mfd = -1; // Not implemented
-#else
-	int pid = prof_getpid();
-
-	mfd = prof_open_maps_internal("/proc/%d/task/%d/maps", pid, pid);
-	if (mfd == -1) {
-		mfd = prof_open_maps_internal("/proc/%d/maps", pid);
-	}
-#endif
-	return mfd;
-}
-prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps =
-    prof_dump_open_maps_impl;
-
-static void
-prof_dump_maps() {
-	int mfd = prof_dump_open_maps();
-	if (mfd == -1) {
-		return;
-	}
-
-	prof_dump_write("\nMAPPED_LIBRARIES:\n");
-	ssize_t nread = 0;
-	do {
-		prof_dump_buf_end += nread;
-		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			/* Make space in prof_dump_buf before read(). */
-			prof_dump_flush();
-		}
-		nread = malloc_read_fd(mfd, &prof_dump_buf[prof_dump_buf_end],
-		    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
-	} while (nread > 0);
-
-	close(mfd);
-}
-
 /*
  * See prof_sample_threshold_update() comment for why the body of this function
  * is conditionally compiled.
  */
 static void
-prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
-    const char *filename) {
+prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx) {
 #ifdef JEMALLOC_PROF
 	/*
 	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
@@ -1036,8 +859,7 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
 		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
 		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
 		malloc_printf(
-		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
-		    filename);
+		    "<jemalloc>: Run jeprof on dump output for leak detail\n");
 	}
 #endif
 }
@@ -1093,62 +915,24 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
 	prof_leave(tsd, tdata);
 }
 
-static bool
-prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck, prof_tdata_t *tdata, const prof_cnt_t *cnt_all,
-    prof_gctx_tree_t *gctxs) {
-	prof_dump_error = false;
-	prof_dump_handle_error_locally = !propagate_err;
-
-	/* Create dump file. */
-	prof_dump_open(filename);
-	/* Dump profile header. */
-	prof_dump_header(tsd_tsdn(tsd), cnt_all);
-	/* Dump per gctx profile stats. */
-	gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter, (void *)tsd_tsdn(tsd));
-	/* Dump /proc/<pid>/maps if possible. */
-	prof_dump_maps();
-	/* Close dump file. */
-	prof_dump_close();
-
-	return prof_dump_error;
-}
-
-bool
-prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+void
+prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata, void (*write_cb)(const char *),
     bool leakcheck) {
-	cassert(config_prof);
-	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return true;
-	}
-
-	pre_reentrancy(tsd, NULL);
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
-
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_dump_mtx);
+	prof_dump_write = write_cb;
 	prof_gctx_tree_t gctxs;
 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
 	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
 	    &prof_gctx_merge_iter_arg, &gctxs);
-	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck,
-	    tdata, &prof_tdata_merge_iter_arg.cnt_all, &gctxs);
+	prof_dump_header(tsd_tsdn(tsd), &prof_tdata_merge_iter_arg.cnt_all);
+	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
+	    (void *)tsd_tsdn(tsd));
 	prof_gctx_finish(tsd, &gctxs);
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
-	post_reentrancy(tsd);
-
-	if (err) {
-		return true;
-	}
-
 	if (leakcheck) {
 		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
-		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
+		    prof_gctx_merge_iter_arg.leak_ngctx);
 	}
-	return false;
 }
 
 /* Used in unit tests. */
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 47bc43b..364c315 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -18,6 +18,28 @@ static char *prof_dump_prefix = NULL;
 /* The fallback allocator profiling functionality will use. */
 base_t *prof_base;
 
+/* The following are needed for dumping and are protected by prof_dump_mtx. */
+/*
+ * Whether there has been an error in the dumping process, which could have
+ * happened either in file opening or in file writing.  When an error has
+ * already occurred, we will stop further writing to the file.
+ */
+static bool prof_dump_error;
+/*
+ * Whether error should be handled locally: if true, then we print out error
+ * message as well as abort (if opt_abort is true) when an error occurred, and
+ * we also report the error back to the caller in the end; if false, then we
+ * only report the error back to the caller in the end.
+ */
+static bool prof_dump_handle_error_locally;
+/*
+ * This buffer is rather large for stack allocation, so use a single buffer for
+ * all profile dumps.
+ */
+static char prof_dump_buf[PROF_DUMP_BUFSIZE];
+static size_t prof_dump_buf_end;
+static int prof_dump_fd;
+
 static int
 prof_sys_thread_name_read_impl(char *buf, size_t limit) {
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
@@ -39,6 +61,191 @@ prof_sys_thread_name_fetch(tsd_t *tsd) {
 #undef THREAD_NAME_MAX_LEN
 }
 
+static void
+prof_dump_check_possible_error(bool err_cond, const char *format, ...) {
+	assert(!prof_dump_error);
+	if (!err_cond) {
+		return;
+	}
+
+	prof_dump_error = true;
+	if (!prof_dump_handle_error_locally) {
+		return;
+	}
+
+	va_list ap;
+	char buf[PROF_PRINTF_BUFSIZE];
+	va_start(ap, format);
+	malloc_vsnprintf(buf, sizeof(buf), format, ap);
+	va_end(ap);
+	malloc_write(buf);
+
+	if (opt_abort) {
+		abort();
+	}
+}
+
+static int
+prof_dump_open_file_impl(const char *filename, int mode) {
+	return creat(filename, mode);
+}
+prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file =
+    prof_dump_open_file_impl;
+
+static void
+prof_dump_open(const char *filename) {
+	prof_dump_fd = prof_dump_open_file(filename, 0644);
+	prof_dump_check_possible_error(prof_dump_fd == -1,
+	    "<jemalloc>: failed to open \"%s\"\n", filename);
+}
+
+prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file = malloc_write_fd;
+
+static void
+prof_dump_flush() {
+	cassert(config_prof);
+	if (!prof_dump_error) {
+		ssize_t err = prof_dump_write_file(prof_dump_fd, prof_dump_buf,
+		    prof_dump_buf_end);
+		prof_dump_check_possible_error(err == -1,
+		    "<jemalloc>: failed to write during heap profile flush\n");
+	}
+	prof_dump_buf_end = 0;
+}
+
+static void
+prof_dump_write(const char *s) {
+	size_t i, slen, n;
+
+	cassert(config_prof);
+
+	i = 0;
+	slen = strlen(s);
+	while (i < slen) {
+		/* Flush the buffer if it is full. */
+		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+			prof_dump_flush();
+		}
+
+		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
+			/* Finish writing. */
+			n = slen - i;
+		} else {
+			/* Write as much of s as will fit. */
+			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
+		}
+		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
+		prof_dump_buf_end += n;
+		i += n;
+	}
+	assert(i == slen);
+}
+
+static void
+prof_dump_close() {
+	if (prof_dump_fd != -1) {
+		prof_dump_flush();
+		close(prof_dump_fd);
+	}
+}
+
+#ifndef _WIN32
+JEMALLOC_FORMAT_PRINTF(1, 2)
+static int
+prof_open_maps_internal(const char *format, ...) {
+	int mfd;
+	va_list ap;
+	char filename[PATH_MAX + 1];
+
+	va_start(ap, format);
+	malloc_vsnprintf(filename, sizeof(filename), format, ap);
+	va_end(ap);
+
+#if defined(O_CLOEXEC)
+	mfd = open(filename, O_RDONLY | O_CLOEXEC);
+#else
+	mfd = open(filename, O_RDONLY);
+	if (mfd != -1) {
+		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
+	}
+#endif
+
+	return mfd;
+}
+#endif
+
+static int
+prof_dump_open_maps_impl() {
+	int mfd;
+
+	cassert(config_prof);
+#ifdef __FreeBSD__
+	mfd = prof_open_maps_internal("/proc/curproc/map");
+#elif defined(_WIN32)
+	mfd = -1; // Not implemented
+#else
+	int pid = prof_getpid();
+
+	mfd = prof_open_maps_internal("/proc/%d/task/%d/maps", pid, pid);
+	if (mfd == -1) {
+		mfd = prof_open_maps_internal("/proc/%d/maps", pid);
+	}
+#endif
+	return mfd;
+}
+prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps =
+    prof_dump_open_maps_impl;
+
+static void
+prof_dump_maps() {
+	int mfd = prof_dump_open_maps();
+	if (mfd == -1) {
+		return;
+	}
+
+	prof_dump_write("\nMAPPED_LIBRARIES:\n");
+	ssize_t nread = 0;
+	do {
+		prof_dump_buf_end += nread;
+		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+			/* Make space in prof_dump_buf before read(). */
+			prof_dump_flush();
+		}
+		nread = malloc_read_fd(mfd, &prof_dump_buf[prof_dump_buf_end],
+		    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
+	} while (nread > 0);
+
+	close(mfd);
+}
+
+static bool
+prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck) {
+	cassert(config_prof);
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
+	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return true;
+	}
+
+	prof_dump_error = false;
+	prof_dump_handle_error_locally = !propagate_err;
+
+	pre_reentrancy(tsd, NULL);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+
+	prof_dump_open(filename);
+	prof_dump_impl(tsd, tdata, prof_dump_write, leakcheck);
+	prof_dump_maps();
+	prof_dump_close();
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
+	post_reentrancy(tsd);
+
+	return prof_dump_error;
+}
+
 /*
  * If profiling is off, then PROF_DUMP_FILENAME_LEN is 1, so we'll end up
  * calling strncpy with a size of 0, which triggers a -Wstringop-truncation
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index 5b8085e..ef392ac 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -1,6 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_sys.h"
 
 #define NTHREADS		4
 #define NALLOCS_PER_THREAD	50
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 6209255..9a47a19 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_sys.h"
+
 static bool did_prof_dump_open;
 
 static int
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index b0c1bc2..607944c 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_sys.h"
+
 #define TEST_PREFIX "test_prefix"
 
 static bool did_prof_dump_open;
diff --git a/test/unit/prof_mdump.c b/test/unit/prof_mdump.c
index 3779c24..75b3a51 100644
--- a/test/unit/prof_mdump.c
+++ b/test/unit/prof_mdump.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_sys.h"
+
 static const char *test_filename = "test_filename";
 static bool did_prof_dump_open;
 
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 22bf796..2bdc37c 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -1,6 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_sys.h"
 
 static int
 prof_dump_open_file_intercept(const char *filename, int mode) {
-- 
cgit v0.12


From d128efcb6aeddec8d3f1220eda0251dcaa25bab8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 3 Apr 2020 10:26:03 -0700
Subject: Relocate a few prof utilities to the right modules

---
 include/jemalloc/internal/prof_data.h    |  5 +++++
 include/jemalloc/internal/prof_externs.h | 14 --------------
 include/jemalloc/internal/prof_log.h     |  3 +++
 include/jemalloc/internal/prof_recent.h  |  5 +++++
 include/jemalloc/internal/prof_sys.h     |  1 +
 src/ctl.c                                |  3 +++
 src/prof.c                               | 16 ----------------
 src/prof_data.c                          |  4 ++++
 src/prof_sys.c                           |  9 +++++++++
 9 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 5c3f129..de9f7ba 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -3,6 +3,10 @@
 
 #include "jemalloc/internal/mutex.h"
 
+extern malloc_mutex_t bt2gctx_mtx;
+extern malloc_mutex_t tdatas_mtx;
+extern malloc_mutex_t prof_dump_mtx;
+
 extern malloc_mutex_t *gctx_locks;
 extern malloc_mutex_t *tdata_locks;
 
@@ -19,6 +23,7 @@ prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
+void prof_reset(tsd_t *tsd, size_t lg_sample);
 void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
 /* Used in unit tests. */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index c7c3ccb..a4a4aa6 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -3,10 +3,6 @@
 
 #include "jemalloc/internal/mutex.h"
 
-extern malloc_mutex_t bt2gctx_mtx;
-extern malloc_mutex_t tdatas_mtx;
-extern malloc_mutex_t prof_dump_mtx;
-
 extern bool opt_prof;
 extern bool opt_prof_active;
 extern bool opt_prof_thread_active_init;
@@ -26,7 +22,6 @@ extern char opt_prof_prefix[
 
 /* For recording recent allocations */
 extern ssize_t opt_prof_recent_alloc_max;
-extern malloc_mutex_t prof_recent_alloc_mtx;
 
 /* Whether to use thread name provided by the system or by mallctl. */
 extern bool opt_prof_sys_thread_name;
@@ -57,12 +52,10 @@ void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx);
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
-int prof_getpid(void);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
 
-void prof_reset(tsd_t *tsd, size_t lg_sample);
 void prof_tdata_cleanup(tsd_t *tsd);
 bool prof_active_get(tsdn_t *tsdn);
 bool prof_active_set(tsdn_t *tsdn, bool active);
@@ -87,11 +80,4 @@ uint64_t prof_sample_new_event_wait(tsd_t *tsd);
 uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
 void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 
-bool prof_log_start(tsdn_t *tsdn, const char *filename);
-bool prof_log_stop(tsdn_t *tsdn);
-
-ssize_t prof_recent_alloc_max_ctl_read();
-ssize_t prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max);
-void prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque);
-
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_log.h b/include/jemalloc/internal/prof_log.h
index e833ced..ccb557d 100644
--- a/include/jemalloc/internal/prof_log.h
+++ b/include/jemalloc/internal/prof_log.h
@@ -16,4 +16,7 @@ bool prof_log_is_logging(void);
 bool prof_log_rep_check(void);
 void prof_log_dummy_set(bool new_value);
 
+bool prof_log_start(tsdn_t *tsdn, const char *filename);
+bool prof_log_stop(tsdn_t *tsdn);
+
 #endif /* JEMALLOC_INTERNAL_PROF_LOG_H */
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index 4f376c7..d793c6d 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_RECENT_H
 #define JEMALLOC_INTERNAL_PROF_RECENT_H
 
+extern malloc_mutex_t prof_recent_alloc_mtx;
 extern malloc_mutex_t prof_recent_dump_mtx;
 
 bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
@@ -15,4 +16,8 @@ extern prof_recent_list_t prof_recent_alloc_list;
 edata_t *prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *node);
 prof_recent_t *edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata);
 
+ssize_t prof_recent_alloc_max_ctl_read();
+ssize_t prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max);
+void prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque);
+
 #endif /* JEMALLOC_INTERNAL_PROF_RECENT_H */
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 0d97cb9..3896f29 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -5,6 +5,7 @@ extern malloc_mutex_t prof_dump_filename_mtx;
 extern base_t *prof_base;
 
 void prof_sys_thread_name_fetch(tsd_t *tsd);
+int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
 void prof_fdump_impl(tsd_t *tsd);
diff --git a/src/ctl.c b/src/ctl.c
index fe0b9f9..8b9f42e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -9,6 +9,9 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/peak_event.h"
+#include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_log.h"
+#include "jemalloc/internal/prof_recent.h"
 #include "jemalloc/internal/prof_sys.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/util.h"
diff --git a/src/prof.c b/src/prof.c
index 7732ede..50c08fa 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -78,16 +78,9 @@ uint64_t prof_interval = 0;
 
 size_t lg_prof_sample;
 
-/* Non static to enable profiling. */
-malloc_mutex_t bt2gctx_mtx;
-
-malloc_mutex_t tdatas_mtx;
-
 static uint64_t next_thr_uid;
 static malloc_mutex_t next_thr_uid_mtx;
 
-malloc_mutex_t prof_dump_mtx;
-
 /* Do not dump any profiles until bootstrapping is complete. */
 bool prof_booted = false;
 
@@ -473,15 +466,6 @@ prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed) {
 	}
 }
 
-int
-prof_getpid(void) {
-#ifdef _WIN32
-	return GetCurrentProcessId();
-#else
-	return getpid();
-#endif
-}
-
 static void
 prof_fdump(void) {
 	tsd_t *tsd;
diff --git a/src/prof_data.c b/src/prof_data.c
index 8077229..6e84e3c 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -25,6 +25,10 @@
 
 /******************************************************************************/
 
+malloc_mutex_t bt2gctx_mtx;
+malloc_mutex_t tdatas_mtx;
+malloc_mutex_t prof_dump_mtx;
+
 /*
  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
  * there is no problem with using them for more than one gctx at the same time.
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 364c315..cdec926 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -61,6 +61,15 @@ prof_sys_thread_name_fetch(tsd_t *tsd) {
 #undef THREAD_NAME_MAX_LEN
 }
 
+int
+prof_getpid(void) {
+#ifdef _WIN32
+	return GetCurrentProcessId();
+#else
+	return getpid();
+#endif
+}
+
 static void
 prof_dump_check_possible_error(bool err_cond, const char *format, ...) {
 	assert(!prof_dump_error);
-- 
cgit v0.12


From dad821bb2239a42517f6ba5e48a29f5f569ab38f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 3 Apr 2020 11:19:51 -0700
Subject: Move unwind to prof_sys

---
 include/jemalloc/internal/prof_data.h |   3 +-
 include/jemalloc/internal/prof_sys.h  |   3 +
 src/prof.c                            | 269 ++--------------------------------
 src/prof_data.c                       |  19 +--
 src/prof_sys.c                        | 266 +++++++++++++++++++++++++++++++++
 5 files changed, 284 insertions(+), 276 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index de9f7ba..9c2d697 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -14,6 +14,7 @@ void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
 
 bool prof_data_init(tsd_t *tsd);
+prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
 void prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata,
@@ -21,8 +22,6 @@ void prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata,
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
     uint64_t thr_discrim, char *thread_name, bool active);
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
-void bt_init(prof_bt_t *bt, void **vec);
-void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 void prof_reset(tsd_t *tsd, size_t lg_sample);
 void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 3896f29..d784ef9 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -4,6 +4,9 @@
 extern malloc_mutex_t prof_dump_filename_mtx;
 extern base_t *prof_base;
 
+void bt_init(prof_bt_t *bt, void **vec);
+void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
+void prof_unwind_init();
 void prof_sys_thread_name_fetch(tsd_t *tsd);
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
diff --git a/src/prof.c b/src/prof.c
index 50c08fa..2573541 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -19,23 +19,6 @@
 
 /******************************************************************************/
 
-#ifdef JEMALLOC_PROF_LIBUNWIND
-#define UNW_LOCAL_ONLY
-#include <libunwind.h>
-#endif
-
-#ifdef JEMALLOC_PROF_LIBGCC
-/*
- * We have a circular dependency -- jemalloc_internal.h tells us if we should
- * use libgcc's unwinding functionality, but after we've included that, we've
- * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
- */
-#undef _Unwind_Backtrace
-#include <unwind.h>
-#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
-#endif
-
-/******************************************************************************/
 /* Data. */
 
 bool opt_prof = false;
@@ -147,242 +130,21 @@ prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	prof_tctx_try_destroy(tsd, tctx);
 }
 
-void
-bt_init(prof_bt_t *bt, void **vec) {
-	cassert(config_prof);
-
-	bt->vec = vec;
-	bt->len = 0;
-}
-
-#ifdef JEMALLOC_PROF_LIBUNWIND
-static void
-prof_backtrace_impl(prof_bt_t *bt) {
-	int nframes;
-
-	cassert(config_prof);
-	assert(bt->len == 0);
-	assert(bt->vec != NULL);
-
-	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
-	if (nframes <= 0) {
-		return;
-	}
-	bt->len = nframes;
-}
-#elif (defined(JEMALLOC_PROF_LIBGCC))
-static _Unwind_Reason_Code
-prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
-	cassert(config_prof);
-
-	return _URC_NO_REASON;
-}
-
-static _Unwind_Reason_Code
-prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
-	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
-	void *ip;
-
-	cassert(config_prof);
-
-	ip = (void *)_Unwind_GetIP(context);
-	if (ip == NULL) {
-		return _URC_END_OF_STACK;
+prof_tctx_t *
+prof_tctx_create(tsd_t *tsd) {
+	if (!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0) {
+		return NULL;
 	}
-	data->bt->vec[data->bt->len] = ip;
-	data->bt->len++;
-	if (data->bt->len == data->max) {
-		return _URC_END_OF_STACK;
-	}
-
-	return _URC_NO_REASON;
-}
-
-static void
-prof_backtrace_impl(prof_bt_t *bt) {
-	prof_unwind_data_t data = {bt, PROF_BT_MAX};
-
-	cassert(config_prof);
 
-	_Unwind_Backtrace(prof_unwind_callback, &data);
-}
-#elif (defined(JEMALLOC_PROF_GCC))
-static void
-prof_backtrace_impl(prof_bt_t *bt) {
-#define BT_FRAME(i)							\
-	if ((i) < PROF_BT_MAX) {					\
-		void *p;						\
-		if (__builtin_frame_address(i) == 0) {			\
-			return;						\
-		}							\
-		p = __builtin_return_address(i);			\
-		if (p == NULL) {					\
-			return;						\
-		}							\
-		bt->vec[(i)] = p;					\
-		bt->len = (i) + 1;					\
-	} else {							\
-		return;							\
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return NULL;
 	}
 
-	cassert(config_prof);
-
-	BT_FRAME(0)
-	BT_FRAME(1)
-	BT_FRAME(2)
-	BT_FRAME(3)
-	BT_FRAME(4)
-	BT_FRAME(5)
-	BT_FRAME(6)
-	BT_FRAME(7)
-	BT_FRAME(8)
-	BT_FRAME(9)
-
-	BT_FRAME(10)
-	BT_FRAME(11)
-	BT_FRAME(12)
-	BT_FRAME(13)
-	BT_FRAME(14)
-	BT_FRAME(15)
-	BT_FRAME(16)
-	BT_FRAME(17)
-	BT_FRAME(18)
-	BT_FRAME(19)
-
-	BT_FRAME(20)
-	BT_FRAME(21)
-	BT_FRAME(22)
-	BT_FRAME(23)
-	BT_FRAME(24)
-	BT_FRAME(25)
-	BT_FRAME(26)
-	BT_FRAME(27)
-	BT_FRAME(28)
-	BT_FRAME(29)
-
-	BT_FRAME(30)
-	BT_FRAME(31)
-	BT_FRAME(32)
-	BT_FRAME(33)
-	BT_FRAME(34)
-	BT_FRAME(35)
-	BT_FRAME(36)
-	BT_FRAME(37)
-	BT_FRAME(38)
-	BT_FRAME(39)
-
-	BT_FRAME(40)
-	BT_FRAME(41)
-	BT_FRAME(42)
-	BT_FRAME(43)
-	BT_FRAME(44)
-	BT_FRAME(45)
-	BT_FRAME(46)
-	BT_FRAME(47)
-	BT_FRAME(48)
-	BT_FRAME(49)
-
-	BT_FRAME(50)
-	BT_FRAME(51)
-	BT_FRAME(52)
-	BT_FRAME(53)
-	BT_FRAME(54)
-	BT_FRAME(55)
-	BT_FRAME(56)
-	BT_FRAME(57)
-	BT_FRAME(58)
-	BT_FRAME(59)
-
-	BT_FRAME(60)
-	BT_FRAME(61)
-	BT_FRAME(62)
-	BT_FRAME(63)
-	BT_FRAME(64)
-	BT_FRAME(65)
-	BT_FRAME(66)
-	BT_FRAME(67)
-	BT_FRAME(68)
-	BT_FRAME(69)
-
-	BT_FRAME(70)
-	BT_FRAME(71)
-	BT_FRAME(72)
-	BT_FRAME(73)
-	BT_FRAME(74)
-	BT_FRAME(75)
-	BT_FRAME(76)
-	BT_FRAME(77)
-	BT_FRAME(78)
-	BT_FRAME(79)
-
-	BT_FRAME(80)
-	BT_FRAME(81)
-	BT_FRAME(82)
-	BT_FRAME(83)
-	BT_FRAME(84)
-	BT_FRAME(85)
-	BT_FRAME(86)
-	BT_FRAME(87)
-	BT_FRAME(88)
-	BT_FRAME(89)
-
-	BT_FRAME(90)
-	BT_FRAME(91)
-	BT_FRAME(92)
-	BT_FRAME(93)
-	BT_FRAME(94)
-	BT_FRAME(95)
-	BT_FRAME(96)
-	BT_FRAME(97)
-	BT_FRAME(98)
-	BT_FRAME(99)
-
-	BT_FRAME(100)
-	BT_FRAME(101)
-	BT_FRAME(102)
-	BT_FRAME(103)
-	BT_FRAME(104)
-	BT_FRAME(105)
-	BT_FRAME(106)
-	BT_FRAME(107)
-	BT_FRAME(108)
-	BT_FRAME(109)
-
-	BT_FRAME(110)
-	BT_FRAME(111)
-	BT_FRAME(112)
-	BT_FRAME(113)
-	BT_FRAME(114)
-	BT_FRAME(115)
-	BT_FRAME(116)
-	BT_FRAME(117)
-	BT_FRAME(118)
-	BT_FRAME(119)
-
-	BT_FRAME(120)
-	BT_FRAME(121)
-	BT_FRAME(122)
-	BT_FRAME(123)
-	BT_FRAME(124)
-	BT_FRAME(125)
-	BT_FRAME(126)
-	BT_FRAME(127)
-#undef BT_FRAME
-}
-#else
-static void
-prof_backtrace_impl(prof_bt_t *bt) {
-	cassert(config_prof);
-	not_reached();
-}
-#endif
-
-void
-prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
-	cassert(config_prof);
-	pre_reentrancy(tsd, NULL);
-	prof_backtrace_impl(bt);
-	post_reentrancy(tsd);
+	prof_bt_t bt;
+	bt_init(&bt, tdata->vec);
+	prof_backtrace(tsd, &bt);
+	return prof_lookup(tsd, &bt);
 }
 
 /*
@@ -852,13 +614,8 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 				return true;
 			}
 		}
-#ifdef JEMALLOC_PROF_LIBGCC
-		/*
-		 * Cause the backtracing machinery to allocate its internal
-		 * state before enabling profiling.
-		 */
-		_Unwind_Backtrace(prof_unwind_init_callback, NULL);
-#endif
+
+		prof_unwind_init();
 	}
 	prof_booted = true;
 
diff --git a/src/prof_data.c b/src/prof_data.c
index 6e84e3c..e38cb80 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -322,7 +322,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 	return false;
 }
 
-static prof_tctx_t *
+prof_tctx_t *
 prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	union {
 		prof_tctx_t	*p;
@@ -395,23 +395,6 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	return ret.p;
 }
 
-prof_tctx_t *
-prof_tctx_create(tsd_t *tsd) {
-	if (!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0) {
-		return NULL;
-	}
-
-	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return NULL;
-	}
-
-	prof_bt_t bt;
-	bt_init(&bt, tdata->vec);
-	prof_backtrace(tsd, &bt);
-	return prof_lookup(tsd, &bt);
-}
-
 /* Used in unit tests. */
 static prof_tdata_t *
 prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
diff --git a/src/prof_sys.c b/src/prof_sys.c
index cdec926..027da89 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -6,6 +6,24 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_sys.h"
 
+#ifdef JEMALLOC_PROF_LIBUNWIND
+#define UNW_LOCAL_ONLY
+#include <libunwind.h>
+#endif
+
+#ifdef JEMALLOC_PROF_LIBGCC
+/*
+ * We have a circular dependency -- jemalloc_internal.h tells us if we should
+ * use libgcc's unwinding functionality, but after we've included that, we've
+ * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
+ */
+#undef _Unwind_Backtrace
+#include <unwind.h>
+#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#endif
+
+/******************************************************************************/
+
 malloc_mutex_t prof_dump_filename_mtx;
 
 static uint64_t prof_dump_seq;
@@ -40,6 +58,254 @@ static char prof_dump_buf[PROF_DUMP_BUFSIZE];
 static size_t prof_dump_buf_end;
 static int prof_dump_fd;
 
+void
+bt_init(prof_bt_t *bt, void **vec) {
+	cassert(config_prof);
+
+	bt->vec = vec;
+	bt->len = 0;
+}
+
+#ifdef JEMALLOC_PROF_LIBUNWIND
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
+	int nframes;
+
+	cassert(config_prof);
+	assert(bt->len == 0);
+	assert(bt->vec != NULL);
+
+	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
+	if (nframes <= 0) {
+		return;
+	}
+	bt->len = nframes;
+}
+#elif (defined(JEMALLOC_PROF_LIBGCC))
+static _Unwind_Reason_Code
+prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
+	cassert(config_prof);
+
+	return _URC_NO_REASON;
+}
+
+static _Unwind_Reason_Code
+prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
+	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
+	void *ip;
+
+	cassert(config_prof);
+
+	ip = (void *)_Unwind_GetIP(context);
+	if (ip == NULL) {
+		return _URC_END_OF_STACK;
+	}
+	data->bt->vec[data->bt->len] = ip;
+	data->bt->len++;
+	if (data->bt->len == data->max) {
+		return _URC_END_OF_STACK;
+	}
+
+	return _URC_NO_REASON;
+}
+
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
+	prof_unwind_data_t data = {bt, PROF_BT_MAX};
+
+	cassert(config_prof);
+
+	_Unwind_Backtrace(prof_unwind_callback, &data);
+}
+#elif (defined(JEMALLOC_PROF_GCC))
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
+#define BT_FRAME(i)							\
+	if ((i) < PROF_BT_MAX) {					\
+		void *p;						\
+		if (__builtin_frame_address(i) == 0) {			\
+			return;						\
+		}							\
+		p = __builtin_return_address(i);			\
+		if (p == NULL) {					\
+			return;						\
+		}							\
+		bt->vec[(i)] = p;					\
+		bt->len = (i) + 1;					\
+	} else {							\
+		return;							\
+	}
+
+	cassert(config_prof);
+
+	BT_FRAME(0)
+	BT_FRAME(1)
+	BT_FRAME(2)
+	BT_FRAME(3)
+	BT_FRAME(4)
+	BT_FRAME(5)
+	BT_FRAME(6)
+	BT_FRAME(7)
+	BT_FRAME(8)
+	BT_FRAME(9)
+
+	BT_FRAME(10)
+	BT_FRAME(11)
+	BT_FRAME(12)
+	BT_FRAME(13)
+	BT_FRAME(14)
+	BT_FRAME(15)
+	BT_FRAME(16)
+	BT_FRAME(17)
+	BT_FRAME(18)
+	BT_FRAME(19)
+
+	BT_FRAME(20)
+	BT_FRAME(21)
+	BT_FRAME(22)
+	BT_FRAME(23)
+	BT_FRAME(24)
+	BT_FRAME(25)
+	BT_FRAME(26)
+	BT_FRAME(27)
+	BT_FRAME(28)
+	BT_FRAME(29)
+
+	BT_FRAME(30)
+	BT_FRAME(31)
+	BT_FRAME(32)
+	BT_FRAME(33)
+	BT_FRAME(34)
+	BT_FRAME(35)
+	BT_FRAME(36)
+	BT_FRAME(37)
+	BT_FRAME(38)
+	BT_FRAME(39)
+
+	BT_FRAME(40)
+	BT_FRAME(41)
+	BT_FRAME(42)
+	BT_FRAME(43)
+	BT_FRAME(44)
+	BT_FRAME(45)
+	BT_FRAME(46)
+	BT_FRAME(47)
+	BT_FRAME(48)
+	BT_FRAME(49)
+
+	BT_FRAME(50)
+	BT_FRAME(51)
+	BT_FRAME(52)
+	BT_FRAME(53)
+	BT_FRAME(54)
+	BT_FRAME(55)
+	BT_FRAME(56)
+	BT_FRAME(57)
+	BT_FRAME(58)
+	BT_FRAME(59)
+
+	BT_FRAME(60)
+	BT_FRAME(61)
+	BT_FRAME(62)
+	BT_FRAME(63)
+	BT_FRAME(64)
+	BT_FRAME(65)
+	BT_FRAME(66)
+	BT_FRAME(67)
+	BT_FRAME(68)
+	BT_FRAME(69)
+
+	BT_FRAME(70)
+	BT_FRAME(71)
+	BT_FRAME(72)
+	BT_FRAME(73)
+	BT_FRAME(74)
+	BT_FRAME(75)
+	BT_FRAME(76)
+	BT_FRAME(77)
+	BT_FRAME(78)
+	BT_FRAME(79)
+
+	BT_FRAME(80)
+	BT_FRAME(81)
+	BT_FRAME(82)
+	BT_FRAME(83)
+	BT_FRAME(84)
+	BT_FRAME(85)
+	BT_FRAME(86)
+	BT_FRAME(87)
+	BT_FRAME(88)
+	BT_FRAME(89)
+
+	BT_FRAME(90)
+	BT_FRAME(91)
+	BT_FRAME(92)
+	BT_FRAME(93)
+	BT_FRAME(94)
+	BT_FRAME(95)
+	BT_FRAME(96)
+	BT_FRAME(97)
+	BT_FRAME(98)
+	BT_FRAME(99)
+
+	BT_FRAME(100)
+	BT_FRAME(101)
+	BT_FRAME(102)
+	BT_FRAME(103)
+	BT_FRAME(104)
+	BT_FRAME(105)
+	BT_FRAME(106)
+	BT_FRAME(107)
+	BT_FRAME(108)
+	BT_FRAME(109)
+
+	BT_FRAME(110)
+	BT_FRAME(111)
+	BT_FRAME(112)
+	BT_FRAME(113)
+	BT_FRAME(114)
+	BT_FRAME(115)
+	BT_FRAME(116)
+	BT_FRAME(117)
+	BT_FRAME(118)
+	BT_FRAME(119)
+
+	BT_FRAME(120)
+	BT_FRAME(121)
+	BT_FRAME(122)
+	BT_FRAME(123)
+	BT_FRAME(124)
+	BT_FRAME(125)
+	BT_FRAME(126)
+	BT_FRAME(127)
+#undef BT_FRAME
+}
+#else
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
+	cassert(config_prof);
+	not_reached();
+}
+#endif
+
+void
+prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
+	cassert(config_prof);
+	pre_reentrancy(tsd, NULL);
+	prof_backtrace_impl(bt);
+	post_reentrancy(tsd);
+}
+
+void prof_unwind_init() {
+#ifdef JEMALLOC_PROF_LIBGCC
+	/*
+	 * Cause the backtracing machinery to allocate its internal
+	 * state before enabling profiling.
+	 */
+	_Unwind_Backtrace(prof_unwind_init_callback, NULL);
+#endif
+}
+
 static int
 prof_sys_thread_name_read_impl(char *buf, size_t limit) {
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
-- 
cgit v0.12


From 1c6742e6a04376928ce1d6755666ba6141f038d8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 13 Apr 2020 14:19:54 -0700
Subject: Migrate prof dumping to use buffered writer

---
 src/prof_sys.c | 59 ++++++++++++++++++----------------------------------------
 1 file changed, 18 insertions(+), 41 deletions(-)

diff --git a/src/prof_sys.c b/src/prof_sys.c
index 027da89..f353802 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_sys.h"
@@ -55,7 +56,7 @@ static bool prof_dump_handle_error_locally;
  * all profile dumps.
  */
 static char prof_dump_buf[PROF_DUMP_BUFSIZE];
-static size_t prof_dump_buf_end;
+static buf_writer_t prof_dump_buf_writer;
 static int prof_dump_fd;
 
 void
@@ -377,49 +378,24 @@ prof_dump_open(const char *filename) {
 prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file = malloc_write_fd;
 
 static void
-prof_dump_flush() {
+prof_dump_flush(void *cbopaque, const char *s) {
 	cassert(config_prof);
+	assert(cbopaque == NULL);
 	if (!prof_dump_error) {
-		ssize_t err = prof_dump_write_file(prof_dump_fd, prof_dump_buf,
-		    prof_dump_buf_end);
+		ssize_t err = prof_dump_write_file(prof_dump_fd, s, strlen(s));
 		prof_dump_check_possible_error(err == -1,
 		    "<jemalloc>: failed to write during heap profile flush\n");
 	}
-	prof_dump_buf_end = 0;
 }
 
 static void
 prof_dump_write(const char *s) {
-	size_t i, slen, n;
-
-	cassert(config_prof);
-
-	i = 0;
-	slen = strlen(s);
-	while (i < slen) {
-		/* Flush the buffer if it is full. */
-		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			prof_dump_flush();
-		}
-
-		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
-			/* Finish writing. */
-			n = slen - i;
-		} else {
-			/* Write as much of s as will fit. */
-			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
-		}
-		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
-		prof_dump_buf_end += n;
-		i += n;
-	}
-	assert(i == slen);
+	buf_writer_cb(&prof_dump_buf_writer, s);
 }
 
 static void
 prof_dump_close() {
 	if (prof_dump_fd != -1) {
-		prof_dump_flush();
 		close(prof_dump_fd);
 	}
 }
@@ -471,6 +447,13 @@ prof_dump_open_maps_impl() {
 prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps =
     prof_dump_open_maps_impl;
 
+static ssize_t
+prof_dump_read_maps_cb(void *read_cbopaque, void *buf, size_t limit) {
+	int mfd = *(int *)read_cbopaque;
+	assert(mfd != -1);
+	return malloc_read_fd(mfd, buf, limit);
+}
+
 static void
 prof_dump_maps() {
 	int mfd = prof_dump_open_maps();
@@ -479,17 +462,7 @@ prof_dump_maps() {
 	}
 
 	prof_dump_write("\nMAPPED_LIBRARIES:\n");
-	ssize_t nread = 0;
-	do {
-		prof_dump_buf_end += nread;
-		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			/* Make space in prof_dump_buf before read(). */
-			prof_dump_flush();
-		}
-		nread = malloc_read_fd(mfd, &prof_dump_buf[prof_dump_buf_end],
-		    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
-	} while (nread > 0);
-
+	buf_writer_pipe(&prof_dump_buf_writer, prof_dump_read_maps_cb, &mfd);
 	close(mfd);
 }
 
@@ -511,8 +484,12 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
 
 	prof_dump_open(filename);
+	bool err = buf_writer_init(tsd_tsdn(tsd), &prof_dump_buf_writer,
+	    prof_dump_flush, NULL, prof_dump_buf, PROF_DUMP_BUFSIZE);
+	assert(!err);
 	prof_dump_impl(tsd, tdata, prof_dump_write, leakcheck);
 	prof_dump_maps();
+	buf_writer_terminate(tsd_tsdn(tsd), &prof_dump_buf_writer);
 	prof_dump_close();
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
-- 
cgit v0.12


From 4556d3c0c8ad4c00fd3c31762653e68fb2a701e0 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 20 Apr 2020 14:14:53 -0700
Subject: Define structures for prof dump parameters

---
 src/prof_data.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index e38cb80..bd1ccf6 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -653,6 +653,7 @@ prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
 	malloc_mutex_unlock(tsdn, gctx->lock);
 }
 
+typedef struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg_t;
 struct prof_gctx_merge_iter_arg_s {
 	tsdn_t	*tsdn;
 	size_t	leak_ngctx;
@@ -660,8 +661,7 @@ struct prof_gctx_merge_iter_arg_s {
 
 static prof_gctx_t *
 prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	struct prof_gctx_merge_iter_arg_s *arg =
-	    (struct prof_gctx_merge_iter_arg_s *)opaque;
+	prof_gctx_merge_iter_arg_t *arg = (prof_gctx_merge_iter_arg_t *)opaque;
 
 	malloc_mutex_lock(arg->tsdn, gctx->lock);
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
@@ -720,6 +720,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 	}
 }
 
+typedef struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg_t;
 struct prof_tdata_merge_iter_arg_s {
 	tsdn_t		*tsdn;
 	prof_cnt_t	cnt_all;
@@ -728,8 +729,8 @@ struct prof_tdata_merge_iter_arg_s {
 static prof_tdata_t *
 prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
     void *opaque) {
-	struct prof_tdata_merge_iter_arg_s *arg =
-	    (struct prof_tdata_merge_iter_arg_s *)opaque;
+	prof_tdata_merge_iter_arg_t *arg =
+	    (prof_tdata_merge_iter_arg_t *)opaque;
 
 	malloc_mutex_lock(arg->tsdn, tdata->lock);
 	if (!tdata->expired) {
@@ -862,8 +863,8 @@ prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 
 static void
 prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
-    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
-    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+    prof_tdata_merge_iter_arg_t *prof_tdata_merge_iter_arg,
+    prof_gctx_merge_iter_arg_t *prof_gctx_merge_iter_arg,
     prof_gctx_tree_t *gctxs) {
 	size_t tabind;
 	union {
@@ -908,8 +909,8 @@ prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata, void (*write_cb)(const char *),
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_dump_mtx);
 	prof_dump_write = write_cb;
 	prof_gctx_tree_t gctxs;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg;
+	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg;
 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
 	    &prof_gctx_merge_iter_arg, &gctxs);
 	prof_dump_header(tsd_tsdn(tsd), &prof_tdata_merge_iter_arg.cnt_all);
@@ -928,8 +929,8 @@ prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes) {
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg;
+	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg;
 	prof_gctx_tree_t gctxs;
 
 	tsd = tsd_fetch();
-- 
cgit v0.12


From 1f5fe3a3e38deaa75d32589a364163060e0ab3b3 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 20 Apr 2020 14:09:08 -0700
Subject: Pass write callback explicitly in prof_data

---
 include/jemalloc/internal/prof_data.h |   7 +-
 src/prof_data.c                       | 120 +++++++++++++++++++++-------------
 src/prof_sys.c                        |  10 +--
 test/unit/prof_reset.c                |   3 +-
 4 files changed, 85 insertions(+), 55 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 9c2d697..a0448d0 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -17,8 +17,8 @@ bool prof_data_init(tsd_t *tsd);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
-void prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata,
-    void (*write_cb)(const char *), bool leakcheck);
+void prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
+    prof_tdata_t *tdata, bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
     uint64_t thr_discrim, char *thread_name, bool active);
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
@@ -28,7 +28,8 @@ void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 /* Used in unit tests. */
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
-typedef void (prof_dump_header_t)(tsdn_t *, const prof_cnt_t *);
+typedef void (prof_dump_header_t)(tsdn_t *, write_cb_t *, void *,
+    const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes);
diff --git a/src/prof_data.c b/src/prof_data.c
index bd1ccf6..bc38915 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -59,9 +59,6 @@ static ckh_t bt2gctx;
  */
 static prof_tdata_tree_t tdatas;
 
-/* Dump write callback; stored global to simplify function interfaces.  */
-static void (*prof_dump_write)(const char *);
-
 /******************************************************************************/
 /* Red-black trees. */
 
@@ -504,21 +501,24 @@ prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 	return 0;
 }
 
-JEMALLOC_FORMAT_PRINTF(1, 2)
+JEMALLOC_FORMAT_PRINTF(3, 4)
 static void
-prof_dump_printf(const char *format, ...) {
+prof_dump_printf(write_cb_t *prof_dump_write, void *cbopaque,
+    const char *format, ...) {
 	va_list ap;
 	char buf[PROF_PRINTF_BUFSIZE];
 
 	va_start(ap, format);
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
 	va_end(ap);
-	prof_dump_write(buf);
+	prof_dump_write(cbopaque, buf);
 }
 
 static void
-prof_dump_print_cnts(const prof_cnt_t *cnts) {
-	prof_dump_printf("%"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]",
+prof_dump_print_cnts(write_cb_t *prof_dump_write, void *cbopaque,
+    const prof_cnt_t *cnts) {
+	prof_dump_printf(prof_dump_write, cbopaque,
+	    "%"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]",
 	    cnts->curobjs, cnts->curbytes, cnts->accumobjs, cnts->accumbytes);
 }
 
@@ -586,10 +586,17 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	return NULL;
 }
 
+typedef struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg_t;
+struct prof_tctx_dump_iter_arg_s {
+	tsdn_t *tsdn;
+	write_cb_t *prof_dump_write;
+	void *cbopaque;
+};
+
 static prof_tctx_t *
-prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
+	prof_tctx_dump_iter_arg_t *arg = (prof_tctx_dump_iter_arg_t *)opaque;
+	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
@@ -598,9 +605,11 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		prof_dump_printf("  t%"FMTu64": ", tctx->thr_uid);
-		prof_dump_print_cnts(&tctx->dump_cnts);
-		prof_dump_write("\n");
+		prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
+		    "  t%"FMTu64": ", tctx->thr_uid);
+		prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
+		    &tctx->dump_cnts);
+		arg->prof_dump_write(arg->cbopaque, "\n");
 		break;
 	default:
 		not_reached();
@@ -761,38 +770,50 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 	return NULL;
 }
 
+typedef struct prof_tdata_dump_iter_arg_s prof_tdata_dump_iter_arg_t;
+struct prof_tdata_dump_iter_arg_s {
+	write_cb_t *prof_dump_write;
+	void *cbopaque;
+};
+
 static prof_tdata_t *
 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *unused) {
+    void *opaque) {
 	if (!tdata->dumping) {
 		return NULL;
 	}
 
-	prof_dump_printf("  t%"FMTu64": ", tdata->thr_uid);
-	prof_dump_print_cnts(&tdata->cnt_summed);
+	prof_tdata_dump_iter_arg_t *arg = (prof_tdata_dump_iter_arg_t *)opaque;
+	prof_dump_printf(arg->prof_dump_write, arg->cbopaque, "  t%"FMTu64": ",
+	    tdata->thr_uid);
+	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
+	    &tdata->cnt_summed);
 	if (tdata->thread_name != NULL) {
-		prof_dump_printf(" %s", tdata->thread_name);
+		arg->prof_dump_write(arg->cbopaque, " ");
+		arg->prof_dump_write(arg->cbopaque, tdata->thread_name);
 	}
-	prof_dump_write("\n");
+	arg->prof_dump_write(arg->cbopaque, "\n");
 	return NULL;
 }
 
 static void
-prof_dump_header_impl(tsdn_t *tsdn, const prof_cnt_t *cnt_all) {
-	prof_dump_printf("heap_v2/%"FMTu64"\n  t*: ",
-	    ((uint64_t)1U << lg_prof_sample));
-	prof_dump_print_cnts(cnt_all);
-	prof_dump_write("\n");
-
+prof_dump_header_impl(tsdn_t *tsdn, write_cb_t *prof_dump_write,
+    void *cbopaque, const prof_cnt_t *cnt_all) {
+	prof_dump_printf(prof_dump_write, cbopaque,
+	    "heap_v2/%"FMTu64"\n  t*: ", ((uint64_t)1U << lg_prof_sample));
+	prof_dump_print_cnts(prof_dump_write, cbopaque, cnt_all);
+	prof_dump_write(cbopaque, "\n");
+
+	prof_tdata_dump_iter_arg_t arg = {prof_dump_write, cbopaque};
 	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, NULL);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, &arg);
 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 }
 prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
 
 static void
-prof_dump_gctx(tsdn_t *tsdn, prof_gctx_t *gctx, const prof_bt_t *bt,
-    prof_gctx_tree_t *gctxs) {
+prof_dump_gctx(tsdn_t *tsdn, write_cb_t *prof_dump_write, void *cbopaque,
+    prof_gctx_t *gctx, const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
 	cassert(config_prof);
 	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
@@ -806,17 +827,18 @@ prof_dump_gctx(tsdn_t *tsdn, prof_gctx_t *gctx, const prof_bt_t *bt,
 		return;
 	}
 
-	prof_dump_write("@");
+	prof_dump_write(cbopaque, "@");
 	for (unsigned i = 0; i < bt->len; i++) {
-		prof_dump_printf(" %#"FMTxPTR, (uintptr_t)bt->vec[i]);
+		prof_dump_printf(prof_dump_write, cbopaque, " %#"FMTxPTR,
+		    (uintptr_t)bt->vec[i]);
 	}
 
-	prof_dump_write("\n  t*: ");
-	prof_dump_print_cnts(&gctx->cnt_summed);
-	prof_dump_write("\n");
+	prof_dump_write(cbopaque, "\n  t*: ");
+	prof_dump_print_cnts(prof_dump_write, cbopaque, &gctx->cnt_summed);
+	prof_dump_write(cbopaque, "\n");
 
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
-	    (void *)tsdn);
+	prof_tctx_dump_iter_arg_t arg = {tsdn, prof_dump_write, cbopaque};
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, &arg);
 }
 
 /*
@@ -852,12 +874,20 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx) {
 #endif
 }
 
+typedef struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg_t;
+struct prof_gctx_dump_iter_arg_s {
+	tsdn_t *tsdn;
+	write_cb_t *prof_dump_write;
+	void *cbopaque;
+};
+
 static prof_gctx_t *
 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	tsdn_t *tsdn = (tsdn_t *)opaque;
-	malloc_mutex_lock(tsdn, gctx->lock);
-	prof_dump_gctx(tsdn, gctx, &gctx->bt, gctxs);
-	malloc_mutex_unlock(tsdn, gctx->lock);
+	prof_gctx_dump_iter_arg_t *arg = (prof_gctx_dump_iter_arg_t *)opaque;
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
+	prof_dump_gctx(arg->tsdn, arg->prof_dump_write, arg->cbopaque, gctx,
+	    &gctx->bt, gctxs);
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 	return NULL;
 }
 
@@ -904,18 +934,20 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
 }
 
 void
-prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata, void (*write_cb)(const char *),
-    bool leakcheck) {
+prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
+    prof_tdata_t *tdata, bool leakcheck) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_dump_mtx);
-	prof_dump_write = write_cb;
 	prof_gctx_tree_t gctxs;
 	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg;
 	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg;
 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
 	    &prof_gctx_merge_iter_arg, &gctxs);
-	prof_dump_header(tsd_tsdn(tsd), &prof_tdata_merge_iter_arg.cnt_all);
+	prof_dump_header(tsd_tsdn(tsd), prof_dump_write, cbopaque,
+	    &prof_tdata_merge_iter_arg.cnt_all);
+	prof_gctx_dump_iter_arg_t prof_gctx_dump_iter_arg = {tsd_tsdn(tsd),
+	    prof_dump_write, cbopaque};
 	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
-	    (void *)tsd_tsdn(tsd));
+	    &prof_gctx_dump_iter_arg);
 	prof_gctx_finish(tsd, &gctxs);
 	if (leakcheck) {
 		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
diff --git a/src/prof_sys.c b/src/prof_sys.c
index f353802..5895ec4 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -389,11 +389,6 @@ prof_dump_flush(void *cbopaque, const char *s) {
 }
 
 static void
-prof_dump_write(const char *s) {
-	buf_writer_cb(&prof_dump_buf_writer, s);
-}
-
-static void
 prof_dump_close() {
 	if (prof_dump_fd != -1) {
 		close(prof_dump_fd);
@@ -461,7 +456,7 @@ prof_dump_maps() {
 		return;
 	}
 
-	prof_dump_write("\nMAPPED_LIBRARIES:\n");
+	buf_writer_cb(&prof_dump_buf_writer, "\nMAPPED_LIBRARIES:\n");
 	buf_writer_pipe(&prof_dump_buf_writer, prof_dump_read_maps_cb, &mfd);
 	close(mfd);
 }
@@ -487,7 +482,8 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
 	bool err = buf_writer_init(tsd_tsdn(tsd), &prof_dump_buf_writer,
 	    prof_dump_flush, NULL, prof_dump_buf, PROF_DUMP_BUFSIZE);
 	assert(!err);
-	prof_dump_impl(tsd, tdata, prof_dump_write, leakcheck);
+	prof_dump_impl(tsd, buf_writer_cb, &prof_dump_buf_writer, tdata,
+	    leakcheck);
 	prof_dump_maps();
 	buf_writer_terminate(tsd_tsdn(tsd), &prof_dump_buf_writer);
 	prof_dump_close();
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 2bdc37c..5916bd1 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -87,7 +87,8 @@ TEST_END
 bool prof_dump_header_intercepted = false;
 prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
 static void
-prof_dump_header_intercept(tsdn_t *tsdn, const prof_cnt_t *cnt_all) {
+prof_dump_header_intercept(tsdn_t *tsdn, write_cb_t *cb, void *cbopaque,
+    const prof_cnt_t *cnt_all) {
 	prof_dump_header_intercepted = true;
 	memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
 }
-- 
cgit v0.12


From 5d823f3a910c7d737500b61ff8a00f6b634bc08b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 20 Apr 2020 14:37:19 -0700
Subject: Consolidate struct definitions for prof dump parameters

---
 src/prof_data.c | 32 +++++++++-----------------------
 1 file changed, 9 insertions(+), 23 deletions(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index bc38915..8cf7228 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -586,8 +586,8 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	return NULL;
 }
 
-typedef struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg_t;
-struct prof_tctx_dump_iter_arg_s {
+typedef struct prof_dump_iter_arg_s prof_dump_iter_arg_t;
+struct prof_dump_iter_arg_s {
 	tsdn_t *tsdn;
 	write_cb_t *prof_dump_write;
 	void *cbopaque;
@@ -595,7 +595,7 @@ struct prof_tctx_dump_iter_arg_s {
 
 static prof_tctx_t *
 prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
-	prof_tctx_dump_iter_arg_t *arg = (prof_tctx_dump_iter_arg_t *)opaque;
+	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
 	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
@@ -770,12 +770,6 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 	return NULL;
 }
 
-typedef struct prof_tdata_dump_iter_arg_s prof_tdata_dump_iter_arg_t;
-struct prof_tdata_dump_iter_arg_s {
-	write_cb_t *prof_dump_write;
-	void *cbopaque;
-};
-
 static prof_tdata_t *
 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
     void *opaque) {
@@ -783,7 +777,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 		return NULL;
 	}
 
-	prof_tdata_dump_iter_arg_t *arg = (prof_tdata_dump_iter_arg_t *)opaque;
+	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
 	prof_dump_printf(arg->prof_dump_write, arg->cbopaque, "  t%"FMTu64": ",
 	    tdata->thr_uid);
 	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
@@ -804,7 +798,7 @@ prof_dump_header_impl(tsdn_t *tsdn, write_cb_t *prof_dump_write,
 	prof_dump_print_cnts(prof_dump_write, cbopaque, cnt_all);
 	prof_dump_write(cbopaque, "\n");
 
-	prof_tdata_dump_iter_arg_t arg = {prof_dump_write, cbopaque};
+	prof_dump_iter_arg_t arg = {tsdn, prof_dump_write, cbopaque};
 	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, &arg);
 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
@@ -837,7 +831,7 @@ prof_dump_gctx(tsdn_t *tsdn, write_cb_t *prof_dump_write, void *cbopaque,
 	prof_dump_print_cnts(prof_dump_write, cbopaque, &gctx->cnt_summed);
 	prof_dump_write(cbopaque, "\n");
 
-	prof_tctx_dump_iter_arg_t arg = {tsdn, prof_dump_write, cbopaque};
+	prof_dump_iter_arg_t arg = {tsdn, prof_dump_write, cbopaque};
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, &arg);
 }
 
@@ -874,16 +868,9 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx) {
 #endif
 }
 
-typedef struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg_t;
-struct prof_gctx_dump_iter_arg_s {
-	tsdn_t *tsdn;
-	write_cb_t *prof_dump_write;
-	void *cbopaque;
-};
-
 static prof_gctx_t *
 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	prof_gctx_dump_iter_arg_t *arg = (prof_gctx_dump_iter_arg_t *)opaque;
+	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
 	malloc_mutex_lock(arg->tsdn, gctx->lock);
 	prof_dump_gctx(arg->tsdn, arg->prof_dump_write, arg->cbopaque, gctx,
 	    &gctx->bt, gctxs);
@@ -944,10 +931,9 @@ prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
 	    &prof_gctx_merge_iter_arg, &gctxs);
 	prof_dump_header(tsd_tsdn(tsd), prof_dump_write, cbopaque,
 	    &prof_tdata_merge_iter_arg.cnt_all);
-	prof_gctx_dump_iter_arg_t prof_gctx_dump_iter_arg = {tsd_tsdn(tsd),
+	prof_dump_iter_arg_t prof_dump_iter_arg = {tsd_tsdn(tsd),
 	    prof_dump_write, cbopaque};
-	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
-	    &prof_gctx_dump_iter_arg);
+	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter, &prof_dump_iter_arg);
 	prof_gctx_finish(tsd, &gctxs);
 	if (leakcheck) {
 		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
-- 
cgit v0.12


From d4259ea53bb842169688f5fcda1053fbbaf021a8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 20 Apr 2020 14:52:05 -0700
Subject: Simplify signatures for prof dump functions

---
 include/jemalloc/internal/prof_data.h |   3 +-
 src/prof_data.c                       | 109 ++++++++++++++++------------------
 test/unit/prof_reset.c                |   3 +-
 3 files changed, 54 insertions(+), 61 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index a0448d0..039c2a8 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -28,8 +28,7 @@ void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 /* Used in unit tests. */
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
-typedef void (prof_dump_header_t)(tsdn_t *, write_cb_t *, void *,
-    const prof_cnt_t *);
+typedef void (prof_dump_header_t)(void *, const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes);
diff --git a/src/prof_data.c b/src/prof_data.c
index 8cf7228..1d50140 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -664,8 +664,8 @@ prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
 
 typedef struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg_t;
 struct prof_gctx_merge_iter_arg_s {
-	tsdn_t	*tsdn;
-	size_t	leak_ngctx;
+	tsdn_t *tsdn;
+	size_t *leak_ngctx;
 };
 
 static prof_gctx_t *
@@ -676,7 +676,7 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
 	    (void *)arg->tsdn);
 	if (gctx->cnt_summed.curobjs != 0) {
-		arg->leak_ngctx++;
+		(*arg->leak_ngctx)++;
 	}
 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 
@@ -731,8 +731,8 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 
 typedef struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg_t;
 struct prof_tdata_merge_iter_arg_s {
-	tsdn_t		*tsdn;
-	prof_cnt_t	cnt_all;
+	tsdn_t *tsdn;
+	prof_cnt_t *cnt_all;
 };
 
 static prof_tdata_t *
@@ -756,11 +756,12 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
 		}
 
-		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
-		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
+		arg->cnt_all->curobjs += tdata->cnt_summed.curobjs;
+		arg->cnt_all->curbytes += tdata->cnt_summed.curbytes;
 		if (opt_prof_accum) {
-			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
-			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
+			arg->cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
+			arg->cnt_all->accumbytes +=
+			    tdata->cnt_summed.accumbytes;
 		}
 	} else {
 		tdata->dumping = false;
@@ -791,25 +792,24 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 }
 
 static void
-prof_dump_header_impl(tsdn_t *tsdn, write_cb_t *prof_dump_write,
-    void *cbopaque, const prof_cnt_t *cnt_all) {
-	prof_dump_printf(prof_dump_write, cbopaque,
+prof_dump_header_impl(void *opaque, const prof_cnt_t *cnt_all) {
+	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
+	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
 	    "heap_v2/%"FMTu64"\n  t*: ", ((uint64_t)1U << lg_prof_sample));
-	prof_dump_print_cnts(prof_dump_write, cbopaque, cnt_all);
-	prof_dump_write(cbopaque, "\n");
+	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque, cnt_all);
+	arg->prof_dump_write(arg->cbopaque, "\n");
 
-	prof_dump_iter_arg_t arg = {tsdn, prof_dump_write, cbopaque};
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, &arg);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(arg->tsdn, &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, arg);
+	malloc_mutex_unlock(arg->tsdn, &tdatas_mtx);
 }
 prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
 
 static void
-prof_dump_gctx(tsdn_t *tsdn, write_cb_t *prof_dump_write, void *cbopaque,
-    prof_gctx_t *gctx, const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
+prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
+    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
 	cassert(config_prof);
-	malloc_mutex_assert_owner(tsdn, gctx->lock);
+	malloc_mutex_assert_owner(arg->tsdn, gctx->lock);
 
 	/* Avoid dumping such gctx's that have no useful data. */
 	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
@@ -821,18 +821,18 @@ prof_dump_gctx(tsdn_t *tsdn, write_cb_t *prof_dump_write, void *cbopaque,
 		return;
 	}
 
-	prof_dump_write(cbopaque, "@");
+	arg->prof_dump_write(arg->cbopaque, "@");
 	for (unsigned i = 0; i < bt->len; i++) {
-		prof_dump_printf(prof_dump_write, cbopaque, " %#"FMTxPTR,
-		    (uintptr_t)bt->vec[i]);
+		prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
+		    " %#"FMTxPTR, (uintptr_t)bt->vec[i]);
 	}
 
-	prof_dump_write(cbopaque, "\n  t*: ");
-	prof_dump_print_cnts(prof_dump_write, cbopaque, &gctx->cnt_summed);
-	prof_dump_write(cbopaque, "\n");
+	arg->prof_dump_write(arg->cbopaque, "\n  t*: ");
+	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
+	    &gctx->cnt_summed);
+	arg->prof_dump_write(arg->cbopaque, "\n");
 
-	prof_dump_iter_arg_t arg = {tsdn, prof_dump_write, cbopaque};
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, &arg);
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, arg);
 }
 
 /*
@@ -872,17 +872,14 @@ static prof_gctx_t *
 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
 	malloc_mutex_lock(arg->tsdn, gctx->lock);
-	prof_dump_gctx(arg->tsdn, arg->prof_dump_write, arg->cbopaque, gctx,
-	    &gctx->bt, gctxs);
+	prof_dump_gctx(arg, gctx, &gctx->bt, gctxs);
 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 	return NULL;
 }
 
 static void
-prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
-    prof_tdata_merge_iter_arg_t *prof_tdata_merge_iter_arg,
-    prof_gctx_merge_iter_arg_t *prof_gctx_merge_iter_arg,
-    prof_gctx_tree_t *gctxs) {
+prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata, prof_cnt_t *cnt_all,
+    size_t *leak_ngctx, prof_gctx_tree_t *gctxs) {
 	size_t tabind;
 	union {
 		prof_gctx_t	*p;
@@ -904,18 +901,20 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
 	 * stats and merge them into the associated gctx's.
 	 */
-	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
-	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
+	memset(cnt_all, 0, sizeof(prof_cnt_t));
+	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg = {tsd_tsdn(tsd),
+	    cnt_all};
 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
-	    (void *)prof_tdata_merge_iter_arg);
+	    &prof_tdata_merge_iter_arg);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	/* Merge tctx stats into gctx's. */
-	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
-	prof_gctx_merge_iter_arg->leak_ngctx = 0;
+	*leak_ngctx = 0;
+	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg = {tsd_tsdn(tsd),
+	    leak_ngctx};
 	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
-	    (void *)prof_gctx_merge_iter_arg);
+	    &prof_gctx_merge_iter_arg);
 
 	prof_leave(tsd, tdata);
 }
@@ -924,20 +923,17 @@ void
 prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
     prof_tdata_t *tdata, bool leakcheck) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_dump_mtx);
+	prof_cnt_t cnt_all;
+	size_t leak_ngctx;
 	prof_gctx_tree_t gctxs;
-	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg;
-	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg;
-	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
-	    &prof_gctx_merge_iter_arg, &gctxs);
-	prof_dump_header(tsd_tsdn(tsd), prof_dump_write, cbopaque,
-	    &prof_tdata_merge_iter_arg.cnt_all);
+	prof_dump_prep(tsd, tdata, &cnt_all, &leak_ngctx, &gctxs);
 	prof_dump_iter_arg_t prof_dump_iter_arg = {tsd_tsdn(tsd),
 	    prof_dump_write, cbopaque};
+	prof_dump_header(&prof_dump_iter_arg, &cnt_all);
 	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter, &prof_dump_iter_arg);
 	prof_gctx_finish(tsd, &gctxs);
 	if (leakcheck) {
-		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
-		    prof_gctx_merge_iter_arg.leak_ngctx);
+		prof_leakcheck(&cnt_all, leak_ngctx);
 	}
 }
 
@@ -947,8 +943,8 @@ prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes) {
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
-	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg;
-	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg;
+	prof_cnt_t cnt_all;
+	size_t leak_ngctx;
 	prof_gctx_tree_t gctxs;
 
 	tsd = tsd_fetch();
@@ -969,21 +965,20 @@ prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 		return;
 	}
 
-	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
-	    &prof_gctx_merge_iter_arg, &gctxs);
+	prof_dump_prep(tsd, tdata, &cnt_all, &leak_ngctx, &gctxs);
 	prof_gctx_finish(tsd, &gctxs);
 
 	if (curobjs != NULL) {
-		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
+		*curobjs = cnt_all.curobjs;
 	}
 	if (curbytes != NULL) {
-		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
+		*curbytes = cnt_all.curbytes;
 	}
 	if (accumobjs != NULL) {
-		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
+		*accumobjs = cnt_all.accumobjs;
 	}
 	if (accumbytes != NULL) {
-		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
+		*accumbytes = cnt_all.accumbytes;
 	}
 }
 
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 5916bd1..8c82e6d 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -87,8 +87,7 @@ TEST_END
 bool prof_dump_header_intercepted = false;
 prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
 static void
-prof_dump_header_intercept(tsdn_t *tsdn, write_cb_t *cb, void *cbopaque,
-    const prof_cnt_t *cnt_all) {
+prof_dump_header_intercept(void *opaque, const prof_cnt_t *cnt_all) {
 	prof_dump_header_intercepted = true;
 	memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
 }
-- 
cgit v0.12


From 80d18c18c9a39e534ecb080256cb00e652f3d863 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 20 Apr 2020 15:26:55 -0700
Subject: Pass prof dump parameters explicitly in prof_sys

---
 src/prof_sys.c | 108 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 57 insertions(+), 51 deletions(-)

diff --git a/src/prof_sys.c b/src/prof_sys.c
index 5895ec4..4897988 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -37,28 +37,6 @@ static char *prof_dump_prefix = NULL;
 /* The fallback allocator profiling functionality will use. */
 base_t *prof_base;
 
-/* The following are needed for dumping and are protected by prof_dump_mtx. */
-/*
- * Whether there has been an error in the dumping process, which could have
- * happened either in file opening or in file writing.  When an error has
- * already occurred, we will stop further writing to the file.
- */
-static bool prof_dump_error;
-/*
- * Whether error should be handled locally: if true, then we print out error
- * message as well as abort (if opt_abort is true) when an error occurred, and
- * we also report the error back to the caller in the end; if false, then we
- * only report the error back to the caller in the end.
- */
-static bool prof_dump_handle_error_locally;
-/*
- * This buffer is rather large for stack allocation, so use a single buffer for
- * all profile dumps.
- */
-static char prof_dump_buf[PROF_DUMP_BUFSIZE];
-static buf_writer_t prof_dump_buf_writer;
-static int prof_dump_fd;
-
 void
 bt_init(prof_bt_t *bt, void **vec) {
 	cassert(config_prof);
@@ -337,15 +315,42 @@ prof_getpid(void) {
 #endif
 }
 
+/*
+ * This buffer is rather large for stack allocation, so use a single buffer for
+ * all profile dumps; protected by prof_dump_mtx.
+ */
+static char prof_dump_buf[PROF_DUMP_BUFSIZE];
+
+typedef struct prof_dump_arg_s prof_dump_arg_t;
+struct prof_dump_arg_s {
+	/*
+	 * Whether error should be handled locally: if true, then we print out
+	 * error message as well as abort (if opt_abort is true) when an error
+	 * occurred, and we also report the error back to the caller in the end;
+	 * if false, then we only report the error back to the caller in the
+	 * end.
+	 */
+	const bool handle_error_locally;
+	/*
+	 * Whether there has been an error in the dumping process, which could
+	 * have happened either in file opening or in file writing.  When an
+	 * error has already occurred, we will stop further writing to the file.
+	 */
+	bool error;
+	/* File descriptor of the dump file. */
+	int prof_dump_fd;
+};
+
 static void
-prof_dump_check_possible_error(bool err_cond, const char *format, ...) {
-	assert(!prof_dump_error);
+prof_dump_check_possible_error(prof_dump_arg_t *arg, bool err_cond,
+    const char *format, ...) {
+	assert(!arg->error);
 	if (!err_cond) {
 		return;
 	}
 
-	prof_dump_error = true;
-	if (!prof_dump_handle_error_locally) {
+	arg->error = true;
+	if (!arg->handle_error_locally) {
 		return;
 	}
 
@@ -369,29 +374,30 @@ prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file =
     prof_dump_open_file_impl;
 
 static void
-prof_dump_open(const char *filename) {
-	prof_dump_fd = prof_dump_open_file(filename, 0644);
-	prof_dump_check_possible_error(prof_dump_fd == -1,
+prof_dump_open(prof_dump_arg_t *arg, const char *filename) {
+	arg->prof_dump_fd = prof_dump_open_file(filename, 0644);
+	prof_dump_check_possible_error(arg, arg->prof_dump_fd == -1,
 	    "<jemalloc>: failed to open \"%s\"\n", filename);
 }
 
 prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file = malloc_write_fd;
 
 static void
-prof_dump_flush(void *cbopaque, const char *s) {
+prof_dump_flush(void *opaque, const char *s) {
 	cassert(config_prof);
-	assert(cbopaque == NULL);
-	if (!prof_dump_error) {
-		ssize_t err = prof_dump_write_file(prof_dump_fd, s, strlen(s));
-		prof_dump_check_possible_error(err == -1,
+	prof_dump_arg_t *arg = (prof_dump_arg_t *)opaque;
+	if (!arg->error) {
+		ssize_t err = prof_dump_write_file(arg->prof_dump_fd, s,
+		    strlen(s));
+		prof_dump_check_possible_error(arg, err == -1,
 		    "<jemalloc>: failed to write during heap profile flush\n");
 	}
 }
 
 static void
-prof_dump_close() {
-	if (prof_dump_fd != -1) {
-		close(prof_dump_fd);
+prof_dump_close(prof_dump_arg_t *arg) {
+	if (arg->prof_dump_fd != -1) {
+		close(arg->prof_dump_fd);
 	}
 }
 
@@ -450,14 +456,14 @@ prof_dump_read_maps_cb(void *read_cbopaque, void *buf, size_t limit) {
 }
 
 static void
-prof_dump_maps() {
+prof_dump_maps(buf_writer_t *buf_writer) {
 	int mfd = prof_dump_open_maps();
 	if (mfd == -1) {
 		return;
 	}
 
-	buf_writer_cb(&prof_dump_buf_writer, "\nMAPPED_LIBRARIES:\n");
-	buf_writer_pipe(&prof_dump_buf_writer, prof_dump_read_maps_cb, &mfd);
+	buf_writer_cb(buf_writer, "\nMAPPED_LIBRARIES:\n");
+	buf_writer_pipe(buf_writer, prof_dump_read_maps_cb, &mfd);
 	close(mfd);
 }
 
@@ -472,26 +478,26 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
 		return true;
 	}
 
-	prof_dump_error = false;
-	prof_dump_handle_error_locally = !propagate_err;
+	prof_dump_arg_t arg = {/* handle_error_locally */ !propagate_err,
+	    /* error */ false, /* prof_dump_fd */ -1};
 
 	pre_reentrancy(tsd, NULL);
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
 
-	prof_dump_open(filename);
-	bool err = buf_writer_init(tsd_tsdn(tsd), &prof_dump_buf_writer,
-	    prof_dump_flush, NULL, prof_dump_buf, PROF_DUMP_BUFSIZE);
+	prof_dump_open(&arg, filename);
+	buf_writer_t buf_writer;
+	bool err = buf_writer_init(tsd_tsdn(tsd), &buf_writer, prof_dump_flush,
+	    &arg, prof_dump_buf, PROF_DUMP_BUFSIZE);
 	assert(!err);
-	prof_dump_impl(tsd, buf_writer_cb, &prof_dump_buf_writer, tdata,
-	    leakcheck);
-	prof_dump_maps();
-	buf_writer_terminate(tsd_tsdn(tsd), &prof_dump_buf_writer);
-	prof_dump_close();
+	prof_dump_impl(tsd, buf_writer_cb, &buf_writer, tdata, leakcheck);
+	prof_dump_maps(&buf_writer);
+	buf_writer_terminate(tsd_tsdn(tsd), &buf_writer);
+	prof_dump_close(&arg);
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 	post_reentrancy(tsd);
 
-	return prof_dump_error;
+	return arg.error;
 }
 
 /*
-- 
cgit v0.12


From f58ebdff7a82ed68f3bc007b0d10ed02ba3d065a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 26 Jun 2020 14:56:17 -0700
Subject: Generalize prof_cnt_all() for testing

---
 include/jemalloc/internal/prof_data.h |  3 +--
 src/prof_data.c                       | 48 +++++++----------------------------
 test/unit/prof_tctx.c                 | 16 ++++++------
 3 files changed, 18 insertions(+), 49 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 039c2a8..bf6e480 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -30,7 +30,6 @@ size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
 typedef void (prof_dump_header_t)(void *, const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
-void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
-    uint64_t *accumbytes);
+void prof_cnt_all(prof_cnt_t *cnt_all);
 
 #endif /* JEMALLOC_INTERNAL_PROF_DATA_H */
diff --git a/src/prof_data.c b/src/prof_data.c
index 1d50140..ee022cc 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -939,46 +939,16 @@ prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
 
 /* Used in unit tests. */
 void
-prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
-    uint64_t *accumbytes) {
-	tsd_t *tsd;
-	prof_tdata_t *tdata;
-	prof_cnt_t cnt_all;
-	size_t leak_ngctx;
-	prof_gctx_tree_t gctxs;
-
-	tsd = tsd_fetch();
-	tdata = prof_tdata_get(tsd, false);
+prof_cnt_all(prof_cnt_t *cnt_all) {
+	tsd_t *tsd = tsd_fetch();
+	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL) {
-		if (curobjs != NULL) {
-			*curobjs = 0;
-		}
-		if (curbytes != NULL) {
-			*curbytes = 0;
-		}
-		if (accumobjs != NULL) {
-			*accumobjs = 0;
-		}
-		if (accumbytes != NULL) {
-			*accumbytes = 0;
-		}
-		return;
-	}
-
-	prof_dump_prep(tsd, tdata, &cnt_all, &leak_ngctx, &gctxs);
-	prof_gctx_finish(tsd, &gctxs);
-
-	if (curobjs != NULL) {
-		*curobjs = cnt_all.curobjs;
-	}
-	if (curbytes != NULL) {
-		*curbytes = cnt_all.curbytes;
-	}
-	if (accumobjs != NULL) {
-		*accumobjs = cnt_all.accumobjs;
-	}
-	if (accumbytes != NULL) {
-		*accumbytes = cnt_all.accumbytes;
+		memset(cnt_all, 0, sizeof(prof_cnt_t));
+	} else {
+		size_t leak_ngctx;
+		prof_gctx_tree_t gctxs;
+		prof_dump_prep(tsd, tdata, cnt_all, &leak_ngctx, &gctxs);
+		prof_gctx_finish(tsd, &gctxs);
 	}
 }
 
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 801e5f7..e0efdc3 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -7,21 +7,21 @@ TEST_BEGIN(test_prof_realloc) {
 	int flags;
 	void *p, *q;
 	prof_info_t prof_info_p, prof_info_q;
-	uint64_t curobjs_0, curobjs_1, curobjs_2, curobjs_3;
+	prof_cnt_t cnt_0, cnt_1, cnt_2, cnt_3;
 
 	test_skip_if(!config_prof);
 
 	tsd = tsd_fetch();
 	flags = MALLOCX_TCACHE_NONE;
 
-	prof_cnt_all(&curobjs_0, NULL, NULL, NULL);
+	prof_cnt_all(&cnt_0);
 	p = mallocx(1024, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	prof_info_get(tsd, p, NULL, &prof_info_p);
 	expect_ptr_ne(prof_info_p.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
-	prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
-	expect_u64_eq(curobjs_0 + 1, curobjs_1,
+	prof_cnt_all(&cnt_1);
+	expect_u64_eq(cnt_0.curobjs + 1, cnt_1.curobjs,
 	    "Allocation should have increased sample size");
 
 	q = rallocx(p, 2048, flags);
@@ -30,13 +30,13 @@ TEST_BEGIN(test_prof_realloc) {
 	prof_info_get(tsd, q, NULL, &prof_info_q);
 	expect_ptr_ne(prof_info_q.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
-	prof_cnt_all(&curobjs_2, NULL, NULL, NULL);
-	expect_u64_eq(curobjs_1, curobjs_2,
+	prof_cnt_all(&cnt_2);
+	expect_u64_eq(cnt_1.curobjs, cnt_2.curobjs,
 	    "Reallocation should not have changed sample size");
 
 	dallocx(q, flags);
-	prof_cnt_all(&curobjs_3, NULL, NULL, NULL);
-	expect_u64_eq(curobjs_0, curobjs_3,
+	prof_cnt_all(&cnt_3);
+	expect_u64_eq(cnt_0.curobjs, cnt_3.curobjs,
 	    "Sample size should have returned to base level");
 }
 TEST_END
-- 
cgit v0.12


From c2e7a063923f43b66a58815ff85f9fcf1681cc76 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 26 Jun 2020 15:26:51 -0700
Subject: No need to intercept prof_dump_header() in tests

---
 include/jemalloc/internal/prof_data.h |  2 --
 src/prof_data.c                       |  4 +---
 test/unit/prof_reset.c                | 31 ++++++-------------------------
 3 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index bf6e480..e2e4aed 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -28,8 +28,6 @@ void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 /* Used in unit tests. */
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
-typedef void (prof_dump_header_t)(void *, const prof_cnt_t *);
-extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 void prof_cnt_all(prof_cnt_t *cnt_all);
 
 #endif /* JEMALLOC_INTERNAL_PROF_DATA_H */
diff --git a/src/prof_data.c b/src/prof_data.c
index ee022cc..6b441de 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -792,8 +792,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 }
 
 static void
-prof_dump_header_impl(void *opaque, const prof_cnt_t *cnt_all) {
-	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
+prof_dump_header(prof_dump_iter_arg_t *arg, const prof_cnt_t *cnt_all) {
 	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
 	    "heap_v2/%"FMTu64"\n  t*: ", ((uint64_t)1U << lg_prof_sample));
 	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque, cnt_all);
@@ -803,7 +802,6 @@ prof_dump_header_impl(void *opaque, const prof_cnt_t *cnt_all) {
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, arg);
 	malloc_mutex_unlock(arg->tsdn, &tdatas_mtx);
 }
-prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
 
 static void
 prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 8c82e6d..a0fb038 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -84,45 +84,26 @@ TEST_BEGIN(test_prof_reset_basic) {
 }
 TEST_END
 
-bool prof_dump_header_intercepted = false;
-prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
-static void
-prof_dump_header_intercept(void *opaque, const prof_cnt_t *cnt_all) {
-	prof_dump_header_intercepted = true;
-	memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
-}
-
 TEST_BEGIN(test_prof_reset_cleanup) {
-	void *p;
-	prof_dump_header_t *prof_dump_header_orig;
-
 	test_skip_if(!config_prof);
 
 	set_prof_active(true);
 
 	expect_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
-	p = mallocx(1, 0);
+	void *p = mallocx(1, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	expect_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
 
-	prof_dump_header_orig = prof_dump_header;
-	prof_dump_header = prof_dump_header_intercept;
-	expect_false(prof_dump_header_intercepted, "Unexpected intercept");
-
-	expect_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
-	    0, "Unexpected error while dumping heap profile");
-	expect_true(prof_dump_header_intercepted, "Expected intercept");
-	expect_u64_eq(cnt_all_copy.curobjs, 1, "Expected 1 allocation");
+	prof_cnt_t cnt_all;
+	prof_cnt_all(&cnt_all);
+	expect_u64_eq(cnt_all.curobjs, 1, "Expected 1 allocation");
 
 	expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 	    "Unexpected error while resetting heap profile data");
-	expect_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
-	    0, "Unexpected error while dumping heap profile");
-	expect_u64_eq(cnt_all_copy.curobjs, 0, "Expected 0 allocations");
+	prof_cnt_all(&cnt_all);
+	expect_u64_eq(cnt_all.curobjs, 0, "Expected 0 allocations");
 	expect_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
 
-	prof_dump_header = prof_dump_header_orig;
-
 	dallocx(p, 0);
 	expect_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
 
-- 
cgit v0.12


From 00f06c9beb2509fba2133677c17ec702446b2102 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Sat, 4 Jul 2020 16:09:27 +0100
Subject: enabling mpss on solaris/illumos.

reusing slighty linux configuration as possible, aligning the
 address range to HUGEPAGE.
---
 configure.ac                                          |  8 ++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  5 +++++
 include/jemalloc/internal/jemalloc_preamble.h.in      |  8 ++++++++
 src/jemalloc.c                                        |  2 +-
 src/pages.c                                           | 15 ++++++++++++---
 5 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/configure.ac b/configure.ac
index d9fdebd..bcd6363 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1879,6 +1879,14 @@ if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then
   wrap_syms="${wrap_syms} _malloc_prefork _malloc_postfork"
 fi
 
+AC_CHECK_FUNC([memcntl],
+	      [have_memcntl="1"],
+	      [have_memcntl="0"],
+	      )
+if test "x$have_memcntl" = "x1" ; then
+  AC_DEFINE([JEMALLOC_HAVE_MEMCNTL], [ ])
+fi
+
 dnl Disable lazy locking by default.
 AC_ARG_ENABLE([lazy_lock],
   [AS_HELP_STRING([--enable-lazy-lock],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 83e733e..0aef0bb 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -301,6 +301,11 @@
  */
 #undef JEMALLOC_THP
 
+/*
+ * Defined if memcntl page admin call is supported
+ */
+#undef JEMALLOC_HAVE_MEMCNTL
+
 /* Define if operating system has alloca.h header. */
 #undef JEMALLOC_HAS_ALLOCA_H
 
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 66302ab..740fcfc 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -217,4 +217,12 @@ static const bool config_high_res_timer =
 #endif
     ;
 
+static const bool have_memcntl =
+#ifdef JEMALLOC_HAVE_MEMCNTL
+    true
+#else
+    false
+#endif
+    ;
+
 #endif /* JEMALLOC_PREAMBLE_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b468d82..9b5ce68 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1533,7 +1533,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				for (int i = 0; i < thp_mode_names_limit; i++) {
 					if (strncmp(thp_mode_names[i],v, vlen)
 					    == 0) {
-						if (!have_madvise_huge) {
+						if (!have_madvise_huge && !have_memcntl) {
 							CONF_ERROR(
 							    "No THP support",
 							    k, klen, v, vlen);
diff --git a/src/pages.c b/src/pages.c
index 9413d87..0ddc5ba 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -363,8 +363,13 @@ pages_huge_impl(void *addr, size_t size, bool aligned) {
 		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
 		assert(HUGEPAGE_CEILING(size) == size);
 	}
-#ifdef JEMALLOC_HAVE_MADVISE_HUGE
+#if defined(JEMALLOC_HAVE_MADVISE_HUGE)
 	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
+#elif defined(JEMALLOC_HAVE_MEMCNTL)
+	struct memcntl_mha m = {0};
+	m.mha_cmd = MHA_MAPSIZE_VA;
+	m.mha_pagesize = HUGEPAGE;
+	return (memcntl(addr, size, MC_HAT_ADVISE, (caddr_t)&m, 0, 0) == 0);
 #else
 	return true;
 #endif
@@ -561,14 +566,14 @@ pages_set_thp_state (void *ptr, size_t size) {
 
 static void
 init_thp_state(void) {
-	if (!have_madvise_huge) {
+	if (!have_madvise_huge && !have_memcntl) {
 		if (metadata_thp_enabled() && opt_abort) {
 			malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
 			abort();
 		}
 		goto label_error;
 	}
-
+#if defined(JEMALLOC_HAVE_MADVISE_HUGE)
 	static const char sys_state_madvise[] = "always [madvise] never\n";
 	static const char sys_state_always[] = "[always] madvise never\n";
 	static const char sys_state_never[] = "always madvise [never]\n";
@@ -608,6 +613,10 @@ init_thp_state(void) {
 		goto label_error;
 	}
 	return;
+#elif defined(JEMALLOC_HAVE_MEMCNTL)
+	init_system_thp_mode = thp_mode_default;
+	return;
+#endif
 label_error:
 	opt_thp = init_system_thp_mode = thp_mode_not_supported;
 }
-- 
cgit v0.12


From 129b72705833658d87886781347548e0261fcaeb Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 11 Jun 2020 15:16:38 -0700
Subject: Add typed-list module.

This gives some named convenience wrappers.
---
 include/jemalloc/internal/edata.h      | 42 +++---------------------------
 include/jemalloc/internal/typed_list.h | 47 ++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 39 deletions(-)
 create mode 100644 include/jemalloc/internal/typed_list.h

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index ac8d647..58bddd1 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -10,6 +10,7 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/slab_data.h"
 #include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/typed_list.h"
 
 enum extent_state_e {
 	extent_state_active   = 0,
@@ -58,7 +59,6 @@ struct edata_map_info_s {
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
-typedef ql_head(edata_t) edata_list_t;
 typedef ph(edata_t) edata_tree_t;
 typedef ph(edata_t) edata_heap_t;
 struct edata_s {
@@ -209,6 +209,8 @@ struct edata_s {
 	};
 };
 
+TYPED_LIST(edata_list, edata_t, ql_link)
+
 static inline unsigned
 edata_arena_ind_get(const edata_t *edata) {
 	unsigned arena_ind = (unsigned)((edata->e_bits &
@@ -531,7 +533,6 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	edata_zeroed_set(edata, zeroed);
 	edata_committed_set(edata, committed);
 	edata_ranged_set(edata, ranged);
-	ql_elm_new(edata, ql_link);
 	edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
 	if (config_prof) {
 		edata_prof_tctx_set(edata, NULL);
@@ -552,43 +553,6 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 	edata_ranged_set(edata, false);
 }
 
-static inline void
-edata_list_init(edata_list_t *list) {
-	ql_new(list);
-}
-
-static inline edata_t *
-edata_list_first(const edata_list_t *list) {
-	return ql_first(list);
-}
-
-static inline edata_t *
-edata_list_last(const edata_list_t *list) {
-	return ql_last(list, ql_link);
-}
-
-static inline void
-edata_list_append(edata_list_t *list, edata_t *edata) {
-	ql_tail_insert(list, edata, ql_link);
-}
-
-static inline void
-edata_list_prepend(edata_list_t *list, edata_t *edata) {
-	ql_head_insert(list, edata, ql_link);
-}
-
-static inline void
-edata_list_replace(edata_list_t *list, edata_t *to_remove,
-    edata_t *to_insert) {
-	ql_after_insert(to_remove, to_insert, ql_link);
-	ql_remove(list, to_remove, ql_link);
-}
-
-static inline void
-edata_list_remove(edata_list_t *list, edata_t *edata) {
-	ql_remove(list, edata, ql_link);
-}
-
 static inline int
 edata_sn_comp(const edata_t *a, const edata_t *b) {
 	size_t a_sn = edata_sn_get(a);
diff --git a/include/jemalloc/internal/typed_list.h b/include/jemalloc/internal/typed_list.h
new file mode 100644
index 0000000..7ad2237
--- /dev/null
+++ b/include/jemalloc/internal/typed_list.h
@@ -0,0 +1,47 @@
+#ifndef JEMALLOC_INTERNAL_TYPED_LIST_H
+#define JEMALLOC_INTERNAL_TYPED_LIST_H
+
+/*
+ * This wraps the ql module to implement a list class in a way that's a little
+ * bit easier to use; it handles ql_elm_new calls and provides type safety.
+ */
+
+#define TYPED_LIST(list_type, el_type, linkage)				\
+typedef struct {							\
+	ql_head(el_type) head;						\
+} list_type##_t;							\
+static inline void							\
+list_type##_init(list_type##_t *list) {					\
+	ql_new(&list->head);						\
+}									\
+static inline el_type *							\
+list_type##_first(const list_type##_t *list) {				\
+	return ql_first(&list->head);					\
+}									\
+static inline el_type *							\
+list_type##_last(const list_type##_t *list) {				\
+	return ql_last(&list->head, linkage);				\
+}									\
+static inline void							\
+list_type##_append(list_type##_t *list, el_type *item) {		\
+	ql_elm_new(item, linkage);					\
+	ql_tail_insert(&list->head, item, linkage);			\
+}									\
+static inline void							\
+list_type##_prepend(list_type##_t *list, el_type *item) {		\
+	ql_elm_new(item, linkage);					\
+	ql_head_insert(&list->head, item, linkage);			\
+}									\
+static inline void							\
+list_type##_replace(list_type##_t *list, el_type *to_remove,		\
+    el_type *to_insert) {						\
+	ql_elm_new(to_insert, linkage);					\
+	ql_after_insert(to_remove, to_insert, linkage);			\
+	ql_remove(&list->head, to_remove, linkage);			\
+}									\
+static inline void							\
+list_type##_remove(list_type##_t *list, el_type *item) {		\
+	ql_remove(&list->head, item, linkage);				\
+}
+
+#endif /* JEMALLOC_INTERNAL_TYPED_LIST_H */
-- 
cgit v0.12


From 392f645f4d850d2256443299183123258899bb3e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 11 Jun 2020 15:15:51 -0700
Subject: Edata: split up different list linkage uses.

---
 include/jemalloc/internal/arena_structs.h |  2 +-
 include/jemalloc/internal/bin.h           |  2 +-
 include/jemalloc/internal/edata.h         | 37 ++++++++++++++++++-------------
 include/jemalloc/internal/edata_cache.h   |  2 +-
 include/jemalloc/internal/eset.h          |  2 +-
 src/arena.c                               | 14 ++++++------
 src/bin.c                                 |  2 +-
 src/edata_cache.c                         | 10 ++++-----
 src/eset.c                                |  6 ++---
 src/extent.c                              |  2 +-
 src/large.c                               |  6 ++---
 src/pa.c                                  | 17 +++++++-------
 12 files changed, 55 insertions(+), 47 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 0c3f42f..baa7031 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -69,7 +69,7 @@ struct arena_s {
 	 *
 	 * Synchronization: large_mtx.
 	 */
-	edata_list_t		large;
+	edata_list_active_t	large;
 	/* Synchronizes all large allocation/update/deallocation. */
 	malloc_mutex_t		large_mtx;
 
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 9a774e9..9241ee7 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -32,7 +32,7 @@ struct bin_s {
 	edata_heap_t		slabs_nonfull;
 
 	/* List used to track full slabs. */
-	edata_list_t		slabs_full;
+	edata_list_active_t	slabs_full;
 
 	/* Bin statistics. */
 	bin_stats_t	stats;
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 58bddd1..fb0b489 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -185,22 +185,28 @@ struct edata_s {
 		size_t			e_bsize;
 	};
 
-	/*
-	 * List linkage, used by a variety of lists:
-	 * - bin_t's slabs_full
-	 * - extents_t's LRU
-	 * - stashed dirty extents
-	 * - arena's large allocations
-	 */
-	ql_elm(edata_t) ql_link;
-
-	/*
-	 * Linkage for per size class sn/address-ordered heaps, and
-	 * for extent_avail
-	 */
-	phn(edata_t)		ph_link;
+	union {
+		/*
+		 * List linkage used when the edata_t is active; either in
+		 * arena's large allocations or bin_t's slabs_full.
+		 */
+		ql_elm(edata_t)	ql_link_active;
+		/*
+		 * Pairing heap linkage.  Used whenever the extent is inactive
+		 * (in the page allocators), or when it is active and in
+		 * slabs_nonfull, or when the edata_t is unassociated with an
+		 * extent and sitting in an edata_cache.
+		 */
+		phn(edata_t)	ph_link;
+	};
 
 	union {
+		/*
+		 * List linkage used when the extent is inactive:
+		 * - Stashed dirty extents
+		 * - Ecache LRU functionality.
+		 */
+		ql_elm(edata_t) ql_link_inactive;
 		/* Small region slab metadata. */
 		slab_data_t	e_slab_data;
 
@@ -209,7 +215,8 @@ struct edata_s {
 	};
 };
 
-TYPED_LIST(edata_list, edata_t, ql_link)
+TYPED_LIST(edata_list_active, edata_t, ql_link_active)
+TYPED_LIST(edata_list_inactive, edata_t, ql_link_inactive)
 
 static inline unsigned
 edata_arena_ind_get(const edata_t *edata) {
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 620360d..02685c8 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -27,7 +27,7 @@ void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
 
 typedef struct edata_cache_small_s edata_cache_small_t;
 struct edata_cache_small_s {
-	edata_list_t list;
+	edata_list_inactive_t list;
 	size_t count;
 	edata_cache_t *fallback;
 };
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index e29179d..d260bc1 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -25,7 +25,7 @@ struct eset_s {
 	bitmap_t bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
 
 	/* LRU of all extents in heaps. */
-	edata_list_t lru;
+	edata_list_inactive_t lru;
 
 	/* Page sum for all extents in heaps. */
 	atomic_zu_t npages;
diff --git a/src/arena.c b/src/arena.c
index 2a3af5c..573dde9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -594,7 +594,7 @@ arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, edata_t *slab) {
 	if (arena_is_auto(arena)) {
 		return;
 	}
-	edata_list_append(&bin->slabs_full, slab);
+	edata_list_active_append(&bin->slabs_full, slab);
 }
 
 static void
@@ -602,7 +602,7 @@ arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) {
 	if (arena_is_auto(arena)) {
 		return;
 	}
-	edata_list_remove(&bin->slabs_full, slab);
+	edata_list_active_remove(&bin->slabs_full, slab);
 }
 
 static void
@@ -622,8 +622,8 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 	}
-	for (slab = edata_list_first(&bin->slabs_full); slab != NULL;
-	     slab = edata_list_first(&bin->slabs_full)) {
+	for (slab = edata_list_active_first(&bin->slabs_full); slab != NULL;
+	     slab = edata_list_active_first(&bin->slabs_full)) {
 		arena_bin_slabs_full_remove(arena, bin, slab);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
@@ -655,8 +655,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Large allocations. */
 	malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 
-	for (edata_t *edata = edata_list_first(&arena->large); edata !=
-	    NULL; edata = edata_list_first(&arena->large)) {
+	for (edata_t *edata = edata_list_active_first(&arena->large);
+	    edata != NULL; edata = edata_list_active_first(&arena->large)) {
 		void *ptr = edata_base_get(edata);
 		size_t usize;
 
@@ -1465,7 +1465,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
 	    ATOMIC_RELAXED);
 
-	edata_list_init(&arena->large);
+	edata_list_active_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
 	    WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
 		goto label_error;
diff --git a/src/bin.c b/src/bin.c
index 52de9ff..fa20458 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -46,7 +46,7 @@ bin_init(bin_t *bin) {
 	}
 	bin->slabcur = NULL;
 	edata_heap_new(&bin->slabs_nonfull);
-	edata_list_init(&bin->slabs_full);
+	edata_list_active_init(&bin->slabs_full);
 	if (config_stats) {
 		memset(&bin->stats, 0, sizeof(bin_stats_t));
 	}
diff --git a/src/edata_cache.c b/src/edata_cache.c
index 4601f33..d899ce8 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -59,7 +59,7 @@ edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 
 void
 edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
-	edata_list_init(&ecs->list);
+	edata_list_inactive_init(&ecs->list);
 	ecs->count = 0;
 	ecs->fallback = fallback;
 }
@@ -67,9 +67,9 @@ edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
 edata_t *
 edata_cache_small_get(edata_cache_small_t *ecs) {
 	assert(ecs->count > 0);
-	edata_t *edata = edata_list_first(&ecs->list);
+	edata_t *edata = edata_list_inactive_first(&ecs->list);
 	assert(edata != NULL);
-	edata_list_remove(&ecs->list, edata);
+	edata_list_inactive_remove(&ecs->list, edata);
 	ecs->count--;
 	return edata;
 }
@@ -77,7 +77,7 @@ edata_cache_small_get(edata_cache_small_t *ecs) {
 void
 edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata) {
 	assert(edata != NULL);
-	edata_list_append(&ecs->list, edata);
+	edata_list_inactive_append(&ecs->list, edata);
 	ecs->count++;
 }
 
@@ -93,7 +93,7 @@ bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
 		if (edata == NULL) {
 			return true;
 		}
-		ql_elm_new(edata, ql_link);
+		ql_elm_new(edata, ql_link_inactive);
 		edata_cache_small_put(ecs, edata);
 	}
 	return false;
diff --git a/src/eset.c b/src/eset.c
index c4e39d2..c9af80e 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -12,7 +12,7 @@ eset_init(eset_t *eset, extent_state_t state) {
 		edata_heap_new(&eset->heaps[i]);
 	}
 	bitmap_init(eset->bitmap, &eset_bitmap_info, true);
-	edata_list_init(&eset->lru);
+	edata_list_inactive_init(&eset->lru);
 	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
 	eset->state = state;
 }
@@ -65,7 +65,7 @@ eset_insert(eset_t *eset, edata_t *edata) {
 		eset_stats_add(eset, pind, size);
 	}
 
-	edata_list_append(&eset->lru, edata);
+	edata_list_inactive_append(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
 	/*
 	 * All modifications to npages hold the mutex (as asserted above), so we
@@ -95,7 +95,7 @@ eset_remove(eset_t *eset, edata_t *edata) {
 		bitmap_set(eset->bitmap, &eset_bitmap_info,
 		    (size_t)pind);
 	}
-	edata_list_remove(&eset->lru, edata);
+	edata_list_inactive_remove(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
 	/*
 	 * As in eset_insert, we hold eset->mtx and so don't need atomic
diff --git a/src/extent.c b/src/extent.c
index 073f806..d6349c3 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -139,7 +139,7 @@ ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	edata_t *edata;
 	while (true) {
 		/* Get the LRU extent, if any. */
-		edata = edata_list_first(&ecache->eset.lru);
+		edata = edata_list_inactive_first(&ecache->eset.lru);
 		if (edata == NULL) {
 			goto label_return;
 		}
diff --git a/src/large.c b/src/large.c
index 3ea08be..42d2fd7 100644
--- a/src/large.c
+++ b/src/large.c
@@ -43,7 +43,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (!arena_is_auto(arena)) {
 		/* Insert edata into large. */
 		malloc_mutex_lock(tsdn, &arena->large_mtx);
-		edata_list_append(&arena->large, edata);
+		edata_list_active_append(&arena->large, edata);
 		malloc_mutex_unlock(tsdn, &arena->large_mtx);
 	}
 
@@ -225,14 +225,14 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 		/* See comments in arena_bin_slabs_full_insert(). */
 		if (!arena_is_auto(arena)) {
 			malloc_mutex_lock(tsdn, &arena->large_mtx);
-			edata_list_remove(&arena->large, edata);
+			edata_list_active_remove(&arena->large, edata);
 			malloc_mutex_unlock(tsdn, &arena->large_mtx);
 		}
 	} else {
 		/* Only hold the large_mtx if necessary. */
 		if (!arena_is_auto(arena)) {
 			malloc_mutex_assert_owner(tsdn, &arena->large_mtx);
-			edata_list_remove(&arena->large, edata);
+			edata_list_active_remove(&arena->large, edata);
 		}
 	}
 	arena_extent_dalloc_large_prep(tsdn, arena, edata);
diff --git a/src/pa.c b/src/pa.c
index a7fe70f..50c64b4 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -239,7 +239,8 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 
 static size_t
 pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
-    size_t npages_limit, size_t npages_decay_max, edata_list_t *result) {
+    size_t npages_limit, size_t npages_decay_max,
+    edata_list_inactive_t *result) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
@@ -252,7 +253,7 @@ pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 		if (edata == NULL) {
 			break;
 		}
-		edata_list_append(result, edata);
+		edata_list_inactive_append(result, edata);
 		nstashed += edata_size_get(edata) >> LG_PAGE;
 	}
 	return nstashed;
@@ -261,7 +262,7 @@ pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 static size_t
 pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
-    edata_list_t *decay_extents) {
+    edata_list_inactive_t *decay_extents) {
 	bool err;
 
 	size_t nmadvise = 0;
@@ -272,9 +273,9 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 	bool try_muzzy = !fully_decay && pa_shard_may_have_muzzy(shard);
 
-	for (edata_t *edata = edata_list_first(decay_extents); edata !=
-	    NULL; edata = edata_list_first(decay_extents)) {
-		edata_list_remove(decay_extents, edata);
+	for (edata_t *edata = edata_list_inactive_first(decay_extents);
+	    edata != NULL; edata = edata_list_inactive_first(decay_extents)) {
+		edata_list_inactive_remove(decay_extents, edata);
 
 		size_t size = edata_size_get(edata);
 		size_t npages = size >> LG_PAGE;
@@ -342,8 +343,8 @@ pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 	decay->purging = true;
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	edata_list_t decay_extents;
-	edata_list_init(&decay_extents);
+	edata_list_inactive_t decay_extents;
+	edata_list_inactive_init(&decay_extents);
 	size_t npurge = pa_stash_decayed(tsdn, shard, ecache, npages_limit,
 	    npages_decay_max, &decay_extents);
 	if (npurge != 0) {
-- 
cgit v0.12


From ae541d3fabd679c97326e81b652fa3979e734404 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 11 Jun 2020 17:16:10 -0700
Subject: Edata: Reserve some space for hugepages.

---
 include/jemalloc/internal/edata.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index fb0b489..bb7da1d 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -185,6 +185,18 @@ struct edata_s {
 		size_t			e_bsize;
 	};
 
+	/*
+	 * Reserved for hugepages -- once that allocator is more settled, we
+	 * might be able to claw some of this back.  Until then, don't get any
+	 * funny ideas about using the space we just freed up to keep some other
+	 * bit of metadata around.  That kind of thinking can be hazardous to
+	 * your health.
+	 *
+	 * This keeps the size of an edata_t at exactly 128 bytes on
+	 * architectures with 8-byte pointers and 4k pages.
+	 */
+	void *reserved1, *reserved2;
+
 	union {
 		/*
 		 * List linkage used when the edata_t is active; either in
-- 
cgit v0.12


From f1f4ec315a1831612f6d66b62be55a323fa94312 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 8 Jul 2020 15:50:23 -0700
Subject: Tcache: Tweak nslots_max tuning parameter.

In making these settings configurable, 634afc4124100b5ff11e892481d912d56099be1a
unintentially changed a tuning parameter (reducing the "goal" max by a factor of
4).  This commit undoes that change.
---
 src/tcache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index b73fd0d..a33d9e2 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -25,12 +25,12 @@ unsigned opt_tcache_nslots_large = 20;
 /*
  * We attempt to make the number of slots in a tcache bin for a given size class
  * equal to the number of objects in a slab times some multiplier.  By default,
- * the multiplier is 1/2 (i.e. we set the maximum number of objects in the
- * tcache to half the number of objects in a slab).
+ * the multiplier is 2 (i.e. we set the maximum number of objects in the tcache
+ * to twice the number of objects in a slab).
  * This is bounded by some other constraints as well, like the fact that it
  * must be even, must be less than opt_tcache_nslots_small_max, etc..
  */
-ssize_t	opt_lg_tcache_nslots_mul = -1;
+ssize_t	opt_lg_tcache_nslots_mul = 1;
 
 /*
  * Number of allocation bytes between tcache incremental GCs.  Again, this
-- 
cgit v0.12


From 3cf19c6e5e8b49c3bbf84bbfeb9ab49b38f0546c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 May 2020 13:21:41 -0700
Subject: atomic: add atomic_load_sub_store

---
 include/jemalloc/internal/atomic.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index e5afb20..c0f7312 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -63,6 +63,13 @@
 	    type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);	\
 	    type newval = oldval + inc;					\
 	    atomic_store_##short_type(a, newval, ATOMIC_RELAXED);	\
+	}								\
+    ATOMIC_INLINE void							\
+    atomic_load_sub_store_##short_type(atomic_##short_type##_t *a,	\
+	type inc) {							\
+	    type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);	\
+	    type newval = oldval - inc;					\
+	    atomic_store_##short_type(a, newval, ATOMIC_RELAXED);	\
 	}
 
 /*
-- 
cgit v0.12


From 1b5f632e0fbb28d162fbf70d1032434787269f1a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 May 2020 15:02:19 -0700
Subject: Introduce PAI: Page allocator interface

---
 include/jemalloc/internal/pa.h  |   8 +++
 include/jemalloc/internal/pai.h |  45 +++++++++++++++
 src/pa.c                        | 123 ++++++++++++++++++++++++++++++----------
 3 files changed, 145 insertions(+), 31 deletions(-)
 create mode 100644 include/jemalloc/internal/pai.h

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 172c549..83fcc4d 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -7,6 +7,7 @@
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/lockedint.h"
+#include "jemalloc/internal/pai.h"
 
 enum pa_decay_purge_setting_e {
 	PA_DECAY_PURGE_ALWAYS,
@@ -111,6 +112,13 @@ struct pa_shard_s {
 	atomic_zu_t nactive;
 
 	/*
+	 * An interface for page allocation from the ecache framework (i.e. a
+	 * cascade of ecache_dirty, ecache_muzzy, ecache_retained).  Right now
+	 * this is the *only* pai, but we'll soon grow another.
+	 */
+	pai_t ecache_pai;
+
+	/*
 	 * Collections of extents that were previously allocated.  These are
 	 * used when allocating extents, in an attempt to re-use address space.
 	 *
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
new file mode 100644
index 0000000..45edd69
--- /dev/null
+++ b/include/jemalloc/internal/pai.h
@@ -0,0 +1,45 @@
+#ifndef JEMALLOC_INTERNAL_PAI_H
+#define JEMALLOC_INTERNAL_PAI_H
+
+/* An interface for page allocation. */
+
+typedef struct pai_s pai_t;
+struct pai_s {
+	/* Returns NULL on failure. */
+	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
+	    size_t alignment, bool zero);
+	bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+	    size_t old_size, size_t new_size, bool zero);
+	bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+	    size_t old_size, size_t new_size);
+	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+};
+
+/*
+ * These are just simple convenience functions to avoid having to reference the
+ * same pai_t twice on every invocation.
+ */
+
+static inline edata_t *
+pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
+	return self->alloc(tsdn, self, size, alignment, zero);
+}
+
+static inline bool
+pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool zero) {
+	return self->expand(tsdn, self, edata, old_size, new_size, zero);
+}
+
+static inline bool
+pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size) {
+	return self->shrink(tsdn, self, edata, old_size, new_size);
+}
+
+static inline void
+pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	self->dalloc(tsdn, self, edata);
+}
+
+#endif /* JEMALLOC_INTERNAL_PAI_H */
diff --git a/src/pa.c b/src/pa.c
index 50c64b4..7a0052e 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,6 +1,14 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+static edata_t *ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t alignment, bool zero);
+static bool ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size, bool zero);
+static bool ecache_pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size);
+static void ecache_pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+
 static void
 pa_nactive_add(pa_shard_t *shard, size_t add_pages) {
 	atomic_fetch_add_zu(&shard->nactive, add_pages, ATOMIC_RELAXED);
@@ -71,6 +79,11 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->emap = emap;
 	shard->base = base;
 
+	shard->ecache_pai.alloc = &ecache_pai_alloc;
+	shard->ecache_pai.expand = &ecache_pai_expand;
+	shard->ecache_pai.shrink = &ecache_pai_shrink;
+	shard->ecache_pai.dalloc = &ecache_pai_dalloc;
+
 	return false;
 }
 
@@ -110,13 +123,11 @@ pa_shard_may_have_muzzy(pa_shard_t *shard) {
 	return pa_shard_muzzy_decay_ms_get(shard) != 0;
 }
 
-edata_t *
-pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
-    bool slab, szind_t szind, bool zero) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	size_t mapped_add = 0;
+static edata_t *
+ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
+    bool zero) {
+	pa_shard_t *shard =
+	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
@@ -129,14 +140,25 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, NULL, size, alignment, zero);
-		mapped_add = size;
+		if (config_stats && edata != NULL) {
+			atomic_fetch_add_zu(&shard->stats->pa_mapped, size,
+			    ATOMIC_RELAXED);
+		}
 	}
+	return edata;
+}
+
+edata_t *
+pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
+    bool slab, szind_t szind, bool zero) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	edata_t *edata = pai_alloc(tsdn, &shard->ecache_pai, size, alignment,
+	    zero);
+
 	if (edata != NULL) {
 		pa_nactive_add(shard, size >> LG_PAGE);
-		if (config_stats && mapped_add > 0) {
-			atomic_fetch_add_zu(&shard->stats->pa_mapped,
-			    mapped_add, ATOMIC_RELAXED);
-		}
 		emap_remap(tsdn, shard->emap, edata, szind, slab);
 		edata_szind_set(edata, szind);
 		edata_slab_set(edata, slab);
@@ -147,18 +169,17 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	return edata;
 }
 
-bool
-pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool zero) {
-	assert(new_size > old_size);
-	assert(edata_size_get(edata) == old_size);
-	assert((new_size & PAGE_MASK) == 0);
+static bool
+ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool zero) {
+	pa_shard_t *shard =
+	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	void *trail_begin = edata_past_get(edata);
-	size_t expand_amount = new_size - old_size;
 
 	size_t mapped_add = 0;
+	size_t expand_amount = new_size - old_size;
 
 	if (ehooks_merge_will_fail(ehooks)) {
 		return true;
@@ -186,22 +207,39 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		atomic_fetch_add_zu(&shard->stats->pa_mapped, mapped_add,
 		    ATOMIC_RELAXED);
 	}
-	pa_nactive_add(shard, expand_amount >> LG_PAGE);
-	edata_szind_set(edata, szind);
-	emap_remap(tsdn, shard->emap, edata, szind, /* slab */ false);
 	return false;
 }
 
 bool
-pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool *generated_dirty) {
-	assert(new_size < old_size);
+pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
+    size_t new_size, szind_t szind, bool zero) {
+	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
-	size_t shrink_amount = old_size - new_size;
+
+	size_t expand_amount = new_size - old_size;
+
+	bool error = pai_expand(tsdn, &shard->ecache_pai, edata, old_size,
+	    new_size, zero);
+	if (error) {
+		return true;
+	}
+
+	pa_nactive_add(shard, expand_amount >> LG_PAGE);
+	edata_szind_set(edata, szind);
+	emap_remap(tsdn, shard->emap, edata, szind, /* slab */ false);
+	return false;
+}
+
+static bool
+ecache_pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size) {
+	pa_shard_t *shard =
+	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	*generated_dirty = false;
+	size_t shrink_amount = old_size - new_size;
+
 
 	if (ehooks_split_will_fail(ehooks)) {
 		return true;
@@ -212,9 +250,25 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	if (trail == NULL) {
 		return true;
 	}
-	pa_nactive_sub(shard, shrink_amount >> LG_PAGE);
-
 	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, trail);
+	return false;
+}
+
+bool
+pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
+    size_t new_size, szind_t szind, bool *generated_dirty) {
+	assert(new_size < old_size);
+	assert(edata_size_get(edata) == old_size);
+	assert((new_size & PAGE_MASK) == 0);
+	size_t shrink_amount = old_size - new_size;
+
+	*generated_dirty = false;
+	bool error = pai_shrink(tsdn, &shard->ecache_pai, edata, old_size,
+	    new_size);
+	if (error) {
+		return true;
+	}
+	pa_nactive_sub(shard, shrink_amount >> LG_PAGE);
 	*generated_dirty = true;
 
 	edata_szind_set(edata, szind);
@@ -222,6 +276,14 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	return false;
 }
 
+static void
+ecache_pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	pa_shard_t *shard =
+	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);
+}
+
 void
 pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty) {
@@ -232,8 +294,7 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	}
 	edata_szind_set(edata, SC_NSIZES);
 	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);
+	pai_dalloc(tsdn, &shard->ecache_pai, edata);
 	*generated_dirty = true;
 }
 
-- 
cgit v0.12


From 777b0ba9655f6b40b19a8a9c485c186ce9adb551 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 May 2020 16:57:31 -0700
Subject: Add PAC: Page allocator classic.

For now, this is just a stub containing the ecaches, with no surrounding code
changed.  Eventually all the core allocator bits will be moved in, in the
subsequent stack of commits.
---
 include/jemalloc/internal/pa.h  | 27 ++++++++++---------------
 include/jemalloc/internal/pac.h | 25 +++++++++++++++++++++++
 src/arena.c                     | 16 +++++++--------
 src/background_thread.c         |  8 ++++----
 src/ctl.c                       |  6 +++---
 src/extent.c                    | 22 ++++++++++----------
 src/pa.c                        | 39 ++++++++++++++++++-----------------
 src/pa_extra.c                  | 45 +++++++++++++++++++++--------------------
 test/unit/pa.c                  |  2 +-
 9 files changed, 106 insertions(+), 84 deletions(-)
 create mode 100644 include/jemalloc/internal/pac.h

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 83fcc4d..d7f2263 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -7,8 +7,17 @@
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/lockedint.h"
+#include "jemalloc/internal/pac.h"
 #include "jemalloc/internal/pai.h"
 
+/*
+ * The page allocator; responsible for acquiring pages of memory for
+ * allocations.  It picks the implementation of the page allocator interface
+ * (i.e. a pai_t) to handle a given page-level allocation request.  For now, the
+ * only such implementation is the PAC code ("page allocator classic"), but
+ * others will be coming soon.
+ */
+
 enum pa_decay_purge_setting_e {
 	PA_DECAY_PURGE_ALWAYS,
 	PA_DECAY_PURGE_NEVER,
@@ -16,11 +25,6 @@ enum pa_decay_purge_setting_e {
 };
 typedef enum pa_decay_purge_setting_e pa_decay_purge_setting_t;
 
-/*
- * The page allocator; responsible for acquiring pages of memory for
- * allocations.
- */
-
 typedef struct pa_shard_decay_stats_s pa_shard_decay_stats_t;
 struct pa_shard_decay_stats_s {
 	/* Total number of purge sweeps. */
@@ -117,16 +121,7 @@ struct pa_shard_s {
 	 * this is the *only* pai, but we'll soon grow another.
 	 */
 	pai_t ecache_pai;
-
-	/*
-	 * Collections of extents that were previously allocated.  These are
-	 * used when allocating extents, in an attempt to re-use address space.
-	 *
-	 * Synchronization: internal.
-	 */
-	ecache_t ecache_dirty;
-	ecache_t ecache_muzzy;
-	ecache_t ecache_retained;
+	pac_t pac;
 
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
@@ -167,7 +162,7 @@ pa_shard_muzzy_decay_ms_get(pa_shard_t *shard) {
 
 static inline bool
 pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
-	return ecache_npages_get(&shard->ecache_muzzy) == 0 &&
+	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 &&
 	    pa_shard_muzzy_decay_ms_get(shard) <= 0;
 }
 
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
new file mode 100644
index 0000000..73e672f
--- /dev/null
+++ b/include/jemalloc/internal/pac.h
@@ -0,0 +1,25 @@
+#ifndef JEMALLOC_INTERNAL_PAC_H
+#define JEMALLOC_INTERNAL_PAC_H
+
+/*
+ * Page allocator classic; an implementation of the PAI interface that:
+ * - Can be used for arenas with custom extent hooks.
+ * - Can always satisfy any allocation request (including highly-fragmentary
+ *   ones).
+ * - Can use efficient OS-level zeroing primitives for demand-filled pages.
+ */
+
+typedef struct pac_s pac_t;
+struct pac_s {
+	/*
+	 * Collections of extents that were previously allocated.  These are
+	 * used when allocating extents, in an attempt to re-use address space.
+	 *
+	 * Synchronization: internal.
+	 */
+	ecache_t ecache_dirty;
+	ecache_t ecache_muzzy;
+	ecache_t ecache_retained;
+};
+
+#endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/arena.c b/src/arena.c
index 573dde9..fb9cb7b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -454,16 +454,16 @@ bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.decay_dirty,
-	    &arena->pa_shard.stats->decay_dirty, &arena->pa_shard.ecache_dirty,
-	    decay_ms);
+	    &arena->pa_shard.stats->decay_dirty,
+	    &arena->pa_shard.pac.ecache_dirty, decay_ms);
 }
 
 bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.decay_muzzy,
-	    &arena->pa_shard.stats->decay_muzzy, &arena->pa_shard.ecache_muzzy,
-	    decay_ms);
+	    &arena->pa_shard.stats->decay_muzzy,
+	    &arena->pa_shard.pac.ecache_muzzy, decay_ms);
 }
 
 static bool
@@ -521,8 +521,8 @@ static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_dirty,
-	    &arena->pa_shard.stats->decay_dirty, &arena->pa_shard.ecache_dirty,
-	    is_background_thread, all);
+	    &arena->pa_shard.stats->decay_dirty,
+	    &arena->pa_shard.pac.ecache_dirty, is_background_thread, all);
 }
 
 static bool
@@ -532,8 +532,8 @@ arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 		return false;
 	}
 	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_muzzy,
-	    &arena->pa_shard.stats->decay_muzzy, &arena->pa_shard.ecache_muzzy,
-	    is_background_thread, all);
+	    &arena->pa_shard.stats->decay_muzzy,
+	    &arena->pa_shard.pac.ecache_muzzy, is_background_thread, all);
 }
 
 void
diff --git a/src/background_thread.c b/src/background_thread.c
index db11405..557dbc4 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -201,12 +201,12 @@ static uint64_t
 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	uint64_t i1, i2;
 	i1 = arena_decay_compute_purge_interval_impl(tsdn,
-	    &arena->pa_shard.decay_dirty, &arena->pa_shard.ecache_dirty);
+	    &arena->pa_shard.decay_dirty, &arena->pa_shard.pac.ecache_dirty);
 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return i1;
 	}
 	i2 = arena_decay_compute_purge_interval_impl(tsdn,
-	    &arena->pa_shard.decay_muzzy, &arena->pa_shard.ecache_muzzy);
+	    &arena->pa_shard.decay_muzzy, &arena->pa_shard.pac.ecache_muzzy);
 
 	return i1 < i2 ? i1 : i2;
 }
@@ -716,8 +716,8 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
 		should_signal = true;
 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (ecache_npages_get(&arena->pa_shard.ecache_dirty) > 0 ||
-	    ecache_npages_get(&arena->pa_shard.ecache_muzzy) > 0 ||
+	    (ecache_npages_get(&arena->pa_shard.pac.ecache_dirty) > 0 ||
+	    ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy) > 0 ||
 	    info->npages_to_purge_new > 0)) {
 		should_signal = true;
 	} else {
diff --git a/src/ctl.c b/src/ctl.c
index 8b9f42e..0098d93 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3127,9 +3127,9 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.edata_cache.mtx);
-		MUTEX_PROF_RESET(arena->pa_shard.ecache_dirty.mtx);
-		MUTEX_PROF_RESET(arena->pa_shard.ecache_muzzy.mtx);
-		MUTEX_PROF_RESET(arena->pa_shard.ecache_retained.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_dirty.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_muzzy.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_retained.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.decay_dirty.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.decay_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
diff --git a/src/extent.c b/src/extent.c
index d6349c3..3d827b8 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -647,7 +647,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		goto label_err;
 	}
 
-	edata_init(edata, ecache_ind_get(&shard->ecache_retained), ptr,
+	edata_init(edata, ecache_ind_get(&shard->pac.ecache_retained), ptr,
 	    alloc_size, false, SC_NSIZES, pa_shard_extent_sn_next(shard),
 	    extent_state_active, zeroed, committed, /* ranged */ false,
 	    EXTENT_IS_HEAD);
@@ -673,11 +673,11 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
 			extent_record(tsdn, shard, ehooks,
-			    &shard->ecache_retained, lead, true);
+			    &shard->pac.ecache_retained, lead, true);
 		}
 		if (trail != NULL) {
 			extent_record(tsdn, shard, ehooks,
-			    &shard->ecache_retained, trail, true);
+			    &shard->pac.ecache_retained, trail, true);
 		}
 	} else {
 		/*
@@ -690,12 +690,12 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 				extent_gdump_add(tsdn, to_salvage);
 			}
 			extent_record(tsdn, shard, ehooks,
-			    &shard->ecache_retained, to_salvage, true);
+			    &shard->pac.ecache_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, shard, to_leak);
 			extents_abandon_vm(tsdn, shard, ehooks,
-			    &shard->ecache_retained, to_leak, true);
+			    &shard->pac.ecache_retained, to_leak, true);
 		}
 		goto label_err;
 	}
@@ -704,7 +704,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
 			extent_record(tsdn, shard, ehooks,
-			    &shard->ecache_retained, edata, true);
+			    &shard->pac.ecache_retained, edata, true);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
@@ -756,8 +756,8 @@ extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &shard->ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, shard, ehooks,
-	    &shard->ecache_retained, new_addr, size, alignment, zero, commit,
-	    /* growing_retained */ true);
+	    &shard->pac.ecache_retained, new_addr, size, alignment, zero,
+	    commit, /* growing_retained */ true);
 	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
 		if (config_prof) {
@@ -792,7 +792,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return NULL;
 	}
-	edata_init(edata, ecache_ind_get(&shard->ecache_dirty), addr,
+	edata_init(edata, ecache_ind_get(&shard->pac.ecache_dirty), addr,
 	    size, /* slab */ false, SC_NSIZES, pa_shard_extent_sn_next(shard),
 	    extent_state_active, zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
@@ -972,7 +972,7 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		edata = extent_try_coalesce(tsdn, shard,  ehooks, ecache, edata,
 		    NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
-		assert(ecache == &shard->ecache_dirty);
+		assert(ecache == &shard->pac.ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
@@ -1072,7 +1072,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, shard, ehooks, &shard->ecache_retained, edata,
+	extent_record(tsdn, shard, ehooks, &shard->pac.ecache_retained, edata,
 	    false);
 }
 
diff --git a/src/pa.c b/src/pa.c
index 7a0052e..27fc9ee 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -32,7 +32,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	 * are likely to be reused soon after deallocation, and the cost of
 	 * merging/splitting extents is non-trivial.
 	 */
-	if (ecache_init(tsdn, &shard->ecache_dirty, extent_state_dirty, ind,
+	if (ecache_init(tsdn, &shard->pac.ecache_dirty, extent_state_dirty, ind,
 	    /* delay_coalesce */ true)) {
 		return true;
 	}
@@ -40,7 +40,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	 * Coalesce muzzy extents immediately, because operations on them are in
 	 * the critical path much less often than for dirty extents.
 	 */
-	if (ecache_init(tsdn, &shard->ecache_muzzy, extent_state_muzzy, ind,
+	if (ecache_init(tsdn, &shard->pac.ecache_muzzy, extent_state_muzzy, ind,
 	    /* delay_coalesce */ false)) {
 		return true;
 	}
@@ -50,7 +50,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (ecache_init(tsdn, &shard->ecache_retained, extent_state_retained,
+	if (ecache_init(tsdn, &shard->pac.ecache_retained, extent_state_retained,
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
@@ -94,8 +94,8 @@ pa_shard_reset(pa_shard_t *shard) {
 
 void
 pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
-	assert(ecache_npages_get(&shard->ecache_dirty) == 0);
-	assert(ecache_npages_get(&shard->ecache_muzzy) == 0);
+	assert(ecache_npages_get(&shard->pac.ecache_dirty) == 0);
+	assert(ecache_npages_get(&shard->pac.ecache_muzzy) == 0);
 	/*
 	 * Iterate over the retained extents and destroy them.  This gives the
 	 * extent allocator underlying the extent hooks an opportunity to unmap
@@ -108,7 +108,7 @@ pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata;
 	while ((edata = ecache_evict(tsdn, shard, ehooks,
-	    &shard->ecache_retained, 0)) != NULL) {
+	    &shard->pac.ecache_retained, 0)) != NULL) {
 		extent_destroy_wrapper(tsdn, shard, ehooks, edata);
 	}
 }
@@ -131,15 +131,15 @@ ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
-	    &shard->ecache_dirty, NULL, size, alignment, zero);
+	    &shard->pac.ecache_dirty, NULL, size, alignment, zero);
 
 	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
-		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    NULL, size, alignment, zero);
+		edata = ecache_alloc(tsdn, shard, ehooks,
+		    &shard->pac.ecache_muzzy, NULL, size, alignment, zero);
 	}
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
-		    &shard->ecache_retained, NULL, size, alignment, zero);
+		    &shard->pac.ecache_retained, NULL, size, alignment, zero);
 		if (config_stats && edata != NULL) {
 			atomic_fetch_add_zu(&shard->stats->pa_mapped, size,
 			    ATOMIC_RELAXED);
@@ -184,16 +184,17 @@ ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	if (ehooks_merge_will_fail(ehooks)) {
 		return true;
 	}
-	edata_t *trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_dirty,
-	    trail_begin, expand_amount, PAGE, zero);
+	edata_t *trail = ecache_alloc(tsdn, shard, ehooks,
+	    &shard->pac.ecache_dirty, trail_begin, expand_amount, PAGE, zero);
 	if (trail == NULL) {
-		trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    trail_begin, expand_amount, PAGE, zero);
+		trail = ecache_alloc(tsdn, shard, ehooks,
+		    &shard->pac.ecache_muzzy, trail_begin, expand_amount, PAGE,
+		    zero);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, shard, ehooks,
-		    &shard->ecache_retained, trail_begin, expand_amount, PAGE,
-		    zero);
+		    &shard->pac.ecache_retained, trail_begin, expand_amount,
+		    PAGE, zero);
 		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
@@ -250,7 +251,7 @@ ecache_pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	if (trail == NULL) {
 		return true;
 	}
-	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, trail);
+	ecache_dalloc(tsdn, shard, ehooks, &shard->pac.ecache_dirty, trail);
 	return false;
 }
 
@@ -281,7 +282,7 @@ ecache_pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	pa_shard_t *shard =
 	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);
+	ecache_dalloc(tsdn, shard, ehooks, &shard->pac.ecache_dirty, edata);
 }
 
 void
@@ -353,7 +354,7 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 				    edata, /* offset */ 0, size);
 				if (!err) {
 					ecache_dalloc(tsdn, shard, ehooks,
-					    &shard->ecache_muzzy, edata);
+					    &shard->pac.ecache_muzzy, edata);
 					break;
 				}
 			}
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 1f90f7f..70ef19b 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -21,9 +21,9 @@ pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
 
 void
 pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
-	ecache_prefork(tsdn, &shard->ecache_dirty);
-	ecache_prefork(tsdn, &shard->ecache_muzzy);
-	ecache_prefork(tsdn, &shard->ecache_retained);
+	ecache_prefork(tsdn, &shard->pac.ecache_dirty);
+	ecache_prefork(tsdn, &shard->pac.ecache_muzzy);
+	ecache_prefork(tsdn, &shard->pac.ecache_retained);
 }
 
 
@@ -35,9 +35,9 @@ pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) {
 void
 pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	edata_cache_postfork_parent(tsdn, &shard->edata_cache);
-	ecache_postfork_parent(tsdn, &shard->ecache_dirty);
-	ecache_postfork_parent(tsdn, &shard->ecache_muzzy);
-	ecache_postfork_parent(tsdn, &shard->ecache_retained);
+	ecache_postfork_parent(tsdn, &shard->pac.ecache_dirty);
+	ecache_postfork_parent(tsdn, &shard->pac.ecache_muzzy);
+	ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
 	ecache_grow_postfork_parent(tsdn, &shard->ecache_grow);
 	malloc_mutex_postfork_parent(tsdn, &shard->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->decay_muzzy.mtx);
@@ -46,9 +46,9 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 void
 pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	edata_cache_postfork_child(tsdn, &shard->edata_cache);
-	ecache_postfork_child(tsdn, &shard->ecache_dirty);
-	ecache_postfork_child(tsdn, &shard->ecache_muzzy);
-	ecache_postfork_child(tsdn, &shard->ecache_retained);
+	ecache_postfork_child(tsdn, &shard->pac.ecache_dirty);
+	ecache_postfork_child(tsdn, &shard->pac.ecache_muzzy);
+	ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
 	ecache_grow_postfork_child(tsdn, &shard->ecache_grow);
 	malloc_mutex_postfork_child(tsdn, &shard->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->decay_muzzy.mtx);
@@ -58,8 +58,8 @@ void
 pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
     size_t *nmuzzy) {
 	*nactive += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
-	*ndirty += ecache_npages_get(&shard->ecache_dirty);
-	*nmuzzy += ecache_npages_get(&shard->ecache_muzzy);
+	*ndirty += ecache_npages_get(&shard->pac.ecache_dirty);
+	*nmuzzy += ecache_npages_get(&shard->pac.ecache_muzzy);
 }
 
 void
@@ -69,13 +69,13 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 	cassert(config_stats);
 
 	shard_stats_out->retained +=
-	    ecache_npages_get(&shard->ecache_retained) << LG_PAGE;
+	    ecache_npages_get(&shard->pac.ecache_retained) << LG_PAGE;
 	shard_stats_out->edata_avail += atomic_load_zu(
 	    &shard->edata_cache.count, ATOMIC_RELAXED);
 
 	size_t resident_pgs = 0;
 	resident_pgs += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
-	resident_pgs += ecache_npages_get(&shard->ecache_dirty);
+	resident_pgs += ecache_npages_get(&shard->pac.ecache_dirty);
 	*resident += (resident_pgs << LG_PAGE);
 
 	/* Dirty decay stats */
@@ -112,12 +112,13 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
 		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
 		    retained_bytes;
-		dirty = ecache_nextents_get(&shard->ecache_dirty, i);
-		muzzy = ecache_nextents_get(&shard->ecache_muzzy, i);
-		retained = ecache_nextents_get(&shard->ecache_retained, i);
-		dirty_bytes = ecache_nbytes_get(&shard->ecache_dirty, i);
-		muzzy_bytes = ecache_nbytes_get(&shard->ecache_muzzy, i);
-		retained_bytes = ecache_nbytes_get(&shard->ecache_retained, i);
+		dirty = ecache_nextents_get(&shard->pac.ecache_dirty, i);
+		muzzy = ecache_nextents_get(&shard->pac.ecache_muzzy, i);
+		retained = ecache_nextents_get(&shard->pac.ecache_retained, i);
+		dirty_bytes = ecache_nbytes_get(&shard->pac.ecache_dirty, i);
+		muzzy_bytes = ecache_nbytes_get(&shard->pac.ecache_muzzy, i);
+		retained_bytes = ecache_nbytes_get(&shard->pac.ecache_retained,
+		    i);
 
 		extent_stats_out[i].ndirty = dirty;
 		extent_stats_out[i].nmuzzy = muzzy;
@@ -142,11 +143,11 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
 	    &shard->edata_cache.mtx, arena_prof_mutex_extent_avail);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
-	    &shard->ecache_dirty.mtx, arena_prof_mutex_extents_dirty);
+	    &shard->pac.ecache_dirty.mtx, arena_prof_mutex_extents_dirty);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
-	    &shard->ecache_muzzy.mtx, arena_prof_mutex_extents_muzzy);
+	    &shard->pac.ecache_muzzy.mtx, arena_prof_mutex_extents_muzzy);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
-	    &shard->ecache_retained.mtx, arena_prof_mutex_extents_retained);
+	    &shard->pac.ecache_retained.mtx, arena_prof_mutex_extents_retained);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
 	    &shard->decay_dirty.mtx, arena_prof_mutex_decay_dirty);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
diff --git a/test/unit/pa.c b/test/unit/pa.c
index f7b7290..8846f61 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -90,7 +90,7 @@ do_alloc_free_purge(void *arg) {
 		pa_decay_all(TSDN_NULL, &test_data->shard,
 		    &test_data->shard.decay_dirty,
 		    &test_data->stats.decay_dirty,
-		    &test_data->shard.ecache_dirty, true);
+		    &test_data->shard.pac.ecache_dirty, true);
 		malloc_mutex_unlock(TSDN_NULL,
 		    &test_data->shard.decay_dirty.mtx);
 	}
-- 
cgit v0.12


From 722652222a159c10f616d61b6dc145d07f84e025 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 May 2020 17:14:16 -0700
Subject: PAC: Move in edata_cache accesses.

---
 include/jemalloc/internal/pac.h |  2 ++
 src/extent.c                    | 26 +++++++++++++-------------
 src/pa.c                        |  1 +
 src/pa_extra.c                  |  1 -
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 73e672f..bd1c856 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -20,6 +20,8 @@ struct pac_s {
 	ecache_t ecache_dirty;
 	ecache_t ecache_muzzy;
 	ecache_t ecache_retained;
+
+	edata_cache_t *edata_cache;
 };
 
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/extent.c b/src/extent.c
index 3d827b8..4810a61 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -210,7 +210,7 @@ extents_abandon_vm(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			    edata_size_get(edata), growing_retained);
 		}
 	}
-	edata_cache_put(tsdn, &shard->edata_cache, edata);
+	edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 }
 
 static void
@@ -632,7 +632,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		    shard->ecache_grow.next + egn_skip);
 	}
 
-	edata_t *edata = edata_cache_get(tsdn, &shard->edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, shard->pac.edata_cache);
 	if (edata == NULL) {
 		goto label_err;
 	}
@@ -643,7 +643,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    &committed);
 
 	if (ptr == NULL) {
-		edata_cache_put(tsdn, &shard->edata_cache, edata);
+		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 		goto label_err;
 	}
 
@@ -653,7 +653,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, shard, edata)) {
-		edata_cache_put(tsdn, &shard->edata_cache, edata);
+		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 		goto label_err;
 	}
 
@@ -781,7 +781,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = edata_cache_get(tsdn, &shard->edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, shard->pac.edata_cache);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -789,7 +789,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
 	    &zero, commit);
 	if (addr == NULL) {
-		edata_cache_put(tsdn, &shard->edata_cache, edata);
+		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 		return NULL;
 	}
 	edata_init(edata, ecache_ind_get(&shard->pac.ecache_dirty), addr,
@@ -797,7 +797,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    extent_state_active, zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, shard, edata)) {
-		edata_cache_put(tsdn, &shard->edata_cache, edata);
+		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 		return NULL;
 	}
 
@@ -1000,7 +1000,7 @@ extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, shard, edata)) {
-		edata_cache_put(tsdn, &shard->edata_cache, edata);
+		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 		return;
 	}
 	extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
@@ -1023,7 +1023,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    edata_size_get(edata), edata_committed_get(edata));
 
 	if (!err) {
-		edata_cache_put(tsdn, &shard->edata_cache, edata);
+		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 	}
 
 	return err;
@@ -1093,7 +1093,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
 	    edata_size_get(edata), edata_committed_get(edata));
 
-	edata_cache_put(tsdn, &shard->edata_cache, edata);
+	edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 }
 
 static bool
@@ -1177,7 +1177,7 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	edata_t *trail = edata_cache_get(tsdn, &shard->edata_cache);
+	edata_t *trail = edata_cache_get(tsdn, shard->pac.edata_cache);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
@@ -1214,7 +1214,7 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 label_error_c:
 	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
 label_error_b:
-	edata_cache_put(tsdn, &shard->edata_cache, trail);
+	edata_cache_put(tsdn, shard->pac.edata_cache, trail);
 label_error_a:
 	return NULL;
 }
@@ -1262,7 +1262,7 @@ extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks, edata_t *a,
 	emap_merge_commit(tsdn, shard->emap, &prepare, a, b);
 	emap_unlock_edata2(tsdn, shard->emap, a, b);
 
-	edata_cache_put(tsdn, &shard->edata_cache, b);
+	edata_cache_put(tsdn, shard->pac.edata_cache, b);
 
 	return false;
 }
diff --git a/src/pa.c b/src/pa.c
index 27fc9ee..f37337d 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -57,6 +57,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	if (edata_cache_init(&shard->edata_cache, base)) {
 		return true;
 	}
+	shard->pac.edata_cache = &shard->edata_cache;
 
 	if (ecache_grow_init(tsdn, &shard->ecache_grow)) {
 		return true;
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 70ef19b..caa94d8 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -26,7 +26,6 @@ pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_prefork(tsdn, &shard->pac.ecache_retained);
 }
 
-
 void
 pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) {
 	edata_cache_prefork(tsdn, &shard->edata_cache);
-- 
cgit v0.12


From 7efcb946c4707f12728e38f82fae1344591b9757 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 May 2020 17:32:37 -0700
Subject: PAC: Add an init function.

---
 Makefile.in                                        |  1 +
 include/jemalloc/internal/pac.h                    |  3 ++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 src/pa.c                                           | 31 ++---------------
 src/pac.c                                          | 39 ++++++++++++++++++++++
 8 files changed, 53 insertions(+), 29 deletions(-)
 create mode 100644 src/pac.c

diff --git a/Makefile.in b/Makefile.in
index 7f07d96..2802f7f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -127,6 +127,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
 	$(srcroot)src/pa_extra.c \
+	$(srcroot)src/pac.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/peak_event.c \
 	$(srcroot)src/prof.c \
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index bd1c856..5eb1e80 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -24,4 +24,7 @@ struct pac_s {
 	edata_cache_t *edata_cache;
 };
 
+bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind,
+    edata_cache_t *edata_cache);
+
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 00ea2be..fe14779 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -68,6 +68,7 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 0bcb45a..4b7b6ba 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -88,6 +88,9 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pac.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 446ea60..6bd43c7 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -68,6 +68,7 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 0bcb45a..4b7b6ba 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -88,6 +88,9 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pac.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/pa.c b/src/pa.c
index f37337d..f8fa922 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -26,39 +26,12 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
     nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
-	/*
-	 * Delay coalescing for dirty extents despite the disruptive effect on
-	 * memory layout for best-fit extent allocation, since cached extents
-	 * are likely to be reused soon after deallocation, and the cost of
-	 * merging/splitting extents is non-trivial.
-	 */
-	if (ecache_init(tsdn, &shard->pac.ecache_dirty, extent_state_dirty, ind,
-	    /* delay_coalesce */ true)) {
-		return true;
-	}
-	/*
-	 * Coalesce muzzy extents immediately, because operations on them are in
-	 * the critical path much less often than for dirty extents.
-	 */
-	if (ecache_init(tsdn, &shard->pac.ecache_muzzy, extent_state_muzzy, ind,
-	    /* delay_coalesce */ false)) {
-		return true;
-	}
-	/*
-	 * Coalesce retained extents immediately, in part because they will
-	 * never be evicted (and therefore there's no opportunity for delayed
-	 * coalescing), but also because operations on retained extents are not
-	 * in the critical path.
-	 */
-	if (ecache_init(tsdn, &shard->pac.ecache_retained, extent_state_retained,
-	    ind, /* delay_coalesce */ false)) {
+	if (edata_cache_init(&shard->edata_cache, base)) {
 		return true;
 	}
-	if (edata_cache_init(&shard->edata_cache, base)) {
+	if (pac_init(tsdn, &shard->pac, ind, &shard->edata_cache)) {
 		return true;
 	}
-	shard->pac.edata_cache = &shard->edata_cache;
-
 	if (ecache_grow_init(tsdn, &shard->ecache_grow)) {
 		return true;
 	}
diff --git a/src/pac.c b/src/pac.c
new file mode 100644
index 0000000..746bd4c
--- /dev/null
+++ b/src/pac.c
@@ -0,0 +1,39 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/pac.h"
+
+bool
+pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, edata_cache_t *edata_cache) {
+	/*
+	 * Delay coalescing for dirty extents despite the disruptive effect on
+	 * memory layout for best-fit extent allocation, since cached extents
+	 * are likely to be reused soon after deallocation, and the cost of
+	 * merging/splitting extents is non-trivial.
+	 */
+	if (ecache_init(tsdn, &pac->ecache_dirty, extent_state_dirty, ind,
+	    /* delay_coalesce */ true)) {
+		return true;
+	}
+	/*
+	 * Coalesce muzzy extents immediately, because operations on them are in
+	 * the critical path much less often than for dirty extents.
+	 */
+	if (ecache_init(tsdn, &pac->ecache_muzzy, extent_state_muzzy, ind,
+	    /* delay_coalesce */ false)) {
+		return true;
+	}
+	/*
+	 * Coalesce retained extents immediately, in part because they will
+	 * never be evicted (and therefore there's no opportunity for delayed
+	 * coalescing), but also because operations on retained extents are not
+	 * in the critical path.
+	 */
+	if (ecache_init(tsdn, &pac->ecache_retained, extent_state_retained,
+	    ind, /* delay_coalesce */ false)) {
+		return true;
+	}
+
+	pac->edata_cache = edata_cache;
+	return false;
+}
-- 
cgit v0.12


From 65803171a7f441f567b5d7e3809df22bda871d62 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 May 2020 17:47:04 -0700
Subject: PAC: move in emap

---
 include/jemalloc/internal/pac.h |  3 ++-
 src/extent.c                    | 50 ++++++++++++++++++++---------------------
 src/pa.c                        |  2 +-
 src/pac.c                       |  4 +++-
 4 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 5eb1e80..8a89b6d 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -21,10 +21,11 @@ struct pac_s {
 	ecache_t ecache_muzzy;
 	ecache_t ecache_retained;
 
+	emap_t *emap;
 	edata_cache_t *edata_cache;
 };
 
-bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind,
+bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
     edata_cache_t *edata_cache);
 
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/extent.c b/src/extent.c
index 4810a61..269bc7c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -282,15 +282,15 @@ extent_register_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	 * We need to hold the lock to protect against a concurrent coalesce
 	 * operation that sees us in a partial state.
 	 */
-	emap_lock_edata(tsdn, shard->emap, edata);
+	emap_lock_edata(tsdn, shard->pac.emap, edata);
 
-	if (emap_register_boundary(tsdn, shard->emap, edata, SC_NSIZES,
+	if (emap_register_boundary(tsdn, shard->pac.emap, edata, SC_NSIZES,
 	    /* slab */ false)) {
-		emap_unlock_edata(tsdn, shard->emap, edata);
+		emap_unlock_edata(tsdn, shard->pac.emap, edata);
 		return true;
 	}
 
-	emap_unlock_edata(tsdn, shard->emap, edata);
+	emap_unlock_edata(tsdn, shard->pac.emap, edata);
 
 	if (config_prof && gdump_add) {
 		extent_gdump_add(tsdn, edata);
@@ -321,9 +321,9 @@ extent_reregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
 static void
 extent_deregister_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool gdump) {
-	emap_lock_edata(tsdn, shard->emap, edata);
-	emap_deregister_boundary(tsdn, shard->emap, edata);
-	emap_unlock_edata(tsdn, shard->emap, edata);
+	emap_lock_edata(tsdn, shard->pac.emap, edata);
+	emap_deregister_boundary(tsdn, shard->pac.emap, edata);
+	emap_unlock_edata(tsdn, shard->pac.emap, edata);
 
 	if (config_prof && gdump) {
 		extent_gdump_sub(tsdn, edata);
@@ -371,8 +371,8 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
-		edata = emap_lock_edata_from_addr(tsdn, shard->emap, new_addr,
-		    false);
+		edata = emap_lock_edata_from_addr(tsdn, shard->pac.emap,
+		    new_addr, false);
 		if (edata != NULL) {
 			/*
 			 * We might null-out edata to report an error, but we
@@ -386,7 +386,7 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			    != ecache->state) {
 				edata = NULL;
 			}
-			emap_unlock_edata(tsdn, shard->emap, unlock_edata);
+			emap_unlock_edata(tsdn, shard->pac.emap, unlock_edata);
 		}
 	} else {
 		/*
@@ -545,7 +545,7 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			extent_deregister_no_gdump_sub(tsdn, shard, to_leak);
 			extents_abandon_vm(tsdn, shard, ehooks, ecache, to_leak,
 			    growing_retained);
-			assert(emap_lock_edata_from_addr(tsdn, shard->emap,
+			assert(emap_lock_edata_from_addr(tsdn, shard->pac.emap,
 			    leak, false) == NULL);
 		}
 		return NULL;
@@ -863,7 +863,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		again = false;
 
 		/* Try to coalesce forward. */
-		edata_t *next = emap_lock_edata_from_addr(tsdn, shard->emap,
+		edata_t *next = emap_lock_edata_from_addr(tsdn, shard->pac.emap,
 		    edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
@@ -874,7 +874,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			bool can_coalesce = extent_can_coalesce(ecache,
 			    edata, next);
 
-			emap_unlock_edata(tsdn, shard->emap, next);
+			emap_unlock_edata(tsdn, shard->pac.emap, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, shard,
 			    ehooks, ecache, edata, next, true,
@@ -889,12 +889,12 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		}
 
 		/* Try to coalesce backward. */
-		edata_t *prev = emap_lock_edata_from_addr(tsdn, shard->emap,
+		edata_t *prev = emap_lock_edata_from_addr(tsdn, shard->pac.emap,
 		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(ecache, edata,
 			    prev);
-			emap_unlock_edata(tsdn, shard->emap, prev);
+			emap_unlock_edata(tsdn, shard->pac.emap, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, shard,
 			    ehooks, ecache, edata, prev, false,
@@ -966,7 +966,7 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
-	emap_assert_mapped(tsdn, shard->emap, edata);
+	emap_assert_mapped(tsdn, shard->pac.emap, edata);
 
 	if (!ecache->delay_coalesce) {
 		edata = extent_try_coalesce(tsdn, shard,  ehooks, ecache, edata,
@@ -1189,13 +1189,13 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    edata_committed_get(edata), edata_ranged_get(edata),
 	    EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
-	bool err = emap_split_prepare(tsdn, shard->emap, &prepare, edata,
+	bool err = emap_split_prepare(tsdn, shard->pac.emap, &prepare, edata,
 	    size_a, trail, size_b);
 	if (err) {
 		goto label_error_b;
 	}
 
-	emap_lock_edata2(tsdn, shard->emap, edata, trail);
+	emap_lock_edata2(tsdn, shard->pac.emap, edata, trail);
 
 	err = ehooks_split(tsdn, ehooks, edata_base_get(edata), size_a + size_b,
 	    size_a, size_b, edata_committed_get(edata));
@@ -1205,14 +1205,14 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	edata_size_set(edata, size_a);
-	emap_split_commit(tsdn, shard->emap, &prepare, edata, size_a, trail,
+	emap_split_commit(tsdn, shard->pac.emap, &prepare, edata, size_a, trail,
 	    size_b);
 
-	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
+	emap_unlock_edata2(tsdn, shard->pac.emap, edata, trail);
 
 	return trail;
 label_error_c:
-	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
+	emap_unlock_edata2(tsdn, shard->pac.emap, edata, trail);
 label_error_b:
 	edata_cache_put(tsdn, shard->pac.edata_cache, trail);
 label_error_a:
@@ -1250,17 +1250,17 @@ extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks, edata_t *a,
 	 * than extent_{,de}register() to do things in the right order.
 	 */
 	emap_prepare_t prepare;
-	emap_merge_prepare(tsdn, shard->emap, &prepare, a, b);
+	emap_merge_prepare(tsdn, shard->pac.emap, &prepare, a, b);
 
-	emap_lock_edata2(tsdn, shard->emap, a, b);
+	emap_lock_edata2(tsdn, shard->pac.emap, a, b);
 
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
 	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
 	    edata_sn_get(a) : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
-	emap_merge_commit(tsdn, shard->emap, &prepare, a, b);
-	emap_unlock_edata2(tsdn, shard->emap, a, b);
+	emap_merge_commit(tsdn, shard->pac.emap, &prepare, a, b);
+	emap_unlock_edata2(tsdn, shard->pac.emap, a, b);
 
 	edata_cache_put(tsdn, shard->pac.edata_cache, b);
 
diff --git a/src/pa.c b/src/pa.c
index f8fa922..9d35dd5 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -29,7 +29,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	if (edata_cache_init(&shard->edata_cache, base)) {
 		return true;
 	}
-	if (pac_init(tsdn, &shard->pac, ind, &shard->edata_cache)) {
+	if (pac_init(tsdn, &shard->pac, ind, emap, &shard->edata_cache)) {
 		return true;
 	}
 	if (ecache_grow_init(tsdn, &shard->ecache_grow)) {
diff --git a/src/pac.c b/src/pac.c
index 746bd4c..7df5b02 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -4,7 +4,8 @@
 #include "jemalloc/internal/pac.h"
 
 bool
-pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, edata_cache_t *edata_cache) {
+pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
+    edata_cache_t *edata_cache) {
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
 	 * memory layout for best-fit extent allocation, since cached extents
@@ -34,6 +35,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, edata_cache_t *edata_cache) {
 		return true;
 	}
 
+	pac->emap = emap;
 	pac->edata_cache = edata_cache;
 	return false;
 }
-- 
cgit v0.12


From c81e389996ef37c0d27b5a28bba0e04337d02a54 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 1 Jun 2020 16:01:53 -0700
Subject: PAC: Move in ecache_grow.

---
 include/jemalloc/internal/pa.h  |  2 --
 include/jemalloc/internal/pac.h |  5 +++++
 src/extent.c                    | 28 ++++++++++++++--------------
 src/pa.c                        | 24 ++----------------------
 src/pa_extra.c                  |  6 +++---
 src/pac.c                       | 28 ++++++++++++++++++++++++++++
 test/unit/retained.c            |  2 +-
 7 files changed, 53 insertions(+), 42 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index d7f2263..0b3e528 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -126,8 +126,6 @@ struct pa_shard_s {
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
 
-	/* The grow info for the retained ecache. */
-	ecache_grow_t ecache_grow;
 
 	/* Extent serial number generator state. */
 	atomic_zu_t extent_sn_next;
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 8a89b6d..3ad0097 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -23,9 +23,14 @@ struct pac_s {
 
 	emap_t *emap;
 	edata_cache_t *edata_cache;
+
+	/* The grow info for the retained ecache. */
+	ecache_grow_t ecache_grow;
 };
 
 bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
     edata_cache_t *edata_cache);
+bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
+    size_t *new_limit);
 
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/extent.c b/src/extent.c
index 269bc7c..ed90a15 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -608,7 +608,7 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 static edata_t *
 extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     size_t size, size_t alignment, bool zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &shard->ecache_grow.mtx);
+	malloc_mutex_assert_owner(tsdn, &shard->pac.ecache_grow.mtx);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -620,16 +620,16 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(shard->ecache_grow.next + egn_skip);
+	size_t alloc_size = sz_pind2sz(shard->pac.ecache_grow.next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (shard->ecache_grow.next + egn_skip >=
+		if (shard->pac.ecache_grow.next + egn_skip >=
 		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			goto label_err;
 		}
 		alloc_size = sz_pind2sz(
-		    shard->ecache_grow.next + egn_skip);
+		    shard->pac.ecache_grow.next + egn_skip);
 	}
 
 	edata_t *edata = edata_cache_get(tsdn, shard->pac.edata_cache);
@@ -722,14 +722,14 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (shard->ecache_grow.next + egn_skip + 1 <=
-	    shard->ecache_grow.limit) {
-		shard->ecache_grow.next += egn_skip + 1;
+	if (shard->pac.ecache_grow.next + egn_skip + 1 <=
+	    shard->pac.ecache_grow.limit) {
+		shard->pac.ecache_grow.next += egn_skip + 1;
 	} else {
-		shard->ecache_grow.next = shard->ecache_grow.limit;
+		shard->pac.ecache_grow.next = shard->pac.ecache_grow.limit;
 	}
 	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -743,7 +743,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
 	return NULL;
 }
 
@@ -753,13 +753,13 @@ extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &shard->ecache_grow.mtx);
+	malloc_mutex_lock(tsdn, &shard->pac.ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, shard, ehooks,
 	    &shard->pac.ecache_retained, new_addr, size, alignment, zero,
 	    commit, /* growing_retained */ true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
@@ -768,9 +768,9 @@ extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		    alignment, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &shard->ecache_grow.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &shard->pac.ecache_grow.mtx);
 
 	return edata;
 }
diff --git a/src/pa.c b/src/pa.c
index 9d35dd5..98deba5 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -32,9 +32,6 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	if (pac_init(tsdn, &shard->pac, ind, emap, &shard->edata_cache)) {
 		return true;
 	}
-	if (ecache_grow_init(tsdn, &shard->ecache_grow)) {
-		return true;
-	}
 
 	if (decay_init(&shard->decay_dirty, cur_time, dirty_decay_ms)) {
 		return true;
@@ -455,23 +452,6 @@ pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 bool
 pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
     size_t *old_limit, size_t *new_limit) {
-	pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
-	if (new_limit != NULL) {
-		size_t limit = *new_limit;
-		/* Grow no more than the new limit. */
-		if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) {
-			return true;
-		}
-	}
-
-	malloc_mutex_lock(tsdn, &shard->ecache_grow.mtx);
-	if (old_limit != NULL) {
-		*old_limit = sz_pind2sz(shard->ecache_grow.limit);
-	}
-	if (new_limit != NULL) {
-		shard->ecache_grow.limit = new_ind;
-	}
-	malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
-
-	return false;
+	return pac_retain_grow_limit_get_set(tsdn, &shard->pac, old_limit,
+	    new_limit);
 }
diff --git a/src/pa_extra.c b/src/pa_extra.c
index caa94d8..a755781 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -16,7 +16,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
 
 void
 pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
-	ecache_grow_prefork(tsdn, &shard->ecache_grow);
+	ecache_grow_prefork(tsdn, &shard->pac.ecache_grow);
 }
 
 void
@@ -37,7 +37,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_dirty);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
-	ecache_grow_postfork_parent(tsdn, &shard->ecache_grow);
+	ecache_grow_postfork_parent(tsdn, &shard->pac.ecache_grow);
 	malloc_mutex_postfork_parent(tsdn, &shard->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->decay_muzzy.mtx);
 }
@@ -48,7 +48,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_child(tsdn, &shard->pac.ecache_dirty);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
-	ecache_grow_postfork_child(tsdn, &shard->ecache_grow);
+	ecache_grow_postfork_child(tsdn, &shard->pac.ecache_grow);
 	malloc_mutex_postfork_child(tsdn, &shard->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->decay_muzzy.mtx);
 }
diff --git a/src/pac.c b/src/pac.c
index 7df5b02..f30c4bb 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -34,8 +34,36 @@ pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
+	if (ecache_grow_init(tsdn, &pac->ecache_grow)) {
+		return true;
+	}
 
 	pac->emap = emap;
 	pac->edata_cache = edata_cache;
 	return false;
 }
+
+bool
+pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
+    size_t *new_limit) {
+	pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
+	if (new_limit != NULL) {
+		size_t limit = *new_limit;
+		/* Grow no more than the new limit. */
+		if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) {
+			return true;
+		}
+	}
+
+	malloc_mutex_lock(tsdn, &pac->ecache_grow.mtx);
+	if (old_limit != NULL) {
+		*old_limit = sz_pind2sz(pac->ecache_grow.limit);
+	}
+	if (new_limit != NULL) {
+		pac->ecache_grow.limit = new_ind;
+	}
+	malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
+
+	return false;
+}
+
diff --git a/test/unit/retained.c b/test/unit/retained.c
index cf3de1e..ef301aa 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -142,7 +142,7 @@ TEST_BEGIN(test_retained) {
 		size_t usable = 0;
 		size_t fragmented = 0;
 		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
-		    arena->pa_shard.ecache_grow.next; pind++) {
+		    arena->pa_shard.pac.ecache_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;
-- 
cgit v0.12


From db211eefbfe2e35441dad0a7857e073ba4e8130e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 1 Jun 2020 16:35:17 -0700
Subject: PAC: Move in decay.

---
 include/jemalloc/internal/arena_inlines_b.h        |  3 ---
 .../jemalloc/internal/background_thread_inlines.h  |  2 +-
 include/jemalloc/internal/pa.h                     | 25 +++-------------------
 include/jemalloc/internal/pac.h                    | 22 ++++++++++++++++++-
 src/arena.c                                        | 12 +++++------
 src/background_thread.c                            |  4 ++--
 src/ctl.c                                          |  4 ++--
 src/pa.c                                           | 12 +++--------
 src/pa_extra.c                                     | 16 +++++++-------
 src/pac.c                                          |  9 +++++++-
 test/unit/pa.c                                     |  7 +++---
 11 files changed, 58 insertions(+), 58 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 7351db9..335c079 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -131,9 +131,6 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_assert_not_owner(tsdn, &arena->pa_shard.decay_dirty.mtx);
-	malloc_mutex_assert_not_owner(tsdn, &arena->pa_shard.decay_muzzy.mtx);
-
 	arena_decay_ticks(tsdn, arena, 1);
 }
 
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index 7bdbe92..71b433c 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -55,7 +55,7 @@ arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
 	    arena_background_thread_info_get(arena);
 	if (background_thread_indefinite_sleep(info)) {
 		background_thread_interval_check(tsdn, arena,
-		    &arena->pa_shard.decay_dirty, 0);
+		    &arena->pa_shard.pac.decay_dirty, 0);
 	}
 }
 
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 0b3e528..ca6482a 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -126,22 +126,12 @@ struct pa_shard_s {
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
 
-
 	/* Extent serial number generator state. */
 	atomic_zu_t extent_sn_next;
 
 	malloc_mutex_t *stats_mtx;
 	pa_shard_stats_t *stats;
 
-	/*
-	 * Decay-based purging state, responsible for scheduling extent state
-	 * transitions.
-	 *
-	 * Synchronization: via the internal mutex.
-	 */
-	decay_t decay_dirty; /* dirty --> muzzy */
-	decay_t decay_muzzy; /* muzzy --> retained */
-
 	/* The emap this shard is tied to. */
 	emap_t *emap;
 
@@ -149,25 +139,16 @@ struct pa_shard_s {
 	base_t *base;
 };
 
-static inline ssize_t
-pa_shard_dirty_decay_ms_get(pa_shard_t *shard) {
-	return decay_ms_read(&shard->decay_dirty);
-}
-static inline ssize_t
-pa_shard_muzzy_decay_ms_get(pa_shard_t *shard) {
-	return decay_ms_read(&shard->decay_muzzy);
-}
-
 static inline bool
 pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
 	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 &&
-	    pa_shard_muzzy_decay_ms_get(shard) <= 0;
+	    pac_muzzy_decay_ms_get(&shard->pac) <= 0;
 }
 
 static inline bool
 pa_shard_may_force_decay(pa_shard_t *shard) {
-	return !(pa_shard_dirty_decay_ms_get(shard) == -1
-	    || pa_shard_muzzy_decay_ms_get(shard) == -1);
+	return !(pac_dirty_decay_ms_get(&shard->pac) == -1
+	    || pac_muzzy_decay_ms_get(&shard->pac) == -1);
 }
 
 static inline ehooks_t *
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 3ad0097..da14b62 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -26,11 +26,31 @@ struct pac_s {
 
 	/* The grow info for the retained ecache. */
 	ecache_grow_t ecache_grow;
+
+	/*
+	 * Decay-based purging state, responsible for scheduling extent state
+	 * transitions.
+	 *
+	 * Synchronization: via the internal mutex.
+	 */
+	decay_t decay_dirty; /* dirty --> muzzy */
+	decay_t decay_muzzy; /* muzzy --> retained */
 };
 
 bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
-    edata_cache_t *edata_cache);
+    edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
+    ssize_t muzzy_decay_ms);
 bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
     size_t *new_limit);
 
+static inline ssize_t
+pac_dirty_decay_ms_get(pac_t *pac) {
+	return decay_ms_read(&pac->decay_dirty);
+}
+
+static inline ssize_t
+pac_muzzy_decay_ms_get(pac_t *pac) {
+	return decay_ms_read(&pac->decay_muzzy);
+}
+
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/arena.c b/src/arena.c
index fb9cb7b..9fa2db7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -397,12 +397,12 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 
 ssize_t
 arena_dirty_decay_ms_get(arena_t *arena) {
-	return pa_shard_dirty_decay_ms_get(&arena->pa_shard);
+	return pac_dirty_decay_ms_get(&arena->pa_shard.pac);
 }
 
 ssize_t
 arena_muzzy_decay_ms_get(arena_t *arena) {
-	return pa_shard_muzzy_decay_ms_get(&arena->pa_shard);
+	return pac_muzzy_decay_ms_get(&arena->pa_shard.pac);
 }
 
 /*
@@ -453,7 +453,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
-	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.decay_dirty,
+	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.pac.decay_dirty,
 	    &arena->pa_shard.stats->decay_dirty,
 	    &arena->pa_shard.pac.ecache_dirty, decay_ms);
 }
@@ -461,7 +461,7 @@ arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
-	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.decay_muzzy,
+	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.pac.decay_muzzy,
 	    &arena->pa_shard.stats->decay_muzzy,
 	    &arena->pa_shard.pac.ecache_muzzy, decay_ms);
 }
@@ -520,7 +520,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
-	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_dirty,
+	return arena_decay_impl(tsdn, arena, &arena->pa_shard.pac.decay_dirty,
 	    &arena->pa_shard.stats->decay_dirty,
 	    &arena->pa_shard.pac.ecache_dirty, is_background_thread, all);
 }
@@ -531,7 +531,7 @@ arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 	if (pa_shard_dont_decay_muzzy(&arena->pa_shard)) {
 		return false;
 	}
-	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_muzzy,
+	return arena_decay_impl(tsdn, arena, &arena->pa_shard.pac.decay_muzzy,
 	    &arena->pa_shard.stats->decay_muzzy,
 	    &arena->pa_shard.pac.ecache_muzzy, is_background_thread, all);
 }
diff --git a/src/background_thread.c b/src/background_thread.c
index 557dbc4..a36836c 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -201,12 +201,12 @@ static uint64_t
 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	uint64_t i1, i2;
 	i1 = arena_decay_compute_purge_interval_impl(tsdn,
-	    &arena->pa_shard.decay_dirty, &arena->pa_shard.pac.ecache_dirty);
+	    &arena->pa_shard.pac.decay_dirty, &arena->pa_shard.pac.ecache_dirty);
 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return i1;
 	}
 	i2 = arena_decay_compute_purge_interval_impl(tsdn,
-	    &arena->pa_shard.decay_muzzy, &arena->pa_shard.pac.ecache_muzzy);
+	    &arena->pa_shard.pac.decay_muzzy, &arena->pa_shard.pac.ecache_muzzy);
 
 	return i1 < i2 ? i1 : i2;
 }
diff --git a/src/ctl.c b/src/ctl.c
index 0098d93..56dcf82 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3130,8 +3130,8 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_dirty.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_retained.mtx);
-		MUTEX_PROF_RESET(arena->pa_shard.decay_dirty.mtx);
-		MUTEX_PROF_RESET(arena->pa_shard.decay_muzzy.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.pac.decay_dirty.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.pac.decay_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
 		MUTEX_PROF_RESET(arena->base->mtx);
 
diff --git a/src/pa.c b/src/pa.c
index 98deba5..501d57c 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -29,14 +29,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	if (edata_cache_init(&shard->edata_cache, base)) {
 		return true;
 	}
-	if (pac_init(tsdn, &shard->pac, ind, emap, &shard->edata_cache)) {
-		return true;
-	}
-
-	if (decay_init(&shard->decay_dirty, cur_time, dirty_decay_ms)) {
-		return true;
-	}
-	if (decay_init(&shard->decay_muzzy, cur_time, muzzy_decay_ms)) {
+	if (pac_init(tsdn, &shard->pac, ind, emap, &shard->edata_cache,
+	    cur_time, dirty_decay_ms, muzzy_decay_ms)) {
 		return true;
 	}
 
@@ -91,7 +85,7 @@ pa_shard_extent_sn_next(pa_shard_t *shard) {
 
 static bool
 pa_shard_may_have_muzzy(pa_shard_t *shard) {
-	return pa_shard_muzzy_decay_ms_get(shard) != 0;
+	return pac_muzzy_decay_ms_get(&shard->pac) != 0;
 }
 
 static edata_t *
diff --git a/src/pa_extra.c b/src/pa_extra.c
index a755781..ae5855a 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -10,8 +10,8 @@
 
 void
 pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
-	malloc_mutex_prefork(tsdn, &shard->decay_dirty.mtx);
-	malloc_mutex_prefork(tsdn, &shard->decay_muzzy.mtx);
+	malloc_mutex_prefork(tsdn, &shard->pac.decay_dirty.mtx);
+	malloc_mutex_prefork(tsdn, &shard->pac.decay_muzzy.mtx);
 }
 
 void
@@ -38,8 +38,8 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
 	ecache_grow_postfork_parent(tsdn, &shard->pac.ecache_grow);
-	malloc_mutex_postfork_parent(tsdn, &shard->decay_dirty.mtx);
-	malloc_mutex_postfork_parent(tsdn, &shard->decay_muzzy.mtx);
+	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
+	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
 }
 
 void
@@ -49,8 +49,8 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_child(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
 	ecache_grow_postfork_child(tsdn, &shard->pac.ecache_grow);
-	malloc_mutex_postfork_child(tsdn, &shard->decay_dirty.mtx);
-	malloc_mutex_postfork_child(tsdn, &shard->decay_muzzy.mtx);
+	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
+	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
 }
 
 void
@@ -148,7 +148,7 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
 	    &shard->pac.ecache_retained.mtx, arena_prof_mutex_extents_retained);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
-	    &shard->decay_dirty.mtx, arena_prof_mutex_decay_dirty);
+	    &shard->pac.decay_dirty.mtx, arena_prof_mutex_decay_dirty);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
-	    &shard->decay_muzzy.mtx, arena_prof_mutex_decay_muzzy);
+	    &shard->pac.decay_muzzy.mtx, arena_prof_mutex_decay_muzzy);
 }
diff --git a/src/pac.c b/src/pac.c
index f30c4bb..1e20d65 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -5,7 +5,8 @@
 
 bool
 pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
-    edata_cache_t *edata_cache) {
+    edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
+    ssize_t muzzy_decay_ms) {
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
 	 * memory layout for best-fit extent allocation, since cached extents
@@ -37,6 +38,12 @@ pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
 	if (ecache_grow_init(tsdn, &pac->ecache_grow)) {
 		return true;
 	}
+	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
+		return true;
+	}
+	if (decay_init(&pac->decay_muzzy, cur_time, muzzy_decay_ms)) {
+		return true;
+	}
 
 	pac->emap = emap;
 	pac->edata_cache = edata_cache;
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 8846f61..7cd9fa1 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -86,13 +86,14 @@ do_alloc_free_purge(void *arg) {
 		bool generated_dirty;
 		pa_dalloc(TSDN_NULL, &test_data->shard, edata,
 		    &generated_dirty);
-		malloc_mutex_lock(TSDN_NULL, &test_data->shard.decay_dirty.mtx);
+		malloc_mutex_lock(TSDN_NULL,
+		    &test_data->shard.pac.decay_dirty.mtx);
 		pa_decay_all(TSDN_NULL, &test_data->shard,
-		    &test_data->shard.decay_dirty,
+		    &test_data->shard.pac.decay_dirty,
 		    &test_data->stats.decay_dirty,
 		    &test_data->shard.pac.ecache_dirty, true);
 		malloc_mutex_unlock(TSDN_NULL,
-		    &test_data->shard.decay_dirty.mtx);
+		    &test_data->shard.pac.decay_dirty.mtx);
 	}
 	return NULL;
 }
-- 
cgit v0.12


From 73913823491ef32a7ea1471de1ef185219e44d41 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 1 Jun 2020 17:42:27 -0700
Subject: PA->PAC: Move in stats.

---
 include/jemalloc/internal/arena_externs.h |  2 +-
 include/jemalloc/internal/ctl.h           |  2 +-
 include/jemalloc/internal/pa.h            | 67 +++++--------------------------
 include/jemalloc/internal/pac.h           | 64 ++++++++++++++++++++++++++++-
 src/arena.c                               | 18 ++++-----
 src/ctl.c                                 | 56 +++++++++++++-------------
 src/extent.c                              | 17 ++++----
 src/pa.c                                  | 19 ++++-----
 src/pa_extra.c                            | 46 ++++++++++-----------
 src/pac.c                                 |  4 +-
 test/unit/pa.c                            |  2 +-
 11 files changed, 156 insertions(+), 141 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 40dad71..e6e9a0b 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -27,7 +27,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pa_extent_stats_t *estats);
+    pac_estats_t *estats);
 void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index e0b46fa..fbc432b 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -44,7 +44,7 @@ typedef struct ctl_arena_stats_s {
 
 	bin_stats_data_t bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
-	pa_extent_stats_t estats[SC_NPSIZES];
+	pac_estats_t estats[SC_NPSIZES];
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index ca6482a..2891d7c 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -25,33 +25,6 @@ enum pa_decay_purge_setting_e {
 };
 typedef enum pa_decay_purge_setting_e pa_decay_purge_setting_t;
 
-typedef struct pa_shard_decay_stats_s pa_shard_decay_stats_t;
-struct pa_shard_decay_stats_s {
-	/* Total number of purge sweeps. */
-	locked_u64_t npurge;
-	/* Total number of madvise calls made. */
-	locked_u64_t nmadvise;
-	/* Total number of pages purged. */
-	locked_u64_t purged;
-};
-
-typedef struct pa_extent_stats_s pa_extent_stats_t;
-struct pa_extent_stats_s {
-	/*
-	 * Stats for a given index in the range [0, SC_NPSIZES] in the various
-	 * ecache_ts.
-	 * We track both bytes and # of extents: two extents in the same bucket
-	 * may have different sizes if adjacent size classes differ by more than
-	 * a page, so bytes cannot always be derived from # of extents.
-	 */
-	size_t ndirty;
-	size_t dirty_bytes;
-	size_t nmuzzy;
-	size_t muzzy_bytes;
-	size_t nretained;
-	size_t retained_bytes;
-};
-
 /*
  * The stats for a particular pa_shard.  Because of the way the ctl module
  * handles stats epoch data collection (it has its own arena_stats, and merges
@@ -65,30 +38,15 @@ struct pa_extent_stats_s {
  */
 typedef struct pa_shard_stats_s pa_shard_stats_t;
 struct pa_shard_stats_s {
-	pa_shard_decay_stats_t decay_dirty;
-	pa_shard_decay_stats_t decay_muzzy;
-
-	/*
-	 * Number of unused virtual memory bytes currently retained.  Retained
-	 * bytes are technically mapped (though always decommitted or purged),
-	 * but they are excluded from the mapped statistic (above).
-	 */
-	size_t retained; /* Derived. */
-
-	/*
-	 * Number of bytes currently mapped, excluding retained memory (and any
-	 * base-allocated memory, which is tracked by the arena stats).
-	 *
-	 * We name this "pa_mapped" to avoid confusion with the arena_stats
-	 * "mapped".
-	 */
-	atomic_zu_t pa_mapped;
-
 	/* Number of edata_t structs allocated by base, but not being used. */
 	size_t edata_avail; /* Derived. */
-
-	/* VM space had to be leaked (undocumented).  Normally 0. */
-	atomic_zu_t abandoned_vm;
+	/*
+	 * Stats specific to the PAC.  For now, these are the only stats that
+	 * exist, but there will eventually be other page allocators.  Things
+	 * like edata_avail make sense in a cross-PA sense, but things like
+	 * npurges don't.
+	 */
+	pac_stats_t pac_stats;
 };
 
 /*
@@ -208,14 +166,14 @@ void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
  * concurrently with the call.
  */
 void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
 /*
  * Updates decay settings for the current time, and conditionally purges in
  * response (depending on decay_purge_setting).  Returns whether or not the
  * epoch advanced.
  */
 bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
     pa_decay_purge_setting_t decay_purge_setting);
 
 /*
@@ -251,13 +209,8 @@ void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
     size_t *ndirty, size_t *nmuzzy);
 
-static inline size_t
-pa_shard_pa_mapped(pa_shard_t *shard) {
-	return atomic_load_zu(&shard->stats->pa_mapped, ATOMIC_RELAXED);
-}
-
 void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
-    pa_shard_stats_t *shard_stats_out, pa_extent_stats_t *extent_stats_out,
+    pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
     size_t *resident);
 
 /*
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index da14b62..14ee09f 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -9,6 +9,58 @@
  * - Can use efficient OS-level zeroing primitives for demand-filled pages.
  */
 
+typedef struct pac_decay_stats_s pac_decay_stats_t;
+struct pac_decay_stats_s {
+	/* Total number of purge sweeps. */
+	locked_u64_t npurge;
+	/* Total number of madvise calls made. */
+	locked_u64_t nmadvise;
+	/* Total number of pages purged. */
+	locked_u64_t purged;
+};
+
+typedef struct pac_estats_s pac_estats_t;
+struct pac_estats_s {
+	/*
+	 * Stats for a given index in the range [0, SC_NPSIZES] in the various
+	 * ecache_ts.
+	 * We track both bytes and # of extents: two extents in the same bucket
+	 * may have different sizes if adjacent size classes differ by more than
+	 * a page, so bytes cannot always be derived from # of extents.
+	 */
+	size_t ndirty;
+	size_t dirty_bytes;
+	size_t nmuzzy;
+	size_t muzzy_bytes;
+	size_t nretained;
+	size_t retained_bytes;
+};
+
+typedef struct pac_stats_s pac_stats_t;
+struct pac_stats_s {
+	pac_decay_stats_t decay_dirty;
+	pac_decay_stats_t decay_muzzy;
+
+	/*
+	 * Number of unused virtual memory bytes currently retained.  Retained
+	 * bytes are technically mapped (though always decommitted or purged),
+	 * but they are excluded from the mapped statistic (above).
+	 */
+	size_t retained; /* Derived. */
+
+	/*
+	 * Number of bytes currently mapped, excluding retained memory (and any
+	 * base-allocated memory, which is tracked by the arena stats).
+	 *
+	 * We name this "pac_mapped" to avoid confusion with the arena_stats
+	 * "mapped".
+	 */
+	atomic_zu_t pac_mapped;
+
+	/* VM space had to be leaked (undocumented).  Normally 0. */
+	atomic_zu_t abandoned_vm;
+};
+
 typedef struct pac_s pac_t;
 struct pac_s {
 	/*
@@ -35,13 +87,18 @@ struct pac_s {
 	 */
 	decay_t decay_dirty; /* dirty --> muzzy */
 	decay_t decay_muzzy; /* muzzy --> retained */
+
+	malloc_mutex_t *stats_mtx;
+	pac_stats_t *stats;
 };
 
 bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
-    ssize_t muzzy_decay_ms);
+    ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx);
 bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
     size_t *new_limit);
+void pac_stats_merge(tsdn_t *tsdn, pac_t *pac, pac_stats_t *pac_stats_out,
+    pac_estats_t *estats_out, size_t *resident);
 
 static inline ssize_t
 pac_dirty_decay_ms_get(pac_t *pac) {
@@ -53,4 +110,9 @@ pac_muzzy_decay_ms_get(pac_t *pac) {
 	return decay_ms_read(&pac->decay_muzzy);
 }
 
+static inline size_t
+pac_mapped(pac_t *pac) {
+	return atomic_load_zu(&pac->stats->pac_mapped, ATOMIC_RELAXED);
+}
+
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/arena.c b/src/arena.c
index 9fa2db7..619060f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -80,7 +80,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pa_extent_stats_t *estats) {
+    pac_estats_t *estats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -89,8 +89,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	size_t base_allocated, base_resident, base_mapped, metadata_thp;
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
-	size_t pa_mapped = pa_shard_pa_mapped(&arena->pa_shard);
-	astats->mapped += base_mapped + pa_mapped;
+	size_t pac_mapped_sz = pac_mapped(&arena->pa_shard.pac);
+	astats->mapped += base_mapped + pac_mapped_sz;
 	astats->resident += base_resident;
 
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
@@ -423,7 +423,7 @@ arena_decide_unforced_decay_purge_setting(bool is_background_thread) {
 
 static bool
 arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, ssize_t decay_ms) {
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, ssize_t decay_ms) {
 	if (!decay_ms_valid(decay_ms)) {
 		return true;
 	}
@@ -454,7 +454,7 @@ bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.pac.decay_dirty,
-	    &arena->pa_shard.stats->decay_dirty,
+	    &arena->pa_shard.pac.stats->decay_dirty,
 	    &arena->pa_shard.pac.ecache_dirty, decay_ms);
 }
 
@@ -462,13 +462,13 @@ bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.pac.decay_muzzy,
-	    &arena->pa_shard.stats->decay_muzzy,
+	    &arena->pa_shard.pac.stats->decay_muzzy,
 	    &arena->pa_shard.pac.ecache_muzzy, decay_ms);
 }
 
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
     bool is_background_thread, bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
@@ -521,7 +521,7 @@ static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->pa_shard.pac.decay_dirty,
-	    &arena->pa_shard.stats->decay_dirty,
+	    &arena->pa_shard.pac.stats->decay_dirty,
 	    &arena->pa_shard.pac.ecache_dirty, is_background_thread, all);
 }
 
@@ -532,7 +532,7 @@ arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 		return false;
 	}
 	return arena_decay_impl(tsdn, arena, &arena->pa_shard.pac.decay_muzzy,
-	    &arena->pa_shard.stats->decay_muzzy,
+	    &arena->pa_shard.pac.stats->decay_muzzy,
 	    &arena->pa_shard.pac.ecache_muzzy, is_background_thread, all);
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index 56dcf82..8b4b764 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -831,7 +831,7 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) *
 		    sizeof(arena_stats_large_t));
 		memset(ctl_arena->astats->estats, 0, SC_NPSIZES *
-		    sizeof(pa_extent_stats_t));
+		    sizeof(pac_estats_t));
 	}
 }
 
@@ -889,32 +889,31 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 
 		if (!destroyed) {
 			sdstats->astats.mapped += astats->astats.mapped;
-			sdstats->astats.pa_shard_stats.retained
-			    += astats->astats.pa_shard_stats.retained;
+			sdstats->astats.pa_shard_stats.pac_stats.retained
+			    += astats->astats.pa_shard_stats.pac_stats.retained;
 			sdstats->astats.pa_shard_stats.edata_avail
 			    += astats->astats.pa_shard_stats.edata_avail;
 		}
 
-
 		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.decay_dirty.npurge,
-		    &astats->astats.pa_shard_stats.decay_dirty.npurge);
+		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge);
 		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.decay_dirty.nmadvise,
-		    &astats->astats.pa_shard_stats.decay_dirty.nmadvise);
+		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise);
 		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.decay_dirty.purged,
-		    &astats->astats.pa_shard_stats.decay_dirty.purged);
+		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.purged,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged);
 
 		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.decay_muzzy.npurge,
-		    &astats->astats.pa_shard_stats.decay_muzzy.npurge);
+		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge);
 		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.decay_muzzy.nmadvise,
-		    &astats->astats.pa_shard_stats.decay_muzzy.nmadvise);
+		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise);
 		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.decay_muzzy.purged,
-		    &astats->astats.pa_shard_stats.decay_muzzy.purged);
+		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged);
 
 #define OP(mtx) malloc_mutex_prof_merge(				\
 		    &(sdstats->astats.mutex_prof_data[			\
@@ -957,8 +956,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		    += astats->astats.nrequests_large;
 		sdstats->astats.nflushes_large += astats->astats.nflushes_large;
 		ctl_accum_atomic_zu(
-		    &sdstats->astats.pa_shard_stats.abandoned_vm,
-		    &astats->astats.pa_shard_stats.abandoned_vm);
+		    &sdstats->astats.pa_shard_stats.pac_stats.abandoned_vm,
+		    &astats->astats.pa_shard_stats.pac_stats.abandoned_vm);
 
 		sdstats->astats.tcache_bytes += astats->astats.tcache_bytes;
 
@@ -1117,8 +1116,8 @@ ctl_refresh(tsdn_t *tsdn) {
 		ctl_stats->metadata_thp =
 		    ctl_sarena->astats->astats.metadata_thp;
 		ctl_stats->mapped = ctl_sarena->astats->astats.mapped;
-		ctl_stats->retained =
-		    ctl_sarena->astats->astats.pa_shard_stats.retained;
+		ctl_stats->retained = ctl_sarena->astats->astats
+		    .pa_shard_stats.pac_stats.retained;
 
 		ctl_background_thread_stats_read(tsdn);
 
@@ -2976,35 +2975,34 @@ CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
     arenas_i(mib[2])->astats->astats.mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
-    arenas_i(mib[2])->astats->astats.pa_shard_stats.retained,
-    size_t)
+    arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.retained, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
     arenas_i(mib[2])->astats->astats.pa_shard_stats.edata_avail, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_dirty.npurge),
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_dirty.nmadvise),
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_dirty.purged),
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_muzzy.npurge),
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_muzzy.nmadvise),
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_muzzy.purged),
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
@@ -3022,7 +3020,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
     atomic_load_zu(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.abandoned_vm,
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.abandoned_vm,
     ATOMIC_RELAXED), size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
diff --git a/src/extent.c b/src/extent.c
index ed90a15..fb6cceb 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -196,7 +196,7 @@ extents_abandon_vm(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		atomic_fetch_add_zu(&shard->stats->abandoned_vm, sz,
+		atomic_fetch_add_zu(&shard->pac.stats->abandoned_vm, sz,
 		    ATOMIC_RELAXED);
 	}
 	/*
@@ -938,21 +938,20 @@ extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
-		LOCKEDINT_MTX_LOCK(tsdn, *shard->stats_mtx);
+		LOCKEDINT_MTX_LOCK(tsdn, *shard->pac.stats_mtx);
 		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*shard->stats_mtx),
-		    &shard->stats->decay_dirty.nmadvise, 1);
+		    LOCKEDINT_MTX(*shard->pac.stats_mtx),
+		    &shard->pac.stats->decay_dirty.nmadvise, 1);
 		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*shard->stats_mtx),
-		    &shard->stats->decay_dirty.purged,
+		    LOCKEDINT_MTX(*shard->pac.stats_mtx),
+		    &shard->pac.stats->decay_dirty.purged,
 		    extent_size >> LG_PAGE);
-		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
-		atomic_fetch_sub_zu(&shard->stats->pa_mapped, extent_size,
+		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->pac.stats_mtx);
+		atomic_fetch_sub_zu(&shard->pac.stats->pac_mapped, extent_size,
 		    ATOMIC_RELAXED);
 	}
 }
 
-
 /*
  * Does the metadata management portions of putting an unused extent into the
  * given ecache_t (coalesces and inserts into the eset).
diff --git a/src/pa.c b/src/pa.c
index 501d57c..3ca8e35 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -30,7 +30,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 		return true;
 	}
 	if (pac_init(tsdn, &shard->pac, ind, emap, &shard->edata_cache,
-	    cur_time, dirty_decay_ms, muzzy_decay_ms)) {
+	    cur_time, dirty_decay_ms, muzzy_decay_ms, &stats->pac_stats,
+	    stats_mtx)) {
 		return true;
 	}
 
@@ -106,7 +107,7 @@ ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->pac.ecache_retained, NULL, size, alignment, zero);
 		if (config_stats && edata != NULL) {
-			atomic_fetch_add_zu(&shard->stats->pa_mapped, size,
+			atomic_fetch_add_zu(&shard->pac.stats->pac_mapped, size,
 			    ATOMIC_RELAXED);
 		}
 	}
@@ -170,7 +171,7 @@ ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 	if (config_stats && mapped_add > 0) {
-		atomic_fetch_add_zu(&shard->stats->pa_mapped, mapped_add,
+		atomic_fetch_add_zu(&shard->pac.stats->pac_mapped, mapped_add,
 		    ATOMIC_RELAXED);
 	}
 	return false;
@@ -288,7 +289,7 @@ pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 
 static size_t
 pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     edata_list_inactive_t *decay_extents) {
 	bool err;
 
@@ -343,7 +344,7 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
 		    &decay_stats->purged, npurged);
 		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
-		atomic_fetch_sub_zu(&shard->stats->pa_mapped,
+		atomic_fetch_sub_zu(&shard->pac.stats->pac_mapped,
 		    nunmapped << LG_PAGE, ATOMIC_RELAXED);
 	}
 
@@ -359,7 +360,7 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
  */
 static void
 pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     size_t npages_limit, size_t npages_decay_max) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
@@ -386,7 +387,7 @@ pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 void
 pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) {
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 	pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache, fully_decay,
 	    /* npages_limit */ 0, ecache_npages_get(ecache));
@@ -394,7 +395,7 @@ pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 static void
 pa_decay_try_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
     size_t current_npages, size_t npages_limit) {
 	if (current_npages > npages_limit) {
 		pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache,
@@ -405,7 +406,7 @@ pa_decay_try_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 bool
 pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
     pa_decay_purge_setting_t decay_purge_setting) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
diff --git a/src/pa_extra.c b/src/pa_extra.c
index ae5855a..26a196b 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -63,13 +63,13 @@ pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
 
 void
 pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
-    pa_shard_stats_t *shard_stats_out, pa_extent_stats_t *extent_stats_out,
+    pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
     size_t *resident) {
 	cassert(config_stats);
 
-	shard_stats_out->retained +=
+	pa_shard_stats_out->pac_stats.retained +=
 	    ecache_npages_get(&shard->pac.ecache_retained) << LG_PAGE;
-	shard_stats_out->edata_avail += atomic_load_zu(
+	pa_shard_stats_out->edata_avail += atomic_load_zu(
 	    &shard->edata_cache.count, ATOMIC_RELAXED);
 
 	size_t resident_pgs = 0;
@@ -79,34 +79,34 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 
 	/* Dirty decay stats */
 	locked_inc_u64_unsynchronized(
-	    &shard_stats_out->decay_dirty.npurge,
+	    &pa_shard_stats_out->pac_stats.decay_dirty.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->decay_dirty.npurge));
+	    &shard->pac.stats->decay_dirty.npurge));
 	locked_inc_u64_unsynchronized(
-	    &shard_stats_out->decay_dirty.nmadvise,
+	    &pa_shard_stats_out->pac_stats.decay_dirty.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->decay_dirty.nmadvise));
+	    &shard->pac.stats->decay_dirty.nmadvise));
 	locked_inc_u64_unsynchronized(
-	    &shard_stats_out->decay_dirty.purged,
+	    &pa_shard_stats_out->pac_stats.decay_dirty.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->decay_dirty.purged));
+	    &shard->pac.stats->decay_dirty.purged));
 
 	/* Muzzy decay stats */
 	locked_inc_u64_unsynchronized(
-	    &shard_stats_out->decay_muzzy.npurge,
+	    &pa_shard_stats_out->pac_stats.decay_muzzy.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->decay_muzzy.npurge));
+	    &shard->pac.stats->decay_muzzy.npurge));
 	locked_inc_u64_unsynchronized(
-	    &shard_stats_out->decay_muzzy.nmadvise,
+	    &pa_shard_stats_out->pac_stats.decay_muzzy.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->decay_muzzy.nmadvise));
+	    &shard->pac.stats->decay_muzzy.nmadvise));
 	locked_inc_u64_unsynchronized(
-	    &shard_stats_out->decay_muzzy.purged,
+	    &pa_shard_stats_out->pac_stats.decay_muzzy.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->decay_muzzy.purged));
+	    &shard->pac.stats->decay_muzzy.purged));
 
-	atomic_load_add_store_zu(&shard_stats_out->abandoned_vm,
-	    atomic_load_zu(&shard->stats->abandoned_vm, ATOMIC_RELAXED));
+	atomic_load_add_store_zu(&pa_shard_stats_out->pac_stats.abandoned_vm,
+	    atomic_load_zu(&shard->pac.stats->abandoned_vm, ATOMIC_RELAXED));
 
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
 		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
@@ -119,12 +119,12 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 		retained_bytes = ecache_nbytes_get(&shard->pac.ecache_retained,
 		    i);
 
-		extent_stats_out[i].ndirty = dirty;
-		extent_stats_out[i].nmuzzy = muzzy;
-		extent_stats_out[i].nretained = retained;
-		extent_stats_out[i].dirty_bytes = dirty_bytes;
-		extent_stats_out[i].muzzy_bytes = muzzy_bytes;
-		extent_stats_out[i].retained_bytes = retained_bytes;
+		estats_out[i].ndirty = dirty;
+		estats_out[i].nmuzzy = muzzy;
+		estats_out[i].nretained = retained;
+		estats_out[i].dirty_bytes = dirty_bytes;
+		estats_out[i].muzzy_bytes = muzzy_bytes;
+		estats_out[i].retained_bytes = retained_bytes;
 	}
 }
 
diff --git a/src/pac.c b/src/pac.c
index 1e20d65..9192f54 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -6,7 +6,7 @@
 bool
 pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
-    ssize_t muzzy_decay_ms) {
+    ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx) {
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
 	 * memory layout for best-fit extent allocation, since cached extents
@@ -47,6 +47,8 @@ pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
 
 	pac->emap = emap;
 	pac->edata_cache = edata_cache;
+	pac->stats = pac_stats;
+	pac->stats_mtx = stats_mtx;
 	return false;
 }
 
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 7cd9fa1..17889b5 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -90,7 +90,7 @@ do_alloc_free_purge(void *arg) {
 		    &test_data->shard.pac.decay_dirty.mtx);
 		pa_decay_all(TSDN_NULL, &test_data->shard,
 		    &test_data->shard.pac.decay_dirty,
-		    &test_data->stats.decay_dirty,
+		    &test_data->shard.pac.stats->decay_dirty,
 		    &test_data->shard.pac.ecache_dirty, true);
 		malloc_mutex_unlock(TSDN_NULL,
 		    &test_data->shard.pac.decay_dirty.mtx);
-- 
cgit v0.12


From dee5d1c42de6e0908e1ee8e3c4c89cffcbee72ff Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 1 Jun 2020 18:01:19 -0700
Subject: PA->PAC: Move in extent_sn.

---
 include/jemalloc/internal/extent.h | 2 +-
 include/jemalloc/internal/pa.h     | 5 -----
 include/jemalloc/internal/pac.h    | 3 +++
 src/extent.c                       | 9 +++++++--
 src/extent_dss.c                   | 7 ++++---
 src/pa.c                           | 6 ------
 src/pac.c                          | 1 +
 7 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 2f14b81..2eb53f6 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -48,7 +48,7 @@ edata_t *extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *a, edata_t *b);
-
+size_t extent_sn_next(pac_t *pac);
 bool extent_boot(void);
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_H */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 2891d7c..e6ed1fd 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -84,9 +84,6 @@ struct pa_shard_s {
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
 
-	/* Extent serial number generator state. */
-	atomic_zu_t extent_sn_next;
-
 	malloc_mutex_t *stats_mtx;
 	pa_shard_stats_t *stats;
 
@@ -131,8 +128,6 @@ void pa_shard_reset(pa_shard_t *shard);
  */
 void pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard);
 
-size_t pa_shard_extent_sn_next(pa_shard_t *shard);
-
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
     size_t alignment, bool slab, szind_t szind, bool zero);
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 14ee09f..d1d6853 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -90,6 +90,9 @@ struct pac_s {
 
 	malloc_mutex_t *stats_mtx;
 	pac_stats_t *stats;
+
+	/* Extent serial number generator state. */
+	atomic_zu_t extent_sn_next;
 };
 
 bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
diff --git a/src/extent.c b/src/extent.c
index fb6cceb..bb5daba 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -50,6 +50,11 @@ static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
 
 /******************************************************************************/
 
+size_t
+extent_sn_next(pac_t *pac) {
+	return atomic_fetch_add_zu(&pac->extent_sn_next, 1, ATOMIC_RELAXED);
+}
+
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, ecache_t *ecache, edata_t *edata) {
@@ -648,7 +653,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	edata_init(edata, ecache_ind_get(&shard->pac.ecache_retained), ptr,
-	    alloc_size, false, SC_NSIZES, pa_shard_extent_sn_next(shard),
+	    alloc_size, false, SC_NSIZES, extent_sn_next(&shard->pac),
 	    extent_state_active, zeroed, committed, /* ranged */ false,
 	    EXTENT_IS_HEAD);
 
@@ -793,7 +798,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		return NULL;
 	}
 	edata_init(edata, ecache_ind_get(&shard->pac.ecache_dirty), addr,
-	    size, /* slab */ false, SC_NSIZES, pa_shard_extent_sn_next(shard),
+	    size, /* slab */ false, SC_NSIZES, extent_sn_next(&shard->pac),
 	    extent_state_active, zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, shard, edata)) {
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 81161b3..dff231d 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -154,9 +154,10 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			if (gap_size_page != 0) {
 				edata_init(gap, arena_ind_get(arena),
 				    gap_addr_page, gap_size_page, false,
-				    SC_NSIZES, pa_shard_extent_sn_next(
-					&arena->pa_shard), extent_state_active,
-				    false, true, false, EXTENT_NOT_HEAD);
+				    SC_NSIZES, extent_sn_next(
+					&arena->pa_shard.pac),
+				    extent_state_active, false, true, false,
+				    EXTENT_NOT_HEAD);
 			}
 			/*
 			 * Compute the address just past the end of the desired
diff --git a/src/pa.c b/src/pa.c
index 3ca8e35..2f970c7 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -35,7 +35,6 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 		return true;
 	}
 
-	atomic_store_zu(&shard->extent_sn_next, 0, ATOMIC_RELAXED);
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
 
 	shard->stats_mtx = stats_mtx;
@@ -79,11 +78,6 @@ pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
 	}
 }
 
-size_t
-pa_shard_extent_sn_next(pa_shard_t *shard) {
-	return atomic_fetch_add_zu(&shard->extent_sn_next, 1, ATOMIC_RELAXED);
-}
-
 static bool
 pa_shard_may_have_muzzy(pa_shard_t *shard) {
 	return pac_muzzy_decay_ms_get(&shard->pac) != 0;
diff --git a/src/pac.c b/src/pac.c
index 9192f54..8ff6f1c 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -49,6 +49,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
 	pac->edata_cache = edata_cache;
 	pac->stats = pac_stats;
 	pac->stats_mtx = stats_mtx;
+	atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
 	return false;
 }
 
-- 
cgit v0.12


From 72435b0aba3e121d598be10e865f43d9491c71e2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 1 Jun 2020 18:49:42 -0700
Subject: PA->PAC: Make extent.c forget about PA.

---
 include/jemalloc/internal/extent.h |  20 +-
 include/jemalloc/internal/pa.h     |   6 -
 src/extent.c                       | 366 ++++++++++++++++++-------------------
 src/extent_dss.c                   |   2 +-
 src/pa.c                           |  34 ++--
 5 files changed, 211 insertions(+), 217 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 2eb53f6..f620736 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -19,22 +19,22 @@
 #define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 extern size_t opt_lg_extent_max_active_fit;
 
-edata_t *ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero);
-edata_t *ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+edata_t *ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero);
-void ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
-edata_t *ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
-edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
-void extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+void extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
-void extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
-void extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
@@ -44,9 +44,9 @@ bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
-edata_t *extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
+edata_t *extent_split_wrapper(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b);
-bool extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+bool extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b);
 size_t extent_sn_next(pac_t *pac);
 bool extent_boot(void);
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index e6ed1fd..9482380 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -100,12 +100,6 @@ pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
 	    pac_muzzy_decay_ms_get(&shard->pac) <= 0;
 }
 
-static inline bool
-pa_shard_may_force_decay(pa_shard_t *shard) {
-	return !(pac_dirty_decay_ms_get(&shard->pac) == -1
-	    || pac_muzzy_decay_ms_get(&shard->pac) == -1);
-}
-
 static inline ehooks_t *
 pa_shard_ehooks_get(pa_shard_t *shard) {
 	return base_ehooks_get(shard->base);
diff --git a/src/extent.c b/src/extent.c
index bb5daba..87d6a9a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,10 +19,9 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
-static edata_t *extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b,
-    bool growing_retained);
-static bool extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+static edata_t *extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    edata_t *edata, size_t size_a, size_t size_b, bool growing_retained);
+static bool extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b, bool growing_retained);
 
 /* Used exclusively for gdump triggering. */
@@ -35,16 +34,15 @@ static atomic_zu_t highpages;
  * definition.
  */
 
-static void extent_deregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata);
-static edata_t *extent_recycle(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, ecache_t *ecache, void *new_addr, size_t usize,
-    size_t alignment, bool zero, bool *commit, bool growing_retained);
-static edata_t *extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained);
-static void extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+static void extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata);
+static edata_t *extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t usize, size_t alignment, bool zero,
+    bool *commit, bool growing_retained);
+static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained);
+static void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
-static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
+static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool zero,
     bool *commit);
 
@@ -55,12 +53,18 @@ extent_sn_next(pac_t *pac) {
 	return atomic_fetch_add_zu(&pac->extent_sn_next, 1, ATOMIC_RELAXED);
 }
 
+static inline bool
+extent_may_force_decay(pac_t *pac) {
+	return !(pac_dirty_decay_ms_get(pac) == -1
+	    || pac_muzzy_decay_ms_get(pac) == -1);
+}
+
 static bool
-extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata) {
+extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, shard, ehooks, ecache,
+	edata = extent_try_coalesce(tsdn, pac, ehooks, ecache,
 	    edata, &coalesced, false);
 	edata_state_set(edata, ecache->state);
 
@@ -72,32 +76,30 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
 }
 
 edata_t *
-ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
-    bool zero) {
+ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    void *new_addr, size_t size, size_t alignment, bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
-	edata_t *edata = extent_recycle(tsdn, shard, ehooks, ecache,
-	    new_addr, size, alignment, zero, &commit, false);
+	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, new_addr,
+	    size, alignment, zero, &commit, false);
 	assert(edata == NULL || !edata_ranged_get(edata));
 	return edata;
 }
 
 edata_t *
-ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
-    bool zero) {
+ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    void *new_addr, size_t size, size_t alignment, bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
-	edata_t *edata = extent_alloc_retained(tsdn, shard, ehooks, new_addr,
+	edata_t *edata = extent_alloc_retained(tsdn, pac, ehooks, new_addr,
 	    size, alignment, zero, &commit);
 	if (edata == NULL) {
 		if (opt_retain && new_addr != NULL) {
@@ -109,7 +111,7 @@ ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			 */
 			return NULL;
 		}
-		edata = extent_alloc_wrapper(tsdn, shard, ehooks, new_addr,
+		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr,
 		    size, alignment, zero, &commit);
 	}
 
@@ -118,8 +120,8 @@ ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 }
 
 void
-ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata) {
+ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
 	assert(!edata_ranged_get(edata));
@@ -129,11 +131,11 @@ ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
 
-	extent_record(tsdn, shard, ehooks, ecache, edata, false);
+	extent_record(tsdn, pac, ehooks, ecache, edata, false);
 }
 
 edata_t *
-ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min) {
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -159,7 +161,7 @@ ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, shard, ehooks, ecache,
+		if (extent_try_delayed_coalesce(tsdn, pac, ehooks, ecache,
 		    edata)) {
 			break;
 		}
@@ -181,7 +183,7 @@ ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		edata_state_set(edata, extent_state_active);
 		break;
 	case extent_state_retained:
-		extent_deregister(tsdn, shard, edata);
+		extent_deregister(tsdn, pac, edata);
 		break;
 	default:
 		not_reached();
@@ -197,11 +199,11 @@ label_return:
  * indicates OOM), e.g. when trying to split an existing extent.
  */
 static void
-extents_abandon_vm(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool growing_retained) {
+extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		atomic_fetch_add_zu(&shard->pac.stats->abandoned_vm, sz,
+		atomic_fetch_add_zu(&pac->stats->abandoned_vm, sz,
 		    ATOMIC_RELAXED);
 	}
 	/*
@@ -215,7 +217,7 @@ extents_abandon_vm(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			    edata_size_get(edata), growing_retained);
 		}
 	}
-	edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+	edata_cache_put(tsdn, pac->edata_cache, edata);
 }
 
 static void
@@ -281,21 +283,20 @@ extent_gdump_sub(tsdn_t *tsdn, const edata_t *edata) {
 }
 
 static bool
-extent_register_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
-    bool gdump_add) {
+extent_register_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata, bool gdump_add) {
 	/*
 	 * We need to hold the lock to protect against a concurrent coalesce
 	 * operation that sees us in a partial state.
 	 */
-	emap_lock_edata(tsdn, shard->pac.emap, edata);
+	emap_lock_edata(tsdn, pac->emap, edata);
 
-	if (emap_register_boundary(tsdn, shard->pac.emap, edata, SC_NSIZES,
+	if (emap_register_boundary(tsdn, pac->emap, edata, SC_NSIZES,
 	    /* slab */ false)) {
-		emap_unlock_edata(tsdn, shard->pac.emap, edata);
+		emap_unlock_edata(tsdn, pac->emap, edata);
 		return true;
 	}
 
-	emap_unlock_edata(tsdn, shard->pac.emap, edata);
+	emap_unlock_edata(tsdn, pac->emap, edata);
 
 	if (config_prof && gdump_add) {
 		extent_gdump_add(tsdn, edata);
@@ -305,18 +306,18 @@ extent_register_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 }
 
 static bool
-extent_register(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
-	return extent_register_impl(tsdn, shard, edata, true);
+extent_register(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
+	return extent_register_impl(tsdn, pac, edata, true);
 }
 
 static bool
-extent_register_no_gdump_add(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
-	return extent_register_impl(tsdn, shard, edata, false);
+extent_register_no_gdump_add(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
+	return extent_register_impl(tsdn, pac, edata, false);
 }
 
 static void
-extent_reregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
-	bool err = extent_register(tsdn, shard, edata);
+extent_reregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
+	bool err = extent_register(tsdn, pac, edata);
 	assert(!err);
 }
 
@@ -324,11 +325,11 @@ extent_reregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
  * Removes all pointers to the given extent from the global rtree.
  */
 static void
-extent_deregister_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+extent_deregister_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata,
     bool gdump) {
-	emap_lock_edata(tsdn, shard->pac.emap, edata);
-	emap_deregister_boundary(tsdn, shard->pac.emap, edata);
-	emap_unlock_edata(tsdn, shard->pac.emap, edata);
+	emap_lock_edata(tsdn, pac->emap, edata);
+	emap_deregister_boundary(tsdn, pac->emap, edata);
+	emap_unlock_edata(tsdn, pac->emap, edata);
 
 	if (config_prof && gdump) {
 		extent_gdump_sub(tsdn, edata);
@@ -336,14 +337,14 @@ extent_deregister_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 }
 
 static void
-extent_deregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
-	extent_deregister_impl(tsdn, shard, edata, true);
+extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
+	extent_deregister_impl(tsdn, pac, edata, true);
 }
 
 static void
-extent_deregister_no_gdump_sub(tsdn_t *tsdn, pa_shard_t *shard,
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac,
     edata_t *edata) {
-	extent_deregister_impl(tsdn, shard, edata, false);
+	extent_deregister_impl(tsdn, pac, edata, false);
 }
 
 /*
@@ -351,7 +352,7 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, pa_shard_t *shard,
  * given allocation request.
  */
 static edata_t *
-extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -376,8 +377,8 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
-		edata = emap_lock_edata_from_addr(tsdn, shard->pac.emap,
-		    new_addr, false);
+		edata = emap_lock_edata_from_addr(tsdn, pac->emap, new_addr,
+		    false);
 		if (edata != NULL) {
 			/*
 			 * We might null-out edata to report an error, but we
@@ -391,7 +392,7 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			    != ecache->state) {
 				edata = NULL;
 			}
-			emap_unlock_edata(tsdn, shard->pac.emap, unlock_edata);
+			emap_unlock_edata(tsdn, pac->emap, unlock_edata);
 		}
 	} else {
 		/*
@@ -451,7 +452,7 @@ typedef enum {
 } extent_split_interior_result_t;
 
 static extent_split_interior_result_t
-extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     /* The result of splitting, in case of success. */
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
@@ -473,7 +474,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *edata;
-		*edata = extent_split_impl(tsdn, shard, ehooks, *lead, leadsize,
+		*edata = extent_split_impl(tsdn, pac, ehooks, *lead, leadsize,
 		    size + trailsize, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
@@ -484,7 +485,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, shard, ehooks, *edata, size,
+		*trail = extent_split_impl(tsdn, pac, ehooks, *edata, size,
 		    trailsize, growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
@@ -505,7 +506,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  * and put back into ecache.
  */
 static edata_t *
-extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
     edata_t *edata, bool growing_retained) {
 	edata_t *lead;
@@ -514,7 +515,7 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, shard, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
+	    tsdn, pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
 	    new_addr, size, alignment, growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
@@ -543,14 +544,14 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		 */
 		assert(result == extent_split_interior_error);
 		if (to_salvage != NULL) {
-			extent_deregister(tsdn, shard, to_salvage);
+			extent_deregister(tsdn, pac, to_salvage);
 		}
 		if (to_leak != NULL) {
 			void *leak = edata_base_get(to_leak);
-			extent_deregister_no_gdump_sub(tsdn, shard, to_leak);
-			extents_abandon_vm(tsdn, shard, ehooks, ecache, to_leak,
+			extent_deregister_no_gdump_sub(tsdn, pac, to_leak);
+			extents_abandon_vm(tsdn, pac, ehooks, ecache, to_leak,
 			    growing_retained);
-			assert(emap_lock_edata_from_addr(tsdn, shard->pac.emap,
+			assert(emap_lock_edata_from_addr(tsdn, pac->emap,
 			    leak, false) == NULL);
 		}
 		return NULL;
@@ -563,18 +564,18 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  * in the given ecache_t.
  */
 static edata_t *
-extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero,
-    bool *commit, bool growing_retained) {
+extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
+    bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	edata_t *edata = extent_recycle_extract(tsdn, shard, ehooks, ecache,
+	edata_t *edata = extent_recycle_extract(tsdn, pac, ehooks, ecache,
 	    new_addr, size, alignment, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
-	edata = extent_recycle_split(tsdn, shard, ehooks, ecache, new_addr,
+	edata = extent_recycle_split(tsdn, pac, ehooks, ecache, new_addr,
 	    size, alignment, edata, growing_retained);
 	if (edata == NULL) {
 		return NULL;
@@ -583,7 +584,7 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	if (*commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), growing_retained)) {
-			extent_record(tsdn, shard, ehooks, ecache, edata,
+			extent_record(tsdn, pac, ehooks, ecache, edata,
 			    growing_retained);
 			return NULL;
 		}
@@ -611,9 +612,9 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  * virtual memory ranges retained by each shard.
  */
 static edata_t *
-extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     size_t size, size_t alignment, bool zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &shard->pac.ecache_grow.mtx);
+	malloc_mutex_assert_owner(tsdn, &pac->ecache_grow.mtx);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -625,19 +626,18 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(shard->pac.ecache_grow.next + egn_skip);
+	size_t alloc_size = sz_pind2sz(pac->ecache_grow.next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (shard->pac.ecache_grow.next + egn_skip >=
+		if (pac->ecache_grow.next + egn_skip >=
 		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			goto label_err;
 		}
-		alloc_size = sz_pind2sz(
-		    shard->pac.ecache_grow.next + egn_skip);
+		alloc_size = sz_pind2sz(pac->ecache_grow.next + egn_skip);
 	}
 
-	edata_t *edata = edata_cache_get(tsdn, shard->pac.edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, pac->edata_cache);
 	if (edata == NULL) {
 		goto label_err;
 	}
@@ -648,17 +648,17 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    &committed);
 
 	if (ptr == NULL) {
-		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+		edata_cache_put(tsdn, pac->edata_cache, edata);
 		goto label_err;
 	}
 
-	edata_init(edata, ecache_ind_get(&shard->pac.ecache_retained), ptr,
-	    alloc_size, false, SC_NSIZES, extent_sn_next(&shard->pac),
+	edata_init(edata, ecache_ind_get(&pac->ecache_retained), ptr,
+	    alloc_size, false, SC_NSIZES, extent_sn_next(pac),
 	    extent_state_active, zeroed, committed, /* ranged */ false,
 	    EXTENT_IS_HEAD);
 
-	if (extent_register_no_gdump_add(tsdn, shard, edata)) {
-		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+	if (extent_register_no_gdump_add(tsdn, pac, edata)) {
+		edata_cache_put(tsdn, pac->edata_cache, edata);
 		goto label_err;
 	}
 
@@ -672,17 +672,17 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    shard, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
+	    pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
 	    size, alignment, /* growing_retained */ true);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_record(tsdn, shard, ehooks,
-			    &shard->pac.ecache_retained, lead, true);
+			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
+			    lead, true);
 		}
 		if (trail != NULL) {
-			extent_record(tsdn, shard, ehooks,
-			    &shard->pac.ecache_retained, trail, true);
+			extent_record(tsdn, pac, ehooks,
+			    &pac->ecache_retained, trail, true);
 		}
 	} else {
 		/*
@@ -694,13 +694,13 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			if (config_prof) {
 				extent_gdump_add(tsdn, to_salvage);
 			}
-			extent_record(tsdn, shard, ehooks,
-			    &shard->pac.ecache_retained, to_salvage, true);
+			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
+			    to_salvage, true);
 		}
 		if (to_leak != NULL) {
-			extent_deregister_no_gdump_sub(tsdn, shard, to_leak);
-			extents_abandon_vm(tsdn, shard, ehooks,
-			    &shard->pac.ecache_retained, to_leak, true);
+			extent_deregister_no_gdump_sub(tsdn, pac, to_leak);
+			extents_abandon_vm(tsdn, pac, ehooks,
+			    &pac->ecache_retained, to_leak, true);
 		}
 		goto label_err;
 	}
@@ -708,8 +708,8 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	if (*commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
-			extent_record(tsdn, shard, ehooks,
-			    &shard->pac.ecache_retained, edata, true);
+			extent_record(tsdn, pac, ehooks,
+			    &pac->ecache_retained, edata, true);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
@@ -727,14 +727,13 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (shard->pac.ecache_grow.next + egn_skip + 1 <=
-	    shard->pac.ecache_grow.limit) {
-		shard->pac.ecache_grow.next += egn_skip + 1;
+	if (pac->ecache_grow.next + egn_skip + 1 <= pac->ecache_grow.limit) {
+		pac->ecache_grow.next += egn_skip + 1;
 	} else {
-		shard->pac.ecache_grow.next = shard->pac.ecache_grow.limit;
+		pac->ecache_grow.next = pac->ecache_grow.limit;
 	}
 	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -748,45 +747,45 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
 	return NULL;
 }
 
 static edata_t *
-extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &shard->pac.ecache_grow.mtx);
+	malloc_mutex_lock(tsdn, &pac->ecache_grow.mtx);
 
-	edata_t *edata = extent_recycle(tsdn, shard, ehooks,
-	    &shard->pac.ecache_retained, new_addr, size, alignment, zero,
+	edata_t *edata = extent_recycle(tsdn, pac, ehooks,
+	    &pac->ecache_retained, new_addr, size, alignment, zero,
 	    commit, /* growing_retained */ true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
 	} else if (opt_retain && new_addr == NULL) {
-		edata = extent_grow_retained(tsdn, shard, ehooks, size,
+		edata = extent_grow_retained(tsdn, pac, ehooks, size,
 		    alignment, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &shard->pac.ecache_grow.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &pac->ecache_grow.mtx);
 
 	return edata;
 }
 
 edata_t *
-extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = edata_cache_get(tsdn, shard->pac.edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, pac->edata_cache);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -794,15 +793,15 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
 	    &zero, commit);
 	if (addr == NULL) {
-		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+		edata_cache_put(tsdn, pac->edata_cache, edata);
 		return NULL;
 	}
-	edata_init(edata, ecache_ind_get(&shard->pac.ecache_dirty), addr,
-	    size, /* slab */ false, SC_NSIZES, extent_sn_next(&shard->pac),
+	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr,
+	    size, /* slab */ false, SC_NSIZES, extent_sn_next(pac),
 	    extent_state_active, zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
-	if (extent_register(tsdn, shard, edata)) {
-		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+	if (extent_register(tsdn, pac, edata)) {
+		edata_cache_put(tsdn, pac->edata_cache, edata);
 		return NULL;
 	}
 
@@ -831,15 +830,14 @@ extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
-    bool growing_retained) {
+extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *inner, edata_t *outer, bool forward, bool growing_retained) {
 	assert(extent_can_coalesce(ecache, inner, outer));
 
 	extent_activate_locked(tsdn, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
-	bool err = extent_merge_impl(tsdn, shard, ehooks,
+	bool err = extent_merge_impl(tsdn, pac, ehooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -851,7 +849,7 @@ extent_coalesce(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 }
 
 static edata_t *
-extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained,
     bool inactive_only) {
 	/*
@@ -868,7 +866,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		again = false;
 
 		/* Try to coalesce forward. */
-		edata_t *next = emap_lock_edata_from_addr(tsdn, shard->pac.emap,
+		edata_t *next = emap_lock_edata_from_addr(tsdn, pac->emap,
 		    edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
@@ -879,9 +877,9 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			bool can_coalesce = extent_can_coalesce(ecache,
 			    edata, next);
 
-			emap_unlock_edata(tsdn, shard->pac.emap, next);
+			emap_unlock_edata(tsdn, pac->emap, next);
 
-			if (can_coalesce && !extent_coalesce(tsdn, shard,
+			if (can_coalesce && !extent_coalesce(tsdn, pac,
 			    ehooks, ecache, edata, next, true,
 			    growing_retained)) {
 				if (ecache->delay_coalesce) {
@@ -894,14 +892,14 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		}
 
 		/* Try to coalesce backward. */
-		edata_t *prev = emap_lock_edata_from_addr(tsdn, shard->pac.emap,
+		edata_t *prev = emap_lock_edata_from_addr(tsdn, pac->emap,
 		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(ecache, edata,
 			    prev);
-			emap_unlock_edata(tsdn, shard->pac.emap, prev);
+			emap_unlock_edata(tsdn, pac->emap, prev);
 
-			if (can_coalesce && !extent_coalesce(tsdn, shard,
+			if (can_coalesce && !extent_coalesce(tsdn, pac,
 			    ehooks, ecache, edata, prev, false,
 			    growing_retained)) {
 				edata = prev;
@@ -922,37 +920,37 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 }
 
 static edata_t *
-extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, shard, ehooks, ecache, edata,
+	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
 	    coalesced, growing_retained, false);
 }
 
 static edata_t *
-extent_try_coalesce_large(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_try_coalesce_large(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, shard, ehooks, ecache, edata,
+	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
 	    coalesced, growing_retained, true);
 }
 
 /* Purge a single extent to retained / unmapped directly. */
 static void
-extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_maximally_purge(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	size_t extent_size = edata_size_get(edata);
-	extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
+	extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
-		LOCKEDINT_MTX_LOCK(tsdn, *shard->pac.stats_mtx);
+		LOCKEDINT_MTX_LOCK(tsdn, *pac->stats_mtx);
 		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*shard->pac.stats_mtx),
-		    &shard->pac.stats->decay_dirty.nmadvise, 1);
+		    LOCKEDINT_MTX(*pac->stats_mtx),
+		    &pac->stats->decay_dirty.nmadvise, 1);
 		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*shard->pac.stats_mtx),
-		    &shard->pac.stats->decay_dirty.purged,
+		    LOCKEDINT_MTX(*pac->stats_mtx),
+		    &pac->stats->decay_dirty.purged,
 		    extent_size >> LG_PAGE);
-		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->pac.stats_mtx);
-		atomic_fetch_sub_zu(&shard->pac.stats->pac_mapped, extent_size,
+		LOCKEDINT_MTX_UNLOCK(tsdn, *pac->stats_mtx);
+		atomic_fetch_sub_zu(&pac->stats->pac_mapped, extent_size,
 		    ATOMIC_RELAXED);
 	}
 }
@@ -962,7 +960,7 @@ extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  * given ecache_t (coalesces and inserts into the eset).
  */
 static void
-extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	assert((ecache->state != extent_state_dirty &&
 	    ecache->state != extent_state_muzzy) ||
@@ -970,25 +968,25 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
-	emap_assert_mapped(tsdn, shard->pac.emap, edata);
+	emap_assert_mapped(tsdn, pac->emap, edata);
 
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, shard,  ehooks, ecache, edata,
+		edata = extent_try_coalesce(tsdn, pac,  ehooks, ecache, edata,
 		    NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
-		assert(ecache == &shard->pac.ecache_dirty);
+		assert(ecache == &pac->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
-			edata = extent_try_coalesce_large(tsdn, shard, ehooks,
+			edata = extent_try_coalesce_large(tsdn, pac, ehooks,
 			    ecache, edata, &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold &&
-		    pa_shard_may_force_decay(shard)) {
+		    extent_may_force_decay(pac)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
-			extent_maximally_purge(tsdn, shard, ehooks, edata);
+			extent_maximally_purge(tsdn, pac, ehooks, edata);
 			return;
 		}
 	}
@@ -998,20 +996,20 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 }
 
 void
-extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (extent_register(tsdn, shard, edata)) {
-		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+	if (extent_register(tsdn, pac, edata)) {
+		edata_cache_put(tsdn, pac->edata_cache, edata);
 		return;
 	}
-	extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
+	extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
 }
 
 static bool
-extent_dalloc_wrapper_try(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_dalloc_wrapper_try(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	bool err;
 
@@ -1027,14 +1025,14 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    edata_size_get(edata), edata_committed_get(edata));
 
 	if (!err) {
-		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+		edata_cache_put(tsdn, pac->edata_cache, edata);
 	}
 
 	return err;
 }
 
 void
-extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	assert(!edata_ranged_get(edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1046,11 +1044,11 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.
 		 */
-		extent_deregister(tsdn, shard, edata);
-		if (!extent_dalloc_wrapper_try(tsdn, shard, ehooks, edata)) {
+		extent_deregister(tsdn, pac, edata);
+		if (!extent_dalloc_wrapper_try(tsdn, pac, ehooks, edata)) {
 			return;
 		}
-		extent_reregister(tsdn, shard, edata);
+		extent_reregister(tsdn, pac, edata);
 	}
 
 	/* Try to decommit; purge if that fails. */
@@ -1076,12 +1074,12 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, shard, ehooks, &shard->pac.ecache_retained, edata,
+	extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata,
 	    false);
 }
 
 void
-extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
@@ -1089,7 +1087,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	/* Deregister first to avoid a race with other allocating threads. */
-	extent_deregister(tsdn, shard, edata);
+	extent_deregister(tsdn, pac, edata);
 
 	edata_addr_set(edata, edata_base_get(edata));
 
@@ -1097,7 +1095,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
 	    edata_size_get(edata), edata_committed_get(edata));
 
-	edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+	edata_cache_put(tsdn, pac->edata_cache, edata);
 }
 
 static bool
@@ -1171,7 +1169,7 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  * and returns the trail (except in case of error).
  */
 static edata_t *
-extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata, size_t size_a, size_t size_b, bool growing_retained) {
 	assert(edata_size_get(edata) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1181,7 +1179,7 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	edata_t *trail = edata_cache_get(tsdn, shard->pac.edata_cache);
+	edata_t *trail = edata_cache_get(tsdn, pac->edata_cache);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
@@ -1193,13 +1191,13 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    edata_committed_get(edata), edata_ranged_get(edata),
 	    EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
-	bool err = emap_split_prepare(tsdn, shard->pac.emap, &prepare, edata,
+	bool err = emap_split_prepare(tsdn, pac->emap, &prepare, edata,
 	    size_a, trail, size_b);
 	if (err) {
 		goto label_error_b;
 	}
 
-	emap_lock_edata2(tsdn, shard->pac.emap, edata, trail);
+	emap_lock_edata2(tsdn, pac->emap, edata, trail);
 
 	err = ehooks_split(tsdn, ehooks, edata_base_get(edata), size_a + size_b,
 	    size_a, size_b, edata_committed_get(edata));
@@ -1209,29 +1207,29 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	edata_size_set(edata, size_a);
-	emap_split_commit(tsdn, shard->pac.emap, &prepare, edata, size_a, trail,
+	emap_split_commit(tsdn, pac->emap, &prepare, edata, size_a, trail,
 	    size_b);
 
-	emap_unlock_edata2(tsdn, shard->pac.emap, edata, trail);
+	emap_unlock_edata2(tsdn, pac->emap, edata, trail);
 
 	return trail;
 label_error_c:
-	emap_unlock_edata2(tsdn, shard->pac.emap, edata, trail);
+	emap_unlock_edata2(tsdn, pac->emap, edata, trail);
 label_error_b:
-	edata_cache_put(tsdn, shard->pac.edata_cache, trail);
+	edata_cache_put(tsdn, pac->edata_cache, trail);
 label_error_a:
 	return NULL;
 }
 
 edata_t *
-extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b) {
-	return extent_split_impl(tsdn, shard, ehooks, edata, size_a, size_b,
+extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
+    size_t size_a, size_t size_b) {
+	return extent_split_impl(tsdn, pac, ehooks, edata, size_a, size_b,
 	    /* growing_retained */ false);
 }
 
 static bool
-extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks, edata_t *a,
+extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
     edata_t *b, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -1254,27 +1252,27 @@ extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks, edata_t *a,
 	 * than extent_{,de}register() to do things in the right order.
 	 */
 	emap_prepare_t prepare;
-	emap_merge_prepare(tsdn, shard->pac.emap, &prepare, a, b);
+	emap_merge_prepare(tsdn, pac->emap, &prepare, a, b);
 
-	emap_lock_edata2(tsdn, shard->pac.emap, a, b);
+	emap_lock_edata2(tsdn, pac->emap, a, b);
 
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
 	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
 	    edata_sn_get(a) : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
-	emap_merge_commit(tsdn, shard->pac.emap, &prepare, a, b);
-	emap_unlock_edata2(tsdn, shard->pac.emap, a, b);
+	emap_merge_commit(tsdn, pac->emap, &prepare, a, b);
+	emap_unlock_edata2(tsdn, pac->emap, a, b);
 
-	edata_cache_put(tsdn, shard->pac.edata_cache, b);
+	edata_cache_put(tsdn, pac->edata_cache, b);
 
 	return false;
 }
 
 bool
-extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b) {
-	return extent_merge_impl(tsdn, shard, ehooks, a, b, false);
+	return extent_merge_impl(tsdn, pac, ehooks, a, b, false);
 }
 
 bool
diff --git a/src/extent_dss.c b/src/extent_dss.c
index dff231d..7427cd8 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -189,7 +189,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					ehooks_t *ehooks = arena_get_ehooks(
 					    arena);
 					extent_dalloc_gap(tsdn,
-					    &arena->pa_shard, ehooks, gap);
+					    &arena->pa_shard.pac, ehooks, gap);
 				} else {
 					edata_cache_put(tsdn,
 					    &arena->pa_shard.edata_cache, gap);
diff --git a/src/pa.c b/src/pa.c
index 2f970c7..e8c88a0 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -72,9 +72,9 @@ pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
 	 */
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata;
-	while ((edata = ecache_evict(tsdn, shard, ehooks,
+	while ((edata = ecache_evict(tsdn, &shard->pac, ehooks,
 	    &shard->pac.ecache_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, shard, ehooks, edata);
+		extent_destroy_wrapper(tsdn, &shard->pac, ehooks, edata);
 	}
 }
 
@@ -90,15 +90,15 @@ ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
+	edata_t *edata = ecache_alloc(tsdn, &shard->pac, ehooks,
 	    &shard->pac.ecache_dirty, NULL, size, alignment, zero);
 
 	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
-		edata = ecache_alloc(tsdn, shard, ehooks,
+		edata = ecache_alloc(tsdn, &shard->pac, ehooks,
 		    &shard->pac.ecache_muzzy, NULL, size, alignment, zero);
 	}
 	if (edata == NULL) {
-		edata = ecache_alloc_grow(tsdn, shard, ehooks,
+		edata = ecache_alloc_grow(tsdn, &shard->pac, ehooks,
 		    &shard->pac.ecache_retained, NULL, size, alignment, zero);
 		if (config_stats && edata != NULL) {
 			atomic_fetch_add_zu(&shard->pac.stats->pac_mapped, size,
@@ -144,15 +144,15 @@ ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	if (ehooks_merge_will_fail(ehooks)) {
 		return true;
 	}
-	edata_t *trail = ecache_alloc(tsdn, shard, ehooks,
+	edata_t *trail = ecache_alloc(tsdn, &shard->pac, ehooks,
 	    &shard->pac.ecache_dirty, trail_begin, expand_amount, PAGE, zero);
 	if (trail == NULL) {
-		trail = ecache_alloc(tsdn, shard, ehooks,
+		trail = ecache_alloc(tsdn, &shard->pac, ehooks,
 		    &shard->pac.ecache_muzzy, trail_begin, expand_amount, PAGE,
 		    zero);
 	}
 	if (trail == NULL) {
-		trail = ecache_alloc_grow(tsdn, shard, ehooks,
+		trail = ecache_alloc_grow(tsdn, &shard->pac, ehooks,
 		    &shard->pac.ecache_retained, trail_begin, expand_amount,
 		    PAGE, zero);
 		mapped_add = expand_amount;
@@ -160,8 +160,8 @@ ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	if (trail == NULL) {
 		return true;
 	}
-	if (extent_merge_wrapper(tsdn, shard, ehooks, edata, trail)) {
-		extent_dalloc_wrapper(tsdn, shard, ehooks, trail);
+	if (extent_merge_wrapper(tsdn, &shard->pac, ehooks, edata, trail)) {
+		extent_dalloc_wrapper(tsdn, &shard->pac, ehooks, trail);
 		return true;
 	}
 	if (config_stats && mapped_add > 0) {
@@ -206,12 +206,13 @@ ecache_pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 
-	edata_t *trail = extent_split_wrapper(tsdn, shard, ehooks, edata,
+	edata_t *trail = extent_split_wrapper(tsdn, &shard->pac, ehooks, edata,
 	    new_size, shrink_amount);
 	if (trail == NULL) {
 		return true;
 	}
-	ecache_dalloc(tsdn, shard, ehooks, &shard->pac.ecache_dirty, trail);
+	ecache_dalloc(tsdn, &shard->pac, ehooks, &shard->pac.ecache_dirty,
+	    trail);
 	return false;
 }
 
@@ -242,7 +243,8 @@ ecache_pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	pa_shard_t *shard =
 	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	ecache_dalloc(tsdn, shard, ehooks, &shard->pac.ecache_dirty, edata);
+	ecache_dalloc(tsdn, &shard->pac, ehooks, &shard->pac.ecache_dirty,
+	    edata);
 }
 
 void
@@ -270,7 +272,7 @@ pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 	/* Stash extents according to npages_limit. */
 	size_t nstashed = 0;
 	while (nstashed < npages_decay_max) {
-		edata_t *edata = ecache_evict(tsdn, shard, ehooks, ecache,
+		edata_t *edata = ecache_evict(tsdn, &shard->pac, ehooks, ecache,
 		    npages_limit);
 		if (edata == NULL) {
 			break;
@@ -313,14 +315,14 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 				err = extent_purge_lazy_wrapper(tsdn, ehooks,
 				    edata, /* offset */ 0, size);
 				if (!err) {
-					ecache_dalloc(tsdn, shard, ehooks,
+					ecache_dalloc(tsdn, &shard->pac, ehooks,
 					    &shard->pac.ecache_muzzy, edata);
 					break;
 				}
 			}
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
+			extent_dalloc_wrapper(tsdn, &shard->pac, ehooks, edata);
 			nunmapped += npages;
 			break;
 		case extent_state_retained:
-- 
cgit v0.12


From 4ee75be3a3d549619930cf07b5bc8a3809eab008 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 2 Jun 2020 12:45:39 -0700
Subject: PA -> PAC: Move in decay_purge enum.

---
 include/jemalloc/internal/pa.h  |  9 +--------
 include/jemalloc/internal/pac.h |  8 ++++++++
 src/arena.c                     | 12 ++++++------
 src/pa.c                        |  6 +++---
 src/pac.c                       |  1 -
 5 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 9482380..e5a46f9 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -18,13 +18,6 @@
  * others will be coming soon.
  */
 
-enum pa_decay_purge_setting_e {
-	PA_DECAY_PURGE_ALWAYS,
-	PA_DECAY_PURGE_NEVER,
-	PA_DECAY_PURGE_ON_EPOCH_ADVANCE
-};
-typedef enum pa_decay_purge_setting_e pa_decay_purge_setting_t;
-
 /*
  * The stats for a particular pa_shard.  Because of the way the ctl module
  * handles stats epoch data collection (it has its own arena_stats, and merges
@@ -163,7 +156,7 @@ void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
  */
 bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    pa_decay_purge_setting_t decay_purge_setting);
+    pac_decay_purge_setting_t decay_purge_setting);
 
 /*
  * Gets / sets the maximum amount that we'll grow an arena down the
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index d1d6853..aa4a76a 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -9,6 +9,14 @@
  * - Can use efficient OS-level zeroing primitives for demand-filled pages.
  */
 
+/* How "eager" decay/purging should be. */
+enum pac_decay_purge_setting_e {
+	PAC_DECAY_PURGE_ALWAYS,
+	PAC_DECAY_PURGE_NEVER,
+	PAC_DECAY_PURGE_ON_EPOCH_ADVANCE
+};
+typedef enum pac_decay_purge_setting_e pac_decay_purge_setting_t;
+
 typedef struct pac_decay_stats_s pac_decay_stats_t;
 struct pac_decay_stats_s {
 	/* Total number of purge sweeps. */
diff --git a/src/arena.c b/src/arena.c
index 619060f..95dea18 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -410,14 +410,14 @@ arena_muzzy_decay_ms_get(arena_t *arena) {
  * specifically requested it), should we purge ourselves, or wait for the
  * background thread to get to it.
  */
-static pa_decay_purge_setting_t
+static pac_decay_purge_setting_t
 arena_decide_unforced_decay_purge_setting(bool is_background_thread) {
 	if (is_background_thread) {
-		return PA_DECAY_PURGE_ALWAYS;
+		return PAC_DECAY_PURGE_ALWAYS;
 	} else if (!is_background_thread && background_thread_enabled()) {
-		return PA_DECAY_PURGE_NEVER;
+		return PAC_DECAY_PURGE_NEVER;
 	} else {
-		return PA_DECAY_PURGE_ON_EPOCH_ADVANCE;
+		return PAC_DECAY_PURGE_ON_EPOCH_ADVANCE;
 	}
 }
 
@@ -440,7 +440,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
 	decay_reinit(decay, &cur_time, decay_ms);
-	pa_decay_purge_setting_t decay_purge =
+	pac_decay_purge_setting_t decay_purge =
 	    arena_decide_unforced_decay_purge_setting(
 		/* is_background_thread */ false);
 	pa_maybe_decay_purge(tsdn, &arena->pa_shard, decay, decay_stats, ecache,
@@ -497,7 +497,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		/* No need to wait if another thread is in progress. */
 		return true;
 	}
-	pa_decay_purge_setting_t decay_purge =
+	pac_decay_purge_setting_t decay_purge =
 	    arena_decide_unforced_decay_purge_setting(is_background_thread);
 	bool epoch_advanced = pa_maybe_decay_purge(tsdn, &arena->pa_shard,
 	    decay, decay_stats, ecache, decay_purge);
diff --git a/src/pa.c b/src/pa.c
index e8c88a0..66a9fbc 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -403,7 +403,7 @@ pa_decay_try_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 bool
 pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    pa_decay_purge_setting_t decay_purge_setting) {
+    pac_decay_purge_setting_t decay_purge_setting) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
@@ -429,9 +429,9 @@ pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 	size_t npages_current = ecache_npages_get(ecache);
 	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
 	    npages_current);
-	if (decay_purge_setting == PA_DECAY_PURGE_ALWAYS
+	if (decay_purge_setting == PAC_DECAY_PURGE_ALWAYS
 	    || (epoch_advanced && decay_purge_setting
-	    == PA_DECAY_PURGE_ON_EPOCH_ADVANCE)) {
+	    == PAC_DECAY_PURGE_ON_EPOCH_ADVANCE)) {
 		size_t npages_limit = decay_npages_limit_get(decay);
 		pa_decay_try_purge(tsdn, shard, decay, decay_stats, ecache,
 		    npages_current, npages_limit);
diff --git a/src/pac.c b/src/pac.c
index 8ff6f1c..e2e6b58 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -76,4 +76,3 @@ pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 
 	return false;
 }
-
-- 
cgit v0.12


From 6a2774719fe6b4cdae35c4a087afc2ef7f8c9110 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 3 Jun 2020 14:43:28 -0700
Subject: PA->PAC: Move in decay functions.

---
 include/jemalloc/internal/pa.h  |  33 -------
 include/jemalloc/internal/pac.h |  36 +++++++-
 src/arena.c                     |  14 +--
 src/pa.c                        |   2 +-
 src/pac.c                       | 187 +++++++++++++++++++++++++++++++++++++++-
 test/unit/pa.c                  |   2 +-
 6 files changed, 230 insertions(+), 44 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index e5a46f9..a2fa0ba 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -137,39 +137,6 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
 
-/*
- * All purging functions require holding decay->mtx.  This is one of the few
- * places external modules are allowed to peek inside pa_shard_t internals.
- */
-
-/*
- * Decays the number of pages currently in the ecache.  This might not leave the
- * ecache empty if other threads are inserting dirty objects into it
- * concurrently with the call.
- */
-void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
-/*
- * Updates decay settings for the current time, and conditionally purges in
- * response (depending on decay_purge_setting).  Returns whether or not the
- * epoch advanced.
- */
-bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    pac_decay_purge_setting_t decay_purge_setting);
-
-/*
- * Gets / sets the maximum amount that we'll grow an arena down the
- * grow-retained pathways (unless forced to by an allocaction request).
- *
- * Set new_limit to NULL if it's just a query, or old_limit to NULL if you don't
- * care about the previous value.
- *
- * Returns true on error (if the new limit is not valid).
- */
-bool pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
-    size_t *old_limit, size_t *new_limit);
-
 /******************************************************************************/
 /*
  * Various bits of "boring" functionality that are still part of this module,
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index aa4a76a..6c3721f 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -81,6 +81,7 @@ struct pac_s {
 	ecache_t ecache_muzzy;
 	ecache_t ecache_retained;
 
+	base_t *base;
 	emap_t *emap;
 	edata_cache_t *edata_cache;
 
@@ -103,7 +104,7 @@ struct pac_s {
 	atomic_zu_t extent_sn_next;
 };
 
-bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
+bool pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
     ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx);
 bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
@@ -126,4 +127,37 @@ pac_mapped(pac_t *pac) {
 	return atomic_load_zu(&pac->stats->pac_mapped, ATOMIC_RELAXED);
 }
 
+/*
+ * All purging functions require holding decay->mtx.  This is one of the few
+ * places external modules are allowed to peek inside pa_shard_t internals.
+ */
+
+/*
+ * Decays the number of pages currently in the ecache.  This might not leave the
+ * ecache empty if other threads are inserting dirty objects into it
+ * concurrently with the call.
+ */
+void pac_decay_all(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
+/*
+ * Updates decay settings for the current time, and conditionally purges in
+ * response (depending on decay_purge_setting).  Returns whether or not the
+ * epoch advanced.
+ */
+bool pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
+    pac_decay_purge_setting_t decay_purge_setting);
+
+/*
+ * Gets / sets the maximum amount that we'll grow an arena down the
+ * grow-retained pathways (unless forced to by an allocaction request).
+ *
+ * Set new_limit to NULL if it's just a query, or old_limit to NULL if you don't
+ * care about the previous value.
+ *
+ * Returns true on error (if the new limit is not valid).
+ */
+bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
+    size_t *new_limit);
+
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/arena.c b/src/arena.c
index 95dea18..8263d8e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -443,8 +443,8 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	pac_decay_purge_setting_t decay_purge =
 	    arena_decide_unforced_decay_purge_setting(
 		/* is_background_thread */ false);
-	pa_maybe_decay_purge(tsdn, &arena->pa_shard, decay, decay_stats, ecache,
-	    decay_purge);
+	pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac, decay, decay_stats,
+	    ecache, decay_purge);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
 	return false;
@@ -472,8 +472,8 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     bool is_background_thread, bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
-		pa_decay_all(tsdn, &arena->pa_shard, decay, decay_stats, ecache,
-		    /* fully_decay */ all);
+		pac_decay_all(tsdn, &arena->pa_shard.pac, decay, decay_stats,
+		    ecache, /* fully_decay */ all);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 		/*
 		 * The previous pa_decay_all call may not have actually decayed
@@ -499,7 +499,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	}
 	pac_decay_purge_setting_t decay_purge =
 	    arena_decide_unforced_decay_purge_setting(is_background_thread);
-	bool epoch_advanced = pa_maybe_decay_purge(tsdn, &arena->pa_shard,
+	bool epoch_advanced = pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac,
 	    decay, decay_stats, ecache, decay_purge);
 	size_t npages_new;
 	if (epoch_advanced) {
@@ -1401,8 +1401,8 @@ bool
 arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
     size_t *new_limit) {
 	assert(opt_retain);
-	return pa_shard_retain_grow_limit_get_set(tsd_tsdn(tsd),
-	    &arena->pa_shard, old_limit, new_limit);
+	return pac_retain_grow_limit_get_set(tsd_tsdn(tsd),
+	    &arena->pa_shard.pac, old_limit, new_limit);
 }
 
 unsigned
diff --git a/src/pa.c b/src/pa.c
index 66a9fbc..43dc318 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -29,7 +29,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	if (edata_cache_init(&shard->edata_cache, base)) {
 		return true;
 	}
-	if (pac_init(tsdn, &shard->pac, ind, emap, &shard->edata_cache,
+	if (pac_init(tsdn, &shard->pac, base, emap, &shard->edata_cache,
 	    cur_time, dirty_decay_ms, muzzy_decay_ms, &stats->pac_stats,
 	    stats_mtx)) {
 		return true;
diff --git a/src/pac.c b/src/pac.c
index e2e6b58..5ed1151 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -3,10 +3,16 @@
 
 #include "jemalloc/internal/pac.h"
 
+static ehooks_t *
+pac_ehooks_get(pac_t *pac) {
+	return base_ehooks_get(pac->base);
+}
+
 bool
-pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
+pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
     ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx) {
+	unsigned ind = base_ind_get(base);
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
 	 * memory layout for best-fit extent allocation, since cached extents
@@ -45,6 +51,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
 		return true;
 	}
 
+	pac->base = base;
 	pac->emap = emap;
 	pac->edata_cache = edata_cache;
 	pac->stats = pac_stats;
@@ -76,3 +83,181 @@ pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 
 	return false;
 }
+
+static size_t
+pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+    size_t npages_limit, size_t npages_decay_max, edata_list_t *result) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+
+	/* Stash extents according to npages_limit. */
+	size_t nstashed = 0;
+	while (nstashed < npages_decay_max) {
+		edata_t *edata = ecache_evict(tsdn, pac, ehooks, ecache,
+		    npages_limit);
+		if (edata == NULL) {
+			break;
+		}
+		edata_list_append(result, edata);
+		nstashed += edata_size_get(edata) >> LG_PAGE;
+	}
+	return nstashed;
+}
+
+static size_t
+pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    edata_list_t *decay_extents) {
+	bool err;
+
+	size_t nmadvise = 0;
+	size_t nunmapped = 0;
+	size_t npurged = 0;
+
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+
+	bool try_muzzy = !fully_decay && pac_muzzy_decay_ms_get(pac) != 0;
+
+	for (edata_t *edata = edata_list_first(decay_extents); edata !=
+	    NULL; edata = edata_list_first(decay_extents)) {
+		edata_list_remove(decay_extents, edata);
+
+		size_t size = edata_size_get(edata);
+		size_t npages = size >> LG_PAGE;
+
+		nmadvise++;
+		npurged += npages;
+
+		switch (ecache->state) {
+		case extent_state_active:
+			not_reached();
+		case extent_state_dirty:
+			if (try_muzzy) {
+				err = extent_purge_lazy_wrapper(tsdn, ehooks,
+				    edata, /* offset */ 0, size);
+				if (!err) {
+					ecache_dalloc(tsdn, pac, ehooks,
+					    &pac->ecache_muzzy, edata);
+					break;
+				}
+			}
+			JEMALLOC_FALLTHROUGH;
+		case extent_state_muzzy:
+			extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
+			nunmapped += npages;
+			break;
+		case extent_state_retained:
+		default:
+			not_reached();
+		}
+	}
+
+	if (config_stats) {
+		LOCKEDINT_MTX_LOCK(tsdn, *pac->stats_mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
+		    &decay_stats->npurge, 1);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
+		    &decay_stats->nmadvise, nmadvise);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
+		    &decay_stats->purged, npurged);
+		LOCKEDINT_MTX_UNLOCK(tsdn, *pac->stats_mtx);
+		atomic_fetch_sub_zu(&pac->stats->pac_mapped,
+		    nunmapped << LG_PAGE, ATOMIC_RELAXED);
+	}
+
+	return npurged;
+}
+
+/*
+ * npages_limit: Decay at most npages_decay_max pages without violating the
+ * invariant: (ecache_npages_get(ecache) >= npages_limit).  We need an upper
+ * bound on number of pages in order to prevent unbounded growth (namely in
+ * stashed), otherwise unbounded new pages could be added to extents during the
+ * current decay run, so that the purging thread never finishes.
+ */
+static void
+pac_decay_to_limit(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    size_t npages_limit, size_t npages_decay_max) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 1);
+
+	if (decay->purging || npages_decay_max == 0) {
+		return;
+	}
+	decay->purging = true;
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+
+	edata_list_t decay_extents;
+	edata_list_init(&decay_extents);
+	size_t npurge = pac_stash_decayed(tsdn, pac, ecache, npages_limit,
+	    npages_decay_max, &decay_extents);
+	if (npurge != 0) {
+		size_t npurged = pac_decay_stashed(tsdn, pac, decay,
+		    decay_stats, ecache, fully_decay, &decay_extents);
+		assert(npurged == npurge);
+	}
+
+	malloc_mutex_lock(tsdn, &decay->mtx);
+	decay->purging = false;
+}
+
+void
+pac_decay_all(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) {
+	malloc_mutex_assert_owner(tsdn, &decay->mtx);
+	pac_decay_to_limit(tsdn, pac, decay, decay_stats, ecache, fully_decay,
+	    /* npages_limit */ 0, ecache_npages_get(ecache));
+}
+
+static void
+pac_decay_try_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
+    size_t current_npages, size_t npages_limit) {
+	if (current_npages > npages_limit) {
+		pac_decay_to_limit(tsdn, pac, decay, decay_stats, ecache,
+		    /* fully_decay */ false, npages_limit,
+		    current_npages - npages_limit);
+	}
+}
+
+bool
+pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
+    pac_decay_purge_setting_t decay_purge_setting) {
+	malloc_mutex_assert_owner(tsdn, &decay->mtx);
+
+	/* Purge all or nothing if the option is disabled. */
+	ssize_t decay_ms = decay_ms_read(decay);
+	if (decay_ms <= 0) {
+		if (decay_ms == 0) {
+			pac_decay_to_limit(tsdn, pac, decay, decay_stats,
+			    ecache, /* fully_decay */ false,
+			    /* npages_limit */ 0, ecache_npages_get(ecache));
+		}
+		return false;
+	}
+
+	/*
+	 * If the deadline has been reached, advance to the current epoch and
+	 * purge to the new limit if necessary.  Note that dirty pages created
+	 * during the current epoch are not subject to purge until a future
+	 * epoch, so as a result purging only happens during epoch advances, or
+	 * being triggered by background threads (scheduled event).
+	 */
+	nstime_t time;
+	nstime_init_update(&time);
+	size_t npages_current = ecache_npages_get(ecache);
+	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
+	    npages_current);
+	if (decay_purge_setting == PAC_DECAY_PURGE_ALWAYS
+	    || (epoch_advanced && decay_purge_setting
+	    == PAC_DECAY_PURGE_ON_EPOCH_ADVANCE)) {
+		size_t npages_limit = decay_npages_limit_get(decay);
+		pac_decay_try_purge(tsdn, pac, decay, decay_stats, ecache,
+		    npages_current, npages_limit);
+	}
+
+	return epoch_advanced;
+}
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 17889b5..63cd976 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -88,7 +88,7 @@ do_alloc_free_purge(void *arg) {
 		    &generated_dirty);
 		malloc_mutex_lock(TSDN_NULL,
 		    &test_data->shard.pac.decay_dirty.mtx);
-		pa_decay_all(TSDN_NULL, &test_data->shard,
+		pac_decay_all(TSDN_NULL, &test_data->shard.pac,
 		    &test_data->shard.pac.decay_dirty,
 		    &test_data->shard.pac.stats->decay_dirty,
 		    &test_data->shard.pac.ecache_dirty, true);
-- 
cgit v0.12


From 471eb5913cfdef1d102219ddab683066e3462f43 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 3 Jun 2020 18:30:33 -0700
Subject: PAC: Move in decay rate setting.

---
 include/jemalloc/internal/arena_externs.h |  7 ++-
 include/jemalloc/internal/pa.h            |  6 ++-
 include/jemalloc/internal/pac.h           | 25 ++++-----
 src/arena.c                               | 86 +++++++++----------------------
 src/ctl.c                                 | 10 ++--
 src/extent.c                              |  4 +-
 src/pa.c                                  | 66 +++++-------------------
 src/pac.c                                 | 70 +++++++++++++++++++++++--
 test/unit/pa.c                            |  1 -
 9 files changed, 126 insertions(+), 149 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index e6e9a0b..674c98f 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -40,10 +40,9 @@ void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
     edata_t *edata, size_t oldsize);
 void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
     edata_t *edata, size_t oldsize);
-ssize_t arena_dirty_decay_ms_get(arena_t *arena);
-bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
-ssize_t arena_muzzy_decay_ms_get(arena_t *arena);
-bool arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
+bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
+    ssize_t decay_ms);
+ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
 void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all);
 void arena_reset(tsd_t *tsd, arena_t *arena);
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index a2fa0ba..4bdd8ac 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -90,7 +90,7 @@ struct pa_shard_s {
 static inline bool
 pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
 	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 &&
-	    pac_muzzy_decay_ms_get(&shard->pac) <= 0;
+	    pac_decay_ms_get(&shard->pac, extent_state_muzzy) <= 0;
 }
 
 static inline ehooks_t *
@@ -137,6 +137,10 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
 
+bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
+    ssize_t decay_ms, pac_purge_eagerness_t eagerness);
+ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
+
 /******************************************************************************/
 /*
  * Various bits of "boring" functionality that are still part of this module,
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 6c3721f..de01c51 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -10,12 +10,12 @@
  */
 
 /* How "eager" decay/purging should be. */
-enum pac_decay_purge_setting_e {
-	PAC_DECAY_PURGE_ALWAYS,
-	PAC_DECAY_PURGE_NEVER,
-	PAC_DECAY_PURGE_ON_EPOCH_ADVANCE
+enum pac_purge_eagerness_e {
+	PAC_PURGE_ALWAYS,
+	PAC_PURGE_NEVER,
+	PAC_PURGE_ON_EPOCH_ADVANCE
 };
-typedef enum pac_decay_purge_setting_e pac_decay_purge_setting_t;
+typedef enum pac_purge_eagerness_e pac_purge_eagerness_t;
 
 typedef struct pac_decay_stats_s pac_decay_stats_t;
 struct pac_decay_stats_s {
@@ -112,16 +112,6 @@ bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 void pac_stats_merge(tsdn_t *tsdn, pac_t *pac, pac_stats_t *pac_stats_out,
     pac_estats_t *estats_out, size_t *resident);
 
-static inline ssize_t
-pac_dirty_decay_ms_get(pac_t *pac) {
-	return decay_ms_read(&pac->decay_dirty);
-}
-
-static inline ssize_t
-pac_muzzy_decay_ms_get(pac_t *pac) {
-	return decay_ms_read(&pac->decay_muzzy);
-}
-
 static inline size_t
 pac_mapped(pac_t *pac) {
 	return atomic_load_zu(&pac->stats->pac_mapped, ATOMIC_RELAXED);
@@ -146,7 +136,7 @@ void pac_decay_all(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
  */
 bool pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    pac_decay_purge_setting_t decay_purge_setting);
+    pac_purge_eagerness_t eagerness);
 
 /*
  * Gets / sets the maximum amount that we'll grow an arena down the
@@ -160,4 +150,7 @@ bool pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
     size_t *new_limit);
 
+bool pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
+    ssize_t decay_ms, pac_purge_eagerness_t eagerness);
+ssize_t pac_decay_ms_get(pac_t *pac, extent_state_t state);
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/arena.c b/src/arena.c
index 8263d8e..72fa228 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -70,8 +70,8 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy) {
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena_dss_prec_get(arena)];
-	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
-	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
+	*dirty_decay_ms = arena_decay_ms_get(arena, extent_state_dirty);
+	*muzzy_decay_ms = arena_decay_ms_get(arena, extent_state_muzzy);
 	pa_shard_basic_stats_merge(&arena->pa_shard, nactive, ndirty, nmuzzy);
 }
 
@@ -189,7 +189,7 @@ void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (arena_dirty_decay_ms_get(arena) == 0) {
+	if (arena_decay_ms_get(arena, extent_state_dirty) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
 		arena_background_thread_inactivity_check(tsdn, arena, false);
@@ -395,77 +395,37 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	}
 }
 
-ssize_t
-arena_dirty_decay_ms_get(arena_t *arena) {
-	return pac_dirty_decay_ms_get(&arena->pa_shard.pac);
-}
-
-ssize_t
-arena_muzzy_decay_ms_get(arena_t *arena) {
-	return pac_muzzy_decay_ms_get(&arena->pa_shard.pac);
-}
-
 /*
  * In situations where we're not forcing a decay (i.e. because the user
  * specifically requested it), should we purge ourselves, or wait for the
  * background thread to get to it.
  */
-static pac_decay_purge_setting_t
-arena_decide_unforced_decay_purge_setting(bool is_background_thread) {
+static pac_purge_eagerness_t
+arena_decide_unforced_purge_eagerness(bool is_background_thread) {
 	if (is_background_thread) {
-		return PAC_DECAY_PURGE_ALWAYS;
+		return PAC_PURGE_ALWAYS;
 	} else if (!is_background_thread && background_thread_enabled()) {
-		return PAC_DECAY_PURGE_NEVER;
+		return PAC_PURGE_NEVER;
 	} else {
-		return PAC_DECAY_PURGE_ON_EPOCH_ADVANCE;
+		return PAC_PURGE_ON_EPOCH_ADVANCE;
 	}
 }
 
-static bool
-arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache, ssize_t decay_ms) {
-	if (!decay_ms_valid(decay_ms)) {
-		return true;
-	}
-
-	malloc_mutex_lock(tsdn, &decay->mtx);
-	/*
-	 * Restart decay backlog from scratch, which may cause many dirty pages
-	 * to be immediately purged.  It would conceptually be possible to map
-	 * the old backlog onto the new backlog, but there is no justification
-	 * for such complexity since decay_ms changes are intended to be
-	 * infrequent, either between the {-1, 0, >0} states, or a one-time
-	 * arbitrary change during initial arena configuration.
-	 */
-	nstime_t cur_time;
-	nstime_init_update(&cur_time);
-	decay_reinit(decay, &cur_time, decay_ms);
-	pac_decay_purge_setting_t decay_purge =
-	    arena_decide_unforced_decay_purge_setting(
-		/* is_background_thread */ false);
-	pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac, decay, decay_stats,
-	    ecache, decay_purge);
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-
-	return false;
-}
-
 bool
-arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
+arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
     ssize_t decay_ms) {
-	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.pac.decay_dirty,
-	    &arena->pa_shard.pac.stats->decay_dirty,
-	    &arena->pa_shard.pac.ecache_dirty, decay_ms);
+	pac_purge_eagerness_t eagerness = arena_decide_unforced_purge_eagerness(
+	    /* is_background_thread */ false);
+	return pa_decay_ms_set(tsdn, &arena->pa_shard, state, decay_ms,
+	    eagerness);
 }
 
-bool
-arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
-    ssize_t decay_ms) {
-	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.pac.decay_muzzy,
-	    &arena->pa_shard.pac.stats->decay_muzzy,
-	    &arena->pa_shard.pac.ecache_muzzy, decay_ms);
+ssize_t
+arena_decay_ms_get(arena_t *arena, extent_state_t state) {
+	return pa_decay_ms_get(&arena->pa_shard, state);
 }
 
+
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache,
@@ -497,10 +457,10 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		/* No need to wait if another thread is in progress. */
 		return true;
 	}
-	pac_decay_purge_setting_t decay_purge =
-	    arena_decide_unforced_decay_purge_setting(is_background_thread);
+	pac_purge_eagerness_t eagerness =
+	    arena_decide_unforced_purge_eagerness(is_background_thread);
 	bool epoch_advanced = pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac,
-	    decay, decay_stats, ecache, decay_purge);
+	    decay, decay_stats, ecache, eagerness);
 	size_t npages_new;
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
@@ -1546,10 +1506,12 @@ arena_choose_huge(tsd_t *tsd) {
 		 * expected for huge allocations.
 		 */
 		if (arena_dirty_decay_ms_default_get() > 0) {
-			arena_dirty_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0);
+			arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
+			    extent_state_dirty, 0);
 		}
 		if (arena_muzzy_decay_ms_default_get() > 0) {
-			arena_muzzy_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0);
+			arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
+			    extent_state_muzzy, 0);
 		}
 	}
 
diff --git a/src/ctl.c b/src/ctl.c
index 8b4b764..62a82a2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2430,10 +2430,10 @@ arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 		ret = EFAULT;
 		goto label_return;
 	}
+	extent_state_t state = dirty ? extent_state_dirty : extent_state_muzzy;
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = dirty ? arena_dirty_decay_ms_get(arena) :
-		    arena_muzzy_decay_ms_get(arena);
+		size_t oldval = arena_decay_ms_get(arena, state);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -2452,9 +2452,9 @@ arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				goto label_return;
 			}
 		}
-		if (dirty ? arena_dirty_decay_ms_set(tsd_tsdn(tsd), arena,
-		    *(ssize_t *)newp) : arena_muzzy_decay_ms_set(tsd_tsdn(tsd),
-		    arena, *(ssize_t *)newp)) {
+
+		if (arena_decay_ms_set(tsd_tsdn(tsd), arena, state,
+		    *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
diff --git a/src/extent.c b/src/extent.c
index 87d6a9a..98db40e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -55,8 +55,8 @@ extent_sn_next(pac_t *pac) {
 
 static inline bool
 extent_may_force_decay(pac_t *pac) {
-	return !(pac_dirty_decay_ms_get(pac) == -1
-	    || pac_muzzy_decay_ms_get(pac) == -1);
+	return !(pac_decay_ms_get(pac, extent_state_dirty) == -1
+	    || pac_decay_ms_get(pac, extent_state_muzzy) == -1);
 }
 
 static bool
diff --git a/src/pa.c b/src/pa.c
index 43dc318..444ea5b 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -78,9 +78,9 @@ pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
 	}
 }
 
-static bool
+static inline bool
 pa_shard_may_have_muzzy(pa_shard_t *shard) {
-	return pac_muzzy_decay_ms_get(&shard->pac) != 0;
+	return pac_decay_ms_get(&shard->pac, extent_state_muzzy) != 0;
 }
 
 static edata_t *
@@ -389,60 +389,20 @@ pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 	    /* npages_limit */ 0, ecache_npages_get(ecache));
 }
 
-static void
-pa_decay_try_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    size_t current_npages, size_t npages_limit) {
-	if (current_npages > npages_limit) {
-		pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache,
-		    /* fully_decay */ false, npages_limit,
-		    current_npages - npages_limit);
-	}
-}
-
-bool
-pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    pac_decay_purge_setting_t decay_purge_setting) {
-	malloc_mutex_assert_owner(tsdn, &decay->mtx);
-
-	/* Purge all or nothing if the option is disabled. */
-	ssize_t decay_ms = decay_ms_read(decay);
-	if (decay_ms <= 0) {
-		if (decay_ms == 0) {
-			pa_decay_to_limit(tsdn, shard, decay, decay_stats,
-			    ecache, /* fully_decay */ false,
-			    /* npages_limit */ 0, ecache_npages_get(ecache));
-		}
-		return false;
-	}
-
-	/*
-	 * If the deadline has been reached, advance to the current epoch and
-	 * purge to the new limit if necessary.  Note that dirty pages created
-	 * during the current epoch are not subject to purge until a future
-	 * epoch, so as a result purging only happens during epoch advances, or
-	 * being triggered by background threads (scheduled event).
-	 */
-	nstime_t time;
-	nstime_init_update(&time);
-	size_t npages_current = ecache_npages_get(ecache);
-	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
-	    npages_current);
-	if (decay_purge_setting == PAC_DECAY_PURGE_ALWAYS
-	    || (epoch_advanced && decay_purge_setting
-	    == PAC_DECAY_PURGE_ON_EPOCH_ADVANCE)) {
-		size_t npages_limit = decay_npages_limit_get(decay);
-		pa_decay_try_purge(tsdn, shard, decay, decay_stats, ecache,
-		    npages_current, npages_limit);
-	}
-
-	return epoch_advanced;
-}
-
 bool
 pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
     size_t *old_limit, size_t *new_limit) {
 	return pac_retain_grow_limit_get_set(tsdn, &shard->pac, old_limit,
 	    new_limit);
 }
+
+bool
+pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
+    ssize_t decay_ms, pac_purge_eagerness_t eagerness) {
+	return pac_decay_ms_set(tsdn, &shard->pac, state, decay_ms, eagerness);
+}
+
+ssize_t
+pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) {
+	return pac_decay_ms_get(&shard->pac, state);
+}
diff --git a/src/pac.c b/src/pac.c
index 5ed1151..bc9f743 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -8,6 +8,27 @@ pac_ehooks_get(pac_t *pac) {
 	return base_ehooks_get(pac->base);
 }
 
+static inline void
+pac_decay_data_get(pac_t *pac, extent_state_t state,
+    decay_t **r_decay, pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
+	switch(state) {
+	case extent_state_dirty:
+		*r_decay = &pac->decay_dirty;
+		*r_decay_stats = &pac->stats->decay_dirty;
+		*r_ecache = &pac->ecache_dirty;
+		return;
+	case extent_state_muzzy:
+		*r_decay = &pac->decay_muzzy;
+		*r_decay_stats = &pac->stats->decay_muzzy;
+		*r_ecache = &pac->ecache_muzzy;
+		return;
+	default:
+		unreachable();
+	}
+}
+
+
+
 bool
 pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
@@ -117,7 +138,8 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
-	bool try_muzzy = !fully_decay && pac_muzzy_decay_ms_get(pac) != 0;
+	bool try_muzzy = !fully_decay
+	    && pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 
 	for (edata_t *edata = edata_list_first(decay_extents); edata !=
 	    NULL; edata = edata_list_first(decay_extents)) {
@@ -225,7 +247,7 @@ pac_decay_try_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 bool
 pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    pac_decay_purge_setting_t decay_purge_setting) {
+    pac_purge_eagerness_t eagerness) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
@@ -251,9 +273,8 @@ pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	size_t npages_current = ecache_npages_get(ecache);
 	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
 	    npages_current);
-	if (decay_purge_setting == PAC_DECAY_PURGE_ALWAYS
-	    || (epoch_advanced && decay_purge_setting
-	    == PAC_DECAY_PURGE_ON_EPOCH_ADVANCE)) {
+	if (eagerness == PAC_PURGE_ALWAYS
+	    || (epoch_advanced && eagerness == PAC_PURGE_ON_EPOCH_ADVANCE)) {
 		size_t npages_limit = decay_npages_limit_get(decay);
 		pac_decay_try_purge(tsdn, pac, decay, decay_stats, ecache,
 		    npages_current, npages_limit);
@@ -261,3 +282,42 @@ pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 
 	return epoch_advanced;
 }
+
+bool
+pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
+    ssize_t decay_ms, pac_purge_eagerness_t eagerness) {
+	decay_t *decay;
+	pac_decay_stats_t *decay_stats;
+	ecache_t *ecache;
+	pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
+
+	if (!decay_ms_valid(decay_ms)) {
+		return true;
+	}
+
+	malloc_mutex_lock(tsdn, &decay->mtx);
+	/*
+	 * Restart decay backlog from scratch, which may cause many dirty pages
+	 * to be immediately purged.  It would conceptually be possible to map
+	 * the old backlog onto the new backlog, but there is no justification
+	 * for such complexity since decay_ms changes are intended to be
+	 * infrequent, either between the {-1, 0, >0} states, or a one-time
+	 * arbitrary change during initial arena configuration.
+	 */
+	nstime_t cur_time;
+	nstime_init_update(&cur_time);
+	decay_reinit(decay, &cur_time, decay_ms);
+	pac_maybe_decay_purge(tsdn, pac, decay, decay_stats, ecache, eagerness);
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+
+	return false;
+}
+
+ssize_t
+pac_decay_ms_get(pac_t *pac, extent_state_t state) {
+	decay_t *decay;
+	pac_decay_stats_t *decay_stats;
+	ecache_t *ecache;
+	pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
+	return decay_ms_read(decay);
+}
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 63cd976..3a91023 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -107,7 +107,6 @@ TEST_BEGIN(test_alloc_free_purge_thds) {
 	for (int i = 0; i < 4; i++) {
 		thd_join(thds[i], NULL);
 	}
-
 }
 TEST_END
 
-- 
cgit v0.12


From cbf096b05ee1b21ce4244f04870083c63798ad64 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 4 Jun 2020 12:44:50 -0700
Subject: Arena: remove redundant bg inactivity check.

---
 src/arena.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 72fa228..2bf02de 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -425,7 +425,6 @@ arena_decay_ms_get(arena_t *arena, extent_state_t state) {
 	return pa_decay_ms_get(&arena->pa_shard, state);
 }
 
-
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache,
@@ -435,21 +434,6 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		pac_decay_all(tsdn, &arena->pa_shard.pac, decay, decay_stats,
 		    ecache, /* fully_decay */ all);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
-		/*
-		 * The previous pa_decay_all call may not have actually decayed
-		 * all pages, if new pages were added concurrently with the
-		 * purge.
-		 *
-		 * I don't think we need an activity check for that case (some
-		 * other thread must be deallocating, and they should do one),
-		 * but we do one anyways.  This line comes out of a refactoring
-		 * diff in which the check was pulled out of the callee, and so
-		 * an extra redundant check minimizes the change.  We should
-		 * reevaluate.
-		 */
-		assert(!is_background_thread);
-		arena_background_thread_inactivity_check(tsdn, arena,
-		    /* is_background_thread */ false);
 		return false;
 	}
 
-- 
cgit v0.12


From 6041aaba9742c792cfa1d9ddbede6c646dd92d33 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 10 Jun 2020 17:42:49 -0700
Subject: PA -> PAC: Move in destruction functions.

---
 include/jemalloc/internal/pa.h  |  2 +-
 include/jemalloc/internal/pac.h |  4 ++++
 src/arena.c                     |  2 +-
 src/pa.c                        | 20 ++------------------
 src/pac.c                       | 31 +++++++++++++++++++++++++++++++
 5 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 4bdd8ac..b3fc8e2 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -113,7 +113,7 @@ void pa_shard_reset(pa_shard_t *shard);
  * decaying all active, dirty, and muzzy extents to the retained state, as the
  * last step in destroying the shard.
  */
-void pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);
 
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index de01c51..302ac07 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -153,4 +153,8 @@ bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 bool pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
     ssize_t decay_ms, pac_purge_eagerness_t eagerness);
 ssize_t pac_decay_ms_get(pac_t *pac, extent_state_t state);
+
+void pac_reset(tsdn_t *tsdn, pac_t *pac);
+void pac_destroy(tsdn_t *tsdn, pac_t *pac);
+
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/arena.c b/src/arena.c
index 2bf02de..46da385 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -645,7 +645,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * extents, so only retained extents may remain and it's safe to call
 	 * pa_shard_destroy_retained.
 	 */
-	pa_shard_destroy_retained(tsd_tsdn(tsd), &arena->pa_shard);
+	pa_shard_destroy(tsd_tsdn(tsd), &arena->pa_shard);
 
 	/*
 	 * Remove the arena pointer from the arenas array.  We rely on the fact
diff --git a/src/pa.c b/src/pa.c
index 444ea5b..6a3db3c 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -58,24 +58,8 @@ pa_shard_reset(pa_shard_t *shard) {
 }
 
 void
-pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
-	assert(ecache_npages_get(&shard->pac.ecache_dirty) == 0);
-	assert(ecache_npages_get(&shard->pac.ecache_muzzy) == 0);
-	/*
-	 * Iterate over the retained extents and destroy them.  This gives the
-	 * extent allocator underlying the extent hooks an opportunity to unmap
-	 * all retained memory without having to keep its own metadata
-	 * structures.  In practice, virtual memory for dss-allocated extents is
-	 * leaked here, so best practice is to avoid dss for arenas to be
-	 * destroyed, or provide custom extent hooks that track retained
-	 * dss-based extents for later reuse.
-	 */
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	edata_t *edata;
-	while ((edata = ecache_evict(tsdn, &shard->pac, ehooks,
-	    &shard->pac.ecache_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, &shard->pac, ehooks, edata);
-	}
+pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
+	pac_destroy(tsdn, &shard->pac);
 }
 
 static inline bool
diff --git a/src/pac.c b/src/pac.c
index bc9f743..ed17a2f 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -321,3 +321,34 @@ pac_decay_ms_get(pac_t *pac, extent_state_t state) {
 	pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
 	return decay_ms_read(decay);
 }
+
+void
+pac_reset(tsdn_t *tsdn, pac_t *pac) {
+	/*
+	 * No-op for now; purging is still done at the arena-level.  It should
+	 * get moved in here, though.
+	 */
+	(void)tsdn;
+	(void)pac;
+}
+
+void
+pac_destroy(tsdn_t *tsdn, pac_t *pac) {
+	assert(ecache_npages_get(&pac->ecache_dirty) == 0);
+	assert(ecache_npages_get(&pac->ecache_muzzy) == 0);
+	/*
+	 * Iterate over the retained extents and destroy them.  This gives the
+	 * extent allocator underlying the extent hooks an opportunity to unmap
+	 * all retained memory without having to keep its own metadata
+	 * structures.  In practice, virtual memory for dss-allocated extents is
+	 * leaked here, so best practice is to avoid dss for arenas to be
+	 * destroyed, or provide custom extent hooks that track retained
+	 * dss-based extents for later reuse.
+	 */
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+	edata_t *edata;
+	while ((edata = ecache_evict(tsdn, pac, ehooks,
+	    &pac->ecache_retained, 0)) != NULL) {
+		extent_destroy_wrapper(tsdn, pac, ehooks, edata);
+	}
+}
-- 
cgit v0.12


From 6107857b7b40cd3d5c64053aeaf44e275374e9e8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 11 Jun 2020 11:53:30 -0700
Subject: PA->PAC: Move in PAI implementation.

---
 include/jemalloc/internal/pa.h  |   7 +-
 include/jemalloc/internal/pac.h |   7 ++
 src/pa.c                        | 254 +---------------------------------------
 src/pac.c                       | 131 +++++++++++++++++++--
 4 files changed, 133 insertions(+), 266 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index b3fc8e2..f6d0a7c 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -66,12 +66,7 @@ struct pa_shard_s {
 	 */
 	atomic_zu_t nactive;
 
-	/*
-	 * An interface for page allocation from the ecache framework (i.e. a
-	 * cascade of ecache_dirty, ecache_muzzy, ecache_retained).  Right now
-	 * this is the *only* pai, but we'll soon grow another.
-	 */
-	pai_t ecache_pai;
+	/* Allocates from a PAC. */
 	pac_t pac;
 
 	/* The source of edata_t objects. */
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 302ac07..2d02bda 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PAC_H
 #define JEMALLOC_INTERNAL_PAC_H
 
+#include "jemalloc/internal/pai.h"
+
 /*
  * Page allocator classic; an implementation of the PAI interface that:
  * - Can be used for arenas with custom extent hooks.
@@ -72,6 +74,11 @@ struct pac_stats_s {
 typedef struct pac_s pac_t;
 struct pac_s {
 	/*
+	 * Must be the first member (we convert it to a PAC given only a
+	 * pointer).  The handle to the allocation interface.
+	 */
+	pai_t pai;
+	/*
 	 * Collections of extents that were previously allocated.  These are
 	 * used when allocating extents, in an attempt to re-use address space.
 	 *
diff --git a/src/pa.c b/src/pa.c
index 6a3db3c..f068fd9 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,14 +1,6 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-static edata_t *ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero);
-static bool ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero);
-static bool ecache_pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size);
-static void ecache_pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
-
 static void
 pa_nactive_add(pa_shard_t *shard, size_t add_pages) {
 	atomic_fetch_add_zu(&shard->nactive, add_pages, ATOMIC_RELAXED);
@@ -44,11 +36,6 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->emap = emap;
 	shard->base = base;
 
-	shard->ecache_pai.alloc = &ecache_pai_alloc;
-	shard->ecache_pai.expand = &ecache_pai_expand;
-	shard->ecache_pai.shrink = &ecache_pai_shrink;
-	shard->ecache_pai.dalloc = &ecache_pai_dalloc;
-
 	return false;
 }
 
@@ -62,43 +49,13 @@ pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 	pac_destroy(tsdn, &shard->pac);
 }
 
-static inline bool
-pa_shard_may_have_muzzy(pa_shard_t *shard) {
-	return pac_decay_ms_get(&shard->pac, extent_state_muzzy) != 0;
-}
-
-static edata_t *
-ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
-    bool zero) {
-	pa_shard_t *shard =
-	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
-
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	edata_t *edata = ecache_alloc(tsdn, &shard->pac, ehooks,
-	    &shard->pac.ecache_dirty, NULL, size, alignment, zero);
-
-	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
-		edata = ecache_alloc(tsdn, &shard->pac, ehooks,
-		    &shard->pac.ecache_muzzy, NULL, size, alignment, zero);
-	}
-	if (edata == NULL) {
-		edata = ecache_alloc_grow(tsdn, &shard->pac, ehooks,
-		    &shard->pac.ecache_retained, NULL, size, alignment, zero);
-		if (config_stats && edata != NULL) {
-			atomic_fetch_add_zu(&shard->pac.stats->pac_mapped, size,
-			    ATOMIC_RELAXED);
-		}
-	}
-	return edata;
-}
-
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
     bool slab, szind_t szind, bool zero) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = pai_alloc(tsdn, &shard->ecache_pai, size, alignment,
+	edata_t *edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment,
 	    zero);
 
 	if (edata != NULL) {
@@ -113,48 +70,6 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	return edata;
 }
 
-static bool
-ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero) {
-	pa_shard_t *shard =
-	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
-
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	void *trail_begin = edata_past_get(edata);
-
-	size_t mapped_add = 0;
-	size_t expand_amount = new_size - old_size;
-
-	if (ehooks_merge_will_fail(ehooks)) {
-		return true;
-	}
-	edata_t *trail = ecache_alloc(tsdn, &shard->pac, ehooks,
-	    &shard->pac.ecache_dirty, trail_begin, expand_amount, PAGE, zero);
-	if (trail == NULL) {
-		trail = ecache_alloc(tsdn, &shard->pac, ehooks,
-		    &shard->pac.ecache_muzzy, trail_begin, expand_amount, PAGE,
-		    zero);
-	}
-	if (trail == NULL) {
-		trail = ecache_alloc_grow(tsdn, &shard->pac, ehooks,
-		    &shard->pac.ecache_retained, trail_begin, expand_amount,
-		    PAGE, zero);
-		mapped_add = expand_amount;
-	}
-	if (trail == NULL) {
-		return true;
-	}
-	if (extent_merge_wrapper(tsdn, &shard->pac, ehooks, edata, trail)) {
-		extent_dalloc_wrapper(tsdn, &shard->pac, ehooks, trail);
-		return true;
-	}
-	if (config_stats && mapped_add > 0) {
-		atomic_fetch_add_zu(&shard->pac.stats->pac_mapped, mapped_add,
-		    ATOMIC_RELAXED);
-	}
-	return false;
-}
-
 bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool zero) {
@@ -164,7 +79,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 
 	size_t expand_amount = new_size - old_size;
 
-	bool error = pai_expand(tsdn, &shard->ecache_pai, edata, old_size,
+	bool error = pai_expand(tsdn, &shard->pac.pai, edata, old_size,
 	    new_size, zero);
 	if (error) {
 		return true;
@@ -176,30 +91,6 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	return false;
 }
 
-static bool
-ecache_pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size) {
-	pa_shard_t *shard =
-	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
-
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	size_t shrink_amount = old_size - new_size;
-
-
-	if (ehooks_split_will_fail(ehooks)) {
-		return true;
-	}
-
-	edata_t *trail = extent_split_wrapper(tsdn, &shard->pac, ehooks, edata,
-	    new_size, shrink_amount);
-	if (trail == NULL) {
-		return true;
-	}
-	ecache_dalloc(tsdn, &shard->pac, ehooks, &shard->pac.ecache_dirty,
-	    trail);
-	return false;
-}
-
 bool
 pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool *generated_dirty) {
@@ -209,7 +100,7 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	size_t shrink_amount = old_size - new_size;
 
 	*generated_dirty = false;
-	bool error = pai_shrink(tsdn, &shard->ecache_pai, edata, old_size,
+	bool error = pai_shrink(tsdn, &shard->pac.pai, edata, old_size,
 	    new_size);
 	if (error) {
 		return true;
@@ -222,15 +113,6 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	return false;
 }
 
-static void
-ecache_pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
-	pa_shard_t *shard =
-	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	ecache_dalloc(tsdn, &shard->pac, ehooks, &shard->pac.ecache_dirty,
-	    edata);
-}
-
 void
 pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty) {
@@ -241,138 +123,10 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	}
 	edata_szind_set(edata, SC_NSIZES);
 	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
-	pai_dalloc(tsdn, &shard->ecache_pai, edata);
+	pai_dalloc(tsdn, &shard->pac.pai, edata);
 	*generated_dirty = true;
 }
 
-static size_t
-pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
-    size_t npages_limit, size_t npages_decay_max,
-    edata_list_inactive_t *result) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-
-	/* Stash extents according to npages_limit. */
-	size_t nstashed = 0;
-	while (nstashed < npages_decay_max) {
-		edata_t *edata = ecache_evict(tsdn, &shard->pac, ehooks, ecache,
-		    npages_limit);
-		if (edata == NULL) {
-			break;
-		}
-		edata_list_inactive_append(result, edata);
-		nstashed += edata_size_get(edata) >> LG_PAGE;
-	}
-	return nstashed;
-}
-
-static size_t
-pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
-    edata_list_inactive_t *decay_extents) {
-	bool err;
-
-	size_t nmadvise = 0;
-	size_t nunmapped = 0;
-	size_t npurged = 0;
-
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-
-	bool try_muzzy = !fully_decay && pa_shard_may_have_muzzy(shard);
-
-	for (edata_t *edata = edata_list_inactive_first(decay_extents);
-	    edata != NULL; edata = edata_list_inactive_first(decay_extents)) {
-		edata_list_inactive_remove(decay_extents, edata);
-
-		size_t size = edata_size_get(edata);
-		size_t npages = size >> LG_PAGE;
-
-		nmadvise++;
-		npurged += npages;
-
-		switch (ecache->state) {
-		case extent_state_active:
-			not_reached();
-		case extent_state_dirty:
-			if (try_muzzy) {
-				err = extent_purge_lazy_wrapper(tsdn, ehooks,
-				    edata, /* offset */ 0, size);
-				if (!err) {
-					ecache_dalloc(tsdn, &shard->pac, ehooks,
-					    &shard->pac.ecache_muzzy, edata);
-					break;
-				}
-			}
-			JEMALLOC_FALLTHROUGH;
-		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, &shard->pac, ehooks, edata);
-			nunmapped += npages;
-			break;
-		case extent_state_retained:
-		default:
-			not_reached();
-		}
-	}
-
-	if (config_stats) {
-		LOCKEDINT_MTX_LOCK(tsdn, *shard->stats_mtx);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-		    &decay_stats->npurge, 1);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-		    &decay_stats->nmadvise, nmadvise);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-		    &decay_stats->purged, npurged);
-		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
-		atomic_fetch_sub_zu(&shard->pac.stats->pac_mapped,
-		    nunmapped << LG_PAGE, ATOMIC_RELAXED);
-	}
-
-	return npurged;
-}
-
-/*
- * npages_limit: Decay at most npages_decay_max pages without violating the
- * invariant: (ecache_npages_get(ecache) >= npages_limit).  We need an upper
- * bound on number of pages in order to prevent unbounded growth (namely in
- * stashed), otherwise unbounded new pages could be added to extents during the
- * current decay run, so that the purging thread never finishes.
- */
-static void
-pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
-    size_t npages_limit, size_t npages_decay_max) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 1);
-
-	if (decay->purging || npages_decay_max == 0) {
-		return;
-	}
-	decay->purging = true;
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-
-	edata_list_inactive_t decay_extents;
-	edata_list_inactive_init(&decay_extents);
-	size_t npurge = pa_stash_decayed(tsdn, shard, ecache, npages_limit,
-	    npages_decay_max, &decay_extents);
-	if (npurge != 0) {
-		size_t npurged = pa_decay_stashed(tsdn, shard, decay,
-		    decay_stats, ecache, fully_decay, &decay_extents);
-		assert(npurged == npurge);
-	}
-
-	malloc_mutex_lock(tsdn, &decay->mtx);
-	decay->purging = false;
-}
-
-void
-pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) {
-	malloc_mutex_assert_owner(tsdn, &decay->mtx);
-	pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache, fully_decay,
-	    /* npages_limit */ 0, ecache_npages_get(ecache));
-}
-
 bool
 pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
     size_t *old_limit, size_t *new_limit) {
diff --git a/src/pac.c b/src/pac.c
index ed17a2f..a437088 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -3,6 +3,14 @@
 
 #include "jemalloc/internal/pac.h"
 
+static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t alignment, bool zero);
+static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size, bool zero);
+static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size);
+static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+
 static ehooks_t *
 pac_ehooks_get(pac_t *pac) {
 	return base_ehooks_get(pac->base);
@@ -27,8 +35,6 @@ pac_decay_data_get(pac_t *pac, extent_state_t state,
 	}
 }
 
-
-
 bool
 pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
@@ -78,9 +84,113 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	pac->stats = pac_stats;
 	pac->stats_mtx = stats_mtx;
 	atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
+
+	pac->pai.alloc = &pac_alloc_impl;
+	pac->pai.expand = &pac_expand_impl;
+	pac->pai.shrink = &pac_shrink_impl;
+	pac->pai.dalloc = &pac_dalloc_impl;
+
 	return false;
 }
 
+static inline bool
+pac_may_have_muzzy(pac_t *pac) {
+	return pac_decay_ms_get(pac, extent_state_muzzy) != 0;
+}
+
+static edata_t *
+pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
+    bool zero) {
+	pac_t *pac = (pac_t *)self;
+
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+	edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
+	    NULL, size, alignment, zero);
+
+	if (edata == NULL && pac_may_have_muzzy(pac)) {
+		edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
+		    NULL, size, alignment, zero);
+	}
+	if (edata == NULL) {
+		edata = ecache_alloc_grow(tsdn, pac, ehooks,
+		    &pac->ecache_retained, NULL, size, alignment, zero);
+		if (config_stats && edata != NULL) {
+			atomic_fetch_add_zu(&pac->stats->pac_mapped, size,
+			    ATOMIC_RELAXED);
+		}
+	}
+	return edata;
+}
+
+static bool
+pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool zero) {
+	pac_t *pac = (pac_t *)self;
+
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+	void *trail_begin = edata_past_get(edata);
+
+	size_t mapped_add = 0;
+	size_t expand_amount = new_size - old_size;
+
+	if (ehooks_merge_will_fail(ehooks)) {
+		return true;
+	}
+	edata_t *trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
+	    trail_begin, expand_amount, PAGE, zero);
+	if (trail == NULL) {
+		trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
+		    trail_begin, expand_amount, PAGE, zero);
+	}
+	if (trail == NULL) {
+		trail = ecache_alloc_grow(tsdn, pac, ehooks,
+		    &pac->ecache_retained, trail_begin, expand_amount, PAGE,
+		    zero);
+		mapped_add = expand_amount;
+	}
+	if (trail == NULL) {
+		return true;
+	}
+	if (extent_merge_wrapper(tsdn, pac, ehooks, edata, trail)) {
+		extent_dalloc_wrapper(tsdn, pac, ehooks, trail);
+		return true;
+	}
+	if (config_stats && mapped_add > 0) {
+		atomic_fetch_add_zu(&pac->stats->pac_mapped, mapped_add,
+		    ATOMIC_RELAXED);
+	}
+	return false;
+}
+
+static bool
+pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size) {
+	pac_t *pac = (pac_t *)self;
+
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+	size_t shrink_amount = old_size - new_size;
+
+
+	if (ehooks_split_will_fail(ehooks)) {
+		return true;
+	}
+
+	edata_t *trail = extent_split_wrapper(tsdn, pac, ehooks, edata,
+	    new_size, shrink_amount);
+	if (trail == NULL) {
+		return true;
+	}
+	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, trail);
+	return false;
+}
+
+static void
+pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	pac_t *pac = (pac_t *)self;
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata);
+}
+
 bool
 pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
     size_t *new_limit) {
@@ -107,7 +217,8 @@ pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 
 static size_t
 pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
-    size_t npages_limit, size_t npages_decay_max, edata_list_t *result) {
+    size_t npages_limit, size_t npages_decay_max,
+    edata_list_inactive_t *result) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	ehooks_t *ehooks = pac_ehooks_get(pac);
@@ -120,7 +231,7 @@ pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 		if (edata == NULL) {
 			break;
 		}
-		edata_list_append(result, edata);
+		edata_list_inactive_append(result, edata);
 		nstashed += edata_size_get(edata) >> LG_PAGE;
 	}
 	return nstashed;
@@ -129,7 +240,7 @@ pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 static size_t
 pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
-    edata_list_t *decay_extents) {
+    edata_list_inactive_t *decay_extents) {
 	bool err;
 
 	size_t nmadvise = 0;
@@ -141,9 +252,9 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	bool try_muzzy = !fully_decay
 	    && pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 
-	for (edata_t *edata = edata_list_first(decay_extents); edata !=
-	    NULL; edata = edata_list_first(decay_extents)) {
-		edata_list_remove(decay_extents, edata);
+	for (edata_t *edata = edata_list_inactive_first(decay_extents); edata !=
+	    NULL; edata = edata_list_inactive_first(decay_extents)) {
+		edata_list_inactive_remove(decay_extents, edata);
 
 		size_t size = edata_size_get(edata);
 		size_t npages = size >> LG_PAGE;
@@ -211,8 +322,8 @@ pac_decay_to_limit(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	decay->purging = true;
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	edata_list_t decay_extents;
-	edata_list_init(&decay_extents);
+	edata_list_inactive_t decay_extents;
+	edata_list_inactive_init(&decay_extents);
 	size_t npurge = pac_stash_decayed(tsdn, pac, ecache, npages_limit,
 	    npages_decay_max, &decay_extents);
 	if (npurge != 0) {
-- 
cgit v0.12


From e6cb7a1c9b31de3c6eca367d9164a1896bbb60ae Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 7 Jul 2020 13:33:30 -0700
Subject: Shorten wait time for peak events

---
 src/peak_event.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/peak_event.c b/src/peak_event.c
index ffb061b..79d91e0 100644
--- a/src/peak_event.c
+++ b/src/peak_event.c
@@ -5,12 +5,12 @@
 #include "jemalloc/internal/peak_event.h"
 
 /*
- * Update every 100k by default.  We're not exposing this as a configuration
+ * Update every 64K by default.  We're not exposing this as a configuration
  * option for now; we don't want to bind ourselves too tightly to any particular
  * performance requirements for small values, or guarantee that we'll even be
  * able to provide fine-grained accuracy.
  */
-#define PEAK_EVENT_WAIT (100 * 1024)
+#define PEAK_EVENT_WAIT (64 * 1024)
 
 /* Update the peak with current tsd state. */
 void
-- 
cgit v0.12


From 4258402047a1b1c9b78ff12dcb26bd869f6ae8cd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 6 Jul 2020 15:48:15 -0700
Subject: Corrections for prof_log_start()

---
 src/prof_log.c | 35 +++++++++++------------------------
 1 file changed, 11 insertions(+), 24 deletions(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index b32d6f6..3a653fb 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -87,7 +87,7 @@ struct prof_alloc_node_s {
 };
 
 /*
- * Created on the first call to prof_log_start and deleted on prof_log_stop.
+ * Created on the first call to prof_try_log and deleted on prof_log_stop.
  * These are the backtraces and threads that have already been logged by an
  * allocation.
  */
@@ -406,7 +406,7 @@ prof_log_dummy_set(bool new_value) {
 
 bool
 prof_log_start(tsdn_t *tsdn, const char *filename) {
-	if (!opt_prof || !prof_booted) {
+	if (!opt_prof) {
 		return true;
 	}
 
@@ -429,7 +429,7 @@ prof_log_start(tsdn_t *tsdn, const char *filename) {
 	}
 
 	if (!ret) {
-		nstime_update(&log_start_timestamp);
+		nstime_prof_init_update(&log_start_timestamp);
 	}
 
 	malloc_mutex_unlock(tsdn, &log_mtx);
@@ -573,10 +573,9 @@ prof_log_emit_metadata(emitter_t *emitter) {
 	emitter_json_kv(emitter, "lg_sample_rate",
 	    emitter_type_int, &lg_prof_sample);
 
-  const char *res_type =
-    prof_time_res_mode_names[opt_prof_time_res];
-  emitter_json_kv(emitter, "prof_time_resolution",
-      emitter_type_string, &res_type);
+	const char *res_type = prof_time_res_mode_names[opt_prof_time_res];
+	emitter_json_kv(emitter, "prof_time_resolution", emitter_type_string,
+	    &res_type);
 
 	int pid = prof_getpid();
 	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
@@ -673,6 +672,11 @@ prof_log_stop(tsdn_t *tsdn) {
 #undef PROF_LOG_STOP_BUFSIZE
 
 bool prof_log_init(tsd_t *tsd) {
+	if (malloc_mutex_init(&log_mtx, "prof_log",
+	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+
 	if (opt_prof_log) {
 		prof_log_start(tsd_tsdn(tsd), NULL);
 	}
@@ -683,26 +687,9 @@ bool prof_log_init(tsd_t *tsd) {
 		if (opt_abort) {
 			abort();
 		}
-	}
-
-	if (malloc_mutex_init(&log_mtx, "prof_log",
-	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-
-	if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
-	    prof_bt_node_hash, prof_bt_node_keycomp)) {
 		return true;
 	}
 
-	if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
-	    prof_thr_node_hash, prof_thr_node_keycomp)) {
-		return true;
-	}
-
-	nstime_init_zero(&log_start_timestamp);
-
-	log_tables_initialized = true;
 	return false;
 }
 
-- 
cgit v0.12


From f5fb4e5a970077e308d7e4e3f1cbbec4cf76a8d9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 22 Jul 2020 14:20:38 -0700
Subject: Modify mallctl output length when needed

This is the only reason why `oldlenp` was designed to be in the form
of a pointer.
---
 doc/jemalloc.xml.in | 3 ++-
 src/ctl.c           | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 5472294..19afe36 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -3608,7 +3608,8 @@ MAPPED_LIBRARIES:
             <constant>NULL</constant>, and <parameter>newlen</parameter> is too
             large or too small.  Alternatively, <parameter>*oldlenp</parameter>
             is too large or too small; in this case as much data as possible
-            are read despite the error.</para></listitem>
+            are read despite the error, with the amount of data read being
+            recorded in <parameter>*oldlenp</parameter>.</para></listitem>
           </varlistentry>
           <varlistentry>
             <term><errorname>ENOENT</errorname></term>
diff --git a/src/ctl.c b/src/ctl.c
index 62a82a2..92e9f51 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1494,6 +1494,7 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 			size_t	copylen = (sizeof(t) <= *oldlenp)	\
 			    ? sizeof(t) : *oldlenp;			\
 			memcpy(oldp, (void *)&(v), copylen);		\
+			*oldlenp = copylen;				\
 			ret = EINVAL;					\
 			goto label_return;				\
 		}							\
-- 
cgit v0.12


From fb347dc6186d5b1747f66075c9209c673d23720b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 22 Jul 2020 14:46:43 -0700
Subject: Verify output space before doing heavy work in mallctl

---
 doc/jemalloc.xml.in | 21 +++++++++++++++++++--
 src/ctl.c           | 37 +++++++++++++++++++++++--------------
 2 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 19afe36..f283fd3 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1760,7 +1760,16 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         automatically managed one that is used by default.  Each explicit cache
         can be used by only one thread at a time; the application must assure
         that this constraint holds.
+        </para>
+
+        <para>If the amount of space supplied for storing the thread-specific
+        cache identifier does not equal
+        <code language="C">sizeof(<type>unsigned</type>)</code>, no
+        thread-specific cache will be created, no data will be written to the
+        space pointed by <parameter>oldp</parameter>, and
+        <parameter>*oldlenp</parameter> will be set to 0.
         </para></listitem>
+
       </varlistentry>
 
       <varlistentry id="tcache.flush">
@@ -2300,7 +2309,14 @@ struct extent_hooks_s {
         </term>
         <listitem><para>Explicitly create a new arena outside the range of
         automatically managed arenas, with optionally specified extent hooks,
-        and return the new arena index.</para></listitem>
+        and return the new arena index.</para>
+
+        <para>If the amount of space supplied for storing the arena index does
+        not equal <code language="C">sizeof(<type>unsigned</type>)</code>, no
+        arena will be created, no data will be written to the space pointed by
+        <parameter>oldp</parameter>, and <parameter>*oldlenp</parameter> will
+        be set to 0.
+        </para></listitem>
       </varlistentry>
 
       <varlistentry id="arenas.lookup">
@@ -3607,7 +3623,8 @@ MAPPED_LIBRARIES:
             <listitem><para><parameter>newp</parameter> is not
             <constant>NULL</constant>, and <parameter>newlen</parameter> is too
             large or too small.  Alternatively, <parameter>*oldlenp</parameter>
-            is too large or too small; in this case as much data as possible
+            is too large or too small; when it happens, except for a very few
+            cases explicitly documented otherwise, as much data as possible
             are read despite the error, with the amount of data read being
             recorded in <parameter>*oldlenp</parameter>.</para></listitem>
           </varlistentry>
diff --git a/src/ctl.c b/src/ctl.c
index 92e9f51..9cfb258 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1488,6 +1488,15 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 	}								\
 } while (0)
 
+/* Verify that the space provided is enough. */
+#define VERIFY_READ(t)	do {						\
+	if (oldp == NULL || oldlenp == NULL || *oldlenp != sizeof(t)) {	\
+		*oldlenp = 0;						\
+		ret = EINVAL;						\
+		goto label_return;					\
+	}								\
+} while (0)
+
 #define READ(v, t)	do {						\
 	if (oldp != NULL && oldlenp != NULL) {				\
 		if (*oldlenp != sizeof(t)) {				\
@@ -1559,8 +1568,8 @@ label_return:								\
 
 #define CTL_RO_CGEN(c, n, v, t)						\
 static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {			\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
 	int ret;							\
 	t oldval;							\
 									\
@@ -1602,8 +1611,8 @@ label_return:								\
  */
 #define CTL_RO_NL_CGEN(c, n, v, t)					\
 static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {			\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
 	int ret;							\
 	t oldval;							\
 									\
@@ -1621,8 +1630,8 @@ label_return:								\
 
 #define CTL_RO_NL_GEN(n, v, t)						\
 static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {			\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
 	int ret;							\
 	t oldval;							\
 									\
@@ -1637,8 +1646,8 @@ label_return:								\
 
 #define CTL_RO_CONFIG_GEN(n, t)						\
 static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {			\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
 	int ret;							\
 	t oldval;							\
 									\
@@ -2103,6 +2112,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	unsigned tcache_ind;
 
 	READONLY();
+	VERIFY_READ(unsigned);
 	if (tcaches_create(tsd, b0get(), &tcache_ind)) {
 		ret = EFAULT;
 		goto label_return;
@@ -2608,10 +2618,6 @@ arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (*oldlenp != sizeof(unsigned)) {
-		ret = EINVAL;
-		goto label_return;
-	}
 	narenas = ctl_arenas->narenas;
 	READ(narenas, unsigned);
 
@@ -2702,6 +2708,7 @@ arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 
+	VERIFY_READ(unsigned);
 	extent_hooks = (extent_hooks_t *)&ehooks_default_extent_hooks;
 	WRITE(extent_hooks, extent_hooks_t *);
 	if ((arena_ind = ctl_arena_init(tsd, extent_hooks)) == UINT_MAX) {
@@ -2731,12 +2738,14 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
 	edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr);
-	if (edata == NULL)
+	if (edata == NULL) {
 		goto label_return;
+	}
 
 	arena = arena_get_from_edata(edata);
-	if (arena == NULL)
+	if (arena == NULL) {
 		goto label_return;
+	}
 
 	arena_ind = arena_ind_get(arena);
 	READ(arena_ind, unsigned);
-- 
cgit v0.12


From 786a27b9e5dfb732bc1d893cc236354c225c8f1c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 27 Jul 2020 13:24:38 -0700
Subject: CI: Update keyring.

---
 .appveyor.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 90b0368..f74f099 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -30,8 +30,10 @@ environment:
 install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
   - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
+  - curl -O http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz
+  - pacman --noconfirm -U msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz --nodeps
   - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
-  - pacman --noconfirm -Suy mingw-w64-%CPU%-make
+  - pacman --noconfirm -S mingw-w64-%CPU%-make
 
 build_script:
   - bash -c "autoconf"
-- 
cgit v0.12


From 1ed0288d9c471771eba98ad5c3f6981fa922e7c4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 22 Jul 2020 08:07:12 -0700
Subject: bit_util: Change ffs functions indexing.

Making these 0-based instead of 1-based makes calling code simpler and will be
more consistent with functions introduced in subsequent diffs.
---
 include/jemalloc/internal/bit_util.h | 28 +++++++++++++------
 include/jemalloc/internal/bitmap.h   | 16 +++++------
 include/jemalloc/internal/prng.h     |  6 ++--
 src/pages.c                          |  6 ++--
 test/unit/bit_util.c                 | 54 +++++++++++++++++++++++++++++++++++-
 5 files changed, 86 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index c045eb8..258fd97 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -11,20 +11,29 @@
 #  error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
 #endif
 
-
+/*
+ * Unlike the builtins and posix ffs functions, our ffs requires a non-zero
+ * input, and returns the position of the lowest bit set (as opposed to the
+ * posix versions, which return 1 larger than that position and use a return
+ * value of zero as a sentinel.  This tends to simplify logic in callers, and
+ * allows for consistency with the builtins we build fls on top of.
+ */
 BIT_UTIL_INLINE unsigned
-ffs_llu(unsigned long long bitmap) {
-	return JEMALLOC_INTERNAL_FFSLL(bitmap);
+ffs_llu(unsigned long long x) {
+	util_assume(x != 0);
+	return JEMALLOC_INTERNAL_FFSLL(x) - 1;
 }
 
 BIT_UTIL_INLINE unsigned
-ffs_lu(unsigned long bitmap) {
-	return JEMALLOC_INTERNAL_FFSL(bitmap);
+ffs_lu(unsigned long x) {
+	util_assume(x != 0);
+	return JEMALLOC_INTERNAL_FFSL(x) - 1;
 }
 
 BIT_UTIL_INLINE unsigned
-ffs_u(unsigned bitmap) {
-	return JEMALLOC_INTERNAL_FFS(bitmap);
+ffs_u(unsigned x) {
+	util_assume(x != 0);
+	return JEMALLOC_INTERNAL_FFS(x) - 1;
 }
 
 #ifdef JEMALLOC_INTERNAL_POPCOUNTL
@@ -41,7 +50,8 @@ popcount_lu(unsigned long bitmap) {
 
 BIT_UTIL_INLINE size_t
 cfs_lu(unsigned long* bitmap) {
-	size_t bit = ffs_lu(*bitmap) - 1;
+	util_assume(*bitmap != 0);
+	size_t bit = ffs_lu(*bitmap);
 	*bitmap ^= ZU(1) << bit;
 	return bit;
 }
@@ -209,7 +219,7 @@ lg_floor(size_t x) {
 		return (8 << LG_SIZEOF_PTR) - 1;
 	}
 	x++;
-	return ffs_zu(x) - 2;
+	return ffs_zu(x) - 1;
 }
 #endif
 
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index f7152a6..dc19454 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -272,7 +272,7 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 			}
 			return bitmap_ffu(bitmap, binfo, sib_base);
 		}
-		bit += ((size_t)(ffs_lu(group_masked) - 1)) <<
+		bit += ((size_t)ffs_lu(group_masked)) <<
 		    (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
 	}
 	assert(bit >= min_bit);
@@ -284,9 +284,9 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 	    - 1);
 	size_t bit;
 	do {
-		bit = ffs_lu(g);
-		if (bit != 0) {
-			return (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
+		if (g != 0) {
+			bit = ffs_lu(g);
+			return (i << LG_BITMAP_GROUP_NBITS) + bit;
 		}
 		i++;
 		g = bitmap[i];
@@ -307,20 +307,20 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 #ifdef BITMAP_USE_TREE
 	i = binfo->nlevels - 1;
 	g = bitmap[binfo->levels[i].group_offset];
-	bit = ffs_lu(g) - 1;
+	bit = ffs_lu(g);
 	while (i > 0) {
 		i--;
 		g = bitmap[binfo->levels[i].group_offset + bit];
-		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
+		bit = (bit << LG_BITMAP_GROUP_NBITS) + ffs_lu(g);
 	}
 #else
 	i = 0;
 	g = bitmap[0];
-	while ((bit = ffs_lu(g)) == 0) {
+	while (g == 0) {
 		i++;
 		g = bitmap[i];
 	}
-	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
+	bit = (i << LG_BITMAP_GROUP_NBITS) + ffs_lu(g);
 #endif
 	bitmap_set(bitmap, binfo, bit);
 	return bit;
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 15cc2d1..12380b4 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -136,7 +136,7 @@ prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
 	assert(range > 1);
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;
+	lg_range = ffs_u32(pow2_ceil_u32(range));
 
 	/* Generate a result in [0..range) via repeated trial. */
 	do {
@@ -154,7 +154,7 @@ prng_range_u64(uint64_t *state, uint64_t range) {
 	assert(range > 1);
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+	lg_range = ffs_u64(pow2_ceil_u64(range));
 
 	/* Generate a result in [0..range) via repeated trial. */
 	do {
@@ -172,7 +172,7 @@ prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) {
 	assert(range > 1);
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+	lg_range = ffs_u64(pow2_ceil_u64(range));
 
 	/* Generate a result in [0..range) via repeated trial. */
 	do {
diff --git a/src/pages.c b/src/pages.c
index 0ddc5ba..05bbf72 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -211,8 +211,8 @@ pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 			flags |= MAP_FIXED | MAP_EXCL;
 		} else {
 			unsigned alignment_bits = ffs_zu(alignment);
-			assert(alignment_bits > 1);
-			flags |= MAP_ALIGNED(alignment_bits - 1);
+			assert(alignment_bits > 0);
+			flags |= MAP_ALIGNED(alignment_bits);
 		}
 
 		void *ret = mmap(addr, size, prot, flags, -1, 0);
@@ -600,7 +600,7 @@ init_thp_state(void) {
 #endif
 
         if (nread < 0) {
-		goto label_error; 
+		goto label_error;
         }
 
 	if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index 3eeb7a3..f3761fd 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -101,11 +101,63 @@ TEST_BEGIN(test_lg_ceil_floor) {
 }
 TEST_END
 
+#define TEST_FFS(t, suf, test_suf, pri) do {				\
+	for (unsigned i = 0; i < sizeof(t) * 8; i++) {			\
+		for (unsigned j = 0; j <= i; j++) {			\
+			for (unsigned k = 0; k <= j; k++) {		\
+				t x = (t)1 << i;			\
+				x |= (t)1 << j;				\
+				x |= (t)1 << k;				\
+				expect_##test_suf##_eq(ffs_##suf(x), k,	\
+				    "Unexpected result, x=%"pri, x);	\
+			}						\
+		}							\
+	}								\
+} while(0)
+
+TEST_BEGIN(test_ffs_u) {
+	TEST_FFS(unsigned, u, u,"u");
+}
+TEST_END
+
+
+TEST_BEGIN(test_ffs_lu) {
+	TEST_FFS(unsigned long, lu, lu, "lu");
+}
+TEST_END
+
+TEST_BEGIN(test_ffs_llu) {
+	TEST_FFS(unsigned long long, llu, qd, "llu");
+}
+TEST_END
+
+TEST_BEGIN(test_ffs_u32) {
+	TEST_FFS(uint32_t, u32, u32, FMTu32);
+}
+TEST_END
+
+
+TEST_BEGIN(test_ffs_u64) {
+	TEST_FFS(uint64_t, u64, u64, FMTu64);
+}
+TEST_END
+
+TEST_BEGIN(test_ffs_zu) {
+	TEST_FFS(size_t, zu, zu, "zu");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_pow2_ceil_u64,
 	    test_pow2_ceil_u32,
 	    test_pow2_ceil_zu,
-	    test_lg_ceil_floor);
+	    test_lg_ceil_floor,
+	    test_ffs_u,
+	    test_ffs_lu,
+	    test_ffs_llu,
+	    test_ffs_u32,
+	    test_ffs_u64,
+	    test_ffs_zu);
 }
-- 
cgit v0.12


From 22da836094f315b3fe1609e21c0e1092e7b0f2f5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 22 Jul 2020 07:10:06 -0700
Subject: bit_util: Add fls_ functions; "find last set".

These simplify a lot of the bit_util module, which had grown bits and pieces of
this functionality across a variety of places over the years.

While we're here, kill off BIT_UTIL_INLINE and don't do reentrancy testing for
bit_util.
---
 configure.ac                         |   6 +-
 include/jemalloc/internal/bit_util.h | 344 ++++++++++++++++++++++-------------
 test/unit/bit_util.c                 |  75 +++++++-
 3 files changed, 292 insertions(+), 133 deletions(-)

diff --git a/configure.ac b/configure.ac
index bcd6363..b197d32 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2118,7 +2118,7 @@ esac
 fi
 
 dnl ============================================================================
-dnl Check for __builtin_clz() and __builtin_clzl().
+dnl Check for __builtin_clz(), __builtin_clzl(), and __builtin_clzll().
 
 AC_CACHE_CHECK([for __builtin_clz],
                [je_cv_builtin_clz],
@@ -2132,6 +2132,10 @@ AC_CACHE_CHECK([for __builtin_clz],
                                                         unsigned long x = 0;
                                                         int y = __builtin_clzl(x);
                                                 }
+                                                {
+                                                        unsigned long long x = 0;
+                                                        int y = __builtin_clzll(x);
+                                                }
                                                 ])],
                                [je_cv_builtin_clz=yes],
                                [je_cv_builtin_clz=no])])
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index 258fd97..c5158f6 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -3,8 +3,6 @@
 
 #include "jemalloc/internal/assert.h"
 
-#define BIT_UTIL_INLINE static inline
-
 /* Sanity check. */
 #if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
     || !defined(JEMALLOC_INTERNAL_FFS)
@@ -18,26 +16,171 @@
  * value of zero as a sentinel.  This tends to simplify logic in callers, and
  * allows for consistency with the builtins we build fls on top of.
  */
-BIT_UTIL_INLINE unsigned
+static inline unsigned
 ffs_llu(unsigned long long x) {
 	util_assume(x != 0);
 	return JEMALLOC_INTERNAL_FFSLL(x) - 1;
 }
 
-BIT_UTIL_INLINE unsigned
+static inline unsigned
 ffs_lu(unsigned long x) {
 	util_assume(x != 0);
 	return JEMALLOC_INTERNAL_FFSL(x) - 1;
 }
 
-BIT_UTIL_INLINE unsigned
+static inline unsigned
 ffs_u(unsigned x) {
 	util_assume(x != 0);
 	return JEMALLOC_INTERNAL_FFS(x) - 1;
 }
 
+#define DO_FLS_SLOW(x, suffix) do {					\
+	util_assume(x != 0);						\
+	x |= (x >> 1);							\
+	x |= (x >> 2);							\
+	x |= (x >> 4);							\
+	x |= (x >> 8);							\
+	x |= (x >> 16);							\
+	if (sizeof(x) > 4) {						\
+		/*							\
+		 * If sizeof(x) is 4, then the expression "x >> 32"	\
+		 * will generate compiler warnings even if the code	\
+		 * never executes.  This circumvents the warning, and	\
+		 * gets compiled out in optimized builds.		\
+		 */							\
+		int constant_32 = sizeof(x) * 4;			\
+		x |= (x >> constant_32);				\
+	}								\
+	x++;								\
+	if (x == 0) {							\
+		return 8 * sizeof(x) - 1;				\
+	}								\
+	return ffs_##suffix(x) - 1;					\
+} while(0)
+
+static inline unsigned
+fls_llu_slow(unsigned long long x) {
+	DO_FLS_SLOW(x, llu);
+}
+
+static inline unsigned
+fls_lu_slow(unsigned long x) {
+	DO_FLS_SLOW(x, lu);
+}
+
+static inline unsigned
+fls_u_slow(unsigned x) {
+	DO_FLS_SLOW(x, u);
+}
+
+#undef DO_FLS_SLOW
+
+#ifdef JEMALLOC_HAVE_BUILTIN_CLZ
+static inline unsigned
+fls_llu(unsigned long long x) {
+	util_assume(x != 0);
+	/*
+	 * Note that the xor here is more naturally written as subtraction; the
+	 * last bit set is the number of bits in the type minus the number of
+	 * leading zero bits.  But GCC implements that as:
+	 *    bsr     edi, edi
+	 *    mov     eax, 31
+	 *    xor     edi, 31
+	 *    sub     eax, edi
+	 * If we write it as xor instead, then we get
+	 *    bsr     eax, edi
+	 * as desired.
+	 */
+	return (8 * sizeof(x) - 1) ^ __builtin_clzll(x);
+}
+
+static inline unsigned
+fls_lu(unsigned long x) {
+	util_assume(x != 0);
+	return (8 * sizeof(x) - 1) ^ __builtin_clzl(x);
+}
+
+static inline unsigned
+fls_u(unsigned x) {
+	util_assume(x != 0);
+	return (8 * sizeof(x) - 1) ^ __builtin_clz(x);
+}
+#elif defined(_MSC_VER)
+
+#if LG_SIZEOF_PTR == 3
+#define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
+#else
+/*
+ * This never actually runs; we're just dodging a compiler error for the
+ * never-taken branch where sizeof(void *) == 8.
+ */
+#define DO_BSR64(bit, x) bit = 0; unreachable()
+#endif
+
+#define DO_FLS(x) do {							\
+	if (x == 0) {							\
+		return 8 * sizeof(x);					\
+	}								\
+	unsigned long bit;						\
+	if (sizeof(x) == 4) {						\
+		_BitScanReverse(&bit, (unsigned)x);			\
+		return (unsigned)bit;					\
+	}								\
+	if (sizeof(x) == 8 && sizeof(void *) == 8) {			\
+		DO_BSR64(bit, x);					\
+		return (unsigned)bit;					\
+	}								\
+	if (sizeof(x) == 8 && sizeof(void *) == 4) {			\
+		/* Dodge a compiler warning, as above. */		\
+		int constant_32 = sizeof(x) * 4;			\
+		if (_BitScanReverse(&bit,				\
+		    (unsigned)(x >> constant_32))) {			\
+			return 32 + (unsigned)bit;			\
+		} else {						\
+			_BitScanReverse(&bit, (unsigned)x);		\
+			return (unsigned)bit;				\
+		}							\
+	}								\
+	unreachable();							\
+} while (0)
+
+static inline unsigned
+fls_llu(unsigned long long x) {
+	DO_FLS(x);
+}
+
+static inline unsigned
+fls_lu(unsigned long x) {
+	DO_FLS(x);
+}
+
+static inline unsigned
+fls_u(unsigned x) {
+	DO_FLS(x);
+}
+
+#undef DO_FLS
+#undef DO_BSR64
+#else
+
+static inline unsigned
+fls_llu(unsigned long long x) {
+	return fls_llu_slow(x);
+}
+
+static inline unsigned
+fls_lu(unsigned long x) {
+	return fls_lu_slow(x);
+}
+
+static inline unsigned
+fls_u(unsigned x) {
+	return fls_u_slow(x);
+}
+#endif
+
 #ifdef JEMALLOC_INTERNAL_POPCOUNTL
-BIT_UTIL_INLINE unsigned
+static inline unsigned
 popcount_lu(unsigned long bitmap) {
   return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
 }
@@ -48,7 +191,7 @@ popcount_lu(unsigned long bitmap) {
  * place of bit.  bitmap *must not* be 0.
  */
 
-BIT_UTIL_INLINE size_t
+static inline size_t
 cfs_lu(unsigned long* bitmap) {
 	util_assume(*bitmap != 0);
 	size_t bit = ffs_lu(*bitmap);
@@ -56,101 +199,102 @@ cfs_lu(unsigned long* bitmap) {
 	return bit;
 }
 
-BIT_UTIL_INLINE unsigned
-ffs_zu(size_t bitmap) {
+static inline unsigned
+ffs_zu(size_t x) {
 #if LG_SIZEOF_PTR == LG_SIZEOF_INT
-	return ffs_u(bitmap);
+	return ffs_u(x);
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
-	return ffs_lu(bitmap);
+	return ffs_lu(x);
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
-	return ffs_llu(bitmap);
+	return ffs_llu(x);
 #else
 #error No implementation for size_t ffs()
 #endif
 }
 
-BIT_UTIL_INLINE unsigned
-ffs_u64(uint64_t bitmap) {
+static inline unsigned
+fls_zu(size_t x) {
+#if LG_SIZEOF_PTR == LG_SIZEOF_INT
+	return fls_u(x);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
+	return fls_lu(x);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
+	return fls_llu(x);
+#else
+#error No implementation for size_t fls()
+#endif
+}
+
+
+static inline unsigned
+ffs_u64(uint64_t x) {
 #if LG_SIZEOF_LONG == 3
-	return ffs_lu(bitmap);
+	return ffs_lu(x);
 #elif LG_SIZEOF_LONG_LONG == 3
-	return ffs_llu(bitmap);
+	return ffs_llu(x);
 #else
 #error No implementation for 64-bit ffs()
 #endif
 }
 
-BIT_UTIL_INLINE unsigned
-ffs_u32(uint32_t bitmap) {
+static inline unsigned
+fls_u64(uint64_t x) {
+#if LG_SIZEOF_LONG == 3
+	return fls_lu(x);
+#elif LG_SIZEOF_LONG_LONG == 3
+	return fls_llu(x);
+#else
+#error No implementation for 64-bit fls()
+#endif
+}
+
+static inline unsigned
+ffs_u32(uint32_t x) {
 #if LG_SIZEOF_INT == 2
-	return ffs_u(bitmap);
+	return ffs_u(x);
 #else
 #error No implementation for 32-bit ffs()
 #endif
-	return ffs_u(bitmap);
+	return ffs_u(x);
 }
 
-BIT_UTIL_INLINE uint64_t
+static inline unsigned
+fls_u32(uint32_t x) {
+#if LG_SIZEOF_INT == 2
+	return fls_u(x);
+#else
+#error No implementation for 32-bit fls()
+#endif
+	return fls_u(x);
+}
+
+static inline uint64_t
 pow2_ceil_u64(uint64_t x) {
-#if (defined(__amd64__) || defined(__x86_64__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ))
-	if(unlikely(x <= 1)) {
+	if (unlikely(x <= 1)) {
 		return x;
 	}
-	size_t msb_on_index;
-#if (defined(__amd64__) || defined(__x86_64__))
-	asm ("bsrq %1, %0"
-			: "=r"(msb_on_index) // Outputs.
-			: "r"(x-1)           // Inputs.
-		);
-#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
-	msb_on_index = (63 ^ __builtin_clzll(x - 1));
-#endif
+	size_t msb_on_index = fls_u64(x - 1);
+	/*
+	 * Range-check; it's on the callers to ensure that the result of this
+	 * call won't overflow.
+	 */
 	assert(msb_on_index < 63);
 	return 1ULL << (msb_on_index + 1);
-#else
-	x--;
-	x |= x >> 1;
-	x |= x >> 2;
-	x |= x >> 4;
-	x |= x >> 8;
-	x |= x >> 16;
-	x |= x >> 32;
-	x++;
-	return x;
-#endif
 }
 
-BIT_UTIL_INLINE uint32_t
+static inline uint32_t
 pow2_ceil_u32(uint32_t x) {
-#if ((defined(__i386__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ)) && (!defined(__s390__)))
-	if(unlikely(x <= 1)) {
-		return x;
+	if (unlikely(x <= 1)) {
+	    return x;
 	}
-	size_t msb_on_index;
-#if (defined(__i386__))
-	asm ("bsr %1, %0"
-			: "=r"(msb_on_index) // Outputs.
-			: "r"(x-1)           // Inputs.
-		);
-#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
-	msb_on_index = (31 ^ __builtin_clz(x - 1));
-#endif
+	size_t msb_on_index = fls_u32(x - 1);
+	/* As above. */
 	assert(msb_on_index < 31);
 	return 1U << (msb_on_index + 1);
-#else
-	x--;
-	x |= x >> 1;
-	x |= x >> 2;
-	x |= x >> 4;
-	x |= x >> 8;
-	x |= x >> 16;
-	x++;
-	return x;
-#endif
 }
 
 /* Compute the smallest power of 2 that is >= x. */
-BIT_UTIL_INLINE size_t
+static inline size_t
 pow2_ceil_zu(size_t x) {
 #if (LG_SIZEOF_PTR == 3)
 	return pow2_ceil_u64(x);
@@ -159,77 +303,21 @@ pow2_ceil_zu(size_t x) {
 #endif
 }
 
-#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
-BIT_UTIL_INLINE unsigned
-lg_floor(size_t x) {
-	size_t ret;
-	assert(x != 0);
-
-	asm ("bsr %1, %0"
-	    : "=r"(ret) // Outputs.
-	    : "r"(x)    // Inputs.
-	    );
-	assert(ret < UINT_MAX);
-	return (unsigned)ret;
-}
-#elif (defined(_MSC_VER))
-BIT_UTIL_INLINE unsigned
+static inline unsigned
 lg_floor(size_t x) {
-	unsigned long ret;
-
-	assert(x != 0);
-
+	util_assume(x != 0);
 #if (LG_SIZEOF_PTR == 3)
-	_BitScanReverse64(&ret, x);
-#elif (LG_SIZEOF_PTR == 2)
-	_BitScanReverse(&ret, x);
-#else
-#  error "Unsupported type size for lg_floor()"
-#endif
-	assert(ret < UINT_MAX);
-	return (unsigned)ret;
-}
-#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
-BIT_UTIL_INLINE unsigned
-lg_floor(size_t x) {
-	assert(x != 0);
-
-#if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
-	return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x);
-#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
-	return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x);
+	return fls_u64(x);
 #else
-#  error "Unsupported type size for lg_floor()"
-#endif
-}
-#else
-BIT_UTIL_INLINE unsigned
-lg_floor(size_t x) {
-	assert(x != 0);
-
-	x |= (x >> 1);
-	x |= (x >> 2);
-	x |= (x >> 4);
-	x |= (x >> 8);
-	x |= (x >> 16);
-#if (LG_SIZEOF_PTR == 3)
-	x |= (x >> 32);
+	return fls_u32(x);
 #endif
-	if (x == SIZE_T_MAX) {
-		return (8 << LG_SIZEOF_PTR) - 1;
-	}
-	x++;
-	return ffs_zu(x) - 1;
 }
-#endif
 
-BIT_UTIL_INLINE unsigned
+static inline unsigned
 lg_ceil(size_t x) {
 	return lg_floor(x) + ((x & (x - 1)) == 0 ? 0 : 1);
 }
 
-#undef BIT_UTIL_INLINE
-
 /* A compile-time version of lg_floor and lg_ceil. */
 #define LG_FLOOR_1(x) 0
 #define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index f3761fd..045cf8b 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -120,7 +120,6 @@ TEST_BEGIN(test_ffs_u) {
 }
 TEST_END
 
-
 TEST_BEGIN(test_ffs_lu) {
 	TEST_FFS(unsigned long, lu, lu, "lu");
 }
@@ -136,7 +135,6 @@ TEST_BEGIN(test_ffs_u32) {
 }
 TEST_END
 
-
 TEST_BEGIN(test_ffs_u64) {
 	TEST_FFS(uint64_t, u64, u64, FMTu64);
 }
@@ -147,9 +145,69 @@ TEST_BEGIN(test_ffs_zu) {
 }
 TEST_END
 
+#define TEST_FLS(t, suf, test_suf, pri) do {				\
+	for (unsigned i = 0; i < sizeof(t) * 8; i++) {			\
+		for (unsigned j = 0; j <= i; j++) {			\
+			for (unsigned k = 0; k <= j; k++) {		\
+				t x = (t)1 << i;			\
+				x |= (t)1 << j;				\
+				x |= (t)1 << k;				\
+				expect_##test_suf##_eq(fls_##suf(x), i,	\
+				    "Unexpected result, x=%"pri, x);	\
+			}						\
+		}							\
+	}								\
+} while(0)
+
+TEST_BEGIN(test_fls_u) {
+	TEST_FLS(unsigned, u, u,"u");
+}
+TEST_END
+
+TEST_BEGIN(test_fls_lu) {
+	TEST_FLS(unsigned long, lu, lu, "lu");
+}
+TEST_END
+
+TEST_BEGIN(test_fls_llu) {
+	TEST_FLS(unsigned long long, llu, qd, "llu");
+}
+TEST_END
+
+TEST_BEGIN(test_fls_u32) {
+	TEST_FLS(uint32_t, u32, u32, FMTu32);
+}
+TEST_END
+
+TEST_BEGIN(test_fls_u64) {
+	TEST_FLS(uint64_t, u64, u64, FMTu64);
+}
+TEST_END
+
+TEST_BEGIN(test_fls_zu) {
+	TEST_FLS(size_t, zu, zu, "zu");
+}
+TEST_END
+
+TEST_BEGIN(test_fls_u_slow) {
+	TEST_FLS(unsigned, u_slow, u,"u");
+}
+TEST_END
+
+TEST_BEGIN(test_fls_lu_slow) {
+	TEST_FLS(unsigned long, lu_slow, lu, "lu");
+}
+TEST_END
+
+TEST_BEGIN(test_fls_llu_slow) {
+	TEST_FLS(unsigned long long, llu_slow, qd, "llu");
+}
+TEST_END
+
+
 int
 main(void) {
-	return test(
+	return test_no_reentrancy(
 	    test_pow2_ceil_u64,
 	    test_pow2_ceil_u32,
 	    test_pow2_ceil_zu,
@@ -159,5 +217,14 @@ main(void) {
 	    test_ffs_llu,
 	    test_ffs_u32,
 	    test_ffs_u64,
-	    test_ffs_zu);
+	    test_ffs_zu,
+	    test_fls_u,
+	    test_fls_lu,
+	    test_fls_llu,
+	    test_fls_u32,
+	    test_fls_u64,
+	    test_fls_zu,
+	    test_fls_u_slow,
+	    test_fls_lu_slow,
+	    test_fls_llu_slow);
 }
-- 
cgit v0.12


From efeab1f4985281fb7cb12ffd985a84317bfb3332 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 17 Jul 2020 16:12:28 -0700
Subject: bitset test: Pull NBITS_TAB into its own file.

---
 test/include/test/nbits.h | 98 +++++++++++++++++++++++++++++++++++++++++++++++
 test/unit/bitmap.c        | 92 +-------------------------------------------
 2 files changed, 99 insertions(+), 91 deletions(-)
 create mode 100644 test/include/test/nbits.h

diff --git a/test/include/test/nbits.h b/test/include/test/nbits.h
new file mode 100644
index 0000000..be74baa
--- /dev/null
+++ b/test/include/test/nbits.h
@@ -0,0 +1,98 @@
+#ifndef TEST_NBITS_H
+#define TEST_NBITS_H
+
+/* Interesting bitmap counts to test. */
+
+#define NBITS_TAB \
+    NB( 1) \
+    NB( 2) \
+    NB( 3) \
+    NB( 4) \
+    NB( 5) \
+    NB( 6) \
+    NB( 7) \
+    NB( 8) \
+    NB( 9) \
+    NB(10) \
+    NB(11) \
+    NB(12) \
+    NB(13) \
+    NB(14) \
+    NB(15) \
+    NB(16) \
+    NB(17) \
+    NB(18) \
+    NB(19) \
+    NB(20) \
+    NB(21) \
+    NB(22) \
+    NB(23) \
+    NB(24) \
+    NB(25) \
+    NB(26) \
+    NB(27) \
+    NB(28) \
+    NB(29) \
+    NB(30) \
+    NB(31) \
+    NB(32) \
+    \
+    NB(33) \
+    NB(34) \
+    NB(35) \
+    NB(36) \
+    NB(37) \
+    NB(38) \
+    NB(39) \
+    NB(40) \
+    NB(41) \
+    NB(42) \
+    NB(43) \
+    NB(44) \
+    NB(45) \
+    NB(46) \
+    NB(47) \
+    NB(48) \
+    NB(49) \
+    NB(50) \
+    NB(51) \
+    NB(52) \
+    NB(53) \
+    NB(54) \
+    NB(55) \
+    NB(56) \
+    NB(57) \
+    NB(58) \
+    NB(59) \
+    NB(60) \
+    NB(61) \
+    NB(62) \
+    NB(63) \
+    NB(64) \
+    NB(65) \
+    \
+    NB(126) \
+    NB(127) \
+    NB(128) \
+    NB(129) \
+    NB(130) \
+    \
+    NB(254) \
+    NB(255) \
+    NB(256) \
+    NB(257) \
+    NB(258) \
+    \
+    NB(510) \
+    NB(511) \
+    NB(512) \
+    NB(513) \
+    NB(514) \
+    \
+    NB(1024) \
+    NB(2048) \
+    NB(4096) \
+    NB(8192) \
+    NB(16384)
+
+#endif /* TEST_NBITS_H */
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 6b0ea9e..78e542b 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -1,96 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define NBITS_TAB \
-    NB( 1) \
-    NB( 2) \
-    NB( 3) \
-    NB( 4) \
-    NB( 5) \
-    NB( 6) \
-    NB( 7) \
-    NB( 8) \
-    NB( 9) \
-    NB(10) \
-    NB(11) \
-    NB(12) \
-    NB(13) \
-    NB(14) \
-    NB(15) \
-    NB(16) \
-    NB(17) \
-    NB(18) \
-    NB(19) \
-    NB(20) \
-    NB(21) \
-    NB(22) \
-    NB(23) \
-    NB(24) \
-    NB(25) \
-    NB(26) \
-    NB(27) \
-    NB(28) \
-    NB(29) \
-    NB(30) \
-    NB(31) \
-    NB(32) \
-    \
-    NB(33) \
-    NB(34) \
-    NB(35) \
-    NB(36) \
-    NB(37) \
-    NB(38) \
-    NB(39) \
-    NB(40) \
-    NB(41) \
-    NB(42) \
-    NB(43) \
-    NB(44) \
-    NB(45) \
-    NB(46) \
-    NB(47) \
-    NB(48) \
-    NB(49) \
-    NB(50) \
-    NB(51) \
-    NB(52) \
-    NB(53) \
-    NB(54) \
-    NB(55) \
-    NB(56) \
-    NB(57) \
-    NB(58) \
-    NB(59) \
-    NB(60) \
-    NB(61) \
-    NB(62) \
-    NB(63) \
-    NB(64) \
-    NB(65) \
-    \
-    NB(126) \
-    NB(127) \
-    NB(128) \
-    NB(129) \
-    NB(130) \
-    \
-    NB(254) \
-    NB(255) \
-    NB(256) \
-    NB(257) \
-    NB(258) \
-    \
-    NB(510) \
-    NB(511) \
-    NB(512) \
-    NB(513) \
-    NB(514) \
-    \
-    NB(1024) \
-    NB(2048) \
-    NB(4096) \
-    NB(8192) \
-    NB(16384) \
+#include "test/nbits.h"
 
 static void
 test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
-- 
cgit v0.12


From 7fde6ac490bd6a257023aafcbedcf422a9413b4f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 17 Jul 2020 16:15:49 -0700
Subject: Nbits: Add a couple more interesting sizes.

Previously, all tests with more than two levels came in powers of 2.  It's
usefule to check cases where we have a partially filled group at above the
second level.
---
 test/include/test/nbits.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/test/include/test/nbits.h b/test/include/test/nbits.h
index be74baa..c06cf1b 100644
--- a/test/include/test/nbits.h
+++ b/test/include/test/nbits.h
@@ -70,6 +70,8 @@
     NB(63) \
     NB(64) \
     NB(65) \
+    NB(66) \
+    NB(67) \
     \
     NB(126) \
     NB(127) \
@@ -89,9 +91,20 @@
     NB(513) \
     NB(514) \
     \
+    NB(1022) \
+    NB(1023) \
     NB(1024) \
+    NB(1025) \
+    NB(1026) \
+    \
     NB(2048) \
+    \
+    NB(4094) \
+    NB(4095) \
     NB(4096) \
+    NB(4097) \
+    NB(4098) \
+    \
     NB(8192) \
     NB(16384)
 
-- 
cgit v0.12


From ceee823519bb534c2609e1dadd9b923bd28853b4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 17 Jul 2020 18:42:50 -0700
Subject: Add flat_bitmap.

The flat_bitmap module offers an extended API, at the cost of decreased
performance in the case of very large bitmaps.
---
 Makefile.in                             |   1 +
 include/jemalloc/internal/flat_bitmap.h | 222 ++++++++++++++++++++++
 test/unit/flat_bitmap.c                 | 313 ++++++++++++++++++++++++++++++++
 3 files changed, 536 insertions(+)
 create mode 100644 include/jemalloc/internal/flat_bitmap.h
 create mode 100644 test/unit/flat_bitmap.c

diff --git a/Makefile.in b/Makefile.in
index 2802f7f..10c5392 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -203,6 +203,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/edata_cache.c \
 	$(srcroot)test/unit/emitter.c \
 	$(srcroot)test/unit/extent_quantize.c \
+	${srcroot}test/unit/flat_bitmap.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/flat_bitmap.h
new file mode 100644
index 0000000..cf2baab
--- /dev/null
+++ b/include/jemalloc/internal/flat_bitmap.h
@@ -0,0 +1,222 @@
+#ifndef JEMALLOC_INTERNAL_FB_H
+#define JEMALLOC_INTERNAL_FB_H
+
+/*
+ * The flat bitmap module.  This has a larger API relative to the bitmap module
+ * (supporting things like backwards searches, and searching for both set and
+ * unset bits), at the cost of slower operations for very large bitmaps.
+ *
+ * Initialized flat bitmaps start at all-zeros (all bits unset).
+ */
+
+typedef unsigned long fb_group_t;
+#define FB_GROUP_BITS (ZU(1) << (LG_SIZEOF_LONG + 3))
+#define FB_NGROUPS(nbits) ((nbits) / FB_GROUP_BITS \
+    + ((nbits) % FB_GROUP_BITS == 0 ? 0 : 1))
+
+static inline void
+fb_init(fb_group_t *fb, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	memset(fb, 0, ngroups * sizeof(fb_group_t));
+}
+
+static inline bool
+fb_empty(fb_group_t *fb, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	for (size_t i = 0; i < ngroups; i++) {
+		if (fb[i] != 0) {
+			return false;
+		}
+	}
+	return true;
+}
+
+static inline bool
+fb_full(fb_group_t *fb, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	size_t trailing_bits = nbits % FB_GROUP_BITS;
+	size_t limit = (trailing_bits == 0 ? ngroups : ngroups - 1);
+	for (size_t i = 0; i < limit; i++) {
+		if (fb[i] != ~(fb_group_t)0) {
+			return false;
+		}
+	}
+	if (trailing_bits == 0) {
+		return true;
+	}
+	return fb[ngroups - 1] == ((fb_group_t)1 << trailing_bits) - 1;
+}
+
+static inline bool
+fb_get(fb_group_t *fb, size_t nbits, size_t bit) {
+	assert(bit < nbits);
+	size_t group_ind = bit / FB_GROUP_BITS;
+	size_t bit_ind = bit % FB_GROUP_BITS;
+	return (bool)(fb[group_ind] & ((fb_group_t)1 << bit_ind));
+}
+
+static inline void
+fb_set(fb_group_t *fb, size_t nbits, size_t bit) {
+	assert(bit < nbits);
+	size_t group_ind = bit / FB_GROUP_BITS;
+	size_t bit_ind = bit % FB_GROUP_BITS;
+	fb[group_ind] |= ((fb_group_t)1 << bit_ind);
+}
+
+static inline void
+fb_unset(fb_group_t *fb, size_t nbits, size_t bit) {
+	assert(bit < nbits);
+	size_t group_ind = bit / FB_GROUP_BITS;
+	size_t bit_ind = bit % FB_GROUP_BITS;
+	fb[group_ind] &= ~((fb_group_t)1 << bit_ind);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+fb_assign_group_impl(fb_group_t *fb, size_t start, size_t cnt, bool val) {
+	assert(cnt > 0);
+	assert(start + cnt - 1 < FB_GROUP_BITS);
+	fb_group_t bits = ((~(fb_group_t)0) >> (FB_GROUP_BITS - cnt)) << start;
+	if (val) {
+		*fb |= bits;
+	} else {
+		*fb &= ~bits;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+fb_assign_impl(fb_group_t *fb, size_t nbits, size_t start, size_t cnt,
+    bool val) {
+	assert(start + cnt - 1 < nbits);
+	size_t group_ind = start / FB_GROUP_BITS;
+	size_t start_bit_ind = start % FB_GROUP_BITS;
+	/*
+	 * The first group is special; it's the only one we don't start writing
+	 * to from bit 0.
+	 */
+	size_t first_group_cnt =
+	    (start_bit_ind + cnt > FB_GROUP_BITS
+		? FB_GROUP_BITS - start_bit_ind
+		: cnt);
+	/*
+	 * We can basically split affected words into:
+	 *   - The first group, where we touch only the high bits
+	 *   - The last group, where we touch only the low bits
+	 *   - The middle, where we set all the bits to the same thing.
+	 * We treat each case individually.  The last two could be merged, but
+	 * this can lead to bad codegen for those middle words.
+	 */
+	/* First group */
+	fb_assign_group_impl(&fb[group_ind], start_bit_ind, first_group_cnt,
+	    val);
+	cnt -= first_group_cnt;
+	group_ind++;
+	/* Middle groups */
+	while (cnt > FB_GROUP_BITS) {
+		fb_assign_group_impl(&fb[group_ind], 0, FB_GROUP_BITS, val);
+		cnt -= FB_GROUP_BITS;
+		group_ind++;
+	}
+	/* Last group */
+	if (cnt != 0) {
+		fb_assign_group_impl(&fb[group_ind], 0, cnt, val);
+	}
+}
+
+/* Sets the cnt bits starting at position start.  Must not have a 0 count. */
+static inline void
+fb_set_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+	fb_assign_impl(fb, nbits, start, cnt, true);
+}
+
+/* Unsets the cnt bits starting at position start.  Must not have a 0 count. */
+static inline void
+fb_unset_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+	fb_assign_impl(fb, nbits, start, cnt, false);
+}
+
+/*
+ * An implementation detail; find the first bit at position >= min_bit with the
+ * value val.
+ *
+ * Returns the number of bits in the bitmap if no such bit exists.
+ */
+JEMALLOC_ALWAYS_INLINE ssize_t
+fb_find_impl(fb_group_t *fb, size_t nbits, size_t start, bool val,
+    bool forward) {
+	assert(start < nbits);
+	size_t ngroups = FB_NGROUPS(nbits);
+	ssize_t group_ind = start / FB_GROUP_BITS;
+	size_t bit_ind = start % FB_GROUP_BITS;
+
+	fb_group_t maybe_invert = (val ? 0 : (fb_group_t)-1);
+
+	fb_group_t group = fb[group_ind];
+	group ^= maybe_invert;
+	if (forward) {
+		/* Only keep ones in bits bit_ind and above. */
+		group &= ~((1LU << bit_ind) - 1);
+	} else {
+		/*
+		 * Only keep ones in bits bit_ind and below.  You might more
+		 * naturally express this as (1 << (bit_ind + 1)) - 1, but
+		 * that shifts by an invalid amount if bit_ind is one less than
+		 * FB_GROUP_BITS.
+		 */
+		group &= ((2LU << bit_ind) - 1);
+	}
+	ssize_t group_ind_bound = forward ? (ssize_t)ngroups : -1;
+	while (group == 0) {
+		group_ind += forward ? 1 : -1;
+		if (group_ind == group_ind_bound) {
+			return forward ? (ssize_t)nbits : (ssize_t)-1;
+		}
+		group = fb[group_ind];
+		group ^= maybe_invert;
+	}
+	assert(group != 0);
+	size_t bit = forward ? ffs_lu(group) : fls_lu(group);
+	size_t pos = group_ind * FB_GROUP_BITS + bit;
+	/*
+	 * The high bits of a partially filled last group are zeros, so if we're
+	 * looking for zeros we don't want to report an invalid result.
+	 */
+	if (forward && !val && pos > nbits) {
+		return nbits;
+	}
+	return pos;
+}
+
+/*
+ * Find the first set bit in the bitmap with an index >= min_bit.  Returns the
+ * number of bits in the bitmap if no such bit exists.
+ */
+static inline size_t
+fb_ffu(fb_group_t *fb, size_t nbits, size_t min_bit) {
+	return (size_t)fb_find_impl(fb, nbits, min_bit, /* val */ false,
+	    /* forward */ true);
+}
+
+/* The same, but looks for an unset bit. */
+static inline size_t
+fb_ffs(fb_group_t *fb, size_t nbits, size_t min_bit) {
+	return (size_t)fb_find_impl(fb, nbits, min_bit, /* val */ true,
+	    /* forward */ true);
+}
+
+/*
+ * Find the last set bit in the bitmap with an index <= max_bit.  Returns -1 if
+ * no such bit exists.
+ */
+static inline ssize_t
+fb_flu(fb_group_t *fb, size_t nbits, size_t max_bit) {
+	return fb_find_impl(fb, nbits, max_bit, /* val */ false,
+	    /* forward */ false);
+}
+
+static inline ssize_t
+fb_fls(fb_group_t *fb, size_t nbits, size_t max_bit) {
+	return fb_find_impl(fb, nbits, max_bit, /* val */ true,
+	    /* forward */ false);
+}
+
+#endif /* JEMALLOC_INTERNAL_FB_H */
diff --git a/test/unit/flat_bitmap.c b/test/unit/flat_bitmap.c
new file mode 100644
index 0000000..1667f77
--- /dev/null
+++ b/test/unit/flat_bitmap.c
@@ -0,0 +1,313 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/flat_bitmap.h"
+#include "test/nbits.h"
+
+static void
+do_test_init(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+	/* Junk fb's contents. */
+	memset(fb, 99, sz);
+	fb_init(fb, nbits);
+	for (size_t i = 0; i < nbits; i++) {
+		expect_false(fb_get(fb, nbits, i),
+		    "bitmap should start empty");
+	}
+	free(fb);
+}
+
+TEST_BEGIN(test_fb_init) {
+#define NB(nbits) \
+	do_test_init(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static void
+do_test_get_set_unset(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+	fb_init(fb, nbits);
+	/* Set the bits divisible by 3. */
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 3 == 0) {
+			fb_set(fb, nbits, i);
+		}
+	}
+	/* Check them. */
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(i % 3 == 0, fb_get(fb, nbits, i),
+		    "Unexpected bit at position %zu", i);
+	}
+	/* Unset those divisible by 5. */
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 5 == 0) {
+			fb_unset(fb, nbits, i);
+		}
+	}
+	/* Check them. */
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(i % 3 == 0 && i % 5 != 0, fb_get(fb, nbits, i),
+		    "Unexpected bit at position %zu", i);
+	}
+	free(fb);
+}
+
+TEST_BEGIN(test_get_set_unset) {
+#define NB(nbits) \
+	do_test_get_set_unset(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static ssize_t
+find_3_5_compute(ssize_t i, size_t nbits, bool bit, bool forward) {
+	for(; i < (ssize_t)nbits && i >= 0; i += (forward ? 1 : -1)) {
+		bool expected_bit = i % 3 == 0 || i % 5 == 0;
+		if (expected_bit == bit) {
+			return i;
+		}
+	}
+	return forward ? (ssize_t)nbits : (ssize_t)-1;
+}
+
+static void
+do_test_search_simple(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+	fb_init(fb, nbits);
+
+	/* We pick multiples of 3 or 5. */
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 3 == 0) {
+			fb_set(fb, nbits, i);
+		}
+		/* This tests double-setting a little, too. */
+		if (i % 5 == 0) {
+			fb_set(fb, nbits, i);
+		}
+	}
+	for (size_t i = 0; i < nbits; i++) {
+		size_t ffs_compute = find_3_5_compute(i, nbits, true, true);
+		size_t ffs_search = fb_ffs(fb, nbits, i);
+		expect_zu_eq(ffs_compute, ffs_search, "ffs mismatch at %zu", i);
+
+		ssize_t fls_compute = find_3_5_compute(i, nbits, true, false);
+		size_t fls_search = fb_fls(fb, nbits, i);
+		expect_zu_eq(fls_compute, fls_search, "fls mismatch at %zu", i);
+
+		size_t ffu_compute = find_3_5_compute(i, nbits, false, true);
+		size_t ffu_search = fb_ffu(fb, nbits, i);
+		expect_zu_eq(ffu_compute, ffu_search, "ffu mismatch at %zu", i);
+
+		size_t flu_compute = find_3_5_compute(i, nbits, false, false);
+		size_t flu_search = fb_flu(fb, nbits, i);
+		expect_zu_eq(flu_compute, flu_search, "flu mismatch at %zu", i);
+	}
+
+	free(fb);
+}
+
+TEST_BEGIN(test_search_simple) {
+#define NB(nbits) \
+	do_test_search_simple(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static void
+expect_exhaustive_results(fb_group_t *mostly_full, fb_group_t *mostly_empty,
+    size_t nbits, size_t special_bit, size_t position) {
+	if (position < special_bit) {
+		expect_zu_eq(special_bit, fb_ffs(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(-1, fb_fls(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(position, fb_ffu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_flu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+
+		expect_zu_eq(position, fb_ffs(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_fls(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(special_bit, fb_ffu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(-1, fb_flu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+	} else if (position == special_bit) {
+		expect_zu_eq(special_bit, fb_ffs(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(position + 1, fb_ffu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position - 1, fb_flu(mostly_empty, nbits,
+		    position), "mismatch at %zu, %zu", position, special_bit);
+
+		expect_zu_eq(position + 1, fb_ffs(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position - 1, fb_fls(mostly_full, nbits,
+		    position), "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(position, fb_ffu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_flu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+	} else {
+		/* position > special_bit. */
+		expect_zu_eq(nbits, fb_ffs(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits,
+		    position), "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(position, fb_ffu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_flu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+
+		expect_zu_eq(position, fb_ffs(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_fls(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(nbits, fb_ffu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(special_bit, fb_flu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+	}
+}
+
+static void
+do_test_search_exhaustive(size_t nbits) {
+	/* This test is quadratic; let's not get too big. */
+	if (nbits > 1000) {
+		return;
+	}
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *empty = malloc(sz);
+	fb_init(empty, nbits);
+	fb_group_t *full = malloc(sz);
+	fb_init(full, nbits);
+	fb_set_range(full, nbits, 0, nbits);
+
+	for (size_t i = 0; i < nbits; i++) {
+		fb_set(empty, nbits, i);
+		fb_unset(full, nbits, i);
+
+		for (size_t j = 0; j < nbits; j++) {
+			expect_exhaustive_results(full, empty, nbits, i, j);
+		}
+		fb_unset(empty, nbits, i);
+		fb_set(full, nbits, i);
+	}
+
+	free(empty);
+	free(full);
+}
+
+TEST_BEGIN(test_search_exhaustive) {
+#define NB(nbits) \
+	do_test_search_exhaustive(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+TEST_BEGIN(test_range_simple) {
+	/*
+	 * Just pick a constant big enough to have nontrivial middle sizes, and
+	 * big enough that usages of things like weirdnum (below) near the
+	 * beginning fit comfortably into the beginning of the bitmap.
+	 */
+	size_t nbits = 64 * 10;
+	size_t ngroups = FB_NGROUPS(nbits);
+	fb_group_t *fb = malloc(sizeof(fb_group_t) * ngroups);
+	fb_init(fb, nbits);
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 2 == 0) {
+			fb_set_range(fb, nbits, i, 1);
+		}
+	}
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(i % 2 == 0, fb_get(fb, nbits, i),
+		    "mismatch at position %zu", i);
+	}
+	fb_set_range(fb, nbits, 0, nbits / 2);
+	fb_unset_range(fb, nbits, nbits / 2, nbits / 2);
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(i < nbits / 2, fb_get(fb, nbits, i),
+		    "mismatch at position %zu", i);
+	}
+
+	static const size_t weirdnum = 7;
+	fb_set_range(fb, nbits, 0, nbits);
+	fb_unset_range(fb, nbits, weirdnum, FB_GROUP_BITS + weirdnum);
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(7 <= i && i <= 2 * weirdnum + FB_GROUP_BITS - 1,
+		    !fb_get(fb, nbits, i), "mismatch at position %zu", i);
+	}
+	free(fb);
+}
+TEST_END
+
+static void
+do_test_empty_full_exhaustive(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *empty = malloc(sz);
+	fb_init(empty, nbits);
+	fb_group_t *full = malloc(sz);
+	fb_init(full, nbits);
+	fb_set_range(full, nbits, 0, nbits);
+
+	expect_true(fb_full(full, nbits), "");
+	expect_false(fb_empty(full, nbits), "");
+	expect_false(fb_full(empty, nbits), "");
+	expect_true(fb_empty(empty, nbits), "");
+
+	for (size_t i = 0; i < nbits; i++) {
+		fb_set(empty, nbits, i);
+		fb_unset(full, nbits, i);
+
+		expect_false(fb_empty(empty, nbits), "error at bit %zu", i);
+		if (nbits != 1) {
+			expect_false(fb_full(empty, nbits),
+			    "error at bit %zu", i);
+			expect_false(fb_empty(full, nbits),
+			    "error at bit %zu", i);
+		} else {
+			expect_true(fb_full(empty, nbits),
+			    "error at bit %zu", i);
+			expect_true(fb_empty(full, nbits),
+			    "error at bit %zu", i);
+		}
+		expect_false(fb_full(full, nbits), "error at bit %zu", i);
+
+		fb_unset(empty, nbits, i);
+		fb_set(full, nbits, i);
+	}
+
+	free(empty);
+	free(full);
+}
+
+TEST_BEGIN(test_empty_full) {
+#define NB(nbits) \
+	do_test_empty_full_exhaustive(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_fb_init,
+	    test_get_set_unset,
+	    test_search_simple,
+	    test_search_exhaustive,
+	    test_range_simple,
+	    test_empty_full);
+}
-- 
cgit v0.12


From ddb8dc4ad0523e07ab0475d6c9583d8ca27de8dc Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 27 Jul 2020 12:26:06 -0700
Subject: FB: Add range iteration support.

---
 include/jemalloc/internal/flat_bitmap.h |  65 +++++++++
 test/unit/flat_bitmap.c                 | 234 +++++++++++++++++++++++++++++++-
 2 files changed, 298 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/flat_bitmap.h
index cf2baab..7b894d5 100644
--- a/include/jemalloc/internal/flat_bitmap.h
+++ b/include/jemalloc/internal/flat_bitmap.h
@@ -219,4 +219,69 @@ fb_fls(fb_group_t *fb, size_t nbits, size_t max_bit) {
 	    /* forward */ false);
 }
 
+/* Returns whether or not we found a range. */
+JEMALLOC_ALWAYS_INLINE bool
+fb_iter_range_impl(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len, bool val, bool forward) {
+	assert(start < nbits);
+	ssize_t next_range_begin = fb_find_impl(fb, nbits, start, val, forward);
+	if ((forward && next_range_begin == (ssize_t)nbits)
+	    || (!forward && next_range_begin == (ssize_t)-1)) {
+		return false;
+	}
+	/* Half open range; the set bits are [begin, end). */
+	ssize_t next_range_end = fb_find_impl(fb, nbits, next_range_begin, !val,
+	    forward);
+	if (forward) {
+		*r_begin = next_range_begin;
+		*r_len = next_range_end - next_range_begin;
+	} else {
+		*r_begin = next_range_end + 1;
+		*r_len = next_range_begin - next_range_end;
+	}
+	return true;
+}
+
+/*
+ * Used to iterate through ranges of set bits.
+ *
+ * Tries to find the next contiguous sequence of set bits with a first index >=
+ * start.  If one exists, puts the earliest bit of the range in *r_begin, its
+ * length in *r_len, and returns true.  Otherwise, returns false (without
+ * touching *r_begin or *r_end).
+ */
+static inline bool
+fb_srange_iter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len) {
+	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+	    /* val */ true, /* forward */ true);
+}
+
+/*
+ * The same as fb_srange_iter, but searches backwards from start rather than
+ * forwards.  (The position returned is still the earliest bit in the range).
+ */
+static inline bool
+fb_srange_riter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len) {
+	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+	    /* val */ true, /* forward */ false);
+}
+
+/* Similar to fb_srange_iter, but searches for unset bits. */
+static inline bool
+fb_urange_iter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len) {
+	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+	    /* val */ false, /* forward */ true);
+}
+
+/* Similar to fb_srange_riter, but searches for unset bits. */
+static inline bool
+fb_urange_riter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len) {
+	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+	    /* val */ false, /* forward */ false);
+}
+
 #endif /* JEMALLOC_INTERNAL_FB_H */
diff --git a/test/unit/flat_bitmap.c b/test/unit/flat_bitmap.c
index 1667f77..14ac6ba 100644
--- a/test/unit/flat_bitmap.c
+++ b/test/unit/flat_bitmap.c
@@ -301,6 +301,236 @@ TEST_BEGIN(test_empty_full) {
 }
 TEST_END
 
+TEST_BEGIN(test_iter_range_simple) {
+	size_t set_limit = 30;
+	size_t nbits = 100;
+	fb_group_t fb[FB_NGROUPS(100)];
+
+	fb_init(fb, nbits);
+
+	/*
+	 * Failing to initialize these can lead to build failures with -Wall;
+	 * the compiler can't prove that they're set.
+	 */
+	size_t begin = (size_t)-1;
+	size_t len = (size_t)-1;
+	bool result;
+
+	/* A set of checks with only the first set_limit bits *set*. */
+	fb_set_range(fb, nbits, 0, set_limit);
+	for (size_t i = 0; i < set_limit; i++) {
+		result = fb_srange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(set_limit - i, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(nbits - set_limit, len, "Incorrect len at %zu", i);
+
+		result = fb_srange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(i + 1, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_riter(fb, nbits, i, &begin, &len);
+		expect_false(result, "Should not have found a range at %zu", i);
+	}
+	for (size_t i = set_limit; i < nbits; i++) {
+		result = fb_srange_iter(fb, nbits, i, &begin, &len);
+		expect_false(result, "Should not have found a range at %zu", i);
+
+		result = fb_urange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(nbits - i, len, "Incorrect len at %zu", i);
+
+		result = fb_srange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(set_limit, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(i - set_limit + 1, len, "Incorrect len at %zu", i);
+	}
+
+	/* A set of checks with only the first set_limit bits *unset*. */
+	fb_unset_range(fb, nbits, 0, set_limit);
+	fb_set_range(fb, nbits, set_limit, nbits - set_limit);
+	for (size_t i = 0; i < set_limit; i++) {
+		result = fb_srange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(nbits - set_limit, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(set_limit - i, len, "Incorrect len at %zu", i);
+
+		result = fb_srange_riter(fb, nbits, i, &begin, &len);
+		expect_false(result, "Should not have found a range at %zu", i);
+
+		result = fb_urange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should not have found a range at %zu", i);
+		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(i + 1, len, "Incorrect len at %zu", i);
+	}
+	for (size_t i = set_limit; i < nbits; i++) {
+		result = fb_srange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(nbits - i, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_iter(fb, nbits, i, &begin, &len);
+		expect_false(result, "Should not have found a range at %zu", i);
+
+		result = fb_srange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(i - set_limit + 1, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(set_limit, len, "Incorrect len at %zu", i);
+	}
+
+}
+TEST_END
+
+/*
+ * Doing this bit-by-bit is too slow for a real implementation, but for testing
+ * code, it's easy to get right.  In the exhaustive tests, we'll compare the
+ * (fast but tricky) real implementation against the (slow but simple) testing
+ * one.
+ */
+static bool
+fb_iter_simple(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len, bool val, bool forward) {
+	ssize_t stride = (forward ? (ssize_t)1 : (ssize_t)-1);
+	ssize_t range_begin = (ssize_t)start;
+	for (; range_begin != (ssize_t)nbits && range_begin != -1;
+	    range_begin += stride) {
+		if (fb_get(fb, nbits, range_begin) == val) {
+			ssize_t range_end = range_begin;
+			for (; range_end != (ssize_t)nbits && range_end != -1;
+			    range_end += stride) {
+				if (fb_get(fb, nbits, range_end) != val) {
+					break;
+				}
+			}
+			if (forward) {
+				*r_begin = range_begin;
+				*r_len = range_end - range_begin;
+			} else {
+				*r_begin = range_end + 1;
+				*r_len = range_begin - range_end;
+			}
+			return true;
+		}
+	}
+	return false;
+}
+
+static void
+expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
+    bool val, bool forward) {
+	bool iter_res;
+	size_t iter_begin;
+	size_t iter_len;
+	if (val) {
+		if (forward) {
+			iter_res = fb_srange_iter(fb, nbits, pos,
+			    &iter_begin, &iter_len);
+		} else {
+			iter_res = fb_srange_riter(fb, nbits, pos,
+			    &iter_begin, &iter_len);
+		}
+	} else {
+		if (forward) {
+			iter_res = fb_urange_iter(fb, nbits, pos,
+			    &iter_begin, &iter_len);
+		} else {
+			iter_res = fb_urange_riter(fb, nbits, pos,
+			    &iter_begin, &iter_len);
+		}
+	}
+
+	bool simple_iter_res;
+	size_t simple_iter_begin;
+	size_t simple_iter_len;
+	simple_iter_res = fb_iter_simple(fb, nbits, pos, &simple_iter_begin,
+	    &simple_iter_len, val, forward);
+
+	expect_b_eq(iter_res, simple_iter_res, "Result mismatch at %zu", pos);
+	if (iter_res && simple_iter_res) {
+		assert_zu_eq(iter_begin, simple_iter_begin,
+		    "Begin mismatch at %zu", pos);
+		expect_zu_eq(iter_len, simple_iter_len,
+		    "Length mismatch at %zu", pos);
+	}
+}
+
+static void
+expect_iter_results(fb_group_t *fb, size_t nbits) {
+	for (size_t i = 0; i < nbits; i++) {
+		expect_iter_results_at(fb, nbits, i, false, false);
+		expect_iter_results_at(fb, nbits, i, false, true);
+		expect_iter_results_at(fb, nbits, i, true, false);
+		expect_iter_results_at(fb, nbits, i, true, true);
+	}
+}
+
+static void
+set_pattern_3(fb_group_t *fb, size_t nbits, bool zero_val) {
+	for (size_t i = 0; i < nbits; i++) {
+		if ((i % 6 < 3 && zero_val) || (i % 6 >= 3 && !zero_val)) {
+			fb_set(fb, nbits, i);
+		} else {
+			fb_unset(fb, nbits, i);
+		}
+	}
+}
+
+static void
+do_test_iter_range_exhaustive(size_t nbits) {
+	/* This test is also pretty slow. */
+	if (nbits > 1000) {
+		return;
+	}
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+	fb_init(fb, nbits);
+
+	set_pattern_3(fb, nbits, /* zero_val */ true);
+	expect_iter_results(fb, nbits);
+
+	set_pattern_3(fb, nbits, /* zero_val */ false);
+	expect_iter_results(fb, nbits);
+
+	fb_set_range(fb, nbits, 0, nbits);
+	fb_unset_range(fb, nbits, 0, nbits / 2 == 0 ? 1 : nbits / 2);
+	expect_iter_results(fb, nbits);
+
+	fb_unset_range(fb, nbits, 0, nbits);
+	fb_set_range(fb, nbits, 0, nbits / 2 == 0 ? 1: nbits / 2);
+	expect_iter_results(fb, nbits);
+
+	free(fb);
+}
+
+TEST_BEGIN(test_iter_range_exhaustive) {
+#define NB(nbits) \
+	do_test_iter_range_exhaustive(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -309,5 +539,7 @@ main(void) {
 	    test_search_simple,
 	    test_search_exhaustive,
 	    test_range_simple,
-	    test_empty_full);
+	    test_empty_full,
+	    test_iter_range_simple,
+	    test_iter_range_exhaustive);
 }
-- 
cgit v0.12


From f28cc2bc87199e031b9d035ccdff6a2d429274c9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 22 Apr 2020 17:22:43 -0700
Subject: Extract bin shard selection out of bin locking

---
 include/jemalloc/internal/arena_externs.h |  2 +-
 src/arena.c                               | 32 ++++++++++++++++---------------
 src/tcache.c                              | 11 +++++------
 3 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 674c98f..c600d10 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -85,7 +85,7 @@ arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 bool arena_init_huge(void);
 bool arena_is_huge(unsigned arena_ind);
 arena_t *arena_choose_huge(tsd_t *tsd);
-bin_t *arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard);
 void arena_boot(sc_data_t *sc_data);
 void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
diff --git a/src/arena.c b/src/arena.c
index 46da385..1df276b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -739,21 +739,20 @@ arena_bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, arena_t *arena,
 	return (bin->slabcur == NULL);
 }
 
-/* Choose a bin shard and return the locked bin. */
 bin_t *
-arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    unsigned *binshard) {
-	bin_t *bin;
+arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    unsigned *binshard_p) {
+	unsigned binshard;
 	if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
-		*binshard = 0;
+		binshard = 0;
 	} else {
-		*binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
+		binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
 	}
-	assert(*binshard < bin_infos[binind].n_shards);
-	bin = &arena->bins[binind].bin_shards[*binshard];
-	malloc_mutex_lock(tsdn, &bin->lock);
-
-	return bin;
+	assert(binshard < bin_infos[binind].n_shards);
+	if (binshard_p != NULL) {
+		*binshard_p = binshard;
+	}
+	return &arena->bins[binind].bin_shards[binshard];
 }
 
 void
@@ -797,11 +796,12 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
 	edata_t *fresh_slab = NULL;
 	bool alloc_and_retry = false;
 	unsigned filled = 0;
-
-	bin_t *bin;
 	unsigned binshard;
+	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+
 label_refill:
-	bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
+	malloc_mutex_lock(tsdn, &bin->lock);
+
 	while (filled < nfill) {
 		/* Try batch-fill from slabcur first. */
 		edata_t *slabcur = bin->slabcur;
@@ -854,6 +854,7 @@ label_refill:
 		bin->stats.nfills++;
 		cache_bin->tstats.nrequests = 0;
 	}
+
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
 	if (alloc_and_retry) {
@@ -906,8 +907,9 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t usize = sz_index2size(binind);
 	unsigned binshard;
-	bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
+	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
+	malloc_mutex_lock(tsdn, &bin->lock);
 	edata_t *fresh_slab = NULL;
 	void *ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
 	if (ret == NULL) {
diff --git a/src/tcache.c b/src/tcache.c
index a33d9e2..b681ee1 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -454,9 +454,9 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 			 * thread's arena, so the stats didn't get merged.
 			 * Manually do so now.
 			 */
-			unsigned binshard;
-			bin_t *bin = arena_bin_choose_lock(tsdn, tcache_arena,
-			    binind, &binshard);
+			bin_t *bin = arena_bin_choose(tsdn, tcache_arena,
+			    binind, NULL);
+			malloc_mutex_lock(tsdn, &bin->lock);
 			bin->stats.nflushes++;
 			bin->stats.nrequests += cache_bin->tstats.nrequests;
 			cache_bin->tstats.nrequests = 0;
@@ -751,9 +751,8 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	for (unsigned i = 0; i < nhbins; i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
 		if (i < SC_NBINS) {
-			unsigned binshard;
-			bin_t *bin = arena_bin_choose_lock(tsdn, arena, i,
-			    &binshard);
+			bin_t *bin = arena_bin_choose(tsdn, arena, i, NULL);
+			malloc_mutex_lock(tsdn, &bin->lock);
 			bin->stats.nrequests += cache_bin->tstats.nrequests;
 			malloc_mutex_unlock(tsdn, &bin->lock);
 		} else {
-- 
cgit v0.12


From 2bb8060d572311e4a42a35fb52e78f78e42725ee Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 22 Jul 2020 16:44:18 -0700
Subject: Add empty test and concat for typed list

---
 include/jemalloc/internal/typed_list.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/jemalloc/internal/typed_list.h b/include/jemalloc/internal/typed_list.h
index 7ad2237..6535055 100644
--- a/include/jemalloc/internal/typed_list.h
+++ b/include/jemalloc/internal/typed_list.h
@@ -42,6 +42,14 @@ list_type##_replace(list_type##_t *list, el_type *to_remove,		\
 static inline void							\
 list_type##_remove(list_type##_t *list, el_type *item) {		\
 	ql_remove(&list->head, item, linkage);				\
+}									\
+static inline bool							\
+list_type##_empty(list_type##_t *list) {				\
+	return ql_empty(&list->head);					\
+}									\
+static inline void							\
+list_type##_concat(list_type##_t *list_a, list_type##_t *list_b) {	\
+	ql_concat(&list_a->head, &list_b->head, linkage);		\
 }
 
 #endif /* JEMALLOC_INTERNAL_TYPED_LIST_H */
-- 
cgit v0.12


From 49e5c2fe7d35ffdeb2dc767ab7d3c569eb5c6a40 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 22 Apr 2020 18:13:06 -0700
Subject: Add batch allocation from fresh slabs

---
 include/jemalloc/internal/arena_externs.h |  2 +
 src/arena.c                               | 62 +++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index c600d10..8134f24 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -87,6 +87,8 @@ bool arena_is_huge(unsigned arena_ind);
 arena_t *arena_choose_huge(tsd_t *tsd);
 bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard);
+size_t arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    void **ptrs, size_t nfill);
 void arena_boot(sc_data_t *sc_data);
 void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
diff --git a/src/arena.c b/src/arena.c
index 1df276b..0a5c60b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -883,6 +883,68 @@ label_refill:
 	arena_decay_tick(tsdn, arena);
 }
 
+size_t
+arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    void **ptrs, size_t nfill) {
+	assert(binind < SC_NBINS);
+	const bin_info_t *bin_info = &bin_infos[binind];
+	const size_t nregs = bin_info->nregs;
+	assert(nregs > 0);
+	const bool manual_arena = !arena_is_auto(arena);
+	unsigned binshard;
+	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+
+	size_t nslab = 0;
+	size_t filled = 0;
+	edata_t *slab = NULL;
+	edata_list_active_t fulls;
+	edata_list_active_init(&fulls);
+
+	while (filled < nfill && (slab = arena_slab_alloc(tsdn, arena, binind,
+	    binshard, bin_info)) != NULL) {
+		assert((size_t)edata_nfree_get(slab) == nregs);
+		++nslab;
+		size_t batch = nfill - filled;
+		if (batch > nregs) {
+			batch = nregs;
+		}
+		assert(batch > 0);
+		arena_slab_reg_alloc_batch(slab, bin_info, (unsigned)batch,
+		    &ptrs[filled]);
+		filled += batch;
+		if (batch == nregs) {
+			if (manual_arena) {
+				edata_list_active_append(&fulls, slab);
+			}
+			slab = NULL;
+		}
+	}
+
+	malloc_mutex_lock(tsdn, &bin->lock);
+	/*
+	 * Only the last slab can be non-empty, and the last slab is non-empty
+	 * iff slab != NULL.
+	 */
+	if (slab != NULL) {
+		arena_bin_lower_slab(tsdn, arena, slab, bin);
+	}
+	if (manual_arena) {
+		edata_list_active_concat(&bin->slabs_full, &fulls);
+	}
+	assert(edata_list_active_empty(&fulls));
+	if (config_stats) {
+		bin->stats.nslabs += nslab;
+		bin->stats.curslabs += nslab;
+		bin->stats.nmalloc += filled;
+		bin->stats.nrequests += filled;
+		bin->stats.curregs += filled;
+	}
+	malloc_mutex_unlock(tsdn, &bin->lock);
+
+	arena_decay_tick(tsdn, arena);
+	return filled;
+}
+
 /*
  * Without allocating a new slab, try arena_slab_reg_alloc() and re-fill
  * bin->slabcur if necessary.
-- 
cgit v0.12


From f805468957343e0fb02c84c0548eb39f98b9e29c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 22 Jul 2020 17:01:44 -0700
Subject: Add zero option to arena batch allocation

---
 include/jemalloc/internal/arena_externs.h | 2 +-
 src/arena.c                               | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 8134f24..a2fdff9 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -88,7 +88,7 @@ arena_t *arena_choose_huge(tsd_t *tsd);
 bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard);
 size_t arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    void **ptrs, size_t nfill);
+    void **ptrs, size_t nfill, bool zero);
 void arena_boot(sc_data_t *sc_data);
 void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
diff --git a/src/arena.c b/src/arena.c
index 0a5c60b..b2feff4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -885,11 +885,13 @@ label_refill:
 
 size_t
 arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    void **ptrs, size_t nfill) {
+    void **ptrs, size_t nfill, bool zero) {
 	assert(binind < SC_NBINS);
 	const bin_info_t *bin_info = &bin_infos[binind];
 	const size_t nregs = bin_info->nregs;
 	assert(nregs > 0);
+	const size_t usize = bin_info->reg_size;
+
 	const bool manual_arena = !arena_is_auto(arena);
 	unsigned binshard;
 	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
@@ -911,6 +913,10 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 		assert(batch > 0);
 		arena_slab_reg_alloc_batch(slab, bin_info, (unsigned)batch,
 		    &ptrs[filled]);
+		assert(edata_addr_get(slab) == ptrs[filled]);
+		if (zero) {
+			memset(ptrs[filled], 0, batch * usize);
+		}
 		filled += batch;
 		if (batch == nregs) {
 			if (manual_arena) {
-- 
cgit v0.12


From c6f59e9bb450bbce279f256ed56c0780092473c4 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 12 Mar 2020 15:24:36 -0700
Subject: Add surplus reading API for thread event lookahead

---
 include/jemalloc/internal/thread_event.h | 50 ++++++++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index bca8a44..5925563 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -226,12 +226,56 @@ te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
 	}
 }
 
+/*
+ * The lookahead functionality facilitates events to be able to lookahead, i.e.
+ * without touching the event counters, to determine whether an event would be
+ * triggered.  The event counters are not advanced until the end of the
+ * allocation / deallocation calls, so the lookahead can be useful if some
+ * preparation work for some event must be done early in the allocation /
+ * deallocation calls.
+ *
+ * Currently only the profiling sampling event needs the lookahead
+ * functionality, so we don't yet define general purpose lookahead functions.
+ *
+ * Surplus is a terminology referring to the amount of bytes beyond what's
+ * needed for triggering an event, which can be a useful quantity to have in
+ * general when lookahead is being called.
+ */
+
+JEMALLOC_ALWAYS_INLINE bool
+te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
+    size_t *surplus) {
+	if (surplus != NULL) {
+		/*
+		 * This is a dead store: the surplus will be overwritten before
+		 * any read.  The initialization suppresses compiler warnings.
+		 * Meanwhile, using SIZE_MAX to initialize is good for
+		 * debugging purpose, because a valid surplus value is strictly
+		 * less than usize, which is at most SIZE_MAX.
+		 */
+		*surplus = SIZE_MAX;
+	}
+	if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) {
+		return false;
+	}
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize -
+	    tsd_thread_allocated_last_event_get(tsd);
+	uint64_t sample_wait = tsd_prof_sample_event_wait_get(tsd);
+	if (accumbytes < sample_wait) {
+		return false;
+	}
+	assert(accumbytes - sample_wait < (uint64_t)usize);
+	if (surplus != NULL) {
+		*surplus = (size_t)(accumbytes - sample_wait);
+	}
+	return true;
+}
+
 JEMALLOC_ALWAYS_INLINE bool
 te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
 	assert(usize == sz_s2u(usize));
-	return tsd_thread_allocated_get(tsd) + usize -
-	    tsd_thread_allocated_last_event_get(tsd) >=
-	    tsd_prof_sample_event_wait_get(tsd);
+	return te_prof_sample_event_lookahead_surplus(tsd, usize, NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-- 
cgit v0.12


From 978f830ee300c15460085bdc49b4bdb9ef1a16d8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 23 Apr 2020 15:46:45 -0700
Subject: Add batch allocation API

---
 Makefile.in                                        |   4 +-
 .../jemalloc/internal/jemalloc_internal_externs.h  |   1 +
 include/jemalloc/internal/prof_inlines.h           |  11 ++
 include/jemalloc/internal/thread_event.h           |   1 -
 src/jemalloc.c                                     | 117 +++++++++++++
 test/unit/batch_alloc.c                            | 190 +++++++++++++++++++++
 test/unit/batch_alloc.sh                           |   3 +
 test/unit/batch_alloc_prof.c                       |   1 +
 test/unit/batch_alloc_prof.sh                      |   3 +
 9 files changed, 329 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/batch_alloc.c
 create mode 100644 test/unit/batch_alloc.sh
 create mode 100644 test/unit/batch_alloc_prof.c
 create mode 100644 test/unit/batch_alloc_prof.sh

diff --git a/Makefile.in b/Makefile.in
index 10c5392..da094f0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -191,6 +191,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/background_thread.c \
 	$(srcroot)test/unit/background_thread_enable.c \
 	$(srcroot)test/unit/base.c \
+	$(srcroot)test/unit/batch_alloc.c \
 	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
@@ -264,7 +265,8 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/zero_reallocs.c
 ifeq (@enable_prof@, 1)
 TESTS_UNIT += \
-	$(srcroot)test/unit/arena_reset_prof.c
+	$(srcroot)test/unit/arena_reset_prof.c \
+	$(srcroot)test/unit/batch_alloc_prof.c
 endif
 TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/allocated.c \
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 3dea1e2..3e7124d 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -54,6 +54,7 @@ void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
 void iarena_cleanup(tsd_t *tsd);
 void arena_cleanup(tsd_t *tsd);
 void arenas_tdata_cleanup(tsd_t *tsd);
+size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
 void jemalloc_prefork(void);
 void jemalloc_postfork_parent(void);
 void jemalloc_postfork_child(void);
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index d8f401d..3d0bd14 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -229,6 +229,17 @@ prof_sample_aligned(const void *ptr) {
 	return ((uintptr_t)ptr & PAGE_MASK) == 0;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+prof_sampled(tsd_t *tsd, const void *ptr) {
+	prof_info_t prof_info;
+	prof_info_get(tsd, ptr, NULL, &prof_info);
+	bool sampled = (uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U;
+	if (sampled) {
+		assert(prof_sample_aligned(ptr));
+	}
+	return sampled;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize,
     emap_alloc_ctx_t *alloc_ctx) {
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 5925563..525019b 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -274,7 +274,6 @@ te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
 
 JEMALLOC_ALWAYS_INLINE bool
 te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
-	assert(usize == sz_s2u(usize));
 	return te_prof_sample_event_lookahead_surplus(tsd, usize, NULL);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9b5ce68..f2e5f8e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3916,6 +3916,123 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	return ret;
 }
 
+static void
+batch_alloc_prof_sample_assert(tsd_t *tsd, size_t batch, size_t usize) {
+	assert(config_prof && opt_prof);
+	bool prof_sample_event = te_prof_sample_event_lookahead(tsd,
+	    batch * usize);
+	assert(!prof_sample_event);
+	size_t surplus;
+	prof_sample_event = te_prof_sample_event_lookahead_surplus(tsd,
+	    (batch + 1) * usize, &surplus);
+	assert(prof_sample_event);
+	assert(surplus < usize);
+}
+
+size_t
+batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
+	LOG("core.batch_alloc.entry",
+	    "ptrs: %p, num: %zu, size: %zu, flags: %d", ptrs, num, size, flags);
+
+	tsd_t *tsd = tsd_fetch();
+	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	size_t filled = 0;
+
+	if (unlikely(tsd == NULL || tsd_reentrancy_level_get(tsd) > 0)) {
+		goto label_done;
+	}
+
+	size_t alignment = MALLOCX_ALIGN_GET(flags);
+	size_t usize;
+	if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
+		goto label_done;
+	}
+
+	szind_t ind = sz_size2index(usize);
+	if (unlikely(ind >= SC_NBINS)) {
+		/* No optimization for large sizes. */
+		void *p;
+		while (filled < num && (p = je_mallocx(size, flags)) != NULL) {
+			ptrs[filled++] = p;
+		}
+		goto label_done;
+	}
+
+	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
+
+	unsigned arena_ind = mallocx_arena_get(flags);
+	arena_t *arena;
+	if (arena_get_from_ind(tsd, arena_ind, &arena)) {
+		goto label_done;
+	}
+	if (arena == NULL) {
+		arena = arena_choose(tsd, NULL);
+	} else {
+		/* When a manual arena is specified, bypass the tcache. */
+		flags |= MALLOCX_TCACHE_NONE;
+	}
+	if (unlikely(arena == NULL)) {
+		goto label_done;
+	}
+
+	while (filled < num) {
+		size_t batch = num - filled;
+		size_t surplus = SIZE_MAX; /* Dead store. */
+		bool prof_sample_event = config_prof && opt_prof
+		    && te_prof_sample_event_lookahead_surplus(tsd,
+		    batch * usize, &surplus);
+
+		if (prof_sample_event) {
+			/*
+			 * Adjust so that the batch does not trigger prof
+			 * sampling.
+			 */
+			batch -= surplus / usize + 1;
+			batch_alloc_prof_sample_assert(tsd, batch, usize);
+		}
+
+		size_t n = arena_fill_small_fresh(tsd_tsdn(tsd), arena,
+		    ind, ptrs + filled, batch, zero);
+		filled += n;
+
+		/*
+		 * For thread events other than prof sampling, trigger them as
+		 * if there's a single allocation of size (n * usize).  This is
+		 * fine because:
+		 * (a) these events do not alter the allocation itself, and
+		 * (b) it's possible that some event would have been triggered
+		 *     multiple times, instead of only once, if the allocations
+		 *     were handled individually, but it would do no harm (or
+		 *     even be beneficial) to coalesce the triggerings.
+		 */
+		thread_alloc_event(tsd, n * usize);
+
+		if (n < batch) { /* OOM */
+			break;
+		}
+
+		if (prof_sample_event) {
+			/*
+			 * The next allocation will be prof sampled.  The
+			 * thread event logic is handled within the mallocx()
+			 * call.
+			 */
+			void *p = je_mallocx(size, flags);
+			if (p == NULL) { /* OOM */
+				break;
+			}
+			assert(prof_sampled(tsd, p));
+			ptrs[filled++] = p;
+		}
+	}
+
+label_done:
+	check_entry_exit_locking(tsd_tsdn(tsd));
+	LOG("core.batch_alloc.exit", "result: %zu", filled);
+	return filled;
+}
+
 /*
  * End non-standard functions.
  */
diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
new file mode 100644
index 0000000..66e0565
--- /dev/null
+++ b/test/unit/batch_alloc.c
@@ -0,0 +1,190 @@
+#include "test/jemalloc_test.h"
+
+#define BATCH_MAX ((1U << 16) + 1024)
+static void *ptrs[BATCH_MAX];
+
+#define PAGE_ALIGNED(ptr) (((uintptr_t)ptr & PAGE_MASK) == 0)
+
+static void
+verify_stats(bin_stats_t *before, bin_stats_t *after, size_t batch,
+    unsigned nregs) {
+	if (!config_stats) {
+		return;
+	}
+	if (config_prof && opt_prof) {
+		/*
+		 * Checking the stats when prof is on is feasible but
+		 * complicated, while checking the non-prof case suffices for
+		 * unit-test purpose.
+		 */
+		return;
+	}
+	expect_u64_eq(before->nmalloc + batch, after->nmalloc, "");
+	expect_u64_eq(before->nrequests + batch, after->nrequests, "");
+	expect_zu_eq(before->curregs + batch, after->curregs, "");
+	size_t nslab = batch / nregs;
+	size_t n_nonfull = 0;
+	if (batch % nregs != 0) {
+		++nslab;
+		++n_nonfull;
+	}
+	expect_u64_eq(before->nslabs + nslab, after->nslabs, "");
+	expect_zu_eq(before->curslabs + nslab, after->curslabs, "");
+	expect_zu_eq(before->nonfull_slabs + n_nonfull, after->nonfull_slabs,
+	    "");
+}
+
+static void
+verify_batch(tsd_t *tsd, void **ptrs, size_t batch, size_t usize, bool zero,
+    arena_t *arena, unsigned nregs) {
+	for (size_t i = 0, j = 0; i < batch; ++i, ++j) {
+		if (j == nregs) {
+			j = 0;
+		}
+		void *p = ptrs[i];
+		expect_zu_eq(isalloc(tsd_tsdn(tsd), p), usize, "");
+		expect_ptr_eq(iaalloc(tsd_tsdn(tsd), p), arena, "");
+		if (zero) {
+			for (size_t k = 0; k < usize; ++k) {
+				expect_true(*((unsigned char *)p + k) == 0, "");
+			}
+		}
+		if (j == 0) {
+			expect_true(PAGE_ALIGNED(p), "");
+			continue;
+		}
+		assert(i > 0);
+		void *q = ptrs[i - 1];
+		bool adjacent = (uintptr_t)p > (uintptr_t)q
+		    && (size_t)((uintptr_t)p - (uintptr_t)q) == usize;
+		if (config_prof && opt_prof) {
+			if (adjacent) {
+				expect_false(prof_sampled(tsd, p)
+				    || prof_sampled(tsd, q), "");
+			} else {
+				expect_true(prof_sampled(tsd, p)
+				    || prof_sampled(tsd, q), "");
+				expect_true(PAGE_ALIGNED(p), "");
+				j = 0;
+			}
+		} else {
+			expect_true(adjacent, "");
+		}
+	}
+}
+
+static void
+release_batch(void **ptrs, size_t batch, size_t size) {
+	for (size_t i = 0; i < batch; ++i) {
+		sdallocx(ptrs[i], size, 0);
+	}
+}
+
+static void
+test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
+	tsd_t *tsd = tsd_fetch();
+	assert(tsd != NULL);
+	const size_t usize =
+	    (alignment != 0 ? sz_sa2u(size, alignment) : sz_s2u(size));
+	const szind_t ind = sz_size2index(usize);
+	const bin_info_t *bin_info = &bin_infos[ind];
+	const unsigned nregs = bin_info->nregs;
+	assert(nregs > 0);
+	arena_t *arena;
+	if (arena_flag != 0) {
+		arena = arena_get(tsd_tsdn(tsd), MALLOCX_ARENA_GET(arena_flag),
+		    false);
+	} else {
+		arena = arena_choose(tsd, NULL);
+	}
+	assert(arena != NULL);
+	bin_t *bin = arena_bin_choose(tsd_tsdn(tsd), arena, ind, NULL);
+	assert(bin != NULL);
+	int flags = arena_flag;
+	if (alignment != 0) {
+		flags |= MALLOCX_ALIGN(alignment);
+	}
+	if (zero) {
+		flags |= MALLOCX_ZERO;
+	}
+
+	/*
+	 * Allocate for the purpose of bootstrapping arena_tdata, so that the
+	 * change in bin stats won't contaminate the stats to be verified below.
+	 */
+	void *p = mallocx(size, flags | MALLOCX_TCACHE_NONE);
+
+	for (size_t i = 0; i < 4; ++i) {
+		size_t base = 0;
+		if (i == 1) {
+			base = nregs;
+		} else if (i == 2) {
+			base = nregs * 2;
+		} else if (i == 3) {
+			base = (1 << 16);
+		}
+		for (int j = -1; j <= 1; ++j) {
+			if (base == 0 && j == -1) {
+				continue;
+			}
+			size_t batch = base + (size_t)j;
+			assert(batch < BATCH_MAX);
+			bin_stats_t stats_before, stats_after;
+			memcpy(&stats_before, &bin->stats, sizeof(bin_stats_t));
+			size_t filled = batch_alloc(ptrs, batch, size, flags);
+			assert_zu_eq(filled, batch, "");
+			memcpy(&stats_after, &bin->stats, sizeof(bin_stats_t));
+			verify_stats(&stats_before, &stats_after, batch, nregs);
+			verify_batch(tsd, ptrs, batch, usize, zero, arena,
+			    nregs);
+			release_batch(ptrs, batch, usize);
+		}
+	}
+
+	free(p);
+}
+
+TEST_BEGIN(test_batch_alloc) {
+	test_wrapper(11, 0, false, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_batch_alloc_zero) {
+	test_wrapper(11, 0, true, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_batch_alloc_aligned) {
+	test_wrapper(7, 16, false, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_batch_alloc_manual_arena) {
+	unsigned arena_ind;
+	size_t len_unsigned = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.create", &arena_ind, &len_unsigned, NULL,
+	    0), 0, "");
+	test_wrapper(11, 0, false, MALLOCX_ARENA(arena_ind));
+}
+TEST_END
+
+TEST_BEGIN(test_batch_alloc_fallback) {
+	const size_t size = SC_LARGE_MINCLASS;
+	for (size_t batch = 0; batch < 4; ++batch) {
+		assert(batch < BATCH_MAX);
+		size_t filled = batch_alloc(ptrs, batch, size, 0);
+		assert_zu_eq(filled, batch, "");
+		release_batch(ptrs, batch, size);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_batch_alloc,
+	    test_batch_alloc_zero,
+	    test_batch_alloc_aligned,
+	    test_batch_alloc_manual_arena,
+	    test_batch_alloc_fallback);
+}
diff --git a/test/unit/batch_alloc.sh b/test/unit/batch_alloc.sh
new file mode 100644
index 0000000..9d81010
--- /dev/null
+++ b/test/unit/batch_alloc.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="tcache_gc_incr_bytes:2147483648"
diff --git a/test/unit/batch_alloc_prof.c b/test/unit/batch_alloc_prof.c
new file mode 100644
index 0000000..ef64458
--- /dev/null
+++ b/test/unit/batch_alloc_prof.c
@@ -0,0 +1 @@
+#include "batch_alloc.c"
diff --git a/test/unit/batch_alloc_prof.sh b/test/unit/batch_alloc_prof.sh
new file mode 100644
index 0000000..a2697a6
--- /dev/null
+++ b/test/unit/batch_alloc_prof.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="prof:true,lg_prof_sample:14"
-- 
cgit v0.12


From f6cf5eb388eefd1c48c04d6b8c550105b2ad8c17 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Jul 2020 10:42:07 -0700
Subject: Add mallctl for batch allocation API

---
 src/ctl.c               | 32 +++++++++++++++++++++++++++++++-
 test/unit/batch_alloc.c | 21 ++++++++++++++++++++-
 2 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 9cfb258..aec3473 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -254,6 +254,7 @@ CTL_PROTO(experimental_arenas_i_pactivep)
 INDEX_PROTO(experimental_arenas_i)
 CTL_PROTO(experimental_prof_recent_alloc_max)
 CTL_PROTO(experimental_prof_recent_alloc_dump)
+CTL_PROTO(experimental_batch_alloc)
 
 #define MUTEX_STATS_CTL_PROTO_GEN(n)					\
 CTL_PROTO(stats_##n##_num_ops)						\
@@ -675,7 +676,8 @@ static const ctl_named_node_t experimental_node[] = {
 	{NAME("hooks"),		CHILD(named, experimental_hooks)},
 	{NAME("utilization"),	CHILD(named, experimental_utilization)},
 	{NAME("arenas"),	CHILD(indexed, experimental_arenas)},
-	{NAME("prof_recent"),	CHILD(named, experimental_prof_recent)}
+	{NAME("prof_recent"),	CHILD(named, experimental_prof_recent)},
+	{NAME("batch_alloc"),	CTL(experimental_batch_alloc)}
 };
 
 static const ctl_named_node_t	root_node[] = {
@@ -3637,3 +3639,31 @@ experimental_prof_recent_alloc_dump_ctl(tsd_t *tsd, const size_t *mib,
 label_return:
 	return ret;
 }
+
+typedef struct batch_alloc_packet_s batch_alloc_packet_t;
+struct batch_alloc_packet_s {
+	void **ptrs;
+	size_t num;
+	size_t size;
+	int flags;
+};
+
+static int
+experimental_batch_alloc_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	VERIFY_READ(size_t);
+
+	batch_alloc_packet_t batch_alloc_packet;
+	ASSURED_WRITE(batch_alloc_packet, batch_alloc_packet_t);
+	size_t filled = batch_alloc(batch_alloc_packet.ptrs,
+	    batch_alloc_packet.num, batch_alloc_packet.size,
+	    batch_alloc_packet.flags);
+	READ(filled, size_t);
+
+	ret = 0;
+
+label_return:
+	return ret;
+}
diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
index 66e0565..08d6f66 100644
--- a/test/unit/batch_alloc.c
+++ b/test/unit/batch_alloc.c
@@ -80,6 +80,24 @@ release_batch(void **ptrs, size_t batch, size_t size) {
 	}
 }
 
+typedef struct batch_alloc_packet_s batch_alloc_packet_t;
+struct batch_alloc_packet_s {
+	void **ptrs;
+	size_t num;
+	size_t size;
+	int flags;
+};
+
+static size_t
+batch_alloc_wrapper(void **ptrs, size_t num, size_t size, int flags) {
+	batch_alloc_packet_t batch_alloc_packet = {ptrs, num, size, flags};
+	size_t filled;
+	size_t len = sizeof(size_t);
+	assert_d_eq(mallctl("experimental.batch_alloc", &filled, &len,
+	    &batch_alloc_packet, sizeof(batch_alloc_packet)), 0, "");
+	return filled;
+}
+
 static void
 test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 	tsd_t *tsd = tsd_fetch();
@@ -131,7 +149,8 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 			assert(batch < BATCH_MAX);
 			bin_stats_t stats_before, stats_after;
 			memcpy(&stats_before, &bin->stats, sizeof(bin_stats_t));
-			size_t filled = batch_alloc(ptrs, batch, size, flags);
+			size_t filled = batch_alloc_wrapper(ptrs, batch, size,
+			    flags);
 			assert_zu_eq(filled, batch, "");
 			memcpy(&stats_after, &bin->stats, sizeof(bin_stats_t));
 			verify_stats(&stats_before, &stats_after, batch, nregs);
-- 
cgit v0.12


From e032a1a1de75cf7faf087406a21789ced2b2f650 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 31 Jul 2020 15:56:38 -0700
Subject: Add a stress test for batch allocation

---
 Makefile.in               |  3 +-
 test/stress/batch_alloc.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 1 deletion(-)
 create mode 100644 test/stress/batch_alloc.c

diff --git a/Makefile.in b/Makefile.in
index da094f0..506d9da 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -298,7 +298,8 @@ TESTS_ANALYZE := $(srcroot)test/analyze/rand.c \
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
 	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/large_microbench.c \
-	$(srcroot)test/stress/hookbench.c
+	$(srcroot)test/stress/hookbench.c \
+	$(srcroot)test/stress/batch_alloc.c
 
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
diff --git a/test/stress/batch_alloc.c b/test/stress/batch_alloc.c
new file mode 100644
index 0000000..b203e05
--- /dev/null
+++ b/test/stress/batch_alloc.c
@@ -0,0 +1,88 @@
+#include "test/jemalloc_test.h"
+#include "test/bench.h"
+
+#define BATCH (1000 * 1000)
+#define HUGE_BATCH (100 * BATCH)
+static void *batch_ptrs[HUGE_BATCH];
+static void *item_ptrs[HUGE_BATCH];
+
+#define SIZE 7
+
+typedef struct batch_alloc_packet_s batch_alloc_packet_t;
+struct batch_alloc_packet_s {
+	void **ptrs;
+	size_t num;
+	size_t size;
+	int flags;
+};
+
+static void
+batch_alloc_wrapper(size_t batch) {
+	batch_alloc_packet_t batch_alloc_packet = {batch_ptrs, batch, SIZE, 0};
+	size_t filled;
+	size_t len = sizeof(size_t);
+	assert_d_eq(mallctl("experimental.batch_alloc", &filled, &len,
+	    &batch_alloc_packet, sizeof(batch_alloc_packet)), 0, "");
+	assert_zu_eq(filled, batch, "");
+}
+
+static void
+item_alloc_wrapper(size_t batch) {
+	for (size_t i = 0; i < batch; ++i) {
+		item_ptrs[i] = malloc(SIZE);
+	}
+}
+
+static void
+release_and_clear(void **ptrs, size_t batch) {
+	for (size_t i = 0; i < batch; ++i) {
+		void *p = ptrs[i];
+		assert_ptr_not_null(p, "allocation failed");
+		sdallocx(p, SIZE, 0);
+		ptrs[i] = NULL;
+	}
+}
+
+static void
+batch_alloc_small_can_repeat() {
+	batch_alloc_wrapper(BATCH);
+	release_and_clear(batch_ptrs, BATCH);
+}
+
+static void
+item_alloc_small_can_repeat() {
+	item_alloc_wrapper(BATCH);
+	release_and_clear(item_ptrs, BATCH);
+}
+
+TEST_BEGIN(test_small_batch_with_free) {
+	compare_funcs(10, 100,
+	    "batch allocation", batch_alloc_small_can_repeat,
+	    "item allocation", item_alloc_small_can_repeat);
+}
+TEST_END
+
+static void
+batch_alloc_huge_cannot_repeat() {
+	batch_alloc_wrapper(HUGE_BATCH);
+}
+
+static void
+item_alloc_huge_cannot_repeat() {
+	item_alloc_wrapper(HUGE_BATCH);
+}
+
+TEST_BEGIN(test_huge_batch_without_free) {
+	compare_funcs(0, 1,
+	    "batch allocation", batch_alloc_huge_cannot_repeat,
+	    "item allocation", item_alloc_huge_cannot_repeat);
+	release_and_clear(batch_ptrs, HUGE_BATCH);
+	release_and_clear(item_ptrs, HUGE_BATCH);
+}
+TEST_END
+
+int main(void) {
+	return test_no_reentrancy(
+	    test_small_batch_with_free,
+	    test_huge_batch_without_free);
+}
-- 
cgit v0.12


From 81c2f841e5386294834d143fa66c32beb825e4b5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Aug 2020 15:27:08 -0700
Subject: Add a simple utility to detect profiling bias.

---
 Makefile.in                              |  3 +-
 include/jemalloc/internal/prof_externs.h |  7 ++++
 src/prof_sys.c                           |  7 +++-
 test/analyze/prof_bias.c                 | 60 ++++++++++++++++++++++++++++++++
 4 files changed, 75 insertions(+), 2 deletions(-)
 create mode 100644 test/analyze/prof_bias.c

diff --git a/Makefile.in b/Makefile.in
index 506d9da..7d14758 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -294,7 +294,8 @@ CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
 endif
 TESTS_ANALYZE := $(srcroot)test/analyze/rand.c \
-	$(srcroot)test/analyze/sizes.c
+	$(srcroot)test/analyze/sizes.c \
+	$(srcroot)test/analyze/prof_bias.c
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
 	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/large_microbench.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index a4a4aa6..4579ab0 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -43,6 +43,13 @@ extern size_t lg_prof_sample;
 
 extern bool prof_booted;
 
+/*
+ * A hook to mock out backtrace functionality.  This can be handy, since it's
+ * otherwise difficult to guarantee that two allocations are reported as coming
+ * from the exact same stack trace in the presence of an optimizing compiler.
+ */
+extern void (* JET_MUTABLE prof_backtrace_hook)(prof_bt_t *bt);
+
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 4897988..dddba4b 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -27,6 +27,8 @@
 
 malloc_mutex_t prof_dump_filename_mtx;
 
+bool prof_do_mock = false;
+
 static uint64_t prof_dump_seq;
 static uint64_t prof_dump_iseq;
 static uint64_t prof_dump_mseq;
@@ -267,11 +269,14 @@ prof_backtrace_impl(prof_bt_t *bt) {
 }
 #endif
 
+
+void (* JET_MUTABLE prof_backtrace_hook)(prof_bt_t *bt) = &prof_backtrace_impl;
+
 void
 prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 	cassert(config_prof);
 	pre_reentrancy(tsd, NULL);
-	prof_backtrace_impl(bt);
+	prof_backtrace_hook(bt);
 	post_reentrancy(tsd);
 }
 
diff --git a/test/analyze/prof_bias.c b/test/analyze/prof_bias.c
new file mode 100644
index 0000000..0aae766
--- /dev/null
+++ b/test/analyze/prof_bias.c
@@ -0,0 +1,60 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * This is a helper utility, only meant to be run manually (and, for example,
+ * doesn't check for failures, try to skip execution in non-prof modes, etc.).
+ * It runs, allocates objects of two different sizes from the same stack trace,
+ * and exits.
+ *
+ * The idea is that some human operator will run it like:
+ *     MALLOC_CONF="prof:true,prof_final:true" test/analyze/prof_bias
+ * and manually inspect the results.
+ *
+ * The results should be:
+ * jeprof --text test/analyze/prof_bias --inuse_space jeprof.<pid>.0.f.heap:
+ * 	around 1024 MB
+ * jeprof --text test/analyze/prof_bias --inuse_objects jeprof.<pid>.0.f.heap:
+ * 	around 33554448 = 16 + 32 * 1024 * 1024
+ *
+ * And, if prof_accum is on:
+ * jeprof --text test/analyze/prof_bias --alloc_space jeprof.<pid>.0.f.heap:
+ *     around 2048 MB
+ * jeprof --text test/analyze/prof_bias --alloc_objects jeprof.<pid>.0.f.heap:
+ * 	around 67108896 = 2 * (16 + 32 * 1024 * 1024)
+ */
+
+static void
+mock_backtrace(prof_bt_t *bt) {
+	bt->len = 4;
+	bt->vec[0] = (void *)0x111;
+	bt->vec[1] = (void *)0x222;
+	bt->vec[2] = (void *)0x333;
+	bt->vec[3] = (void *)0x444;
+}
+
+static void
+do_allocs(size_t sz, size_t cnt, bool do_frees) {
+	for (size_t i = 0; i < cnt; i++) {
+		void *ptr = mallocx(sz, 0);
+		assert_ptr_not_null(ptr, "Unexpected mallocx failure");
+		if (do_frees) {
+			dallocx(ptr, 0);
+		}
+	}
+}
+
+int
+main(void) {
+	size_t lg_prof_sample = 19;
+	int err = mallctl("prof.reset", NULL, NULL, (void *)&lg_prof_sample,
+	    sizeof(lg_prof_sample));
+	assert(err == 0);
+
+	prof_backtrace_hook = &mock_backtrace;
+	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ true);
+	do_allocs(32 * 1024* 1024, 16, /* do_frees */ true);
+	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ false);
+	do_allocs(32 * 1024* 1024, 16, /* do_frees */ false);
+
+	return 0;
+}
-- 
cgit v0.12


From 60993697d8bd3f8a07756091df397ed4044da921 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Aug 2020 13:05:34 -0700
Subject: Prof: Add prof_unbias.

This gives more accurate attribution of bytes and counts to stack traces,
without introducing backwards incompatibilities in heap-profile parsing tools.
We track the ideal reported (to the end user) number of bytes more carefully
inside core jemalloc.  When dumping heap profiles, insteading of outputting our
counts directly, we output counts that will cause parsing tools to give a result
close to the value we want.

We retain the old version as an opt setting, to let users who are tracking
values on a per-component basis to keep their metrics stable until they decide
to switch.
---
 include/jemalloc/internal/prof_externs.h |   4 +
 include/jemalloc/internal/prof_structs.h |   4 +
 src/jemalloc.c                           |  10 ++
 src/prof.c                               |  65 +++++++++++++
 src/prof_data.c                          | 160 ++++++++++++++++++++++++++++++-
 5 files changed, 241 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 4579ab0..ba5933a 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -19,6 +19,7 @@ extern char opt_prof_prefix[
     PATH_MAX +
 #endif
     1];
+extern bool opt_prof_unbias;
 
 /* For recording recent allocations */
 extern ssize_t opt_prof_recent_alloc_max;
@@ -40,6 +41,9 @@ extern uint64_t prof_interval;
  * resets.
  */
 extern size_t lg_prof_sample;
+extern size_t prof_unbiased_sz[SC_NSIZES];
+extern size_t prof_shifted_unbiased_cnt[SC_NSIZES];
+void prof_unbias_map_init();
 
 extern bool prof_booted;
 
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 26942aa..fbad614 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -24,9 +24,13 @@ typedef struct {
 struct prof_cnt_s {
 	/* Profiling counters. */
 	uint64_t	curobjs;
+	uint64_t	curobjs_shifted_unbiased;
 	uint64_t	curbytes;
+	uint64_t	curbytes_unbiased;
 	uint64_t	accumobjs;
+	uint64_t	accumobjs_shifted_unbiased;
 	uint64_t	accumbytes;
+	uint64_t	accumbytes_unbiased;
 };
 
 typedef enum {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f2e5f8e..ae9ef3d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1517,6 +1517,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					}
 					CONF_CONTINUE;
 				}
+				/*
+				 * Undocumented.  When set to false, don't
+				 * correct for an unbiasing bug in jeprof
+				 * attribution.  This can be handy if you want
+				 * to get consistent numbers from your binary
+				 * across different jemalloc versions, even if
+				 * those numbers are incorrect.  The default is
+				 * true.
+				 */
+				CONF_HANDLE_BOOL(opt_prof_unbias, "prof_unbias")
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {
diff --git a/src/prof.c b/src/prof.c
index 2573541..7b649e4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -32,6 +32,7 @@ bool opt_prof_leak = false;
 bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 bool opt_prof_sys_thread_name = false;
+bool opt_prof_unbias = true;
 
 /* Accessed via prof_sample_event_handler(). */
 static counter_accum_t prof_idump_accumulated;
@@ -60,6 +61,8 @@ static malloc_mutex_t prof_gdump_mtx;
 uint64_t prof_interval = 0;
 
 size_t lg_prof_sample;
+size_t prof_unbiased_sz[SC_NSIZES];
+size_t prof_shifted_unbiased_cnt[SC_NSIZES];
 
 static uint64_t next_thr_uid;
 static malloc_mutex_t next_thr_uid_mtx;
@@ -69,6 +72,40 @@ bool prof_booted = false;
 
 /******************************************************************************/
 
+void prof_unbias_map_init() {
+	/* See the comment in prof_sample_new_event_wait */
+#ifdef JEMALLOC_PROF
+	for (szind_t i = 0; i < SC_NSIZES; i++) {
+		double sz = (double)sz_index2size(i);
+		double rate = (double)(ZU(1) << lg_prof_sample);
+		double div_val = 1.0 - exp(-sz / rate);
+		double unbiased_sz = sz / div_val;
+		/*
+		 * The "true" right value for the unbiased count is
+		 * 1.0/(1 - exp(-sz/rate)).  The problem is, we keep the counts
+		 * as integers (for a variety of reasons -- rounding errors
+		 * could trigger asserts, and not all libcs can properly handle
+		 * floating point arithmetic during malloc calls inside libc).
+		 * Rounding to an integer, though, can lead to rounding errors
+		 * of over 30% for sizes close to the sampling rate.  So
+		 * instead, we multiply by a constant, dividing the maximum
+		 * possible roundoff error by that constant.  To avoid overflow
+		 * in summing up size_t values, the largest safe constant we can
+		 * pick is the size of the smallest allocation.
+		 */
+		double cnt_shift = (double)(ZU(1) << SC_LG_TINY_MIN);
+		double shifted_unbiased_cnt = cnt_shift / div_val;
+		prof_unbiased_sz[i] = (size_t)round(unbiased_sz);
+		prof_shifted_unbiased_cnt[i] = (size_t)round(
+		    shifted_unbiased_cnt);
+	}
+#else
+	unreachable();
+#endif
+}
+
+/******************************************************************************/
+
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	cassert(config_prof);
@@ -96,12 +133,30 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 	    ptr);
 	prof_info_set(tsd, edata, tctx);
 
+	szind_t szind = sz_size2index(size);
+
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
+	/*
+	 * We need to do these map lookups while holding the lock, to avoid the
+	 * possibility of races with prof_reset calls, which update the map and
+	 * then acquire the lock.  This actually still leaves a data race on the
+	 * contents of the unbias map, but we have not yet gone through and
+	 * atomic-ified the prof module, and compilers are not yet causing us
+	 * issues.  The key thing is to make sure that, if we read garbage data,
+	 * the prof_reset call is about to mark our tctx as expired before any
+	 * dumping of our corrupted output is attempted.
+	 */
+	size_t shifted_unbiased_cnt = prof_shifted_unbiased_cnt[szind];
+	size_t unbiased_bytes = prof_unbiased_sz[szind];
 	tctx->cnts.curobjs++;
+	tctx->cnts.curobjs_shifted_unbiased += shifted_unbiased_cnt;
 	tctx->cnts.curbytes += usize;
+	tctx->cnts.curbytes_unbiased += unbiased_bytes;
 	if (opt_prof_accum) {
 		tctx->cnts.accumobjs++;
+		tctx->cnts.accumobjs_shifted_unbiased += shifted_unbiased_cnt;
 		tctx->cnts.accumbytes += usize;
+		tctx->cnts.accumbytes_unbiased += unbiased_bytes;
 	}
 	bool record_recent = prof_recent_alloc_prepare(tsd, tctx);
 	tctx->prepared = false;
@@ -118,12 +173,21 @@ prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	prof_tctx_t *tctx = prof_info->alloc_tctx;
 	assert((uintptr_t)tctx > (uintptr_t)1U);
 
+	szind_t szind = sz_size2index(usize);
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	assert(tctx->cnts.curobjs > 0);
 	assert(tctx->cnts.curbytes >= usize);
+	/*
+	 * It's not correct to do equivalent asserts for unbiased bytes, because
+	 * of the potential for races with prof.reset calls.  The map contents
+	 * should really be atomic, but we have not atomic-ified the prof module
+	 * yet.
+	 */
 	tctx->cnts.curobjs--;
+	tctx->cnts.curobjs_shifted_unbiased -= prof_shifted_unbiased_cnt[szind];
 	tctx->cnts.curbytes -= usize;
+	tctx->cnts.curbytes_unbiased -= prof_unbiased_sz[szind];
 
 	prof_try_log(tsd, usize, prof_info);
 
@@ -517,6 +581,7 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 		unsigned i;
 
 		lg_prof_sample = opt_lg_prof_sample;
+		prof_unbias_map_init();
 
 		prof_active = opt_prof_active;
 		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
diff --git a/src/prof_data.c b/src/prof_data.c
index 6b441de..ae9cd4b 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -514,12 +514,121 @@ prof_dump_printf(write_cb_t *prof_dump_write, void *cbopaque,
 	prof_dump_write(cbopaque, buf);
 }
 
+/*
+ * Casting a double to a uint64_t may not necessarily be in range; this can be
+ * UB.  I don't think this is practically possible with the cur counters, but
+ * plausibly could be with the accum counters.
+ */
+#ifdef JEMALLOC_PROF
+static uint64_t
+prof_double_uint64_cast(double d) {
+	/*
+	 * Note: UINT64_MAX + 1 is exactly representable as a double on all
+	 * reasonable platforms (certainly those we'll support).  Writing this
+	 * as !(a < b) instead of (a >= b) means that we're NaN-safe.
+	 */
+	double rounded = round(d);
+	if (!(rounded < (double)UINT64_MAX)) {
+		return UINT64_MAX;
+	}
+	return (uint64_t)rounded;
+}
+#endif
+
+/*
+ * The unbiasing story is long.  The jeprof unbiasing logic was copied from
+ * pprof.  Both shared an issue: they unbiased using the average size of the
+ * allocations at a particular stack trace.  This can work out OK if allocations
+ * are mostly of the same size given some stack, but not otherwise.  We now
+ * internally track what the unbiased results ought to be.  We can't just report
+ * them as they are though; they'll still go through the jeprof unbiasing
+ * process.  Instead, we figure out what values we can feed *into* jeprof's
+ * unbiasing mechanism that will lead to getting the right values out.
+ *
+ * It'll unbias count and aggregate size as:
+ *
+ *   c_out = c_in * 1/(1-exp(-s_in/c_in/R)
+ *   s_out = s_in * 1/(1-exp(-s_in/c_in/R)
+ *
+ * We want to solve for the values of c_in and s_in that will
+ * give the c_out and s_out that we've computed internally.
+ *
+ * Let's do a change of variables (both to make the math easier and to make it
+ * easier to write):
+ *   x = s_in / c_in
+ *   y = s_in
+ *   k = 1/R.
+ *
+ * Then
+ *   c_out = y/x * 1/(1-exp(-k*x))
+ *   s_out = y * 1/(1-exp(-k*x))
+ *
+ * The first equation gives:
+ *   y = x * c_out * (1-exp(-k*x))
+ * The second gives:
+ *   y = s_out * (1-exp(-k*x))
+ * So we have
+ *   x = s_out / c_out.
+ * And all the other values fall out from that.
+ *
+ * This is all a fair bit of work.  The thing we get out of it is that we don't
+ * break backwards compatibility with jeprof (and the various tools that have
+ * copied its unbiasing logic).  Eventually, we anticipate a v3 heap profile
+ * dump format based on JSON, at which point I think much of this logic can get
+ * cleaned up (since we'll be taking a compatibility break there anyways).
+ */
+static void
+prof_do_unbias(uint64_t c_out_shifted_i, uint64_t s_out_i, uint64_t *r_c_in,
+    uint64_t *r_s_in) {
+#ifdef JEMALLOC_PROF
+	if (c_out_shifted_i == 0 || s_out_i == 0) {
+		*r_c_in = 0;
+		*r_s_in = 0;
+		return;
+	}
+	/*
+	 * See the note in prof_unbias_map_init() to see why we take c_out in a
+	 * shifted form.
+	 */
+	double c_out = (double)c_out_shifted_i
+	    / (double)(ZU(1) << SC_LG_TINY_MIN);
+	double s_out = (double)s_out_i;
+	double R = (double)(ZU(1) << lg_prof_sample);
+
+	double x = s_out / c_out;
+	double y = s_out * (1.0 - exp(-x / R));
+
+	double c_in = y / x;
+	double s_in = y;
+
+	*r_c_in = prof_double_uint64_cast(c_in);
+	*r_s_in = prof_double_uint64_cast(s_in);
+#else
+	unreachable();
+#endif
+}
+
 static void
 prof_dump_print_cnts(write_cb_t *prof_dump_write, void *cbopaque,
     const prof_cnt_t *cnts) {
+	uint64_t curobjs;
+	uint64_t curbytes;
+	uint64_t accumobjs;
+	uint64_t accumbytes;
+	if (opt_prof_unbias) {
+		prof_do_unbias(cnts->curobjs_shifted_unbiased,
+		    cnts->curbytes_unbiased, &curobjs, &curbytes);
+		prof_do_unbias(cnts->accumobjs_shifted_unbiased,
+		    cnts->accumbytes_unbiased, &accumobjs, &accumbytes);
+	} else {
+		curobjs = cnts->curobjs;
+		curbytes = cnts->curbytes;
+		accumobjs = cnts->accumobjs;
+		accumbytes = cnts->accumbytes;
+	}
 	prof_dump_printf(prof_dump_write, cbopaque,
 	    "%"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]",
-	    cnts->curobjs, cnts->curbytes, cnts->accumobjs, cnts->accumbytes);
+	    curobjs, curbytes, accumobjs, accumbytes);
 }
 
 static void
@@ -539,12 +648,20 @@ prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
 
 		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+		tdata->cnt_summed.curobjs_shifted_unbiased
+		    += tctx->dump_cnts.curobjs_shifted_unbiased;
 		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+		tdata->cnt_summed.curbytes_unbiased
+		    += tctx->dump_cnts.curbytes_unbiased;
 		if (opt_prof_accum) {
 			tdata->cnt_summed.accumobjs +=
 			    tctx->dump_cnts.accumobjs;
+			tdata->cnt_summed.accumobjs_shifted_unbiased +=
+			    tctx->dump_cnts.accumobjs_shifted_unbiased;
 			tdata->cnt_summed.accumbytes +=
 			    tctx->dump_cnts.accumbytes;
+			tdata->cnt_summed.accumbytes_unbiased +=
+			    tctx->dump_cnts.accumbytes_unbiased;
 		}
 		break;
 	case prof_tctx_state_dumping:
@@ -558,10 +675,17 @@ prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
 	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+	gctx->cnt_summed.curobjs_shifted_unbiased
+	    += tctx->dump_cnts.curobjs_shifted_unbiased;
 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+	gctx->cnt_summed.curbytes_unbiased += tctx->dump_cnts.curbytes_unbiased;
 	if (opt_prof_accum) {
 		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
+		gctx->cnt_summed.accumobjs_shifted_unbiased
+		    += tctx->dump_cnts.accumobjs_shifted_unbiased;
 		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
+		gctx->cnt_summed.accumbytes_unbiased
+		    += tctx->dump_cnts.accumbytes_unbiased;
 	}
 }
 
@@ -757,11 +881,19 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 		}
 
 		arg->cnt_all->curobjs += tdata->cnt_summed.curobjs;
+		arg->cnt_all->curobjs_shifted_unbiased
+		    += tdata->cnt_summed.curobjs_shifted_unbiased;
 		arg->cnt_all->curbytes += tdata->cnt_summed.curbytes;
+		arg->cnt_all->curbytes_unbiased
+		    += tdata->cnt_summed.curbytes_unbiased;
 		if (opt_prof_accum) {
 			arg->cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
+			arg->cnt_all->accumobjs_shifted_unbiased
+			    += tdata->cnt_summed.accumobjs_shifted_unbiased;
 			arg->cnt_all->accumbytes +=
 			    tdata->cnt_summed.accumbytes;
+			arg->cnt_all->accumbytes_unbiased +=
+			    tdata->cnt_summed.accumbytes_unbiased;
 		}
 	} else {
 		tdata->dumping = false;
@@ -814,8 +946,16 @@ prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
 	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
 		assert(gctx->cnt_summed.curobjs == 0);
 		assert(gctx->cnt_summed.curbytes == 0);
+		/*
+		 * These asserts would not be correct -- see the comment on races
+		 * in prof.c
+		 * assert(gctx->cnt_summed.curobjs_unbiased == 0);
+		 * assert(gctx->cnt_summed.curbytes_unbiased == 0);
+		*/
 		assert(gctx->cnt_summed.accumobjs == 0);
+		assert(gctx->cnt_summed.accumobjs_shifted_unbiased == 0);
 		assert(gctx->cnt_summed.accumbytes == 0);
+		assert(gctx->cnt_summed.accumbytes_unbiased == 0);
 		return;
 	}
 
@@ -834,7 +974,7 @@ prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
 }
 
 /*
- * See prof_sample_threshold_update() comment for why the body of this function
+ * See prof_sample_new_event_wait() comment for why the body of this function
  * is conditionally compiled.
  */
 static void
@@ -1120,6 +1260,7 @@ prof_reset(tsd_t *tsd, size_t lg_sample) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	lg_prof_sample = lg_sample;
+	prof_unbias_map_init();
 
 	next = NULL;
 	do {
@@ -1162,9 +1303,24 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 
 	assert(tctx->cnts.curobjs == 0);
 	assert(tctx->cnts.curbytes == 0);
+	/*
+	 * These asserts are not correct -- see the comment about races in
+	 * prof.c
+	 *
+	 * assert(tctx->cnts.curobjs_shifted_unbiased == 0);
+	 * assert(tctx->cnts.curbytes_unbiased == 0);
+	 */
 	assert(!opt_prof_accum);
 	assert(tctx->cnts.accumobjs == 0);
 	assert(tctx->cnts.accumbytes == 0);
+	/*
+	 * These ones are, since accumbyte counts never go down.  Either
+	 * prof_accum is off (in which case these should never have changed from
+	 * their initial value of zero), or it's on (in which case we shouldn't
+	 * be destroying this tctx).
+	 */
+	assert(tctx->cnts.accumobjs_shifted_unbiased == 0);
+	assert(tctx->cnts.accumbytes_unbiased == 0);
 
 	prof_gctx_t *gctx = tctx->gctx;
 
-- 
cgit v0.12


From 53084cc5c285954d576b2f4a19a230a853014f82 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Aug 2020 18:24:05 -0700
Subject: Safety check: Don't directly abort.

The sized dealloc checks called the generic safety_check_fail, and then called
abort.  This means the failure case isn't mockable, hence not testable.  Fix it
in anticipation of a coming diff.
---
 src/safety_check.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/safety_check.c b/src/safety_check.c
index a83dca7..c692835 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -4,7 +4,6 @@
 static void (*safety_check_abort)(const char *message);
 
 void safety_check_fail_sized_dealloc(bool current_dealloc) {
-	assert(config_opt_safety_checks);
 	char *src = current_dealloc ? "the current pointer being freed" :
 	    "in thread cache, possibly from previous deallocations";
 
@@ -12,7 +11,6 @@ void safety_check_fail_sized_dealloc(bool current_dealloc) {
 	   " application sized deallocation bugs (source: %s). Suggest building"
 	    "with --enable-debug or address sanitizer for debugging. Abort.\n",
 	    src);
-	abort();
 }
 
 void safety_check_set_abort(void (*abort_fn)(const char *)) {
-- 
cgit v0.12


From eaed1e39be8574b1a59d21824b68e31af378cd0f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Aug 2020 18:23:36 -0700
Subject: Add sized-delete size-checking functionality.

The existing checks are good at finding such issues (on tcache flush), but not
so good at pinpointing them.  Debug mode can find them, but sometimes debug mode
slows down a program so much that hard-to-hit bugs can take a long time to
crash.

This commit adds functionality to keep programs mostly on their fast paths,
while also checking every sized delete argument they get.
---
 Makefile.in                                        |  1 +
 configure.ac                                       | 17 ++++++
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |  3 ++
 include/jemalloc/internal/jemalloc_preamble.h.in   | 13 +++++
 src/jemalloc.c                                     | 48 +++++++++++++----
 test/unit/size_check.c                             | 62 ++++++++++++++++++++++
 6 files changed, 135 insertions(+), 9 deletions(-)
 create mode 100644 test/unit/size_check.c

diff --git a/Makefile.in b/Makefile.in
index 7d14758..a63f69f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -247,6 +247,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/sc.c \
 	$(srcroot)test/unit/seq.c \
 	$(srcroot)test/unit/SFMT.c \
+	$(srcroot)test/unit/size_check.c \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/slab.c \
 	$(srcroot)test/unit/smoothstep.c \
diff --git a/configure.ac b/configure.ac
index b197d32..d68d376 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1492,6 +1492,23 @@ if test "x$enable_opt_safety_checks" = "x1" ; then
 fi
 AC_SUBST([enable_opt_safety_checks])
 
+dnl Look for sized-deallocation bugs while otherwise being in opt mode.
+AC_ARG_ENABLE([opt-size-checks],
+  [AS_HELP_STRING([--enable-opt-size-checks],
+  [Perform sized-deallocation argument checks, even in opt mode])],
+[if test "x$enable_opt_size_checks" = "xno" ; then
+  enable_opt_size_checks="0"
+else
+  enable_opt_size_checks="1"
+fi
+],
+[enable_opt_size_checks="0"]
+)
+if test "x$enable_opt_size_checks" = "x1" ; then
+  AC_DEFINE([JEMALLOC_OPT_SIZE_CHECKS], [ ])
+fi
+AC_SUBST([enable_opt_size_checks])
+
 JE_COMPILABLE([a program using __builtin_unreachable], [
 void foo (void) {
   __builtin_unreachable();
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 0aef0bb..ee052bb 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -373,4 +373,7 @@
 /* Performs additional safety checks when defined. */
 #undef JEMALLOC_OPT_SAFETY_CHECKS
 
+/* Performs additional size checks when defined. */
+#undef JEMALLOC_OPT_SIZE_CHECKS
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 740fcfc..4012eb2 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -180,6 +180,19 @@ static const bool config_opt_safety_checks =
 #endif
     ;
 
+/*
+ * Extra debugging of sized deallocations too onerous to be included in the
+ * general safety checks.
+ */
+static const bool config_opt_size_checks =
+#if defined(JEMALLOC_OPT_SIZE_CHECKS) || defined(JEMALLOC_OPT_SAFETY_CHECKS) \
+    || defined(JEMALLOC_DEBUG)
+    true
+#else
+    false
+#endif
+    ;
+
 #if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
 /* Currently percpu_arena depends on sched_getcpu. */
 #define JEMALLOC_PERCPU_ARENA
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ae9ef3d..51a1a23 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2793,6 +2793,27 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	thread_dalloc_event(tsd, usize);
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
+	if (config_opt_size_checks) {
+		emap_alloc_ctx_t dbg_ctx;
+		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+		    &dbg_ctx);
+		if (alloc_ctx->szind != dbg_ctx.szind) {
+			safety_check_fail_sized_dealloc(
+			    /* curent_dealloc */ true);
+			return true;
+		}
+		if (alloc_ctx->slab != dbg_ctx.slab) {
+			safety_check_fail(
+			    "Internal heap corruption detected: "
+			    "mismatch in slab bit");
+			return true;
+		}
+	}
+	return false;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	if (!slow_path) {
@@ -2823,13 +2844,6 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 				/* Non page aligned must be slab allocated. */
 				alloc_ctx.slab = true;
 			}
-			if (config_debug) {
-				emap_alloc_ctx_t dbg_ctx;
-				emap_alloc_ctx_lookup(tsd_tsdn(tsd),
-				    &arena_emap_global, ptr, &dbg_ctx);
-				assert(dbg_ctx.szind == alloc_ctx.szind);
-				assert(dbg_ctx.slab == alloc_ctx.slab);
-			}
 		} else if (opt_prof) {
 			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
 			    ptr, &alloc_ctx);
@@ -2845,6 +2859,16 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 			alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 		}
 	}
+	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
+	if (fail) {
+		/*
+		 * This is a heap corruption bug.  In real life we'll crash; for
+		 * the unit test we just want to avoid breaking anything too
+		 * badly to get a test result out.  Let's leak instead of trying
+		 * to free.
+		 */
+		return;
+	}
 
 	if (config_prof && opt_prof) {
 		prof_free(tsd, ptr, usize, &alloc_ctx);
@@ -2934,8 +2958,15 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 			return false;
 		}
 		alloc_ctx.szind = sz_size2index_lookup(size);
-		alloc_ctx.slab = false;
+		/* This is a dead store, except when opt size checking is on. */
+		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+	}
+	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
+	if (fail) {
+		/* See the comment in isfree. */
+		return true;
 	}
+
 	uint64_t deallocated, threshold;
 	te_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
 
@@ -3739,7 +3770,6 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 	tsd_t *tsd = tsd_fetch_min();
 	bool fast = tsd_fast(tsd);
 	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
-	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	unsigned tcache_ind = mallocx_tcache_get(flags);
diff --git a/test/unit/size_check.c b/test/unit/size_check.c
new file mode 100644
index 0000000..3d2912d
--- /dev/null
+++ b/test/unit/size_check.c
@@ -0,0 +1,62 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/safety_check.h"
+
+bool fake_abort_called;
+void fake_abort(const char *message) {
+	(void)message;
+	fake_abort_called = true;
+}
+
+#define SIZE1 SC_SMALL_MAXCLASS
+#define SIZE2 (SC_SMALL_MAXCLASS / 2)
+
+TEST_BEGIN(test_invalid_size_sdallocx) {
+	test_skip_if(!config_opt_size_checks);
+	safety_check_set_abort(&fake_abort);
+
+	fake_abort_called = false;
+	void *ptr = malloc(SIZE1);
+	assert_ptr_not_null(ptr, "Unexpected failure");
+	sdallocx(ptr, SIZE2, 0);
+	expect_true(fake_abort_called, "Safety check didn't fire");
+
+	safety_check_set_abort(NULL);
+}
+TEST_END
+
+TEST_BEGIN(test_invalid_size_sdallocx_nonzero_flag) {
+	test_skip_if(!config_opt_size_checks);
+	safety_check_set_abort(&fake_abort);
+
+	fake_abort_called = false;
+	void *ptr = malloc(SIZE1);
+	assert_ptr_not_null(ptr, "Unexpected failure");
+	sdallocx(ptr, SIZE2, MALLOCX_TCACHE_NONE);
+	expect_true(fake_abort_called, "Safety check didn't fire");
+
+	safety_check_set_abort(NULL);
+}
+TEST_END
+
+TEST_BEGIN(test_invalid_size_sdallocx_noflags) {
+	test_skip_if(!config_opt_size_checks);
+	safety_check_set_abort(&fake_abort);
+
+	fake_abort_called = false;
+	void *ptr = malloc(SIZE1);
+	assert_ptr_not_null(ptr, "Unexpected failure");
+	je_sdallocx_noflags(ptr, SIZE2);
+	expect_true(fake_abort_called, "Safety check didn't fire");
+
+	safety_check_set_abort(NULL);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_invalid_size_sdallocx,
+	    test_invalid_size_sdallocx_nonzero_flag,
+	    test_invalid_size_sdallocx_noflags);
+}
-- 
cgit v0.12


From 743021b63fd06ad23a81af310d467e2e26108a9a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 5 Aug 2020 14:43:03 -0700
Subject: Fix size miscalculation bug in reallocation

---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index b2feff4..f8e8cba 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1297,7 +1297,7 @@ void *
 arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
-	size_t usize = sz_s2u(size);
+	size_t usize = alignment == 0 ? sz_s2u(size) : sz_sa2u(size, alignment);
 	if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
-- 
cgit v0.12


From 8f9e958e1e81342091b1178005c0dedfed5573dd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 10 Aug 2020 15:39:16 -0700
Subject: Add alignment stress test for rallocx

---
 test/integration/rallocx.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 6cc4437..57c7967 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -171,6 +171,39 @@ TEST_BEGIN(test_align) {
 }
 TEST_END
 
+TEST_BEGIN(test_align_enum) {
+/* Span both small sizes and large sizes. */
+#define LG_MIN 12
+#define LG_MAX 15
+	for (size_t lg_align = LG_MIN; lg_align <= LG_MAX; ++lg_align) {
+		for (size_t lg_size = LG_MIN; lg_size <= LG_MAX; ++lg_size) {
+			size_t size = 1 << lg_size;
+			for (size_t lg_align_next = LG_MIN;
+			    lg_align_next <= LG_MAX; ++lg_align_next) {
+				int flags = MALLOCX_LG_ALIGN(lg_align);
+				void *p = mallocx(1, flags);
+				assert_ptr_not_null(p,
+				    "Unexpected mallocx() error");
+				assert_zu_eq(nallocx(1, flags),
+				    malloc_usable_size(p),
+				    "Wrong mallocx() usable size");
+				int flags_next =
+				    MALLOCX_LG_ALIGN(lg_align_next);
+				p = rallocx(p, size, flags_next);
+				assert_ptr_not_null(p,
+				    "Unexpected rallocx() error");
+				expect_zu_eq(nallocx(size, flags_next),
+				    malloc_usable_size(p),
+				    "Wrong rallocx() usable size");
+				free(p);
+			}
+		}
+	}
+#undef LG_MAX
+#undef LG_MIN
+}
+TEST_END
+
 TEST_BEGIN(test_lg_align_and_zero) {
 	void *p, *q;
 	unsigned lg_align;
@@ -253,6 +286,7 @@ main(void) {
 	    test_grow_and_shrink,
 	    test_zero,
 	    test_align,
+	    test_align_enum,
 	    test_lg_align_and_zero,
 	    test_overflow);
 }
-- 
cgit v0.12


From 9e18ae639f760d9c655e79baa2880e26b32c54db Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 12 Aug 2020 11:00:50 -0700
Subject: Config: safety checks don't imply size checks.

The commit introducing size checks accidentally enabled them whenever any safety
checks were on.  This ends up causing the regression that splitting up the
features was intended to avoid.  Fix the issue.
---
 include/jemalloc/internal/jemalloc_preamble.h.in | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 4012eb2..d62fee0 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -185,8 +185,7 @@ static const bool config_opt_safety_checks =
  * general safety checks.
  */
 static const bool config_opt_size_checks =
-#if defined(JEMALLOC_OPT_SIZE_CHECKS) || defined(JEMALLOC_OPT_SAFETY_CHECKS) \
-    || defined(JEMALLOC_DEBUG)
+#if defined(JEMALLOC_OPT_SIZE_CHECKS) || defined(JEMALLOC_DEBUG)
     true
 #else
     false
-- 
cgit v0.12


From ab274a23b98c228c073f1dfef89d0323fbe8b4c2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 12 Aug 2020 12:07:42 -0700
Subject: Add narenas_ratio.

This allows setting arenas per cpu dynamically, rather than forcing the user to
know the number of CPUs in advance if they want a particular CPU/space tradeoff.
---
 src/jemalloc.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 51a1a23..0ca400e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -127,6 +127,7 @@ bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
+unsigned	opt_narenas_ratio = 4;
 
 unsigned	ncpus;
 
@@ -1294,6 +1295,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					    /* clip */ false)
 				}
 			}
+			if (CONF_MATCH("narenas_ratio")) {
+				CONF_HANDLE_UNSIGNED(opt_narenas_ratio,
+				    "narenas_ratio", 1, UINT_MAX,
+				    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+				    /* clip */ false)
+			}
 			if (CONF_MATCH("bin_shards")) {
 				const char *bin_shards_segment_cur = v;
 				size_t vlen_left = vlen;
@@ -1781,7 +1788,7 @@ malloc_narenas_default(void) {
 	 * default.
 	 */
 	if (ncpus > 1) {
-		return ncpus << 2;
+		return ncpus * opt_narenas_ratio;
 	} else {
 		return 1;
 	}
-- 
cgit v0.12


From 38867c5c1723efa7e42898e1737e1587b5c734e1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Aug 2020 16:27:50 -0700
Subject: Makefile: alphabetize stress/analyze utilities.

---
 Makefile.in | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index a63f69f..7140c25 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -294,14 +294,15 @@ else
 CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
 endif
-TESTS_ANALYZE := $(srcroot)test/analyze/rand.c \
-	$(srcroot)test/analyze/sizes.c \
-	$(srcroot)test/analyze/prof_bias.c
-TESTS_STRESS := $(srcroot)test/stress/microbench.c \
+TESTS_ANALYZE := $(srcroot)test/analyze/prof_bias.c \
+	$(srcroot)test/analyze/rand.c \
+	$(srcroot)test/analyze/sizes.c
+TESTS_STRESS := $(srcroot)test/stress/batch_alloc.c \
 	$(srcroot)test/stress/fill_flush.c \
-	$(srcroot)test/stress/large_microbench.c \
 	$(srcroot)test/stress/hookbench.c \
-	$(srcroot)test/stress/batch_alloc.c
+	$(srcroot)test/stress/large_microbench.c \
+	$(srcroot)test/stress/microbench.c
+	
 
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
-- 
cgit v0.12


From 32d46732217ab592032567350c176850ba0249c6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Aug 2020 16:57:09 -0700
Subject: Add a mallctl speed stress test.

---
 Makefile.in           |  1 +
 test/stress/mallctl.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 test/stress/mallctl.c

diff --git a/Makefile.in b/Makefile.in
index 7140c25..80e5aaf 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -301,6 +301,7 @@ TESTS_STRESS := $(srcroot)test/stress/batch_alloc.c \
 	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/hookbench.c \
 	$(srcroot)test/stress/large_microbench.c \
+	$(srcroot)test/stress/mallctl.c \
 	$(srcroot)test/stress/microbench.c
 	
 
diff --git a/test/stress/mallctl.c b/test/stress/mallctl.c
new file mode 100644
index 0000000..6d2e5c5
--- /dev/null
+++ b/test/stress/mallctl.c
@@ -0,0 +1,74 @@
+#include "test/jemalloc_test.h"
+#include "test/bench.h"
+
+static void
+mallctl_short(void) {
+	const char *version;
+	size_t sz = sizeof(version);
+	int err = mallctl("version", &version, &sz, NULL, 0);
+	assert_d_eq(err, 0, "mallctl failure");
+}
+
+size_t mib_short[1];
+
+static void
+mallctlbymib_short(void) {
+	size_t miblen = sizeof(mib_short)/sizeof(mib_short[0]);
+	const char *version;
+	size_t sz = sizeof(version);
+	int err = mallctlbymib(mib_short, miblen, &version, &sz, NULL, 0);
+	assert_d_eq(err, 0, "mallctlbymib failure");
+}
+
+TEST_BEGIN(test_mallctl_vs_mallctlbymib_short) {
+	size_t miblen = sizeof(mib_short)/sizeof(mib_short[0]);
+
+	int err = mallctlnametomib("version", mib_short, &miblen);
+	assert_d_eq(err, 0, "mallctlnametomib failure");
+	compare_funcs(10*1000*1000, 10*1000*1000, "mallctl_short",
+	    mallctl_short, "mallctlbymib_short", mallctlbymib_short);
+}
+TEST_END
+
+static void
+mallctl_long(void) {
+	uint64_t nmalloc;
+	size_t sz = sizeof(nmalloc);
+	int err = mallctl("stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz, NULL,
+	    0);
+	assert_d_eq(err, 0, "mallctl failure");
+}
+
+size_t mib_long[6];
+
+static void
+mallctlbymib_long(void) {
+	size_t miblen = sizeof(mib_long)/sizeof(mib_long[0]);
+	const char *version;
+	size_t sz = sizeof(version);
+	int err = mallctlbymib(mib_long, miblen, &version, &sz, NULL, 0);
+	assert_d_eq(err, 0, "mallctlbymib failure");
+}
+
+TEST_BEGIN(test_mallctl_vs_mallctlbymib_long) {
+	/*
+	 * We want to use the longest mallctl we have; that needs stats support
+	 * to be allowed.
+	 */
+	test_skip_if(!config_stats);
+
+	size_t miblen = sizeof(mib_long)/sizeof(mib_long[0]);
+	int err = mallctlnametomib("stats.arenas.0.bins.0.nmalloc", mib_long,
+	    &miblen);
+	assert_d_eq(err, 0, "mallctlnametomib failure");
+	compare_funcs(10*1000*1000, 10*1000*1000, "mallctl_long",
+	    mallctl_long, "mallctlbymib_long", mallctlbymib_long);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_mallctl_vs_mallctlbymib_short,
+	    test_mallctl_vs_mallctlbymib_long);
+}
-- 
cgit v0.12


From 7b187360e9641c8f664709d3ac50296e3a87b2e0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Aug 2020 18:30:34 -0700
Subject: IO: Support 0-padding for unsigned numbers.

---
 src/malloc_io.c       | 28 ++++++++++++++++++++++++++--
 test/unit/malloc_io.c | 10 ++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/src/malloc_io.c b/src/malloc_io.c
index f5004f0..59a0cbf 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -346,7 +346,11 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 	if (!left_justify && pad_len != 0) {				\
 		size_t j;						\
 		for (j = 0; j < pad_len; j++) {				\
-			APPEND_C(' ');					\
+			if (pad_zero) {					\
+				APPEND_C('0');				\
+			} else {					\
+				APPEND_C(' ');				\
+			}						\
 		}							\
 	}								\
 	/* Value. */							\
@@ -418,6 +422,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 			unsigned char len = '?';
 			char *s;
 			size_t slen;
+			bool first_width_digit = true;
+			bool pad_zero = false;
 
 			f++;
 			/* Flags. */
@@ -454,7 +460,12 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 					width = -width;
 				}
 				break;
-			case '0': case '1': case '2': case '3': case '4':
+			case '0':
+				if (first_width_digit) {
+					pad_zero = true;
+				}
+				JEMALLOC_FALLTHROUGH;
+			case '1': case '2': case '3': case '4':
 			case '5': case '6': case '7': case '8': case '9': {
 				uintmax_t uwidth;
 				set_errno(0);
@@ -462,6 +473,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				assert(uwidth != UINTMAX_MAX || get_errno() !=
 				    ERANGE);
 				width = (int)uwidth;
+				first_width_digit = false;
 				break;
 			} default:
 				break;
@@ -519,6 +531,18 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				intmax_t val JEMALLOC_CC_SILENCE_INIT(0);
 				char buf[D2S_BUFSIZE];
 
+				/*
+				 * Outputting negative, zero-padded numbers
+				 * would require a nontrivial rework of the
+				 * interaction between the width and padding
+				 * (since 0 padding goes between the '-' and the
+				 * number, while ' ' padding goes either before
+				 * the - or after the number.  Since we
+				 * currently don't ever need 0-padded negative
+				 * numbers, just don't bother supporting it.
+				 */
+				assert(!pad_zero);
+
 				GET_ARG_NUMERIC(val, len);
 				s = d2s(val, (plus_plus ? '+' : (plus_space ?
 				    ' ' : '-')), buf, &slen);
diff --git a/test/unit/malloc_io.c b/test/unit/malloc_io.c
index 1a6e5f6..385f745 100644
--- a/test/unit/malloc_io.c
+++ b/test/unit/malloc_io.c
@@ -175,6 +175,7 @@ TEST_BEGIN(test_malloc_snprintf) {
 	TEST("_1234_", "_%o_", 01234);
 	TEST("_01234_", "_%#o_", 01234);
 	TEST("_1234_", "_%u_", 1234);
+	TEST("01234", "%05u", 1234);
 
 	TEST("_1234_", "_%d_", 1234);
 	TEST("_ 1234_", "_% d_", 1234);
@@ -183,6 +184,15 @@ TEST_BEGIN(test_malloc_snprintf) {
 	TEST("_-1234_", "_% d_", -1234);
 	TEST("_-1234_", "_%+d_", -1234);
 
+	/*
+	 * Morally, we should test these too, but 0-padded signed types are not
+	 * yet supported.
+	 *
+	 * TEST("01234", "%05", 1234);
+	 * TEST("-1234", "%05d", -1234);
+	 * TEST("-01234", "%06d", -1234);
+	*/
+
 	TEST("_-1234_", "_%d_", -1234);
 	TEST("_1234_", "_%d_", 1234);
 	TEST("_-1234_", "_%i_", -1234);
-- 
cgit v0.12


From 753bbf1849caaf4f523567b2da6cb1de6147d811 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Aug 2020 17:39:45 -0700
Subject: Benchmarks: Also print ns / iter.

This is often what we really care about.  It's not easy to do the division
mentally in all cases.
---
 test/include/test/bench.h | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/test/include/test/bench.h b/test/include/test/bench.h
index 6cd19fd..0397c94 100644
--- a/test/include/test/bench.h
+++ b/test/include/test/bench.h
@@ -13,6 +13,20 @@ time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
 	timer_stop(timer);
 }
 
+#define FMT_NSECS_BUF_SIZE 100
+/* Print nanoseconds / iter into the buffer "buf". */
+static inline void
+fmt_nsecs(uint64_t usec, uint64_t iters, char *buf) {
+	uint64_t nsec = usec * 1000;
+	/* We'll display 3 digits after the decimal point. */
+	uint64_t nsec1000 = nsec * 1000;
+	uint64_t nsecs_per_iter1000 = nsec1000 / iters;
+	uint64_t intpart = nsecs_per_iter1000 / 1000;
+	uint64_t fracpart = nsecs_per_iter1000 % 1000;
+	malloc_snprintf(buf, FMT_NSECS_BUF_SIZE, "%"FMTu64".%03"FMTu64, intpart,
+	    fracpart);
+}
+
 static inline void
 compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
     void (*func_a), const char *name_b, void (*func_b)) {
@@ -29,11 +43,18 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 	time_func(&timer_a, nwarmup, niter, func_a);
 	time_func(&timer_b, nwarmup, niter, func_b);
 
+	uint64_t usec_a = timer_usec(&timer_a);
+	char buf_a[FMT_NSECS_BUF_SIZE];
+	fmt_nsecs(usec_a, niter, buf_a);
+
+	uint64_t usec_b = timer_usec(&timer_b);
+	char buf_b[FMT_NSECS_BUF_SIZE];
+	fmt_nsecs(usec_b, niter, buf_b);
+
 	timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
-	malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us, "
-	    "%s=%"FMTu64"us, ratio=1:%s\n",
-	    niter, name_a, timer_usec(&timer_a), name_b, timer_usec(&timer_b),
-	    ratio_buf);
+	malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us (%s ns/iter), "
+	    "%s=%"FMTu64"us (%s ns/iter), ratio=1:%s\n",
+	    niter, name_a, usec_a, buf_a, name_b, usec_b, buf_b, ratio_buf);
 
 	dallocx(p, 0);
 }
-- 
cgit v0.12


From b0ffa39cac2af955b8b39e5457e9ca8ed3e8748b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 17 Aug 2020 09:04:33 -0700
Subject: Mallctl stress test: fix a type.

The mallctlbymib_long helper was copy-pasted from mallctlbymib_short, and
incorrectly used its output variable (a char *) rather than the output variable
of the mallctl call it was using (a uint64_t), causing breakages when
sizeof(char *) differed from sizeof(uint64_t).
---
 test/stress/mallctl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/stress/mallctl.c b/test/stress/mallctl.c
index 6d2e5c5..d29b311 100644
--- a/test/stress/mallctl.c
+++ b/test/stress/mallctl.c
@@ -44,9 +44,9 @@ size_t mib_long[6];
 static void
 mallctlbymib_long(void) {
 	size_t miblen = sizeof(mib_long)/sizeof(mib_long[0]);
-	const char *version;
-	size_t sz = sizeof(version);
-	int err = mallctlbymib(mib_long, miblen, &version, &sz, NULL, 0);
+	uint64_t nmalloc;
+	size_t sz = sizeof(nmalloc);
+	int err = mallctlbymib(mib_long, miblen, &nmalloc, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctlbymib failure");
 }
 
-- 
cgit v0.12


From b399463fba68d7098d52123b513ab51a2e1ace49 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 13 Aug 2020 13:09:05 -0700
Subject: flat_bitmap unit test: Silence a warning.

---
 test/unit/flat_bitmap.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/test/unit/flat_bitmap.c b/test/unit/flat_bitmap.c
index 14ac6ba..410e94f 100644
--- a/test/unit/flat_bitmap.c
+++ b/test/unit/flat_bitmap.c
@@ -461,8 +461,12 @@ expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
 	}
 
 	bool simple_iter_res;
-	size_t simple_iter_begin;
-	size_t simple_iter_len;
+	/*
+	 * These are dead stores, but the compiler can't always figure that out
+	 * statically, and warns on the uninitialized variable.
+	 */
+	size_t simple_iter_begin = 0;
+	size_t simple_iter_len = 0;
 	simple_iter_res = fb_iter_simple(fb, nbits, pos, &simple_iter_begin,
 	    &simple_iter_len, val, forward);
 
-- 
cgit v0.12


From 131b1b53383720de3ca8877c676e85d968205103 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 7 Aug 2020 18:03:40 -0700
Subject: Rename ecache_grow -> geom_grow.

We're about to start using it outside of the ecaches, in the HPA central
allocator.
---
 Makefile.in                           |  1 +
 include/jemalloc/internal/ecache.h    | 25 -------------------------
 include/jemalloc/internal/geom_grow.h | 29 +++++++++++++++++++++++++++++
 include/jemalloc/internal/pac.h       |  4 +++-
 src/ecache.c                          | 26 --------------------------
 src/extent.c                          | 26 +++++++++++++-------------
 src/geom_grow.c                       | 29 +++++++++++++++++++++++++++++
 src/pa_extra.c                        |  6 +++---
 src/pac.c                             | 10 +++++-----
 test/unit/retained.c                  |  2 +-
 10 files changed, 84 insertions(+), 74 deletions(-)
 create mode 100644 include/jemalloc/internal/geom_grow.h
 create mode 100644 src/geom_grow.c

diff --git a/Makefile.in b/Makefile.in
index 80e5aaf..3697e07 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -117,6 +117,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
+	$(srcroot)src/geom_grow.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index a11418c..cc2752f 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -19,26 +19,6 @@ struct ecache_s {
 	bool delay_coalesce;
 };
 
-typedef struct ecache_grow_s ecache_grow_t;
-struct ecache_grow_s {
-	/*
-	 * Next extent size class in a growing series to use when satisfying a
-	 * request via the extent hooks (only if opt_retain).  This limits the
-	 * number of disjoint virtual memory ranges so that extent merging can
-	 * be effective even if multiple arenas' extent allocation requests are
-	 * highly interleaved.
-	 *
-	 * retain_grow_limit is the max allowed size ind to expand (unless the
-	 * required size is greater).  Default is no limit, and controlled
-	 * through mallctl only.
-	 *
-	 * Synchronization: extent_grow_mtx
-	 */
-	pszind_t next;
-	pszind_t limit;
-	malloc_mutex_t mtx;
-};
-
 static inline size_t
 ecache_npages_get(ecache_t *ecache) {
 	return eset_npages_get(&ecache->eset);
@@ -65,9 +45,4 @@ void ecache_prefork(tsdn_t *tsdn, ecache_t *ecache);
 void ecache_postfork_parent(tsdn_t *tsdn, ecache_t *ecache);
 void ecache_postfork_child(tsdn_t *tsdn, ecache_t *ecache);
 
-bool ecache_grow_init(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
-void ecache_grow_prefork(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
-void ecache_grow_postfork_parent(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
-void ecache_grow_postfork_child(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
-
 #endif /* JEMALLOC_INTERNAL_ECACHE_H */
diff --git a/include/jemalloc/internal/geom_grow.h b/include/jemalloc/internal/geom_grow.h
new file mode 100644
index 0000000..a28c17c
--- /dev/null
+++ b/include/jemalloc/internal/geom_grow.h
@@ -0,0 +1,29 @@
+#ifndef JEMALLOC_INTERNAL_ECACHE_GROW_H
+#define JEMALLOC_INTERNAL_ECACHE_GROW_H
+
+typedef struct geom_grow_s geom_grow_t;
+struct geom_grow_s {
+	/*
+	 * Next extent size class in a growing series to use when satisfying a
+	 * request via the extent hooks (only if opt_retain).  This limits the
+	 * number of disjoint virtual memory ranges so that extent merging can
+	 * be effective even if multiple arenas' extent allocation requests are
+	 * highly interleaved.
+	 *
+	 * retain_grow_limit is the max allowed size ind to expand (unless the
+	 * required size is greater).  Default is no limit, and controlled
+	 * through mallctl only.
+	 *
+	 * Synchronization: mtx
+	 */
+	pszind_t next;
+	pszind_t limit;
+	malloc_mutex_t mtx;
+};
+
+bool geom_grow_init(tsdn_t *tsdn, geom_grow_t *geom_grow);
+void geom_grow_prefork(tsdn_t *tsdn, geom_grow_t *geom_grow);
+void geom_grow_postfork_parent(tsdn_t *tsdn, geom_grow_t *geom_grow);
+void geom_grow_postfork_child(tsdn_t *tsdn, geom_grow_t *geom_grow);
+
+#endif /* JEMALLOC_INTERNAL_ECACHE_GROW_H */
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 2d02bda..a028456 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -1,8 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_PAC_H
 #define JEMALLOC_INTERNAL_PAC_H
 
+#include "jemalloc/internal/geom_grow.h"
 #include "jemalloc/internal/pai.h"
 
+
 /*
  * Page allocator classic; an implementation of the PAI interface that:
  * - Can be used for arenas with custom extent hooks.
@@ -93,7 +95,7 @@ struct pac_s {
 	edata_cache_t *edata_cache;
 
 	/* The grow info for the retained ecache. */
-	ecache_grow_t ecache_grow;
+	geom_grow_t geom_grow;
 
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
diff --git a/src/ecache.c b/src/ecache.c
index 301b7ca..3c1a227 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -29,29 +29,3 @@ void
 ecache_postfork_child(tsdn_t *tsdn, ecache_t *ecache) {
 	malloc_mutex_postfork_child(tsdn, &ecache->mtx);
 }
-
-bool
-ecache_grow_init(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
-	ecache_grow->next = sz_psz2ind(HUGEPAGE);
-	ecache_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
-	if (malloc_mutex_init(&ecache_grow->mtx, "extent_grow",
-	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-	return false;
-}
-
-void
-ecache_grow_prefork(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
-	malloc_mutex_prefork(tsdn, &ecache_grow->mtx);
-}
-
-void
-ecache_grow_postfork_parent(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
-	malloc_mutex_postfork_parent(tsdn, &ecache_grow->mtx);
-}
-
-void
-ecache_grow_postfork_child(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
-	malloc_mutex_postfork_child(tsdn, &ecache_grow->mtx);
-}
diff --git a/src/extent.c b/src/extent.c
index 98db40e..644623d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -614,7 +614,7 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 static edata_t *
 extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     size_t size, size_t alignment, bool zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_assert_owner(tsdn, &pac->geom_grow.mtx);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -626,15 +626,15 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(pac->ecache_grow.next + egn_skip);
+	size_t alloc_size = sz_pind2sz(pac->geom_grow.next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (pac->ecache_grow.next + egn_skip >=
+		if (pac->geom_grow.next + egn_skip >=
 		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			goto label_err;
 		}
-		alloc_size = sz_pind2sz(pac->ecache_grow.next + egn_skip);
+		alloc_size = sz_pind2sz(pac->geom_grow.next + egn_skip);
 	}
 
 	edata_t *edata = edata_cache_get(tsdn, pac->edata_cache);
@@ -727,13 +727,13 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (pac->ecache_grow.next + egn_skip + 1 <= pac->ecache_grow.limit) {
-		pac->ecache_grow.next += egn_skip + 1;
+	if (pac->geom_grow.next + egn_skip + 1 <= pac->geom_grow.limit) {
+		pac->geom_grow.next += egn_skip + 1;
 	} else {
-		pac->ecache_grow.next = pac->ecache_grow.limit;
+		pac->geom_grow.next = pac->geom_grow.limit;
 	}
 	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -747,7 +747,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
 	return NULL;
 }
 
@@ -757,13 +757,13 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_lock(tsdn, &pac->geom_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks,
 	    &pac->ecache_retained, new_addr, size, alignment, zero,
 	    commit, /* growing_retained */ true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
@@ -772,9 +772,9 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		    alignment, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &pac->geom_grow.mtx);
 
 	return edata;
 }
diff --git a/src/geom_grow.c b/src/geom_grow.c
new file mode 100644
index 0000000..d188bb8
--- /dev/null
+++ b/src/geom_grow.c
@@ -0,0 +1,29 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+bool
+geom_grow_init(tsdn_t *tsdn, geom_grow_t *geom_grow) {
+	geom_grow->next = sz_psz2ind(HUGEPAGE);
+	geom_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
+	if (malloc_mutex_init(&geom_grow->mtx, "extent_grow",
+	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	return false;
+}
+
+void
+geom_grow_prefork(tsdn_t *tsdn, geom_grow_t *geom_grow) {
+	malloc_mutex_prefork(tsdn, &geom_grow->mtx);
+}
+
+void
+geom_grow_postfork_parent(tsdn_t *tsdn, geom_grow_t *geom_grow) {
+	malloc_mutex_postfork_parent(tsdn, &geom_grow->mtx);
+}
+
+void
+geom_grow_postfork_child(tsdn_t *tsdn, geom_grow_t *geom_grow) {
+	malloc_mutex_postfork_child(tsdn, &geom_grow->mtx);
+}
+
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 26a196b..9e083ca 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -16,7 +16,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
 
 void
 pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
-	ecache_grow_prefork(tsdn, &shard->pac.ecache_grow);
+	geom_grow_prefork(tsdn, &shard->pac.geom_grow);
 }
 
 void
@@ -37,7 +37,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_dirty);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
-	ecache_grow_postfork_parent(tsdn, &shard->pac.ecache_grow);
+	geom_grow_postfork_parent(tsdn, &shard->pac.geom_grow);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
 }
@@ -48,7 +48,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_child(tsdn, &shard->pac.ecache_dirty);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
-	ecache_grow_postfork_child(tsdn, &shard->pac.ecache_grow);
+	geom_grow_postfork_child(tsdn, &shard->pac.geom_grow);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
 }
diff --git a/src/pac.c b/src/pac.c
index a437088..151be20 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -68,7 +68,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
-	if (ecache_grow_init(tsdn, &pac->ecache_grow)) {
+	if (geom_grow_init(tsdn, &pac->geom_grow)) {
 		return true;
 	}
 	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
@@ -203,14 +203,14 @@ pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 		}
 	}
 
-	malloc_mutex_lock(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_lock(tsdn, &pac->geom_grow.mtx);
 	if (old_limit != NULL) {
-		*old_limit = sz_pind2sz(pac->ecache_grow.limit);
+		*old_limit = sz_pind2sz(pac->geom_grow.limit);
 	}
 	if (new_limit != NULL) {
-		pac->ecache_grow.limit = new_ind;
+		pac->geom_grow.limit = new_ind;
 	}
-	malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
 
 	return false;
 }
diff --git a/test/unit/retained.c b/test/unit/retained.c
index ef301aa..8139617 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -142,7 +142,7 @@ TEST_BEGIN(test_retained) {
 		size_t usable = 0;
 		size_t fragmented = 0;
 		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
-		    arena->pa_shard.pac.ecache_grow.next; pind++) {
+		    arena->pa_shard.pac.geom_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;
-- 
cgit v0.12


From ffe552223cc3b50dd88458e46d531f970b45096e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 7 Aug 2020 18:16:31 -0700
Subject: Geom_grow: Move in advancing logic.

---
 include/jemalloc/internal/geom_grow.h | 27 +++++++++++++++++++++++++++
 src/extent.c                          | 22 +++++++---------------
 2 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/geom_grow.h b/include/jemalloc/internal/geom_grow.h
index a28c17c..d3ac6c9 100644
--- a/include/jemalloc/internal/geom_grow.h
+++ b/include/jemalloc/internal/geom_grow.h
@@ -21,6 +21,33 @@ struct geom_grow_s {
 	malloc_mutex_t mtx;
 };
 
+static inline bool
+geom_grow_size_prepare(geom_grow_t *geom_grow, size_t alloc_size_min,
+    size_t *r_alloc_size, pszind_t *r_skip) {
+	*r_skip = 0;
+	*r_alloc_size = sz_pind2sz(geom_grow->next + *r_skip);
+	while (*r_alloc_size < alloc_size_min) {
+		(*r_skip)++;
+		if (geom_grow->next + *r_skip  >=
+		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
+			/* Outside legal range. */
+			return true;
+		}
+		*r_alloc_size = sz_pind2sz(geom_grow->next + *r_skip);
+	}
+	return false;
+}
+
+static inline void
+geom_grow_size_commit(geom_grow_t *geom_grow, pszind_t skip) {
+	if (geom_grow->next + skip + 1 <= geom_grow->limit) {
+		geom_grow->next += skip + 1;
+	} else {
+		geom_grow->next = geom_grow->limit;
+	}
+
+}
+
 bool geom_grow_init(tsdn_t *tsdn, geom_grow_t *geom_grow);
 void geom_grow_prefork(tsdn_t *tsdn, geom_grow_t *geom_grow);
 void geom_grow_postfork_parent(tsdn_t *tsdn, geom_grow_t *geom_grow);
diff --git a/src/extent.c b/src/extent.c
index 644623d..6abaadf 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -625,16 +625,12 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * Find the next extent size in the series that would be large enough to
 	 * satisfy this request.
 	 */
-	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(pac->geom_grow.next + egn_skip);
-	while (alloc_size < alloc_size_min) {
-		egn_skip++;
-		if (pac->geom_grow.next + egn_skip >=
-		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
-			/* Outside legal range. */
-			goto label_err;
-		}
-		alloc_size = sz_pind2sz(pac->geom_grow.next + egn_skip);
+	size_t alloc_size;
+	pszind_t geom_grow_skip;
+	bool err = geom_grow_size_prepare(&pac->geom_grow, alloc_size_min,
+	    &alloc_size, &geom_grow_skip);
+	if (err) {
+		goto label_err;
 	}
 
 	edata_t *edata = edata_cache_get(tsdn, pac->edata_cache);
@@ -727,12 +723,8 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (pac->geom_grow.next + egn_skip + 1 <= pac->geom_grow.limit) {
-		pac->geom_grow.next += egn_skip + 1;
-	} else {
-		pac->geom_grow.next = pac->geom_grow.limit;
-	}
 	/* All opportunities for failure are past. */
+	geom_grow_size_commit(&pac->geom_grow, geom_grow_skip);
 	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
 
 	if (config_prof) {
-- 
cgit v0.12


From c57494879fe12157470cefc44bbd121726ec363a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 7 Aug 2020 18:26:52 -0700
Subject: Geom_grow: Don't take tsdn at init.

It's never used.
---
 include/jemalloc/internal/geom_grow.h | 2 +-
 src/geom_grow.c                       | 2 +-
 src/pac.c                             | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/geom_grow.h b/include/jemalloc/internal/geom_grow.h
index d3ac6c9..128c56f 100644
--- a/include/jemalloc/internal/geom_grow.h
+++ b/include/jemalloc/internal/geom_grow.h
@@ -48,7 +48,7 @@ geom_grow_size_commit(geom_grow_t *geom_grow, pszind_t skip) {
 
 }
 
-bool geom_grow_init(tsdn_t *tsdn, geom_grow_t *geom_grow);
+bool geom_grow_init(geom_grow_t *geom_grow);
 void geom_grow_prefork(tsdn_t *tsdn, geom_grow_t *geom_grow);
 void geom_grow_postfork_parent(tsdn_t *tsdn, geom_grow_t *geom_grow);
 void geom_grow_postfork_child(tsdn_t *tsdn, geom_grow_t *geom_grow);
diff --git a/src/geom_grow.c b/src/geom_grow.c
index d188bb8..eab8bc9 100644
--- a/src/geom_grow.c
+++ b/src/geom_grow.c
@@ -2,7 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 bool
-geom_grow_init(tsdn_t *tsdn, geom_grow_t *geom_grow) {
+geom_grow_init(geom_grow_t *geom_grow) {
 	geom_grow->next = sz_psz2ind(HUGEPAGE);
 	geom_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
 	if (malloc_mutex_init(&geom_grow->mtx, "extent_grow",
diff --git a/src/pac.c b/src/pac.c
index 151be20..6d52a93 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -68,7 +68,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
-	if (geom_grow_init(tsdn, &pac->geom_grow)) {
+	if (geom_grow_init(&pac->geom_grow)) {
 		return true;
 	}
 	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
-- 
cgit v0.12


From 5e90fd006e97d62d74c79ce67cbf0cae5429ecdc Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 11 Aug 2020 10:18:31 -0700
Subject: Geom_grow: Don't keep the mutex internal.

We're about to use it in ways that will have external synchronization.
---
 include/jemalloc/internal/geom_grow.h |  8 +-------
 include/jemalloc/internal/pac.h       |  1 +
 src/extent.c                          | 16 ++++++++--------
 src/geom_grow.c                       | 23 +----------------------
 src/pa_extra.c                        |  6 +++---
 src/pac.c                             |  8 +++++---
 6 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/include/jemalloc/internal/geom_grow.h b/include/jemalloc/internal/geom_grow.h
index 128c56f..ba83386 100644
--- a/include/jemalloc/internal/geom_grow.h
+++ b/include/jemalloc/internal/geom_grow.h
@@ -13,12 +13,9 @@ struct geom_grow_s {
 	 * retain_grow_limit is the max allowed size ind to expand (unless the
 	 * required size is greater).  Default is no limit, and controlled
 	 * through mallctl only.
-	 *
-	 * Synchronization: mtx
 	 */
 	pszind_t next;
 	pszind_t limit;
-	malloc_mutex_t mtx;
 };
 
 static inline bool
@@ -48,9 +45,6 @@ geom_grow_size_commit(geom_grow_t *geom_grow, pszind_t skip) {
 
 }
 
-bool geom_grow_init(geom_grow_t *geom_grow);
-void geom_grow_prefork(tsdn_t *tsdn, geom_grow_t *geom_grow);
-void geom_grow_postfork_parent(tsdn_t *tsdn, geom_grow_t *geom_grow);
-void geom_grow_postfork_child(tsdn_t *tsdn, geom_grow_t *geom_grow);
+void geom_grow_init(geom_grow_t *geom_grow);
 
 #endif /* JEMALLOC_INTERNAL_ECACHE_GROW_H */
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index a028456..614d34a 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -96,6 +96,7 @@ struct pac_s {
 
 	/* The grow info for the retained ecache. */
 	geom_grow_t geom_grow;
+	malloc_mutex_t grow_mtx;
 
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
diff --git a/src/extent.c b/src/extent.c
index 6abaadf..26a5c13 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -614,7 +614,7 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 static edata_t *
 extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     size_t size, size_t alignment, bool zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_assert_owner(tsdn, &pac->grow_mtx);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -725,7 +725,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 */
 	/* All opportunities for failure are past. */
 	geom_grow_size_commit(&pac->geom_grow, geom_grow_skip);
-	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -739,7 +739,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 	return NULL;
 }
 
@@ -749,24 +749,24 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_lock(tsdn, &pac->grow_mtx);
 
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks,
 	    &pac->ecache_retained, new_addr, size, alignment, zero,
 	    commit, /* growing_retained */ true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
+		malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
 	} else if (opt_retain && new_addr == NULL) {
 		edata = extent_grow_retained(tsdn, pac, ehooks, size,
 		    alignment, zero, commit);
-		/* extent_grow_retained() always releases extent_grow_mtx. */
+		/* extent_grow_retained() always releases pac->grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
+		malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &pac->grow_mtx);
 
 	return edata;
 }
diff --git a/src/geom_grow.c b/src/geom_grow.c
index eab8bc9..4816bb7 100644
--- a/src/geom_grow.c
+++ b/src/geom_grow.c
@@ -1,29 +1,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-bool
+void
 geom_grow_init(geom_grow_t *geom_grow) {
 	geom_grow->next = sz_psz2ind(HUGEPAGE);
 	geom_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
-	if (malloc_mutex_init(&geom_grow->mtx, "extent_grow",
-	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-	return false;
-}
-
-void
-geom_grow_prefork(tsdn_t *tsdn, geom_grow_t *geom_grow) {
-	malloc_mutex_prefork(tsdn, &geom_grow->mtx);
 }
-
-void
-geom_grow_postfork_parent(tsdn_t *tsdn, geom_grow_t *geom_grow) {
-	malloc_mutex_postfork_parent(tsdn, &geom_grow->mtx);
-}
-
-void
-geom_grow_postfork_child(tsdn_t *tsdn, geom_grow_t *geom_grow) {
-	malloc_mutex_postfork_child(tsdn, &geom_grow->mtx);
-}
-
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 9e083ca..8bf54b9 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -16,7 +16,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
 
 void
 pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
-	geom_grow_prefork(tsdn, &shard->pac.geom_grow);
+	malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
 }
 
 void
@@ -37,7 +37,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_dirty);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
-	geom_grow_postfork_parent(tsdn, &shard->pac.geom_grow);
+	malloc_mutex_postfork_parent(tsdn, &shard->pac.grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
 }
@@ -48,7 +48,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_child(tsdn, &shard->pac.ecache_dirty);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
-	geom_grow_postfork_child(tsdn, &shard->pac.geom_grow);
+	malloc_mutex_postfork_child(tsdn, &shard->pac.grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
 }
diff --git a/src/pac.c b/src/pac.c
index 6d52a93..f50e82b 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -68,7 +68,9 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
-	if (geom_grow_init(&pac->geom_grow)) {
+	geom_grow_init(&pac->geom_grow);
+	if (malloc_mutex_init(&pac->grow_mtx, "extent_grow",
+	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
@@ -203,14 +205,14 @@ pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 		}
 	}
 
-	malloc_mutex_lock(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_lock(tsdn, &pac->grow_mtx);
 	if (old_limit != NULL) {
 		*old_limit = sz_pind2sz(pac->geom_grow.limit);
 	}
 	if (new_limit != NULL) {
 		pac->geom_grow.limit = new_ind;
 	}
-	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 
 	return false;
 }
-- 
cgit v0.12


From 8efcdc3f98d896c0a67cc2dc34ff0494639b6bf5 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 21 Aug 2020 10:23:23 -0700
Subject: Move unbias data to prof_data

---
 include/jemalloc/internal/prof_data.h    |  4 ++++
 include/jemalloc/internal/prof_externs.h |  3 ---
 src/prof.c                               | 36 --------------------------------
 src/prof_data.c                          | 35 +++++++++++++++++++++++++++++++
 4 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index e2e4aed..d7c3c52 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -10,6 +10,9 @@ extern malloc_mutex_t prof_dump_mtx;
 extern malloc_mutex_t *gctx_locks;
 extern malloc_mutex_t *tdata_locks;
 
+extern size_t prof_unbiased_sz[SC_NSIZES];
+extern size_t prof_shifted_unbiased_cnt[SC_NSIZES];
+
 void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
 
@@ -17,6 +20,7 @@ bool prof_data_init(tsd_t *tsd);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
+void prof_unbias_map_init();
 void prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
     prof_tdata_t *tdata, bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index ba5933a..b94fbed 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -41,9 +41,6 @@ extern uint64_t prof_interval;
  * resets.
  */
 extern size_t lg_prof_sample;
-extern size_t prof_unbiased_sz[SC_NSIZES];
-extern size_t prof_shifted_unbiased_cnt[SC_NSIZES];
-void prof_unbias_map_init();
 
 extern bool prof_booted;
 
diff --git a/src/prof.c b/src/prof.c
index 7b649e4..0c12c49 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -61,8 +61,6 @@ static malloc_mutex_t prof_gdump_mtx;
 uint64_t prof_interval = 0;
 
 size_t lg_prof_sample;
-size_t prof_unbiased_sz[SC_NSIZES];
-size_t prof_shifted_unbiased_cnt[SC_NSIZES];
 
 static uint64_t next_thr_uid;
 static malloc_mutex_t next_thr_uid_mtx;
@@ -72,40 +70,6 @@ bool prof_booted = false;
 
 /******************************************************************************/
 
-void prof_unbias_map_init() {
-	/* See the comment in prof_sample_new_event_wait */
-#ifdef JEMALLOC_PROF
-	for (szind_t i = 0; i < SC_NSIZES; i++) {
-		double sz = (double)sz_index2size(i);
-		double rate = (double)(ZU(1) << lg_prof_sample);
-		double div_val = 1.0 - exp(-sz / rate);
-		double unbiased_sz = sz / div_val;
-		/*
-		 * The "true" right value for the unbiased count is
-		 * 1.0/(1 - exp(-sz/rate)).  The problem is, we keep the counts
-		 * as integers (for a variety of reasons -- rounding errors
-		 * could trigger asserts, and not all libcs can properly handle
-		 * floating point arithmetic during malloc calls inside libc).
-		 * Rounding to an integer, though, can lead to rounding errors
-		 * of over 30% for sizes close to the sampling rate.  So
-		 * instead, we multiply by a constant, dividing the maximum
-		 * possible roundoff error by that constant.  To avoid overflow
-		 * in summing up size_t values, the largest safe constant we can
-		 * pick is the size of the smallest allocation.
-		 */
-		double cnt_shift = (double)(ZU(1) << SC_LG_TINY_MIN);
-		double shifted_unbiased_cnt = cnt_shift / div_val;
-		prof_unbiased_sz[i] = (size_t)round(unbiased_sz);
-		prof_shifted_unbiased_cnt[i] = (size_t)round(
-		    shifted_unbiased_cnt);
-	}
-#else
-	unreachable();
-#endif
-}
-
-/******************************************************************************/
-
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	cassert(config_prof);
diff --git a/src/prof_data.c b/src/prof_data.c
index ae9cd4b..8dd1fd0 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -59,6 +59,9 @@ static ckh_t bt2gctx;
  */
 static prof_tdata_tree_t tdatas;
 
+size_t prof_unbiased_sz[SC_NSIZES];
+size_t prof_shifted_unbiased_cnt[SC_NSIZES];
+
 /******************************************************************************/
 /* Red-black trees. */
 
@@ -535,6 +538,38 @@ prof_double_uint64_cast(double d) {
 }
 #endif
 
+void prof_unbias_map_init() {
+	/* See the comment in prof_sample_new_event_wait */
+#ifdef JEMALLOC_PROF
+	for (szind_t i = 0; i < SC_NSIZES; i++) {
+		double sz = (double)sz_index2size(i);
+		double rate = (double)(ZU(1) << lg_prof_sample);
+		double div_val = 1.0 - exp(-sz / rate);
+		double unbiased_sz = sz / div_val;
+		/*
+		 * The "true" right value for the unbiased count is
+		 * 1.0/(1 - exp(-sz/rate)).  The problem is, we keep the counts
+		 * as integers (for a variety of reasons -- rounding errors
+		 * could trigger asserts, and not all libcs can properly handle
+		 * floating point arithmetic during malloc calls inside libc).
+		 * Rounding to an integer, though, can lead to rounding errors
+		 * of over 30% for sizes close to the sampling rate.  So
+		 * instead, we multiply by a constant, dividing the maximum
+		 * possible roundoff error by that constant.  To avoid overflow
+		 * in summing up size_t values, the largest safe constant we can
+		 * pick is the size of the smallest allocation.
+		 */
+		double cnt_shift = (double)(ZU(1) << SC_LG_TINY_MIN);
+		double shifted_unbiased_cnt = cnt_shift / div_val;
+		prof_unbiased_sz[i] = (size_t)round(unbiased_sz);
+		prof_shifted_unbiased_cnt[i] = (size_t)round(
+		    shifted_unbiased_cnt);
+	}
+#else
+	unreachable();
+#endif
+}
+
 /*
  * The unbiasing story is long.  The jeprof unbiasing logic was copied from
  * pprof.  Both shared an issue: they unbiased using the average size of the
-- 
cgit v0.12


From 20f2479ed79a8ef152c9ef50efdee2aec5dc5737 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 21 Aug 2020 15:33:50 -0700
Subject: Do not create size class tables for non-prof builds

---
 include/jemalloc/internal/prof_data.h  | 4 ++--
 include/jemalloc/internal/prof_types.h | 8 ++++++++
 src/prof.c                             | 4 ++++
 src/prof_data.c                        | 4 ++--
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index d7c3c52..4c8e22c 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -10,8 +10,8 @@ extern malloc_mutex_t prof_dump_mtx;
 extern malloc_mutex_t *gctx_locks;
 extern malloc_mutex_t *tdata_locks;
 
-extern size_t prof_unbiased_sz[SC_NSIZES];
-extern size_t prof_shifted_unbiased_cnt[SC_NSIZES];
+extern size_t prof_unbiased_sz[PROF_SC_NSIZES];
+extern size_t prof_shifted_unbiased_cnt[PROF_SC_NSIZES];
 
 void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index dbd758f..ba62865 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -39,6 +39,14 @@ typedef struct prof_recent_s prof_recent_t;
 #  define PROF_DUMP_BUFSIZE		65536
 #endif
 
+/* Size of size class related tables */
+#ifdef JEMALLOC_PROF
+#  define PROF_SC_NSIZES		SC_NSIZES
+#else
+/* Minimize memory bloat for non-prof builds. */
+#  define PROF_SC_NSIZES		1
+#endif
+
 /* Size of stack-allocated buffer used by prof_printf(). */
 #define PROF_PRINTF_BUFSIZE		128
 
diff --git a/src/prof.c b/src/prof.c
index 0c12c49..d50cbe3 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -89,6 +89,8 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx) {
+	cassert(config_prof);
+
 	if (opt_prof_sys_thread_name) {
 		prof_sys_thread_name_fetch(tsd);
 	}
@@ -133,6 +135,8 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 
 void
 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
+	cassert(config_prof);
+
 	assert(prof_info != NULL);
 	prof_tctx_t *tctx = prof_info->alloc_tctx;
 	assert((uintptr_t)tctx > (uintptr_t)1U);
diff --git a/src/prof_data.c b/src/prof_data.c
index 8dd1fd0..6334985 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -59,8 +59,8 @@ static ckh_t bt2gctx;
  */
 static prof_tdata_tree_t tdatas;
 
-size_t prof_unbiased_sz[SC_NSIZES];
-size_t prof_shifted_unbiased_cnt[SC_NSIZES];
+size_t prof_unbiased_sz[PROF_SC_NSIZES];
+size_t prof_shifted_unbiased_cnt[PROF_SC_NSIZES];
 
 /******************************************************************************/
 /* Red-black trees. */
-- 
cgit v0.12


From 866231fc6166b9c937ce071c5717844998a51413 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 24 Aug 2020 20:56:34 -0700
Subject: Do not repeat reentrancy test in profiling

---
 include/jemalloc/internal/prof_inlines.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 3d0bd14..62c5683 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
-#define JEMALLOC_INTERNAL_PROF_INLINES_B_H
+#ifndef JEMALLOC_INTERNAL_PROF_INLINES_H
+#define JEMALLOC_INTERNAL_PROF_INLINES_H
 
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
@@ -115,9 +115,12 @@ prof_sample_should_skip(tsd_t *tsd, bool sample_event) {
 		return true;
 	}
 
-	if (tsd_reentrancy_level_get(tsd) > 0) {
-		return true;
-	}
+	/*
+	 * sample_event is always obtained from the thread event module, and
+	 * whenever it's true, it means that the thread event module has
+	 * already checked the reentrancy level.
+	 */
+	assert(tsd_reentrancy_level_get(tsd) == 0);
 
 	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
 	if (unlikely(tdata == NULL)) {
@@ -255,4 +258,4 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize,
 	}
 }
 
-#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */
+#endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */
-- 
cgit v0.12


From 202f01d4f8b28237d9f349f9ee91691ec220425a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 26 Aug 2020 14:52:25 -0700
Subject: Fix szind computation in profiling

---
 src/prof.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prof.c b/src/prof.c
index d50cbe3..4f45199 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -99,7 +99,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 	    ptr);
 	prof_info_set(tsd, edata, tctx);
 
-	szind_t szind = sz_size2index(size);
+	szind_t szind = sz_size2index(usize);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	/*
-- 
cgit v0.12


From b549389e4a491f48ea466dce4fda475bcd6b7936 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 11 Aug 2020 15:35:41 -0700
Subject: Correct usize in prof last-N record

---
 include/jemalloc/internal/prof_recent.h  | 2 +-
 include/jemalloc/internal/prof_structs.h | 1 +
 src/prof.c                               | 2 +-
 src/prof_recent.c                        | 6 +++---
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index d793c6d..df41023 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -5,7 +5,7 @@ extern malloc_mutex_t prof_recent_alloc_mtx;
 extern malloc_mutex_t prof_recent_dump_mtx;
 
 bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
-void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size);
+void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize);
 void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
 bool prof_recent_init();
 void edata_prof_recent_alloc_init(edata_t *edata);
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index fbad614..73ac3d5 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -209,6 +209,7 @@ struct prof_recent_s {
 
 	ql_elm(prof_recent_t) link;
 	size_t size;
+	size_t usize;
 	atomic_p_t alloc_edata; /* NULL means allocation has been freed. */
 	prof_tctx_t *alloc_tctx;
 	prof_tctx_t *dalloc_tctx;
diff --git a/src/prof.c b/src/prof.c
index 4f45199..9b651db 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -129,7 +129,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 	malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 	if (record_recent) {
 		assert(tctx == edata_prof_tctx_get(edata));
-		prof_recent_alloc(tsd, edata, size);
+		prof_recent_alloc(tsd, edata, size, usize);
 	}
 }
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 426f62e..cfaa5a6 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -270,7 +270,7 @@ prof_recent_alloc_assert_count(tsd_t *tsd) {
 }
 
 void
-prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
+prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize) {
 	assert(edata != NULL);
 	prof_tctx_t *tctx = edata_prof_tctx_get(edata);
 
@@ -356,6 +356,7 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
 	prof_recent_t *tail = ql_last(&prof_recent_alloc_list, link);
 	assert(tail != NULL);
 	tail->size = size;
+	tail->usize = usize;
 	nstime_copy(&tail->alloc_time, edata_prof_alloc_time_get(edata));
 	tail->alloc_tctx = tctx;
 	nstime_init_zero(&tail->dalloc_time);
@@ -477,8 +478,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	emitter_json_object_begin(emitter);
 
 	emitter_json_kv(emitter, "size", emitter_type_size, &node->size);
-	size_t usize = sz_s2u(node->size);
-	emitter_json_kv(emitter, "usize", emitter_type_size, &usize);
+	emitter_json_kv(emitter, "usize", emitter_type_size, &node->usize);
 	bool released = prof_recent_alloc_edata_get_no_lock(node) == NULL;
 	emitter_json_kv(emitter, "released", emitter_type_bool, &released);
 
-- 
cgit v0.12


From 09eda2c9b621ced9982514f2e69e4e572e06ca2d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 9 Sep 2020 11:07:00 -0700
Subject: Add unit tests for usize in prof recent records

---
 test/unit/prof_recent.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 1885a1a..180f13f 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -363,6 +363,7 @@ call_dump() {
 
 typedef struct {
 	size_t size;
+	size_t usize;
 	bool released;
 } confirm_record_t;
 
@@ -421,7 +422,7 @@ confirm_record(const char *template,
 
 		ASSERT_STR("\"usize\"");
 		ASSERT_CHAR(':');
-		ASSERT_FORMATTED_STR("%zu", sz_s2u(record->size));
+		ASSERT_FORMATTED_STR("%zu", record->usize);
 		ASSERT_CHAR(',');
 
 		ASSERT_STR("\"released\"");
@@ -505,12 +506,14 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	p = malloc(7);
 	call_dump();
 	records[0].size = 7;
+	records[0].usize = sz_s2u(7);
 	records[0].released = false;
 	confirm_record(template, records, 1);
 
-	q = malloc(17);
+	q = mallocx(17, MALLOCX_ALIGN(128));
 	call_dump();
 	records[1].size = 17;
+	records[1].usize = sz_sa2u(17, 128);
 	records[1].released = false;
 	confirm_record(template, records, 2);
 
-- 
cgit v0.12


From d243b4ec487224248172547643630f7a5fb5e84d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Aug 2020 14:53:08 -0700
Subject: Add PROFILING_INTERNALS.md

This documents and explains some of the logic behind the profiling
implementation.
---
 .gitignore                          |   2 +
 doc_internal/PROFILING_INTERNALS.md | 127 ++++++++++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+)
 create mode 100644 doc_internal/PROFILING_INTERNALS.md

diff --git a/.gitignore b/.gitignore
index 31cdbb8..0c3c040 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,8 @@
 /doc/jemalloc.html
 /doc/jemalloc.3
 
+/doc_internal/PROFILING_INTERNALS.pdf
+
 /jemalloc.pc
 
 /lib/
diff --git a/doc_internal/PROFILING_INTERNALS.md b/doc_internal/PROFILING_INTERNALS.md
new file mode 100644
index 0000000..0a9f31c
--- /dev/null
+++ b/doc_internal/PROFILING_INTERNALS.md
@@ -0,0 +1,127 @@
+# jemalloc profiling
+This describes the mathematical basis behind jemalloc's profiling implementation, as well as the implementation tricks that make it effective. Historically, the jemalloc profiling design simply copied tcmalloc's. The implementation has since diverged, due to both the desire to record additional information, and to correct some biasing bugs.
+
+Note: this document is markdown with embedded LaTeX; different markdown renderers may not produce the expected output.  Viewing with `pandoc -s PROFILING_INTERNALS.md -o PROFILING_INTERNALS.pdf` is recommended.
+
+## Some tricks in our implementation toolbag
+
+### Sampling
+Recording our metadata is quite expensive; we need to walk up the stack to get a stack trace. On top of that, we need to allocate storage to record that stack trace, and stick it somewhere where a profile-dumping call can find it. That call might happen on another thread, so we'll probably need to take a lock to do so. These costs are quite large compared to the average cost of an allocation. To manage this, we'll only sample some fraction of allocations. This will miss some of them, so our data will be incomplete, but we'll try to make up for it. We can tune our sampling rate to balance accuracy and performance.
+
+### Fast Bernoulli sampling
+Compared to our fast paths, even a `coinflip(p)` function can be quite expensive. Having to do a random-number generation and some floating point operations would be a sizeable relative cost. However (as pointed out in [[Vitter, 1987](https://dl.acm.org/doi/10.1145/23002.23003)]), if we can orchestrate our algorithm so that many of our `coinflip` calls share their parameter value, we can do better. We can sample from the geometric distribution, and initialize a counter with the result. When the counter hits 0, the `coinflip` function returns true (and reinitializes its internal counter).
+This can let us do a random-number generation once per (logical) coinflip that comes up heads, rather than once per (logical) coinflip. Since we expect to sample relatively rarely, this can be a large win.
+
+### Fast-path / slow-path thinking
+Most programs have a skewed distribution of allocations. Smaller allocations are much more frequent than large ones, but shorter lived and less common as a fraction of program memory. "Small" and "large" are necessarily sort of fuzzy terms, but if we define "small" as "allocations jemalloc puts into slabs" and "large" as the others, then it's not uncommon for small allocations to be hundreds of times more frequent than large ones, but take up around half the amount of heap space as large ones. Moreover, small allocations tend to be much cheaper than large ones (often by a factor of 20-30): they're more likely to hit in thread caches, less likely to have to do an mmap, and cheaper to fill (by the user) once the allocation has been returned.
+
+## An unbiased estimator of space consumption from (almost) arbitrary sampling strategies
+Suppose we have a sampling strategy that meets the following criteria:
+
+  - One allocation being sampled is independent of other allocations being sampled.
+  - Each allocation has a non-zero probability of being sampled.
+
+We can then estimate the bytes in live allocations through some particular stack trace as:
+
+$$ \sum_i S_i I_i \frac{1}{\mathrm{E}[I_i]} $$
+
+where the sum ranges over some index variable of live allocations from that stack, $S_i$ is the size of the $i$'th allocation, and $I_i$ is an indicator random variable for whether or not the $i'th$ allocation is sampled. $S_i$ and $\mathrm{E}[I_i]$ are constants (the program allocations are fixed; the random variables are the sampling decisions), so taking the expectation we get
+
+$$ \sum_i S_i \mathrm{E}[I_i] \frac{1}{\mathrm{E}[I_i]}.$$
+
+This is of course $\sum_i S_i$, as we want (and, a similar calculation could be done for allocation counts as well).
+This is a fairly general strategy; note that while we require that sampling decisions be independent of one another's outcomes, they don't have to be independent of previous allocations, total bytes allocated, etc. You can imagine strategies that:
+
+  - Sample allocations at program startup at a higher rate than subsequent allocations
+  - Sample even-indexed allocations more frequently than odd-indexed ones (so long as no allocation has zero sampling probability)
+  - Let threads declare themselves as high-sampling-priority, and sample their allocations at an increased rate.
+
+These can all be fit into this framework to give an unbiased estimator.
+
+## Evaluating sampling strategies
+Not all strategies for picking allocations to sample are equally good, of course. Among unbiased estimators, the lower the variance, the lower the mean squared error. Using the estimator above, the variance is:
+
+$$
+\begin{aligned}
+& \mathrm{Var}[\sum_i S_i I_i \frac{1}{\mathrm{E}[I_i]}]  \\
+=& \sum_i \mathrm{Var}[S_i I_i \frac{1}{\mathrm{E}[I_i]}] \\
+=& \sum_i \frac{S_i^2}{\mathrm{E}[I_i]^2} \mathrm{Var}[I_i] \\
+=& \sum_i \frac{S_i^2}{\mathrm{E}[I_i]^2} \mathrm{Var}[I_i] \\
+=& \sum_i \frac{S_i^2}{\mathrm{E}[I_i]^2} \mathrm{E}[I_i](1 - \mathrm{E}[I_i]) \\
+=& \sum_i S_i^2 \frac{1 - \mathrm{E}[I_i]}{\mathrm{E}[I_i]}.
+\end{aligned}
+$$
+
+We can use this formula to compare various strategy choices. All else being equal, lower-variance strategies are better.
+
+## Possible sampling strategies
+Because of the desire to avoid the fast-path costs, we'd like to use our Bernoulli trick if possible. There are two obvious counters to use: a coinflip per allocation, and a coinflip per byte allocated.
+
+### Bernoulli sampling per-allocation
+An obvious strategy is to pick some large $N$, and give each allocation a $1/N$ chance of being sampled. This would let us use our Bernoulli-via-Geometric trick. Using the formula from above, we can compute the variance as:
+
+$$ \sum_i S_i^2 \frac{1 - \frac{1}{N}}{\frac{1}{N}}  = (N-1) \sum_i S_i^2.$$
+
+That is, an allocation of size $Z$ contributes a term of $(N-1)Z^2$ to the variance.
+
+### Bernoulli sampling per-byte
+Another option we have is to pick some rate $R$, and give each byte a $1/R$ chance of being picked for sampling (at which point we would sample its contained allocation). The chance of an allocation of size $Z$ being sampled, then, is
+
+$$1-(1-\frac{1}{R})^{Z}$$
+
+and an allocation of size $Z$ contributes a term of
+
+$$Z^2 \frac{(1-\frac{1}{R})^{Z}}{1-(1-\frac{1}{R})^{Z}}.$$
+
+In practical settings, $R$ is large, and so this is well-approximated by
+
+$$Z^2 \frac{e^{-Z/R}}{1 - e^{-Z/R}} .$$
+
+Just to get a sense of the dynamics here, let's look at the behavior for various values of $Z$. When $Z$ is small relative to $R$, we can use $e^z \approx 1 + x$, and conclude that the variance contributed by a small-$Z$ allocation is around
+
+$$Z^2 \frac{1-Z/R}{Z/R} \approx RZ.$$
+
+When $Z$ is comparable to $R$, the variance term is near $Z^2$ (we have $\frac{e^{-Z/R}}{1 - e^{-Z/R}} = 1$ when $Z/R = \ln 2 \approx 0.693$). When $Z$ is large relative to $R$, the variance term goes to zero.
+
+## Picking a sampling strategy
+The fast-path/slow-path dynamics of allocation patterns point us towards the per-byte sampling approach:
+
+  - The quadratic increase in variance per allocation in the first approach is quite costly when heaps have a non-negligible portion of their bytes in those allocations, which is practically often the case.
+  - The Bernoulli-per-byte approach shifts more of its samples towards large allocations, which are already a slow-path.
+  - We drive several tickers (e.g. tcache gc) by bytes allocated, and report bytes-allocated as a user-visible statistic, so we have to do all the necessary bookkeeping anyways.
+
+Indeed, this is the approach we use in jemalloc. Our heap dumps record the size of the allocation and the sampling rate $R$, and jeprof unbiases by dividing by $1 - e^{-Z/R}$.  The framework above would suggest dividing by $1-(1-1/R)^Z$; instead, we use the fact that $R$ is large in practical situations, and so $e^{-Z/R}$ is a good approximation (and faster to compute).  (Equivalently, we may also see this as the factor that falls out from viewing sampling as a Poisson process directly).
+
+## Consequences for heap dump consumers
+Using this approach means that there are a few things users need to be aware of.
+
+### Stack counts are not proportional to allocation frequencies
+If one stack appears twice as often as another, this by itself does not imply that it allocates twice as often. Consider the case in which there are only two types of allocating call stacks in a program. Stack A allocates 8 bytes, and occurs a million times in a program. Stack B allocates 8 MB, and occurs just once in a program. If our sampling rate $R$ is about 1MB, we expect stack A to show up about 8 times, and stack B to show up once. Stack A isn't 8 times more frequent than stack B, though; it's a million times more frequent.
+
+### Aggregation must be done after unbiasing samples
+Some tools manually parse heap dump output, and aggregate across stacks (or across program runs) to provide wider-scale data analyses. When doing this aggregation, though, it's important to unbias-and-then-sum, rather than sum-and-then-unbias. Reusing our example from the previous section: suppose we collect heap dumps of the program from a million machines. We then have 8 million occurs of stack A (each of 8 bytes), and a million occurrences of stack B (each of 8 MB). If we sum first, we'll attribute 64 MB to stack A, and 8 TB to stack B. Unbiasing changes these numbers by an infinitesimal amount, so that sum-then-unbias dramatically underreports the amount of memory allocated by stack A.
+
+## An avenue for future exploration
+While the framework we laid out above is pretty general, as an engineering decision we're only interested in fairly simple approaches (i.e. ones for which the chance of an allocation being sampled depends only on its size). Our job is then: for each size class $Z$, pick a probability $p_Z$ that an allocation of that size will be sampled. We made some handwave-y references to statistical distributions to justify our choices, but there's no reason we need to pick them that way. Any set of non-zero probabilities is a valid choice.
+The real limiting factor in our ability to reduce estimator variance is that fact that sampling is expensive; we want to make sure we only do it on a small fraction of allocations. Our goal, then, is to pick the $p_Z$ to minimize variance given some maximum sampling rate $P$. If we define $a_Z$ to be the fraction of allocations of size $Z$, and $l_Z$ to be the fraction of allocations of size $Z$ still alive at the time of a heap dump, then we can phrase this as an optimization problem over the choices of $p_Z$:
+
+Minimize
+
+$$ \sum_Z Z^2 l_Z \frac{1-p_Z}{p_Z} $$
+
+subject to
+
+$$ \sum_Z a_Z p_Z \leq P $$
+
+Ignoring a term that doesn't depend on $p_Z$, the objective is minimized whenever
+
+$$ \sum_Z Z^2 l_Z \frac{1}{p_Z} $$
+
+is. For a particular program, $l_Z$ and $a_Z$ are just numbers that can be obtained (exactly) from existing stats introspection facilities, and we have a fairly tractable convex optimization problem (it can be framed as a second-order cone program). It would be interesting to evaluate, for various common allocation patterns, how well our current strategy adapts. Do our actual choices for $p_Z$ closely correspond to the optimal ones? How close is the variance of our choices to the variance of the optimal strategy?
+You can imagine an implementation that actually goes all the way, and makes $p_Z$ selections a tuning parameter. I don't think this is a good use of development time for the foreseeable future; but I do wonder about the answers to some of these questions.
+
+## Implementation realities
+
+The nice story above is at least partially a lie. Initially, jeprof (copying its logic from pprof)  had the sum-then-unbias error described above.  The current version of jemalloc does the unbiasing step on a per-allocation basis internally, so that we're always tracking what the unbiased numbers "should" be.  The problem is, actually surfacing those unbiased numbers would require a breaking change to jeprof (and the various already-deployed tools that have copied its logic). Instead, we use a little bit more trickery. Since we know at dump time the numbers we want jeprof to report, we simply choose the values we'll output so that the jeprof numbers will match the true numbers.  The math is described in `src/prof_data.c` (where the only cleverness is a change of variables that lets the exponentials fall out).
+
+This has the effect of making the output of jeprof (and related tools) correct, while making its inputs incorrect.  This can be annoying to human readers of raw profiling dump output.
-- 
cgit v0.12


From 1541ffc76571d8a2a0baad4a13a379305b7df5f2 Mon Sep 17 00:00:00 2001
From: Hao Liu <hliu@os.amperecomputing.com>
Date: Wed, 9 Sep 2020 12:21:41 +0800
Subject: configure: add --with-lg-slab-maxregs configure option.

Specify the maximum number of regions in a slab, which is
(<lg-page> - <lg-tiny-min>) by default. This increases the limit of slab sizes
specified by "slab_sizes" in malloc_conf. This should never be less than
the default value. The max value of this option is related to LG_BITMAP_MAXBITS
(see more in bitmap.h).

For example, on a 4k page size system, if we:
  1) configure jemalloc with with --with-lg-slab-maxregs=12.
  2) export MALLOC_CONF="slab_sizes:9-16:4"
The slab size of 16 bytes is set to 4 pages. Previously, the default
lg-slab-maxregs is 9 (i.e. 12 - 3). The max slab size of 16 bytes is 2 pages
(i.e. (1<<9) * 16 bytes). By increasing the value from 9 to 12, the max slab
size can be set by MALLOC_CONF is 16 pages (i.e. (1<<12) * 16 bytes).
---
 INSTALL.md                                            |  7 +++++++
 configure.ac                                          |  9 +++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  3 +++
 include/jemalloc/internal/sc.h                        | 10 ++++++++--
 4 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index eb55acf..2aaa33e 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -250,6 +250,13 @@ any of the following arguments (not a definitive list) to 'configure':
     configuration, jemalloc will provide additional size classes that are not
     16-byte-aligned (24, 40, and 56).
 
+* `--with-lg-slab-maxregs=<lg-slab-maxregs>`
+
+    Specify the maximum number of regions in a slab, which is
+    (<lg-page> - <lg-tiny-min>) by default. This increases the limit of slab
+    sizes specified by "slab_sizes" in malloc_conf. This should never be less
+    than the default value.
+
 * `--with-lg-vaddr=<lg-vaddr>`
 
     Specify the number of significant virtual address bits.  By default, the
diff --git a/configure.ac b/configure.ac
index d68d376..7c20302 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1586,6 +1586,15 @@ if test "x$with_lg_quantum" != "x" ; then
   AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum])
 fi
 
+AC_ARG_WITH([lg_slab_maxregs],
+  [AS_HELP_STRING([--with-lg-slab-maxregs=<lg-slab-maxregs>],
+   [Base 2 log of maximum number of regions in a slab (used with malloc_conf slab_sizes)])],
+  [LG_SLAB_MAXREGS="with_lg_slab_maxregs"],
+  [LG_SLAB_MAXREGS=""])
+if test "x$with_lg_slab_maxregs" != "x" ; then
+  AC_DEFINE_UNQUOTED([LG_SLAB_MAXREGS], [$with_lg_slab_maxregs])
+fi
+
 AC_ARG_WITH([lg_page],
   [AS_HELP_STRING([--with-lg-page=<lg-page>], [Base 2 log of system page size])],
   [LG_PAGE="$with_lg_page"], [LG_PAGE="detect"])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index ee052bb..7a4ebf1 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -182,6 +182,9 @@
 /* One page is 2^LG_PAGE bytes. */
 #undef LG_PAGE
 
+/* Maximum number of regions in a slab. */
+#undef LG_SLAB_MAXREGS
+
 /*
  * One huge page is 2^LG_HUGEPAGE bytes.  Note that this is defined even if the
  * system does not explicitly support huge pages; system calls that require
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 138da5c..133763d 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -270,8 +270,14 @@
 #define SC_LARGE_MAXCLASS (SC_MAX_BASE + (SC_NGROUP - 1) * SC_MAX_DELTA)
 
 /* Maximum number of regions in one slab. */
-#define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
-#define SC_SLAB_MAXREGS (1U << LG_SLAB_MAXREGS)
+#ifndef LG_SLAB_MAXREGS
+#  define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
+#elif (LG_SLAB_MAXREGS < (LG_PAGE - SC_LG_TINY_MIN))
+#  error "Unsupported SC_LG_SLAB_MAXREGS"
+#else
+#  define SC_LG_SLAB_MAXREGS LG_SLAB_MAXREGS
+#endif
+#define SC_SLAB_MAXREGS (1U << SC_LG_SLAB_MAXREGS)
 
 
 typedef struct sc_s sc_t;
-- 
cgit v0.12


From 36ebb5abe319d473c8535488e2dc1f4f0bc4e9d4 Mon Sep 17 00:00:00 2001
From: ezeeyahoo <guptaeshant@gmail.com>
Date: Fri, 11 Sep 2020 13:37:10 +0530
Subject: CI support for PPC64LE architecture

---
 .travis.yml           | 189 ++++++++++++++++++++++++++++++++++++++++++++++----
 scripts/gen_travis.py |  43 ++++++++++--
 2 files changed, 213 insertions(+), 19 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 777aa3e..b61627b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,195 +4,360 @@ dist: precise
 matrix:
   include:
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      arch: amd64
       addons: &gcc_multilib
         apt:
           packages:
             - gcc-multilib
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
       addons: *gcc_multilib
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      arch: amd64
       addons: *gcc_multilib
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      arch: amd64
       addons: *gcc_multilib
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      arch: amd64
       addons: *gcc_multilib
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      arch: amd64
       addons: *gcc_multilib
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      arch: amd64
       addons: *gcc_multilib
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      arch: amd64
       addons: *gcc_multilib
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      arch: amd64
       addons: *gcc_multilib
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      arch: amd64
       addons: *gcc_multilib
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      arch: amd64
       addons: *gcc_multilib
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      arch: amd64
       addons: *gcc_multilib
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     # Development build
     - os: linux
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index b46bd00..6832f91 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -36,8 +36,12 @@ MAX_UNUSUAL_OPTIONS = 2
 os_default = 'linux'
 os_unusual = 'osx'
 
+arch_default = 'amd64'
+arch_unusual = 'ppc64le'
+
 compilers_default = 'CC=gcc CXX=g++'
 compilers_unusual = 'CC=clang CXX=clang++'
+compilers_ppc_default = 'CC=gcc-8 CXX=g++-8'
 
 compiler_flag_unusuals = ['-m32']
 
@@ -58,7 +62,7 @@ malloc_conf_unusuals = [
 ]
 
 all_unusuals = (
-    [os_unusual] + [compilers_unusual] + compiler_flag_unusuals
+    [os_unusual] + [arch_unusual] + [compilers_unusual] + compiler_flag_unusuals
     + configure_flag_unusuals + malloc_conf_unusuals
 )
 
@@ -67,13 +71,15 @@ for i in xrange(MAX_UNUSUAL_OPTIONS + 1):
     unusual_combinations_to_test += combinations(all_unusuals, i)
 
 gcc_multilib_set = False
+gcc_ppc_set = False
 # Formats a job from a combination of flags
 def format_job(combination):
     global gcc_multilib_set
+    global gcc_ppc_set
 
     os = os_unusual if os_unusual in combination else os_default
     compilers = compilers_unusual if compilers_unusual in combination else compilers_default
-
+    arch = arch_unusual if arch_unusual in combination else arch_default
     compiler_flags = [x for x in combination if x in compiler_flag_unusuals]
     configure_flags = [x for x in combination if x in configure_flag_unusuals]
     malloc_conf = [x for x in combination if x in malloc_conf_unusuals]
@@ -90,14 +96,18 @@ def format_job(combination):
     if os == 'osx' and '--enable-prof' in configure_flags:
         return ""
 
-    # We get some spurious errors when -Warray-bounds is enabled.
-    env_string = ('{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}" '
-	'EXTRA_CFLAGS="-Werror -Wno-array-bounds"').format(
-        compilers, " ".join(compiler_flags), " ".join(configure_flags))
+    # Filter out unsupported OSX configuration on PPC64LE
+    if arch == 'ppc64le' and (
+        os == 'osx'
+        or '-m32' in combination
+        or compilers_unusual in combination
+        ):
+        return ""
 
     job = ""
     job += '    - os: %s\n' % os
-    job += '      env: %s\n' % env_string
+    job += '      arch: %s\n' % arch
+
     if '-m32' in combination and os == 'linux':
         job += '      addons:'
         if gcc_multilib_set:
@@ -108,6 +118,25 @@ def format_job(combination):
             job += '          packages:\n'
             job += '            - gcc-multilib\n'
             gcc_multilib_set = True
+
+    if arch == 'ppc64le':
+        job += '      addons:'
+        if gcc_ppc_set:
+            job += ' *gcc_ppc\n'
+        else:
+            job += ' &gcc_ppc\n'
+            job += '        apt:\n'
+            job += '          packages:\n'
+            job += '            - g++-8\n'
+        # Compilers overwritten for PPC64LE to gcc-8
+        compilers = compilers_ppc_default
+
+    # We get some spurious errors when -Warray-bounds is enabled.
+    env_string = ('{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}" '
+                'EXTRA_CFLAGS="-Werror -Wno-array-bounds"').format(
+                compilers, " ".join(compiler_flags), " ".join(configure_flags))
+
+    job += '      env: %s\n' % env_string
     return job
 
 include_rows = ""
-- 
cgit v0.12


From 40cf71a06d07faadc03b81f97697826c53b3fa62 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 16 Sep 2020 14:03:59 -0700
Subject: Remove --with-slab-maxregs options from INSTALL.md

The variable slab sizes feature is still experimental; we don't want people to
start using it willy-nilly, or document its existence as a guarantee.
---
 INSTALL.md | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index 2aaa33e..eb55acf 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -250,13 +250,6 @@ any of the following arguments (not a definitive list) to 'configure':
     configuration, jemalloc will provide additional size classes that are not
     16-byte-aligned (24, 40, and 56).
 
-* `--with-lg-slab-maxregs=<lg-slab-maxregs>`
-
-    Specify the maximum number of regions in a slab, which is
-    (<lg-page> - <lg-tiny-min>) by default. This increases the limit of slab
-    sizes specified by "slab_sizes" in malloc_conf. This should never be less
-    than the default value.
-
 * `--with-lg-vaddr=<lg-vaddr>`
 
     Specify the number of significant virtual address bits.  By default, the
-- 
cgit v0.12


From 7ad2f7866343265f570dc83b2f2df163ef0c03f9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 16 Sep 2020 15:19:06 -0700
Subject: Avoid a -Wundef warning on LG_SLAB_MAXREGS.

---
 configure.ac                                          |  6 +++---
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  2 +-
 include/jemalloc/internal/sc.h                        | 12 +++++++-----
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/configure.ac b/configure.ac
index 7c20302..d55c0b8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1589,10 +1589,10 @@ fi
 AC_ARG_WITH([lg_slab_maxregs],
   [AS_HELP_STRING([--with-lg-slab-maxregs=<lg-slab-maxregs>],
    [Base 2 log of maximum number of regions in a slab (used with malloc_conf slab_sizes)])],
-  [LG_SLAB_MAXREGS="with_lg_slab_maxregs"],
-  [LG_SLAB_MAXREGS=""])
+  [CONFIG_LG_SLAB_MAXREGS="with_lg_slab_maxregs"],
+  [CONFIG_LG_SLAB_MAXREGS=""])
 if test "x$with_lg_slab_maxregs" != "x" ; then
-  AC_DEFINE_UNQUOTED([LG_SLAB_MAXREGS], [$with_lg_slab_maxregs])
+  AC_DEFINE_UNQUOTED([CONFIG_LG_SLAB_MAXREGS], [$with_lg_slab_maxregs])
 fi
 
 AC_ARG_WITH([lg_page],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7a4ebf1..7af28f7 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -183,7 +183,7 @@
 #undef LG_PAGE
 
 /* Maximum number of regions in a slab. */
-#undef LG_SLAB_MAXREGS
+#undef CONFIG_LG_SLAB_MAXREGS
 
 /*
  * One huge page is 2^LG_HUGEPAGE bytes.  Note that this is defined even if the
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 133763d..031ffff 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -270,15 +270,17 @@
 #define SC_LARGE_MAXCLASS (SC_MAX_BASE + (SC_NGROUP - 1) * SC_MAX_DELTA)
 
 /* Maximum number of regions in one slab. */
-#ifndef LG_SLAB_MAXREGS
+#ifndef CONFIG_LG_SLAB_MAXREGS
 #  define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
-#elif (LG_SLAB_MAXREGS < (LG_PAGE - SC_LG_TINY_MIN))
-#  error "Unsupported SC_LG_SLAB_MAXREGS"
 #else
-#  define SC_LG_SLAB_MAXREGS LG_SLAB_MAXREGS
+#  if CONFIG_LG_SLAB_MAXREGS < (LG_PAGE - SC_LG_TINY_MIN)
+#    error "Unsupported SC_LG_SLAB_MAXREGS"
+#  else
+#    define SC_LG_SLAB_MAXREGS CONFIG_LG_SLAB_MAXREGS
+#  endif
 #endif
-#define SC_SLAB_MAXREGS (1U << SC_LG_SLAB_MAXREGS)
 
+#define SC_SLAB_MAXREGS (1U << SC_LG_SLAB_MAXREGS)
 
 typedef struct sc_s sc_t;
 struct sc_s {
-- 
cgit v0.12


From e034500698fe74d4a82cf44131eda0110862f4e8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 9 Jul 2020 18:07:17 -0700
Subject: Edata: rename "ranged" bit to "pai".

This better represents its intended purpose; the hugepage allocator design
evolved away from needing contiguity of hugepage virtual address space.
---
 include/jemalloc/internal/edata.h | 53 +++++++++++++++++++++++----------------
 src/emap.c                        |  2 +-
 src/extent.c                      | 15 ++++++-----
 src/extent_dss.c                  |  6 ++---
 test/unit/rtree.c                 |  8 +++---
 test/unit/slab.c                  |  2 +-
 6 files changed, 48 insertions(+), 38 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index bb7da1d..f1ae56a 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -26,6 +26,16 @@ enum extent_head_state_e {
 };
 typedef enum extent_head_state_e extent_head_state_t;
 
+/*
+ * Which implementation of the page allocator interface, (PAI, defined in
+ * pai.h) owns the given extent?
+ */
+enum extent_pai_e {
+	EXTENT_PAI_PAC = 0,
+	EXTENT_PAI_HPA = 1
+};
+typedef enum extent_pai_e extent_pai_t;
+
 struct e_prof_info_s {
 	/* Time when this was allocated. */
 	nstime_t	e_prof_alloc_time;
@@ -68,7 +78,7 @@ struct edata_s {
 	 * a: arena_ind
 	 * b: slab
 	 * c: committed
-	 * r: ranged
+	 * p: pai
 	 * z: zeroed
 	 * t: state
 	 * i: szind
@@ -76,7 +86,7 @@ struct edata_s {
 	 * s: bin_shard
 	 * n: sn
 	 *
-	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zrcbaaaa aaaaaaaa
+	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zpcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -91,10 +101,7 @@ struct edata_s {
 	 *            as on a system that overcommits and satisfies physical
 	 *            memory needs on demand via soft page faults.
 	 *
-	 * ranged: Whether or not this extent is currently owned by the range
-	 *         allocator.  This may be false even if the extent originally
-	 *         came from a range allocator; this indicates its *current*
-	 *         owner, not its original owner.
+	 * pai: The pai flag is an extent_pai_t.
 	 *
 	 * zeroed: The zeroed flag is used by extent recycling code to track
 	 *         whether memory is zero-filled.
@@ -136,12 +143,12 @@ struct edata_s {
 #define EDATA_BITS_COMMITTED_SHIFT  (EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT)
 #define EDATA_BITS_COMMITTED_MASK  MASK(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
 
-#define EDATA_BITS_RANGED_WIDTH  1
-#define EDATA_BITS_RANGED_SHIFT  (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
-#define EDATA_BITS_RANGED_MASK  MASK(EDATA_BITS_RANGED_WIDTH, EDATA_BITS_RANGED_SHIFT)
+#define EDATA_BITS_PAI_WIDTH  1
+#define EDATA_BITS_PAI_SHIFT  (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_PAI_MASK  MASK(EDATA_BITS_PAI_WIDTH, EDATA_BITS_PAI_SHIFT)
 
 #define EDATA_BITS_ZEROED_WIDTH  1
-#define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_RANGED_WIDTH + EDATA_BITS_RANGED_SHIFT)
+#define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
 #define EDATA_BITS_ZEROED_MASK  MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
 
 #define EDATA_BITS_STATE_WIDTH  2
@@ -291,10 +298,10 @@ edata_committed_get(const edata_t *edata) {
 	    EDATA_BITS_COMMITTED_SHIFT);
 }
 
-static inline bool
-edata_ranged_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_RANGED_MASK) >>
-	    EDATA_BITS_RANGED_SHIFT);
+static inline extent_pai_t
+edata_pai_get(const edata_t *edata) {
+	return (extent_pai_t)((edata->e_bits & EDATA_BITS_PAI_MASK) >>
+	    EDATA_BITS_PAI_SHIFT);
 }
 
 static inline bool
@@ -488,9 +495,9 @@ edata_committed_set(edata_t *edata, bool committed) {
 }
 
 static inline void
-edata_ranged_set(edata_t *edata, bool ranged) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_RANGED_MASK) |
-	    ((uint64_t)ranged << EDATA_BITS_RANGED_SHIFT);
+edata_pai_set(edata_t *edata, extent_pai_t pai) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_PAI_MASK) |
+	    ((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
 }
 
 static inline void
@@ -538,9 +545,8 @@ edata_is_head_set(edata_t *edata, bool is_head) {
 static inline void
 edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
-    bool committed, bool ranged, extent_head_state_t is_head) {
+    bool committed, extent_pai_t pai, extent_head_state_t is_head) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
-	assert(ranged == false);
 
 	edata_arena_ind_set(edata, arena_ind);
 	edata_addr_set(edata, addr);
@@ -551,7 +557,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	edata_state_set(edata, state);
 	edata_zeroed_set(edata, zeroed);
 	edata_committed_set(edata, committed);
-	edata_ranged_set(edata, ranged);
+	edata_pai_set(edata, pai);
 	edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
 	if (config_prof) {
 		edata_prof_tctx_set(edata, NULL);
@@ -569,7 +575,12 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 	edata_state_set(edata, extent_state_active);
 	edata_zeroed_set(edata, true);
 	edata_committed_set(edata, true);
-	edata_ranged_set(edata, false);
+	/*
+	 * This isn't strictly true, but base allocated extents never get
+	 * deallocated and can't be looked up in the emap, but no sense in
+	 * wasting a state bit to encode this fact.
+	 */
+	edata_pai_set(edata, EXTENT_PAI_PAC);
 }
 
 static inline int
diff --git a/src/emap.c b/src/emap.c
index ec1b4cd..4e7ca8d 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -249,7 +249,7 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	 */
 	edata_t lead = {0};
 	edata_init(&lead, 0U, edata_addr_get(edata), size_a, false, 0, 0,
-	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, &lead, false, true,
 	    &prepare->lead_elm_a, &prepare->lead_elm_b);
diff --git a/src/extent.c b/src/extent.c
index 26a5c13..58ec820 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -86,7 +86,7 @@ ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	bool commit = true;
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, new_addr,
 	    size, alignment, zero, &commit, false);
-	assert(edata == NULL || !edata_ranged_get(edata));
+	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
 	return edata;
 }
 
@@ -115,7 +115,7 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		    size, alignment, zero, &commit);
 	}
 
-	assert(edata == NULL || !edata_ranged_get(edata));
+	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
 	return edata;
 }
 
@@ -124,7 +124,7 @@ ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
-	assert(!edata_ranged_get(edata));
+	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -650,7 +650,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	edata_init(edata, ecache_ind_get(&pac->ecache_retained), ptr,
 	    alloc_size, false, SC_NSIZES, extent_sn_next(pac),
-	    extent_state_active, zeroed, committed, /* ranged */ false,
+	    extent_state_active, zeroed, committed, EXTENT_PAI_PAC,
 	    EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, pac, edata)) {
@@ -790,7 +790,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr,
 	    size, /* slab */ false, SC_NSIZES, extent_sn_next(pac),
-	    extent_state_active, zero, *commit, /* ranged */ false,
+	    extent_state_active, zero, *commit, EXTENT_PAI_PAC,
 	    EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, pac, edata)) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
@@ -1026,7 +1026,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
-	assert(!edata_ranged_get(edata));
+	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -1180,8 +1180,7 @@ extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
 	    /* slab */ false, SC_NSIZES, edata_sn_get(edata),
 	    edata_state_get(edata), edata_zeroed_get(edata),
-	    edata_committed_get(edata), edata_ranged_get(edata),
-	    EXTENT_NOT_HEAD);
+	    edata_committed_get(edata), EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
 	bool err = emap_split_prepare(tsdn, pac->emap, &prepare, edata,
 	    size_a, trail, size_b);
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 7427cd8..9857fd2 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -156,8 +156,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				    gap_addr_page, gap_size_page, false,
 				    SC_NSIZES, extent_sn_next(
 					&arena->pa_shard.pac),
-				    extent_state_active, false, true, false,
-				    EXTENT_NOT_HEAD);
+				    extent_state_active, false, true,
+				    EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 			}
 			/*
 			 * Compute the address just past the end of the desired
@@ -206,7 +206,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    arena_ind_get(arena), ret, size,
 					    size, false, SC_NSIZES,
 					    extent_state_active, false, true,
-					    false, EXTENT_NOT_HEAD);
+					    EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
 					    ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 63d6e37..775bc19 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -36,9 +36,9 @@ TEST_BEGIN(test_rtree_extrema) {
 	edata_t edata_a = {0}, edata_b = {0};
 	edata_init(&edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
 	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
-	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 	edata_init(&edata_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	tsdn_t *tsdn = tsdn_fetch();
 
@@ -93,7 +93,7 @@ TEST_BEGIN(test_rtree_bits) {
 
 	edata_t edata = {0};
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
@@ -143,7 +143,7 @@ TEST_BEGIN(test_rtree_random) {
 
 	edata_t edata = {0};
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 5ca8c44..6baa9d3 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -12,7 +12,7 @@ TEST_BEGIN(test_arena_slab_regind) {
 		edata_init(&slab, INVALID_ARENA_IND,
 		    mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE)),
 		    bin_info->slab_size, true,
-		    binind, 0, extent_state_active, false, true, false,
+		    binind, 0, extent_state_active, false, true, EXTENT_PAI_PAC,
 		    EXTENT_NOT_HEAD);
 		expect_ptr_not_null(edata_addr_get(&slab),
 		    "Unexpected malloc() failure");
-- 
cgit v0.12


From ed99d300b93777787aad82549a4b0c4be129df35 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 17 Sep 2020 18:12:06 -0700
Subject: Flat bitmap: Add longest-range computation.

This will come in handy in the (upcoming) page-slab set assertions.
---
 include/jemalloc/internal/flat_bitmap.h | 25 ++++++++++++++++++++
 test/unit/flat_bitmap.c                 | 41 +++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/flat_bitmap.h
index 7b894d5..0faf447 100644
--- a/include/jemalloc/internal/flat_bitmap.h
+++ b/include/jemalloc/internal/flat_bitmap.h
@@ -284,4 +284,29 @@ fb_urange_riter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
 	    /* val */ false, /* forward */ false);
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+fb_range_longest_impl(fb_group_t *fb, size_t nbits, bool val) {
+	size_t begin = 0;
+	size_t longest_len = 0;
+	size_t len = 0;
+	while (begin < nbits && fb_iter_range_impl(fb, nbits, begin, &begin,
+	    &len, val, /* forward */ true)) {
+		if (len > longest_len) {
+			longest_len = len;
+		}
+		begin += len;
+	}
+	return longest_len;
+}
+
+static inline size_t
+fb_srange_longest(fb_group_t *fb, size_t nbits) {
+	return fb_range_longest_impl(fb, nbits, /* val */ true);
+}
+
+static inline size_t
+fb_urange_longest(fb_group_t *fb, size_t nbits) {
+	return fb_range_longest_impl(fb, nbits, /* val */ false);
+}
+
 #endif /* JEMALLOC_INTERNAL_FB_H */
diff --git a/test/unit/flat_bitmap.c b/test/unit/flat_bitmap.c
index 410e94f..2f360d3 100644
--- a/test/unit/flat_bitmap.c
+++ b/test/unit/flat_bitmap.c
@@ -301,6 +301,10 @@ TEST_BEGIN(test_empty_full) {
 }
 TEST_END
 
+/*
+ * This tests both iter_range and the longest range functionality, which is
+ * built closely on top of it.
+ */
 TEST_BEGIN(test_iter_range_simple) {
 	size_t set_limit = 30;
 	size_t nbits = 100;
@@ -318,6 +322,10 @@ TEST_BEGIN(test_iter_range_simple) {
 
 	/* A set of checks with only the first set_limit bits *set*. */
 	fb_set_range(fb, nbits, 0, set_limit);
+	expect_zu_eq(set_limit, fb_srange_longest(fb, nbits),
+	    "Incorrect longest set range");
+	expect_zu_eq(nbits - set_limit, fb_urange_longest(fb, nbits),
+	    "Incorrect longest unset range");
 	for (size_t i = 0; i < set_limit; i++) {
 		result = fb_srange_iter(fb, nbits, i, &begin, &len);
 		expect_true(result, "Should have found a range at %zu", i);
@@ -360,6 +368,10 @@ TEST_BEGIN(test_iter_range_simple) {
 	/* A set of checks with only the first set_limit bits *unset*. */
 	fb_unset_range(fb, nbits, 0, set_limit);
 	fb_set_range(fb, nbits, set_limit, nbits - set_limit);
+	expect_zu_eq(nbits - set_limit, fb_srange_longest(fb, nbits),
+	    "Incorrect longest set range");
+	expect_zu_eq(set_limit, fb_urange_longest(fb, nbits),
+	    "Incorrect longest unset range");
 	for (size_t i = 0; i < set_limit; i++) {
 		result = fb_srange_iter(fb, nbits, i, &begin, &len);
 		expect_true(result, "Should have found a range at %zu", i);
@@ -436,6 +448,27 @@ fb_iter_simple(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
 	return false;
 }
 
+/* Similar, but for finding longest ranges. */
+static size_t
+fb_range_longest_simple(fb_group_t *fb, size_t nbits, bool val) {
+	size_t longest_so_far = 0;
+	for (size_t begin = 0; begin < nbits; begin++) {
+		if (fb_get(fb, nbits, begin) != val) {
+			continue;
+		}
+		size_t end = begin + 1;
+		for (; end < nbits; end++) {
+			if (fb_get(fb, nbits, end) != val) {
+				break;
+			}
+		}
+		if (end - begin > longest_so_far) {
+			longest_so_far = end - begin;
+		}
+	}
+	return longest_so_far;
+}
+
 static void
 expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
     bool val, bool forward) {
@@ -487,6 +520,10 @@ expect_iter_results(fb_group_t *fb, size_t nbits) {
 		expect_iter_results_at(fb, nbits, i, true, false);
 		expect_iter_results_at(fb, nbits, i, true, true);
 	}
+	expect_zu_eq(fb_range_longest_simple(fb, nbits, true),
+	    fb_srange_longest(fb, nbits), "Longest range mismatch");
+	expect_zu_eq(fb_range_longest_simple(fb, nbits, false),
+	    fb_urange_longest(fb, nbits), "Longest range mismatch");
 }
 
 static void
@@ -527,6 +564,10 @@ do_test_iter_range_exhaustive(size_t nbits) {
 	free(fb);
 }
 
+/*
+ * Like test_iter_range_simple, this tests both iteration and longest-range
+ * computation.
+ */
 TEST_BEGIN(test_iter_range_exhaustive) {
 #define NB(nbits) \
 	do_test_iter_range_exhaustive(nbits);
-- 
cgit v0.12


From 018b162d673e64230b7d202075dca0e846e28e6a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 10 Jul 2020 17:40:13 -0700
Subject: Add psset: a set of pageslabs.

This introduces a new sort of edata_t; a pageslab, and a set to manage them.
This is part of a series of a commits to implement a hugepage allocator; the
pageset will be per-arena, and track small page allocations requests within a
larger extent allocated from a centralized hugepage allocator.
---
 Makefile.in                                        |   2 +
 include/jemalloc/internal/edata.h                  |  55 +++-
 include/jemalloc/internal/psset.h                  |  61 ++++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |   3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |   3 +
 src/psset.c                                        | 239 ++++++++++++++++
 test/unit/psset.c                                  | 306 +++++++++++++++++++++
 9 files changed, 670 insertions(+), 1 deletion(-)
 create mode 100644 include/jemalloc/internal/psset.h
 create mode 100644 src/psset.c
 create mode 100644 test/unit/psset.c

diff --git a/Makefile.in b/Makefile.in
index 3697e07..4769d48 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -136,6 +136,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/prof_recent.c \
 	$(srcroot)src/prof_sys.c \
+	$(srcroot)src/psset.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
 	$(srcroot)src/sc.c \
@@ -239,6 +240,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
 	$(srcroot)test/unit/prof_sys_thread_name.c \
+	$(srcroot)test/unit/psset.c \
 	$(srcroot)test/unit/ql.c \
 	$(srcroot)test/unit/qr.c \
 	$(srcroot)test/unit/rb.c \
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index f1ae56a..4fee76b 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -202,7 +202,31 @@ struct edata_s {
 	 * This keeps the size of an edata_t at exactly 128 bytes on
 	 * architectures with 8-byte pointers and 4k pages.
 	 */
-	void *reserved1, *reserved2;
+	void *reserved1;
+	union {
+		/*
+		 * We could steal a low bit from these fields to indicate what
+		 * sort of "thing" this is (a page slab, an object within a page
+		 * slab, or a non-pageslab range).  We don't do this yet, but it
+		 * would enable some extra asserts.
+		 */
+
+		/*
+		 * If this edata is from an HPA, it may be part of some larger
+		 * pageslab.  Track it if so.  Otherwise (either because it's
+		 * not part of a pageslab, or not from the HPA at all), NULL.
+		 */
+		edata_t *ps;
+		/*
+		 * If this edata *is* a pageslab, then it has some longest free
+		 * range in it.  Track it.
+		 */
+		struct {
+			uint32_t longest_free_range;
+			/* Not yet tracked. */
+			/* uint32_t longest_free_range_pos; */
+		};
+	};
 
 	union {
 		/*
@@ -346,6 +370,18 @@ edata_bsize_get(const edata_t *edata) {
 	return edata->e_bsize;
 }
 
+static inline edata_t *
+edata_ps_get(const edata_t *edata) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+	return edata->ps;
+}
+
+static inline uint32_t
+edata_longest_free_range_get(const edata_t *edata) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+	return edata->longest_free_range;
+}
+
 static inline void *
 edata_before_get(const edata_t *edata) {
 	return (void *)((uintptr_t)edata_base_get(edata) - PAGE);
@@ -429,6 +465,19 @@ edata_bsize_set(edata_t *edata, size_t bsize) {
 }
 
 static inline void
+edata_ps_set(edata_t *edata, edata_t *ps) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA || ps == NULL);
+	edata->ps = ps;
+}
+
+static inline void
+edata_longest_free_range_set(edata_t *edata, uint32_t longest_free_range) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA
+	    || longest_free_range == 0);
+	edata->longest_free_range = longest_free_range;
+}
+
+static inline void
 edata_szind_set(edata_t *edata, szind_t szind) {
 	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
 	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SZIND_MASK) |
@@ -562,6 +611,8 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	if (config_prof) {
 		edata_prof_tctx_set(edata, NULL);
 	}
+	edata_ps_set(edata, NULL);
+	edata_longest_free_range_set(edata, 0);
 }
 
 static inline void
@@ -581,6 +632,8 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 	 * wasting a state bit to encode this fact.
 	 */
 	edata_pai_set(edata, EXTENT_PAI_PAC);
+	edata_ps_set(edata, NULL);
+	edata_longest_free_range_set(edata, 0);
 }
 
 static inline int
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
new file mode 100644
index 0000000..8f3f9ee
--- /dev/null
+++ b/include/jemalloc/internal/psset.h
@@ -0,0 +1,61 @@
+#ifndef JEMALLOC_INTERNAL_PSSET_H
+#define JEMALLOC_INTERNAL_PSSET_H
+
+/*
+ * A page-slab set.  What the eset is to PAC, the psset is to HPA.  It maintains
+ * a collection of page-slabs (the intent being that they are backed by
+ * hugepages, or at least could be), and handles allocation and deallocation
+ * requests.
+ *
+ * It has the same synchronization guarantees as the eset; stats queries don't
+ * need any external synchronization, everything else does.
+ */
+
+/*
+ * One more than the maximum pszind_t we will serve out of the HPA.
+ * Practically, we expect only the first few to be actually used.  This
+ * corresponds to a maximum size of of 512MB on systems with 4k pages and
+ * SC_NGROUP == 4, which is already an unreasonably large maximum.  Morally, you
+ * can think of this as being SC_NPSIZES, but there's no sense in wasting that
+ * much space in the arena, making bitmaps that much larger, etc.
+ */
+#define PSSET_NPSIZES 64
+
+typedef struct psset_s psset_t;
+struct psset_s {
+	/*
+	 * The pageslabs, quantized by the size class of the largest contiguous
+	 * free run of pages in a pageslab.
+	 */
+	edata_heap_t pageslabs[PSSET_NPSIZES];
+	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
+};
+
+void psset_init(psset_t *psset);
+
+
+/*
+ * Tries to obtain a chunk from an existing pageslab already in the set.
+ * Returns true on failure.
+ */
+bool psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size);
+
+/*
+ * Given a newly created pageslab ps (not currently in the set), pass ownership
+ * to the psset and allocate an extent from within it.  The passed-in pageslab
+ * must be at least as big as size.
+ */
+void psset_alloc_new(psset_t *psset, edata_t *ps,
+    edata_t *r_edata, size_t size);
+
+/*
+ * Given an extent that comes from a pageslab in this pageslab set, returns it
+ * to its slab.  Does not take ownership of the underlying edata_t.
+ *
+ * If some slab becomes empty as a result of the dalloc, it is retuend -- the
+ * result must be checked and deallocated to the central HPA.  Otherwise returns
+ * NULL.
+ */
+edata_t *psset_dalloc(psset_t *psset, edata_t *edata);
+
+#endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index fe14779..3200eab 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 4b7b6ba..8d45980 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\psset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 6bd43c7..7badc63 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 4b7b6ba..8d45980 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\psset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/psset.c b/src/psset.c
new file mode 100644
index 0000000..9675a0d
--- /dev/null
+++ b/src/psset.c
@@ -0,0 +1,239 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/psset.h"
+
+#include "jemalloc/internal/flat_bitmap.h"
+
+static const bitmap_info_t psset_bitmap_info =
+    BITMAP_INFO_INITIALIZER(PSSET_NPSIZES);
+
+void
+psset_init(psset_t *psset) {
+	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
+		edata_heap_new(&psset->pageslabs[i]);
+	}
+	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+psset_assert_ps_consistent(edata_t *ps) {
+	assert(fb_urange_longest(edata_slab_data_get(ps)->bitmap,
+	    edata_size_get(ps) >> LG_PAGE) == edata_longest_free_range_get(ps));
+}
+
+/*
+ * Similar to PAC's extent_recycle_extract.  Out of all the pageslabs in the
+ * set, picks one that can satisfy the allocation and remove it from the set.
+ */
+static edata_t *
+psset_recycle_extract(psset_t *psset, size_t size) {
+	pszind_t ret_ind;
+	edata_t *ret = NULL;
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
+	for (pszind_t i = (pszind_t)bitmap_ffu(psset->bitmap,
+	    &psset_bitmap_info, (size_t)pind);
+	    i < PSSET_NPSIZES;
+	    i = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
+		(size_t)i + 1)) {
+		assert(!edata_heap_empty(&psset->pageslabs[i]));
+		edata_t *ps = edata_heap_first(&psset->pageslabs[i]);
+		if (ret == NULL || edata_snad_comp(ps, ret) < 0) {
+			ret = ps;
+			ret_ind = i;
+		}
+	}
+	if (ret == NULL) {
+		return NULL;
+	}
+	edata_heap_remove(&psset->pageslabs[ret_ind], ret);
+	if (edata_heap_empty(&psset->pageslabs[ret_ind])) {
+		bitmap_set(psset->bitmap, &psset_bitmap_info, ret_ind);
+	}
+
+	psset_assert_ps_consistent(ret);
+	return ret;
+}
+
+static void
+psset_insert(psset_t *psset, edata_t *ps, size_t largest_range) {
+	psset_assert_ps_consistent(ps);
+
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+	    largest_range << LG_PAGE));
+
+	assert(pind < PSSET_NPSIZES);
+
+	if (edata_heap_empty(&psset->pageslabs[pind])) {
+		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+	}
+	edata_heap_insert(&psset->pageslabs[pind], ps);
+}
+
+/*
+ * Given a pageslab ps and an edata to allocate size bytes from, initializes the
+ * edata with a range in the pageslab, and puts ps back in the set.
+ */
+static void
+psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
+    size_t size) {
+	size_t start = 0;
+	/*
+	 * These are dead stores, but the compiler will issue warnings on them
+	 * since it can't tell statically that found is always true below.
+	 */
+	size_t begin = 0;
+	size_t len = 0;
+
+	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
+
+	size_t npages = size >> LG_PAGE;
+	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
+
+	size_t largest_unchosen_range = 0;
+	while (true) {
+		bool found = fb_urange_iter(ps_fb, ps_npages, start, &begin,
+		    &len);
+		/*
+		 * A precondition to this function is that ps must be able to
+		 * serve the allocation.
+		 */
+		assert(found);
+		if (len >= npages) {
+			/*
+			 * We use first-fit within the page slabs; this gives
+			 * bounded worst-case fragmentation within a slab.  It's
+			 * not necessarily right; we could experiment with
+			 * various other options.
+			 */
+			break;
+		}
+		if (len > largest_unchosen_range) {
+			largest_unchosen_range = len;
+		}
+		start = begin + len;
+	}
+	uintptr_t addr = (uintptr_t)edata_base_get(ps) + begin * PAGE;
+	edata_init(r_edata, edata_arena_ind_get(r_edata), (void *)addr, size,
+	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
+	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
+	    EXTENT_NOT_HEAD);
+	edata_ps_set(r_edata, ps);
+	fb_set_range(ps_fb, ps_npages, begin, npages);
+	/*
+	 * OK, we've got to put the pageslab back.  First we have to figure out
+	 * where, though; we've only checked run sizes before the pageslab we
+	 * picked.  We also need to look for ones after the one we picked.  Note
+	 * that we want begin + npages as the start position, not begin + len;
+	 * we might not have used the whole range.
+	 *
+	 * TODO: With a little bit more care, we can guarantee that the longest
+	 * free range field in the edata is accurate upon entry, and avoid doing
+	 * this check in the case where we're allocating from some smaller run.
+	 */
+	start = begin + npages;
+	while (start < ps_npages) {
+		bool found = fb_urange_iter(ps_fb, ps_npages, start, &begin,
+		    &len);
+		if (!found) {
+			break;
+		}
+		if (len > largest_unchosen_range) {
+			largest_unchosen_range = len;
+		}
+		start = begin + len;
+	}
+	edata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
+	if (largest_unchosen_range != 0) {
+		psset_insert(psset, ps, largest_unchosen_range);
+	}
+}
+
+bool
+psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
+	edata_t *ps = psset_recycle_extract(psset, size);
+	if (ps == NULL) {
+		return true;
+	}
+	psset_ps_alloc_insert(psset, ps, r_edata, size);
+	return false;
+}
+
+void
+psset_alloc_new(psset_t *psset, edata_t *ps, edata_t *r_edata, size_t size) {
+	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
+	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
+	assert(fb_empty(ps_fb, ps_npages));
+
+	assert(ps_npages >= (size >> LG_PAGE));
+	psset_ps_alloc_insert(psset, ps, r_edata, size);
+}
+
+edata_t *
+psset_dalloc(psset_t *psset, edata_t *edata) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+	assert(edata_ps_get(edata) != NULL);
+
+	edata_t *ps = edata_ps_get(edata);
+	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
+	size_t ps_old_longest_free_range = edata_longest_free_range_get(ps);
+
+	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
+	size_t begin =
+	    ((uintptr_t)edata_base_get(edata) - (uintptr_t)edata_base_get(ps))
+	    >> LG_PAGE;
+	size_t len = edata_size_get(edata) >> LG_PAGE;
+	fb_unset_range(ps_fb, ps_npages, begin, len);
+
+	/* We might have just created a new, larger range. */
+	size_t new_begin = (size_t)(fb_fls(ps_fb, ps_npages, begin) + 1);
+	size_t new_end = fb_ffs(ps_fb, ps_npages, begin + len - 1);
+	size_t new_range_len = new_end - new_begin;
+	/*
+	 * If the new free range is no longer than the previous longest one,
+	 * then the pageslab is non-empty and doesn't need to change bins.
+	 * We're done, and don't need to return a pageslab to evict.
+	 */
+	if (new_range_len <= ps_old_longest_free_range) {
+		return NULL;
+	}
+	/*
+	 * Otherwise, it might need to get evicted from the set, or change its
+	 * bin.
+	 */
+	edata_longest_free_range_set(ps, (uint32_t)new_range_len);
+	/*
+	 * If it was previously non-full, then it's in some (possibly now
+	 * incorrect) bin already; remove it.
+	 *
+	 * TODO: We bailed out early above if we didn't expand the longest free
+	 * range, which should avoid a lot of redundant remove/reinserts in the
+	 * same bin.  But it doesn't eliminate all of them; it's possible that
+	 * we decreased the longest free range length, but only slightly, and
+	 * not enough to change our pszind.  We could check that more precisely.
+	 * (Or, ideally, size class dequantization will happen at some point,
+	 * and the issue becomes moot).
+	 */
+	if (ps_old_longest_free_range > 0) {
+		pszind_t old_pind = sz_psz2ind(sz_psz_quantize_floor(
+		    ps_old_longest_free_range<< LG_PAGE));
+		edata_heap_remove(&psset->pageslabs[old_pind], ps);
+		if (edata_heap_empty(&psset->pageslabs[old_pind])) {
+			bitmap_set(psset->bitmap, &psset_bitmap_info,
+			    (size_t)old_pind);
+		}
+	}
+	/* If the pageslab is empty, it gets evicted from the set. */
+	if (new_range_len == ps_npages) {
+		return ps;
+	}
+	/* Otherwise, it gets reinserted. */
+	pszind_t new_pind = sz_psz2ind(sz_psz_quantize_floor(
+	    new_range_len << LG_PAGE));
+	if (edata_heap_empty(&psset->pageslabs[new_pind])) {
+		bitmap_unset(psset->bitmap, &psset_bitmap_info,
+		    (size_t)new_pind);
+	}
+	edata_heap_insert(&psset->pageslabs[new_pind], ps);
+	return NULL;
+}
diff --git a/test/unit/psset.c b/test/unit/psset.c
new file mode 100644
index 0000000..8a5090d
--- /dev/null
+++ b/test/unit/psset.c
@@ -0,0 +1,306 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/psset.h"
+
+#define PAGESLAB_PAGES 64
+#define PAGESLAB_SIZE (PAGESLAB_PAGES << LG_PAGE)
+#define PAGESLAB_SN 123
+#define PAGESLAB_ADDR ((void *)(1234 << LG_PAGE))
+
+#define ALLOC_ARENA_IND 111
+#define ALLOC_ESN 222
+
+static void
+edata_init_test(edata_t *edata) {
+	memset(edata, 0, sizeof(*edata));
+	edata_arena_ind_set(edata, ALLOC_ARENA_IND);
+	edata_esn_set(edata, ALLOC_ESN);
+}
+
+static void
+edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
+	/*
+	 * Note that allocations should get the arena ind of their home
+	 * arena, *not* the arena ind of the pageslab allocator.
+	 */
+	expect_u_eq(ALLOC_ARENA_IND, edata_arena_ind_get(edata),
+	    "Arena ind changed");
+	expect_ptr_eq(
+	    (void *)((uintptr_t)PAGESLAB_ADDR + (page_offset << LG_PAGE)),
+	    edata_addr_get(edata), "Didn't allocate in order");
+	expect_zu_eq(page_cnt << LG_PAGE, edata_size_get(edata), "");
+	expect_false(edata_slab_get(edata), "");
+	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata),
+	    "");
+	expect_zu_eq(0, edata_sn_get(edata), "");
+	expect_d_eq(edata_state_get(edata), extent_state_active, "");
+	expect_false(edata_zeroed_get(edata), "");
+	expect_true(edata_committed_get(edata), "");
+	expect_d_eq(EXTENT_PAI_HPA, edata_pai_get(edata), "");
+	expect_false(edata_is_head_get(edata), "");
+}
+
+TEST_BEGIN(test_empty) {
+	bool err;
+	edata_t pageslab;
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_t alloc;
+
+	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+	edata_init_test(&alloc);
+
+	psset_t psset;
+	psset_init(&psset);
+
+	/* Empty psset should return fail allocations. */
+	err = psset_alloc_reuse(&psset, &alloc, PAGE);
+	expect_true(err, "Empty psset succeeded in an allocation.");
+}
+TEST_END
+
+TEST_BEGIN(test_fill) {
+	bool err;
+	edata_t pageslab;
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_t alloc[PAGESLAB_PAGES];
+
+	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+
+	psset_t psset;
+	psset_init(&psset);
+
+	edata_init_test(&alloc[0]);
+	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+		edata_init_test(&alloc[i]);
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+	}
+
+	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+		edata_t *edata = &alloc[i];
+		edata_expect(edata, i, 1);
+	}
+
+	/* The pageslab, and thus psset, should now have no allocations. */
+	edata_t extra_alloc;
+	edata_init_test(&extra_alloc);
+	err = psset_alloc_reuse(&psset, &extra_alloc, PAGE);
+	expect_true(err, "Alloc succeeded even though psset should be empty");
+}
+TEST_END
+
+TEST_BEGIN(test_reuse) {
+	bool err;
+	edata_t *ps;
+
+	edata_t pageslab;
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_t alloc[PAGESLAB_PAGES];
+
+	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+
+	psset_t psset;
+	psset_init(&psset);
+
+	edata_init_test(&alloc[0]);
+	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+		edata_init_test(&alloc[i]);
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+	}
+
+	/* Free odd indices. */
+	for (size_t i = 0; i < PAGESLAB_PAGES; i ++) {
+		if (i % 2 == 0) {
+			continue;
+		}
+		ps = psset_dalloc(&psset, &alloc[i]);
+		expect_ptr_null(ps, "Nonempty pageslab evicted");
+	}
+	/* Realloc into them. */
+	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+		if (i % 2 == 0) {
+			continue;
+		}
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+		edata_expect(&alloc[i], i, 1);
+	}
+	/* Now, free the pages at indices 0 or 1 mod 2. */
+	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+		if (i % 4 > 1) {
+			continue;
+		}
+		ps = psset_dalloc(&psset, &alloc[i]);
+		expect_ptr_null(ps, "Nonempty pageslab evicted");
+	}
+	/* And realloc 2-page allocations into them. */
+	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+		if (i % 4 != 0) {
+			continue;
+		}
+		err = psset_alloc_reuse(&psset, &alloc[i], 2 * PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+		edata_expect(&alloc[i], i, 2);
+	}
+	/* Free all the 2-page allocations. */
+	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+		if (i % 4 != 0) {
+			continue;
+		}
+		ps = psset_dalloc(&psset, &alloc[i]);
+		expect_ptr_null(ps, "Nonempty pageslab evicted");
+	}
+	/*
+	 * Free up a 1-page hole next to a 2-page hole, but somewhere in the
+	 * middle of the pageslab.  Index 11 should be right before such a hole
+	 * (since 12 % 4 == 0).
+	 */
+	size_t index_of_3 = 11;
+	ps = psset_dalloc(&psset, &alloc[index_of_3]);
+	expect_ptr_null(ps, "Nonempty pageslab evicted");
+	err = psset_alloc_reuse(&psset, &alloc[index_of_3], 3 * PAGE);
+	expect_false(err, "Should have been able to find alloc.");
+	edata_expect(&alloc[index_of_3], index_of_3, 3);
+
+	/* Free up a 4-page hole at the end. */
+	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	expect_ptr_null(ps, "Nonempty pageslab evicted");
+	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 2]);
+	expect_ptr_null(ps, "Nonempty pageslab evicted");
+
+	/* Make sure we can satisfy an allocation at the very end of a slab. */
+	size_t index_of_4 = PAGESLAB_PAGES - 4;
+	ps = psset_dalloc(&psset, &alloc[index_of_4]);
+	expect_ptr_null(ps, "Nonempty pageslab evicted");
+	err = psset_alloc_reuse(&psset, &alloc[index_of_4], 4 * PAGE);
+	expect_false(err, "Should have been able to find alloc.");
+	edata_expect(&alloc[index_of_4], index_of_4, 4);
+}
+TEST_END
+
+TEST_BEGIN(test_evict) {
+	bool err;
+	edata_t *ps;
+	edata_t pageslab;
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_t alloc[PAGESLAB_PAGES];
+
+	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+	psset_t psset;
+	psset_init(&psset);
+
+	/* Alloc the whole slab. */
+	edata_init_test(&alloc[0]);
+	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+		edata_init_test(&alloc[i]);
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Unxpected allocation failure");
+	}
+
+	/* Dealloc the whole slab, going forwards. */
+	for (size_t i = 0; i < PAGESLAB_PAGES - 1; i++) {
+		ps = psset_dalloc(&psset, &alloc[i]);
+		expect_ptr_null(ps, "Nonempty pageslab evicted");
+	}
+	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	expect_ptr_eq(&pageslab, ps, "Empty pageslab not evicted.");
+
+	err = psset_alloc_reuse(&psset, &alloc[0], PAGE);
+	expect_true(err, "psset should be empty.");
+}
+TEST_END
+
+TEST_BEGIN(test_multi_pageslab) {
+	bool err;
+	edata_t *ps;
+	edata_t pageslab[2];
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_t alloc[2][PAGESLAB_PAGES];
+
+	edata_init(&pageslab[0], /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+	edata_init(&pageslab[1], /* arena_ind */ 0,
+	    (void *)((uintptr_t)PAGESLAB_ADDR + PAGESLAB_SIZE), PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+
+	psset_t psset;
+	psset_init(&psset);
+
+	/* Insert both slabs. */
+	edata_init_test(&alloc[0][0]);
+	psset_alloc_new(&psset, &pageslab[0], &alloc[0][0], PAGE);
+	edata_init_test(&alloc[1][0]);
+	psset_alloc_new(&psset, &pageslab[1], &alloc[1][0], PAGE);
+
+	/* Fill them both up; make sure we do so in first-fit order. */
+	for (size_t i = 0; i < 2; i++) {
+		for (size_t j = 1; j < PAGESLAB_PAGES; j++) {
+			edata_init_test(&alloc[i][j]);
+			err = psset_alloc_reuse(&psset, &alloc[i][j], PAGE);
+			expect_false(err,
+			    "Nonempty psset failed page allocation.");
+			assert_ptr_eq(&pageslab[i], edata_ps_get(&alloc[i][j]),
+			    "Didn't pick pageslabs in first-fit");
+		}
+	}
+
+	/*
+	 * Free up a 2-page hole in the earlier slab, and a 1-page one in the
+	 * later one.  We should still pick the earlier slab for a 1-page
+	 * allocation.
+	 */
+	ps = psset_dalloc(&psset, &alloc[0][0]);
+	expect_ptr_null(ps, "Unexpected eviction");
+	ps = psset_dalloc(&psset, &alloc[0][1]);
+	expect_ptr_null(ps, "Unexpected eviction");
+	ps = psset_dalloc(&psset, &alloc[1][0]);
+	expect_ptr_null(ps, "Unexpected eviction");
+	err = psset_alloc_reuse(&psset, &alloc[0][0], PAGE);
+	expect_ptr_eq(&pageslab[0], edata_ps_get(&alloc[0][0]),
+	    "Should have picked first pageslab");
+
+	/*
+	 * Now both slabs have 1-page holes. Free up a second one in the later
+	 * slab.
+	 */
+	ps = psset_dalloc(&psset, &alloc[1][1]);
+	expect_ptr_null(ps, "Unexpected eviction");
+
+	/*
+	 * We should be able to allocate a 2-page object, even though an earlier
+	 * size class is nonempty.
+	 */
+	err = psset_alloc_reuse(&psset, &alloc[1][0], 2 * PAGE);
+	expect_false(err, "Allocation should have succeeded");
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_empty,
+	    test_fill,
+	    test_reuse,
+	    test_evict,
+	    test_multi_pageslab);
+}
-- 
cgit v0.12


From 259c5e3e8f4731f2e32ceac71c66f4bc7d078145 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Sep 2020 12:59:10 -0700
Subject: psset: Add stats

---
 include/jemalloc/internal/edata.h | 12 ++++--
 include/jemalloc/internal/psset.h | 16 ++++++++
 src/psset.c                       | 85 +++++++++++++++++++++++++++++++++++----
 test/unit/psset.c                 | 78 ++++++++++++++++++++++++++++++++++-
 4 files changed, 178 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 4fee76b..f175af9 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -218,13 +218,17 @@ struct edata_s {
 		 */
 		edata_t *ps;
 		/*
-		 * If this edata *is* a pageslab, then it has some longest free
-		 * range in it.  Track it.
+		 * If this edata *is* a pageslab, then we cache some useful
+		 * information about its associated bitmap.
 		 */
 		struct {
+			/*
+			 * The longest free range a pageslab contains determines
+			 * the heap it lives in.  If we know that it didn't
+			 * change after an operation, we can avoid moving it
+			 * between heaps.
+			 */
 			uint32_t longest_free_range;
-			/* Not yet tracked. */
-			/* uint32_t longest_free_range_pos; */
 		};
 	};
 
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 8f3f9ee..abbfc24 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -21,6 +21,16 @@
  */
 #define PSSET_NPSIZES 64
 
+typedef struct psset_bin_stats_s psset_bin_stats_t;
+struct psset_bin_stats_s {
+	/* How many pageslabs are in this bin? */
+	size_t npageslabs;
+	/* Of them, how many pages are active? */
+	size_t nactive;
+	/* How many are inactive? */
+	size_t ninactive;
+};
+
 typedef struct psset_s psset_t;
 struct psset_s {
 	/*
@@ -29,6 +39,12 @@ struct psset_s {
 	 */
 	edata_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
+	/*
+	 * Full slabs don't live in any edata heap.  But we still track their
+	 * stats.
+	 */
+	psset_bin_stats_t full_slab_stats;
+	psset_bin_stats_t slab_stats[PSSET_NPSIZES];
 };
 
 void psset_init(psset_t *psset);
diff --git a/src/psset.c b/src/psset.c
index 9675a0d..04d3548 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -14,6 +14,48 @@ psset_init(psset_t *psset) {
 		edata_heap_new(&psset->pageslabs[i]);
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
+	psset->full_slab_stats.npageslabs = 0;
+	psset->full_slab_stats.nactive = 0;
+	psset->full_slab_stats.ninactive = 0;
+	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
+		psset->slab_stats[i].npageslabs = 0;
+		psset->slab_stats[i].nactive = 0;
+		psset->slab_stats[i].ninactive = 0;
+	}
+}
+
+/*
+ * The stats maintenance strategy is simple, but not necessarily obvious.
+ * edata_nfree and the bitmap must remain consistent at all times.  If they
+ * change while an edata is within an edata_heap (or full), then the associated
+ * stats bin (or the full bin) must also change.  If they change while not in a
+ * bin (say, in between extraction and reinsertion), then the bin stats need not
+ * change.  If a pageslab is removed from a bin (or becomes nonfull), it should
+ * no longer contribute to that bin's stats (or the full stats).  These help
+ * ensure we don't miss any heap modification operations.
+ */
+JEMALLOC_ALWAYS_INLINE void
+psset_bin_stats_adjust(psset_bin_stats_t *binstats, edata_t *ps, bool inc) {
+	size_t mul = inc ? (size_t)1 : (size_t)-1;
+
+	size_t npages = edata_size_get(ps) >> LG_PAGE;
+	size_t ninactive = edata_nfree_get(ps);
+	size_t nactive = npages - ninactive;
+	binstats->npageslabs += mul * 1;
+	binstats->nactive += mul * nactive;
+	binstats->ninactive += mul * ninactive;
+}
+
+static void
+psset_edata_heap_remove(psset_t *psset, pszind_t pind, edata_t *ps) {
+	edata_heap_remove(&psset->pageslabs[pind], ps);
+	psset_bin_stats_adjust(&psset->slab_stats[pind], ps, /* inc */ false);
+}
+
+static void
+psset_edata_heap_insert(psset_t *psset, pszind_t pind, edata_t *ps) {
+	edata_heap_insert(&psset->pageslabs[pind], ps);
+	psset_bin_stats_adjust(&psset->slab_stats[pind], ps, /* inc */ true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -46,7 +88,8 @@ psset_recycle_extract(psset_t *psset, size_t size) {
 	if (ret == NULL) {
 		return NULL;
 	}
-	edata_heap_remove(&psset->pageslabs[ret_ind], ret);
+
+	psset_edata_heap_remove(psset, ret_ind, ret);
 	if (edata_heap_empty(&psset->pageslabs[ret_ind])) {
 		bitmap_set(psset->bitmap, &psset_bitmap_info, ret_ind);
 	}
@@ -67,7 +110,7 @@ psset_insert(psset_t *psset, edata_t *ps, size_t largest_range) {
 	if (edata_heap_empty(&psset->pageslabs[pind])) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
 	}
-	edata_heap_insert(&psset->pageslabs[pind], ps);
+	psset_edata_heap_insert(psset, pind, ps);
 }
 
 /*
@@ -120,6 +163,9 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 	    EXTENT_NOT_HEAD);
 	edata_ps_set(r_edata, ps);
 	fb_set_range(ps_fb, ps_npages, begin, npages);
+	edata_nfree_set(ps, (uint32_t)(edata_nfree_get(ps) - npages));
+	/* The pageslab isn't in a bin, so no bin stats need to change. */
+
 	/*
 	 * OK, we've got to put the pageslab back.  First we have to figure out
 	 * where, though; we've only checked run sizes before the pageslab we
@@ -144,7 +190,10 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 		start = begin + len;
 	}
 	edata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
-	if (largest_unchosen_range != 0) {
+	if (largest_unchosen_range == 0) {
+		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		    /* inc */ true);
+	} else {
 		psset_insert(psset, ps, largest_unchosen_range);
 	}
 }
@@ -164,8 +213,8 @@ psset_alloc_new(psset_t *psset, edata_t *ps, edata_t *r_edata, size_t size) {
 	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
 	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
 	assert(fb_empty(ps_fb, ps_npages));
-
 	assert(ps_npages >= (size >> LG_PAGE));
+	edata_nfree_set(ps, (uint32_t)ps_npages);
 	psset_ps_alloc_insert(psset, ps, r_edata, size);
 }
 
@@ -177,6 +226,11 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	edata_t *ps = edata_ps_get(edata);
 	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
 	size_t ps_old_longest_free_range = edata_longest_free_range_get(ps);
+	pszind_t old_pind = SC_NPSIZES;
+	if (ps_old_longest_free_range != 0) {
+		old_pind = sz_psz2ind(sz_psz_quantize_floor(
+		    ps_old_longest_free_range << LG_PAGE));
+	}
 
 	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
 	size_t begin =
@@ -184,6 +238,23 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	    >> LG_PAGE;
 	size_t len = edata_size_get(edata) >> LG_PAGE;
 	fb_unset_range(ps_fb, ps_npages, begin, len);
+	if (ps_old_longest_free_range == 0) {
+		/* We were in the (imaginary) full bin; update stats for it. */
+		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		    /* inc */ false);
+	} else {
+		/*
+		 * The edata is still in the bin, need to update its
+		 * contribution.
+		 */
+		psset->slab_stats[old_pind].nactive -= len;
+		psset->slab_stats[old_pind].ninactive += len;
+	}
+	/*
+	 * Note that we want to do this after the stats updates, since if it was
+	 * full it psset_bin_stats_adjust would have looked at the old version.
+	 */
+	edata_nfree_set(ps, (uint32_t)(edata_nfree_get(ps) + len));
 
 	/* We might have just created a new, larger range. */
 	size_t new_begin = (size_t)(fb_fls(ps_fb, ps_npages, begin) + 1);
@@ -215,9 +286,7 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	 * and the issue becomes moot).
 	 */
 	if (ps_old_longest_free_range > 0) {
-		pszind_t old_pind = sz_psz2ind(sz_psz_quantize_floor(
-		    ps_old_longest_free_range<< LG_PAGE));
-		edata_heap_remove(&psset->pageslabs[old_pind], ps);
+		psset_edata_heap_remove(psset, old_pind, ps);
 		if (edata_heap_empty(&psset->pageslabs[old_pind])) {
 			bitmap_set(psset->bitmap, &psset_bitmap_info,
 			    (size_t)old_pind);
@@ -234,6 +303,6 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info,
 		    (size_t)new_pind);
 	}
-	edata_heap_insert(&psset->pageslabs[new_pind], ps);
+	psset_edata_heap_insert(psset, new_pind, ps);
 	return NULL;
 }
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 8a5090d..0bc4460 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -295,6 +295,81 @@ TEST_BEGIN(test_multi_pageslab) {
 }
 TEST_END
 
+static void
+stats_expect_empty(psset_bin_stats_t *stats) {
+	assert_zu_eq(0, stats->npageslabs,
+	    "Supposedly empty bin had positive npageslabs");
+	expect_zu_eq(0, stats->nactive, "Unexpected nonempty bin"
+	    "Supposedly empty bin had positive nactive");
+	expect_zu_eq(0, stats->ninactive, "Unexpected nonempty bin"
+	    "Supposedly empty bin had positive ninactive");
+}
+
+static void
+stats_expect(psset_t *psset, size_t nactive) {
+	if (nactive == PAGESLAB_PAGES) {
+		expect_zu_eq(1, psset->full_slab_stats.npageslabs,
+		    "Expected a full slab");
+		expect_zu_eq(PAGESLAB_PAGES, psset->full_slab_stats.nactive,
+		    "Should have exactly filled the bin");
+		expect_zu_eq(0, psset->full_slab_stats.ninactive,
+		    "Should never have inactive pages in a full slab");
+	} else {
+		stats_expect_empty(&psset->full_slab_stats);
+	}
+	size_t ninactive = PAGESLAB_PAGES - nactive;
+	pszind_t nonempty_pind = PSSET_NPSIZES;
+	if (ninactive != 0 && ninactive < PAGESLAB_PAGES) {
+		nonempty_pind = sz_psz2ind(sz_psz_quantize_floor(
+		    ninactive << LG_PAGE));
+	}
+	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+		if (i == nonempty_pind) {
+			assert_zu_eq(1, psset->slab_stats[i].npageslabs,
+			    "Should have found a slab");
+			expect_zu_eq(nactive, psset->slab_stats[i].nactive,
+			    "Mismatch in active pages");
+			expect_zu_eq(ninactive, psset->slab_stats[i].ninactive,
+			    "Mismatch in inactive pages");
+		} else {
+			stats_expect_empty(&psset->slab_stats[i]);
+		}
+	}
+}
+
+TEST_BEGIN(test_stats) {
+	bool err;
+	edata_t pageslab;
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_t alloc[PAGESLAB_PAGES];
+
+	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+
+	psset_t psset;
+	psset_init(&psset);
+	stats_expect(&psset, 0);
+
+	edata_init_test(&alloc[0]);
+	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+		stats_expect(&psset, i);
+		edata_init_test(&alloc[i]);
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+	}
+	stats_expect(&psset, PAGESLAB_PAGES);
+	for (ssize_t i = PAGESLAB_PAGES - 1; i >= 0; i--) {
+		edata_t *ps = psset_dalloc(&psset, &alloc[i]);
+		expect_true((ps == NULL) == (i != 0),
+		    "psset_dalloc should only evict a slab on the last free");
+		stats_expect(&psset, i);
+	}
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -302,5 +377,6 @@ main(void) {
 	    test_fill,
 	    test_reuse,
 	    test_evict,
-	    test_multi_pageslab);
+	    test_multi_pageslab,
+	    test_stats);
 }
-- 
cgit v0.12


From f6bbfc1e965e3f165ea3bbdbc630d26778a7fbf4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 10 Sep 2020 16:01:23 -0700
Subject: Add a .clang-format file.

---
 .clang-format | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 .clang-format

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..719c03c
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,122 @@
+# jemalloc targets clang-format version 8.  We include every option it supports
+# here, but comment out the ones that aren't relevant for us.
+---
+# AccessModifierOffset: -2
+AlignAfterOpenBracket: DontAlign
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Right
+AlignOperands: false
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterReturnType: AllDefinitions
+AlwaysBreakBeforeMultilineStrings: true
+# AlwaysBreakTemplateDeclarations: Yes
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: false
+  AfterNamespace: false
+  AfterObjCDeclaration: false
+  AfterStruct: false
+  AfterUnion: false
+  BeforeCatch: false
+  BeforeElse: false
+  IndentBraces: false
+# BreakAfterJavaFieldAnnotations: true
+BreakBeforeBinaryOperators: NonAssignment
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+# BreakConstructorInitializers: BeforeColon
+# BreakInheritanceList: BeforeColon
+BreakStringLiterals: false
+ColumnLimit: 80
+# CommentPragmas: ''
+# CompactNamespaces: true
+# ConstructorInitializerAllOnOneLineOrOnePerLine: true
+# ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 2
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:   [ ql_foreach, qr_foreach, ]
+# IncludeBlocks: Preserve
+# IncludeCategories:
+#   - Regex:           '^<.*\.h(pp)?>'
+#     Priority:        1
+# IncludeIsMainRegex: ''
+IndentCaseLabels: false
+IndentPPDirectives: AfterHash
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+# JavaImportGroups: []
+# JavaScriptQuotes: Leave
+# JavaScriptWrapImports: True
+KeepEmptyLinesAtTheStartOfBlocks: false
+Language: Cpp
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+# NamespaceIndentation: None
+# ObjCBinPackProtocolList: Auto
+# ObjCBlockIndentWidth: 2
+# ObjCSpaceAfterProperty: false
+# ObjCSpaceBeforeProtocolList: false
+
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+# PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Right
+# RawStringFormats:
+#   - Language: TextProto
+#       Delimiters:
+#         - 'pb'
+#         - 'proto'
+#       EnclosingFunctions:
+#         - 'PARSE_TEXT_PROTO'
+#       BasedOnStyle: google
+#   - Language: Cpp
+#       Delimiters:
+#         - 'cc'
+#         - 'cpp'
+#       BasedOnStyle: llvm
+#       CanonicalDelimiter: 'cc'
+ReflowComments: true
+SortIncludes: false
+SpaceAfterCStyleCast: false
+# SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+# SpaceBeforeCpp11BracedList: false
+# SpaceBeforeCtorInitializerColon: true
+# SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+# SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles:  false
+SpacesInCStyleCastParentheses: false
+# SpacesInContainerLiterals: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+# Standard: Cpp11
+# This is nominally supported in clang-format version 8, but not in the build
+# used by some of the core jemalloc developers.
+# StatementMacros: []
+TabWidth: 8
+UseTab: Never
+...
-- 
cgit v0.12


From 025d8c37c93a69ec0aa5d8a55e3793cb480a5ac8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 10 Sep 2020 17:21:32 -0700
Subject: Add a script to check for clang-formattedness.

---
 scripts/check-formatting.sh | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100755 scripts/check-formatting.sh

diff --git a/scripts/check-formatting.sh b/scripts/check-formatting.sh
new file mode 100755
index 0000000..68cafd8
--- /dev/null
+++ b/scripts/check-formatting.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# The files that need to be properly formatted.  We'll grow this incrementally
+# until it includes all the jemalloc source files (as we convert things over),
+# and then just replace it with
+#    find -name '*.c' -o -name '*.h' -o -name '*.cpp
+FILES=(
+)
+
+if command -v clang-format &> /dev/null; then
+  CLANG_FORMAT="clang-format"
+elif command -v clang-format-8 &> /dev/null; then
+  CLANG_FORMAT="clang-format-8"
+else
+  echo "Couldn't find clang-format."
+fi
+
+if ! $CLANG_FORMAT -version | grep "version 8\." &> /dev/null; then
+  echo "clang-format is the wrong version."
+  exit 1
+fi
+
+for file in ${FILES[@]}; do
+  if ! cmp --silent $file <($CLANG_FORMAT $file) &> /dev/null; then
+    echo "Error: $file is not clang-formatted"
+    exit 1
+  fi
+done
-- 
cgit v0.12


From bdb60a8053dcac4eb39deaa17129b6e40ba6b17a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 5 Oct 2020 18:31:55 -0700
Subject: Appveyor: don't update msys2 keyring.

This is no longer required, and the step now fails.
---
 .appveyor.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index f74f099..f44868d 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -30,8 +30,6 @@ environment:
 install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
   - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
-  - curl -O http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz
-  - pacman --noconfirm -U msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz --nodeps
   - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
   - pacman --noconfirm -S mingw-w64-%CPU%-make
 
-- 
cgit v0.12


From 05130471701b7f42b545e2103f21fad61b67bfb0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 13 Aug 2020 18:02:25 -0700
Subject: PRNG: Allow a a range argument of 1.

This is convenient when the range argument itself is generated from some
computation whose value we don't know in advance.
---
 include/jemalloc/internal/prng.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 12380b4..a309e96 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -133,7 +133,9 @@ prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
 	uint32_t ret;
 	unsigned lg_range;
 
-	assert(range > 1);
+	if (range == 1) {
+		return 0;
+	}
 
 	/* Compute the ceiling of lg(range). */
 	lg_range = ffs_u32(pow2_ceil_u32(range));
@@ -151,7 +153,9 @@ prng_range_u64(uint64_t *state, uint64_t range) {
 	uint64_t ret;
 	unsigned lg_range;
 
-	assert(range > 1);
+	if (range == 1) {
+		return 0;
+	}
 
 	/* Compute the ceiling of lg(range). */
 	lg_range = ffs_u64(pow2_ceil_u64(range));
@@ -169,7 +173,9 @@ prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) {
 	size_t ret;
 	unsigned lg_range;
 
-	assert(range > 1);
+	if (range == 1) {
+		return 0;
+	}
 
 	/* Compute the ceiling of lg(range). */
 	lg_range = ffs_u64(pow2_ceil_u64(range));
-- 
cgit v0.12


From 9e6aa77ab9d8dd5b00018bdca5adff23b03cbdb8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 14 Aug 2020 09:17:11 -0700
Subject: PRNG: Remove atomic functionality.

These had no uses and complicated the API.  As a rule we now expect to only use
thread-local randomization for contention-reduction reasons, so we only pay the
API costs and never get the functionality benefits.
---
 include/jemalloc/internal/prng.h | 81 ++++++++++++++------------------------
 test/unit/prng.c                 | 84 ++++++++++++++++++++--------------------
 2 files changed, 71 insertions(+), 94 deletions(-)

diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index a309e96..14542aa 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -1,7 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_PRNG_H
 #define JEMALLOC_INTERNAL_PRNG_H
 
-#include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bit_util.h"
 
 /*
@@ -59,66 +58,38 @@ prng_state_next_zu(size_t state) {
 
 /*
  * The prng_lg_range functions give a uniform int in the half-open range [0,
- * 2**lg_range).  If atomic is true, they do so safely from multiple threads.
- * Multithreaded 64-bit prngs aren't supported.
+ * 2**lg_range).
  */
 
 JEMALLOC_ALWAYS_INLINE uint32_t
-prng_lg_range_u32(atomic_u32_t *state, unsigned lg_range, bool atomic) {
-	uint32_t ret, state0, state1;
-
+prng_lg_range_u32(uint32_t *state, unsigned lg_range) {
 	assert(lg_range > 0);
 	assert(lg_range <= 32);
 
-	state0 = atomic_load_u32(state, ATOMIC_RELAXED);
-
-	if (atomic) {
-		do {
-			state1 = prng_state_next_u32(state0);
-		} while (!atomic_compare_exchange_weak_u32(state, &state0,
-		    state1, ATOMIC_RELAXED, ATOMIC_RELAXED));
-	} else {
-		state1 = prng_state_next_u32(state0);
-		atomic_store_u32(state, state1, ATOMIC_RELAXED);
-	}
-	ret = state1 >> (32 - lg_range);
+	*state = prng_state_next_u32(*state);
+	uint32_t ret = *state >> (32 - lg_range);
 
 	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
 prng_lg_range_u64(uint64_t *state, unsigned lg_range) {
-	uint64_t ret, state1;
-
 	assert(lg_range > 0);
 	assert(lg_range <= 64);
 
-	state1 = prng_state_next_u64(*state);
-	*state = state1;
-	ret = state1 >> (64 - lg_range);
+	*state = prng_state_next_u64(*state);
+	uint64_t ret = *state >> (64 - lg_range);
 
 	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) {
-	size_t ret, state0, state1;
-
+prng_lg_range_zu(size_t *state, unsigned lg_range) {
 	assert(lg_range > 0);
 	assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
 
-	state0 = atomic_load_zu(state, ATOMIC_RELAXED);
-
-	if (atomic) {
-		do {
-			state1 = prng_state_next_zu(state0);
-		} while (atomic_compare_exchange_weak_zu(state, &state0,
-		    state1, ATOMIC_RELAXED, ATOMIC_RELAXED));
-	} else {
-		state1 = prng_state_next_zu(state0);
-		atomic_store_zu(state, state1, ATOMIC_RELAXED);
-	}
-	ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
+	*state = prng_state_next_zu(*state);
+	size_t ret = *state >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
 
 	return ret;
 }
@@ -129,20 +100,24 @@ prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) {
  */
 
 JEMALLOC_ALWAYS_INLINE uint32_t
-prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
-	uint32_t ret;
-	unsigned lg_range;
-
+prng_range_u32(uint32_t *state, uint32_t range) {
+	assert(range != 0);
+	/*
+	 * If range were 1, lg_range would be 0, so the shift in
+	 * prng_lg_range_u32 would be a shift of a 32-bit variable by 32 bits,
+	 * which is UB.  Just handle this case as a one-off.
+	 */
 	if (range == 1) {
 		return 0;
 	}
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = ffs_u32(pow2_ceil_u32(range));
+	unsigned lg_range = ffs_u32(pow2_ceil_u32(range));
 
 	/* Generate a result in [0..range) via repeated trial. */
+	uint32_t ret;
 	do {
-		ret = prng_lg_range_u32(state, lg_range, atomic);
+		ret = prng_lg_range_u32(state, lg_range);
 	} while (ret >= range);
 
 	return ret;
@@ -150,17 +125,18 @@ prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
 
 JEMALLOC_ALWAYS_INLINE uint64_t
 prng_range_u64(uint64_t *state, uint64_t range) {
-	uint64_t ret;
-	unsigned lg_range;
+	assert(range != 0);
 
+	/* See the note in prng_range_u32. */
 	if (range == 1) {
 		return 0;
 	}
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = ffs_u64(pow2_ceil_u64(range));
+	unsigned lg_range = ffs_u64(pow2_ceil_u64(range));
 
 	/* Generate a result in [0..range) via repeated trial. */
+	uint64_t ret;
 	do {
 		ret = prng_lg_range_u64(state, lg_range);
 	} while (ret >= range);
@@ -169,20 +145,21 @@ prng_range_u64(uint64_t *state, uint64_t range) {
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) {
-	size_t ret;
-	unsigned lg_range;
+prng_range_zu(size_t *state, size_t range) {
+	assert(range != 0);
 
+	/* See the note in prng_range_u32. */
 	if (range == 1) {
 		return 0;
 	}
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = ffs_u64(pow2_ceil_u64(range));
+	unsigned lg_range = ffs_u64(pow2_ceil_u64(range));
 
 	/* Generate a result in [0..range) via repeated trial. */
+	size_t ret;
 	do {
-		ret = prng_lg_range_zu(state, lg_range, atomic);
+		ret = prng_lg_range_zu(state, lg_range);
 	} while (ret >= range);
 
 	return ret;
diff --git a/test/unit/prng.c b/test/unit/prng.c
index 915b350..baf43d9 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -1,34 +1,34 @@
 #include "test/jemalloc_test.h"
 
 static void
-test_prng_lg_range_u32(bool atomic) {
-	atomic_u32_t sa, sb;
+test_prng_lg_range_u32() {
+	uint32_t sa, sb;
 	uint32_t ra, rb;
 	unsigned lg_range;
 
-	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
-	ra = prng_lg_range_u32(&sa, 32, atomic);
-	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
-	rb = prng_lg_range_u32(&sa, 32, atomic);
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32);
+	sa = 42;
+	rb = prng_lg_range_u32(&sa, 32);
 	expect_u32_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
-	atomic_store_u32(&sb, 42, ATOMIC_RELAXED);
-	rb = prng_lg_range_u32(&sb, 32, atomic);
+	sb = 42;
+	rb = prng_lg_range_u32(&sb, 32);
 	expect_u32_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
-	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
-	ra = prng_lg_range_u32(&sa, 32, atomic);
-	rb = prng_lg_range_u32(&sa, 32, atomic);
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32);
+	rb = prng_lg_range_u32(&sa, 32);
 	expect_u32_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
-	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
-	ra = prng_lg_range_u32(&sa, 32, atomic);
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32);
 	for (lg_range = 31; lg_range > 0; lg_range--) {
-		atomic_store_u32(&sb, 42, ATOMIC_RELAXED);
-		rb = prng_lg_range_u32(&sb, lg_range, atomic);
+		sb = 42;
+		rb = prng_lg_range_u32(&sb, lg_range);
 		expect_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
 		expect_u32_eq(rb, (ra >> (32 - lg_range)),
@@ -74,35 +74,35 @@ test_prng_lg_range_u64(void) {
 }
 
 static void
-test_prng_lg_range_zu(bool atomic) {
-	atomic_zu_t sa, sb;
+test_prng_lg_range_zu() {
+	size_t sa, sb;
 	size_t ra, rb;
 	unsigned lg_range;
 
-	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
-	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
-	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
-	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
+	sa = 42;
+	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	expect_zu_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
-	atomic_store_zu(&sb, 42, ATOMIC_RELAXED);
-	rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	sb = 42;
+	rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR));
 	expect_zu_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
-	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
-	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
-	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
+	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	expect_zu_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
-	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
-	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0;
 	    lg_range--) {
-		atomic_store_zu(&sb, 42, ATOMIC_RELAXED);
-		rb = prng_lg_range_zu(&sb, lg_range, atomic);
+		sb = 42;
+		rb = prng_lg_range_zu(&sb, lg_range);
 		expect_zu_eq((rb & (SIZE_T_MAX << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
 		expect_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) -
@@ -112,12 +112,12 @@ test_prng_lg_range_zu(bool atomic) {
 }
 
 TEST_BEGIN(test_prng_lg_range_u32_nonatomic) {
-	test_prng_lg_range_u32(false);
+	test_prng_lg_range_u32();
 }
 TEST_END
 
 TEST_BEGIN(test_prng_lg_range_u32_atomic) {
-	test_prng_lg_range_u32(true);
+	test_prng_lg_range_u32();
 }
 TEST_END
 
@@ -127,29 +127,29 @@ TEST_BEGIN(test_prng_lg_range_u64_nonatomic) {
 TEST_END
 
 TEST_BEGIN(test_prng_lg_range_zu_nonatomic) {
-	test_prng_lg_range_zu(false);
+	test_prng_lg_range_zu();
 }
 TEST_END
 
 TEST_BEGIN(test_prng_lg_range_zu_atomic) {
-	test_prng_lg_range_zu(true);
+	test_prng_lg_range_zu();
 }
 TEST_END
 
 static void
-test_prng_range_u32(bool atomic) {
+test_prng_range_u32() {
 	uint32_t range;
 #define MAX_RANGE	10000000
 #define RANGE_STEP	97
 #define NREPS		10
 
 	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
-		atomic_u32_t s;
+		uint32_t s;
 		unsigned rep;
 
-		atomic_store_u32(&s, range, ATOMIC_RELAXED);
+		s = range;
 		for (rep = 0; rep < NREPS; rep++) {
-			uint32_t r = prng_range_u32(&s, range, atomic);
+			uint32_t r = prng_range_u32(&s, range);
 
 			expect_u32_lt(r, range, "Out of range");
 		}
@@ -177,19 +177,19 @@ test_prng_range_u64(void) {
 }
 
 static void
-test_prng_range_zu(bool atomic) {
+test_prng_range_zu() {
 	size_t range;
 #define MAX_RANGE	10000000
 #define RANGE_STEP	97
 #define NREPS		10
 
 	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
-		atomic_zu_t s;
+		size_t s;
 		unsigned rep;
 
-		atomic_store_zu(&s, range, ATOMIC_RELAXED);
+		s = range;
 		for (rep = 0; rep < NREPS; rep++) {
-			size_t r = prng_range_zu(&s, range, atomic);
+			size_t r = prng_range_zu(&s, range);
 
 			expect_zu_lt(r, range, "Out of range");
 		}
-- 
cgit v0.12


From 2a6ba121b5d7f83498265c3a630ba65e08f4b7e7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 14 Aug 2020 09:23:42 -0700
Subject: PRNG test: cleanups.

Since we no longer have both atomic and non-atomic variants, there's no reason
to try to test both.
---
 test/unit/prng.c | 122 +++++++++++++++++--------------------------------------
 1 file changed, 37 insertions(+), 85 deletions(-)

diff --git a/test/unit/prng.c b/test/unit/prng.c
index baf43d9..a6d9b01 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -1,7 +1,6 @@
 #include "test/jemalloc_test.h"
 
-static void
-test_prng_lg_range_u32() {
+TEST_BEGIN(test_prng_lg_range_u32) {
 	uint32_t sa, sb;
 	uint32_t ra, rb;
 	unsigned lg_range;
@@ -35,10 +34,11 @@ test_prng_lg_range_u32() {
 		    "Expected high order bits of full-width result, "
 		    "lg_range=%u", lg_range);
 	}
+
 }
+TEST_END
 
-static void
-test_prng_lg_range_u64(void) {
+TEST_BEGIN(test_prng_lg_range_u64) {
 	uint64_t sa, sb, ra, rb;
 	unsigned lg_range;
 
@@ -72,9 +72,9 @@ test_prng_lg_range_u64(void) {
 		    "lg_range=%u", lg_range);
 	}
 }
+TEST_END
 
-static void
-test_prng_lg_range_zu() {
+TEST_BEGIN(test_prng_lg_range_zu) {
 	size_t sa, sb;
 	size_t ra, rb;
 	unsigned lg_range;
@@ -109,129 +109,81 @@ test_prng_lg_range_zu() {
 		    lg_range)), "Expected high order bits of full-width "
 		    "result, lg_range=%u", lg_range);
 	}
-}
-
-TEST_BEGIN(test_prng_lg_range_u32_nonatomic) {
-	test_prng_lg_range_u32();
-}
-TEST_END
-
-TEST_BEGIN(test_prng_lg_range_u32_atomic) {
-	test_prng_lg_range_u32();
-}
-TEST_END
 
-TEST_BEGIN(test_prng_lg_range_u64_nonatomic) {
-	test_prng_lg_range_u64();
 }
 TEST_END
 
-TEST_BEGIN(test_prng_lg_range_zu_nonatomic) {
-	test_prng_lg_range_zu();
-}
-TEST_END
-
-TEST_BEGIN(test_prng_lg_range_zu_atomic) {
-	test_prng_lg_range_zu();
-}
-TEST_END
-
-static void
-test_prng_range_u32() {
+TEST_BEGIN(test_prng_range_u32) {
 	uint32_t range;
-#define MAX_RANGE	10000000
-#define RANGE_STEP	97
-#define NREPS		10
 
-	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+	const uint32_t max_range = 10000000;
+	const uint32_t range_step = 97;
+	const unsigned nreps = 10;
+
+	for (range = 2; range < max_range; range += range_step) {
 		uint32_t s;
 		unsigned rep;
 
 		s = range;
-		for (rep = 0; rep < NREPS; rep++) {
+		for (rep = 0; rep < nreps; rep++) {
 			uint32_t r = prng_range_u32(&s, range);
 
 			expect_u32_lt(r, range, "Out of range");
 		}
 	}
 }
+TEST_END
 
-static void
-test_prng_range_u64(void) {
+TEST_BEGIN(test_prng_range_u64) {
 	uint64_t range;
-#define MAX_RANGE	10000000
-#define RANGE_STEP	97
-#define NREPS		10
 
-	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+	const uint64_t max_range = 10000000;
+	const uint64_t range_step = 97;
+	const unsigned nreps = 10;
+
+	for (range = 2; range < max_range; range += range_step) {
 		uint64_t s;
 		unsigned rep;
 
 		s = range;
-		for (rep = 0; rep < NREPS; rep++) {
+		for (rep = 0; rep < nreps; rep++) {
 			uint64_t r = prng_range_u64(&s, range);
 
 			expect_u64_lt(r, range, "Out of range");
 		}
 	}
 }
+TEST_END
 
-static void
-test_prng_range_zu() {
+TEST_BEGIN(test_prng_range_zu) {
 	size_t range;
-#define MAX_RANGE	10000000
-#define RANGE_STEP	97
-#define NREPS		10
 
-	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+	const size_t max_range = 10000000;
+	const size_t range_step = 97;
+	const unsigned nreps = 10;
+
+
+	for (range = 2; range < max_range; range += range_step) {
 		size_t s;
 		unsigned rep;
 
 		s = range;
-		for (rep = 0; rep < NREPS; rep++) {
+		for (rep = 0; rep < nreps; rep++) {
 			size_t r = prng_range_zu(&s, range);
 
 			expect_zu_lt(r, range, "Out of range");
 		}
 	}
 }
-
-TEST_BEGIN(test_prng_range_u32_nonatomic) {
-	test_prng_range_u32(false);
-}
-TEST_END
-
-TEST_BEGIN(test_prng_range_u32_atomic) {
-	test_prng_range_u32(true);
-}
-TEST_END
-
-TEST_BEGIN(test_prng_range_u64_nonatomic) {
-	test_prng_range_u64();
-}
-TEST_END
-
-TEST_BEGIN(test_prng_range_zu_nonatomic) {
-	test_prng_range_zu(false);
-}
-TEST_END
-
-TEST_BEGIN(test_prng_range_zu_atomic) {
-	test_prng_range_zu(true);
-}
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_prng_lg_range_u32_nonatomic,
-	    test_prng_lg_range_u32_atomic,
-	    test_prng_lg_range_u64_nonatomic,
-	    test_prng_lg_range_zu_nonatomic,
-	    test_prng_lg_range_zu_atomic,
-	    test_prng_range_u32_nonatomic,
-	    test_prng_range_u32_atomic,
-	    test_prng_range_u64_nonatomic,
-	    test_prng_range_zu_nonatomic,
-	    test_prng_range_zu_atomic);
+	return test_no_reentrancy(
+	    test_prng_lg_range_u32,
+	    test_prng_lg_range_u64,
+	    test_prng_lg_range_zu,
+	    test_prng_range_u32,
+	    test_prng_range_u64,
+	    test_prng_range_zu);
 }
-- 
cgit v0.12


From 1ed7ec369f44beeb2dcc0e2ca21d7e947d8dd1b7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 28 Sep 2020 15:52:36 -0700
Subject: Emap: Add emap_assert_not_mapped.

The counterpart to emap_assert_mapped, it lets callers check that some edata is
not already in the emap.
---
 include/jemalloc/internal/emap.h |  9 +++++++++
 src/emap.c                       | 13 +++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 9b92522..8b2c6ba 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -127,6 +127,15 @@ emap_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 }
 
+/* Assert that the given edata isn't in the map. */
+void emap_do_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+static inline void
+emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	if (config_debug) {
+		emap_do_assert_not_mapped(tsdn, emap, edata);
+	}
+}
+
 JEMALLOC_ALWAYS_INLINE edata_t *
 emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
diff --git a/src/emap.c b/src/emap.c
index 4e7ca8d..537f588 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -323,3 +323,16 @@ emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	assert(rtree_read(tsdn, &emap->rtree, rtree_ctx,
 	    (uintptr_t)edata_base_get(edata)).edata == edata);
 }
+
+void
+emap_do_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	emap_full_alloc_ctx_t context1 = {0};
+	emap_full_alloc_ctx_try_lookup(tsdn, emap, edata_base_get(edata),
+	    &context1);
+	assert(context1.edata == NULL);
+
+	emap_full_alloc_ctx_t context2 = {0};
+	emap_full_alloc_ctx_try_lookup(tsdn, emap, edata_last_get(edata),
+	    &context2);
+	assert(context2.edata == NULL);
+}
-- 
cgit v0.12


From 21b70cb540e0f9ff7d7ff20fa21772e96c2215b0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 7 Aug 2020 17:47:13 -0700
Subject: Add hpa_central module

This will be the centralized component of the coming hugepage allocator; the
source of larger chunks of memory from which smaller ones can be obtained.
---
 Makefile.in                                        |   2 +
 include/jemalloc/internal/hpa_central.h            |  47 +++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |   3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |   3 +
 src/extent.c                                       |   9 +
 src/hpa_central.c                                  | 206 ++++++++++
 test/unit/hpa_central.c                            | 450 +++++++++++++++++++++
 9 files changed, 722 insertions(+)
 create mode 100644 include/jemalloc/internal/hpa_central.h
 create mode 100644 src/hpa_central.c
 create mode 100644 test/unit/hpa_central.c

diff --git a/Makefile.in b/Makefile.in
index 4769d48..ba0c80b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -119,6 +119,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/geom_grow.c \
 	$(srcroot)src/hook.c \
+	$(srcroot)src/hpa_central.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/log.c \
@@ -210,6 +211,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
+	$(srcroot)test/unit/hpa_central.c \
 	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/inspect.c \
 	$(srcroot)test/unit/junk.c \
diff --git a/include/jemalloc/internal/hpa_central.h b/include/jemalloc/internal/hpa_central.h
new file mode 100644
index 0000000..b90ca41
--- /dev/null
+++ b/include/jemalloc/internal/hpa_central.h
@@ -0,0 +1,47 @@
+#ifndef JEMALLOC_INTERNAL_HPA_CENTRAL_H
+#define JEMALLOC_INTERNAL_HPA_CENTRAL_H
+
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/emap.h"
+
+typedef struct hpa_central_s hpa_central_t;
+struct hpa_central_s {
+	/* The emap we use for metadata operations. */
+	emap_t *emap;
+
+	edata_cache_t *edata_cache;
+	eset_t eset;
+
+	size_t sn_next;
+};
+
+void hpa_central_init(hpa_central_t *central, edata_cache_t *edata_cache,
+    emap_t *emap);
+/*
+ * Tries to satisfy the given allocation request with an extent already given to
+ * central.
+ */
+edata_t *hpa_central_alloc_reuse(tsdn_t *tsdn, hpa_central_t *central,
+    size_t size_min, size_t size_goal);
+/*
+ * Adds the given edata to the central allocator as a new allocation.  The
+ * intent is that after a reuse attempt fails, the caller can allocate a new
+ * extent using whatever growth policy it prefers and allocate from that, giving
+ * the excess to the hpa_central_t (this is analogous to the
+ * extent_grow_retained functionality; we can allocate address space in
+ * exponentially growing chunks).
+ *
+ * The edata_t should come from the same base that this hpa was initialized
+ * with.  Only complete extents should be added (i.e. those for which the head
+ * bit is true, and for which their successor is either not owned by jemalloc
+ * or also has a head bit of true).  It should be active, large enough to
+ * satisfy the requested allocation, and not already in the emap.
+ *
+ * If this returns true, then we did not accept the extent, and took no action.
+ * Otherwise, modifies *edata to satisfy the allocation.
+ */
+bool hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
+    size_t size, edata_t *to_add);
+void hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata);
+
+#endif /* JEMALLOC_INTERNAL_HPA_CENTRAL_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 3200eab..2dcc994 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 8d45980..81f3934 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -58,6 +58,9 @@
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\inspect.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 7badc63..fd814c3 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 8d45980..81f3934 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -58,6 +58,9 @@
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\inspect.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/extent.c b/src/extent.c
index 58ec820..e9c76eb 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -809,6 +809,15 @@ extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
 		return false;
 	}
 
+	/*
+	 * We wouldn't really get into this situation because one or the other
+	 * edata would have to have a head bit set to true, but this is
+	 * conceptually correct and cheap.
+	 */
+	if (edata_pai_get(inner) != edata_pai_get(outer)) {
+		return false;
+	}
+
 	assert(edata_state_get(inner) == extent_state_active);
 	if (edata_state_get(outer) != ecache->state) {
 		return false;
diff --git a/src/hpa_central.c b/src/hpa_central.c
new file mode 100644
index 0000000..d106595
--- /dev/null
+++ b/src/hpa_central.c
@@ -0,0 +1,206 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpa_central.h"
+
+void
+hpa_central_init(hpa_central_t *central, edata_cache_t *edata_cache,
+    emap_t *emap) {
+	central->emap = emap;
+	central->edata_cache = edata_cache;
+	eset_init(&central->eset, extent_state_dirty);
+	central->sn_next = 0;
+}
+
+/*
+ * Returns the trail, or NULL in case of failure (which can only occur in case
+ * of an emap operation failure; i.e. OOM).
+ */
+static edata_t *
+hpa_central_split(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata,
+    size_t size) {
+	edata_t *trail = edata_cache_get(tsdn, central->edata_cache);
+	if (trail == NULL) {
+		return NULL;
+	}
+	size_t cursize = edata_size_get(edata);
+	edata_init(trail, edata_arena_ind_get(edata),
+	    (void *)((uintptr_t)edata_base_get(edata) + size), cursize - size,
+	    /* slab */ false, SC_NSIZES, edata_sn_get(edata),
+	    edata_state_get(edata), edata_zeroed_get(edata),
+	    edata_committed_get(edata), EXTENT_PAI_HPA, EXTENT_NOT_HEAD);
+
+	emap_prepare_t prepare;
+	bool err = emap_split_prepare(tsdn, central->emap, &prepare, edata,
+	    size, trail, cursize - size);
+	if (err) {
+		edata_cache_put(tsdn, central->edata_cache, trail);
+		return NULL;
+	}
+	emap_lock_edata2(tsdn, central->emap, edata, trail);
+	edata_size_set(edata, size);
+	emap_split_commit(tsdn, central->emap, &prepare, edata, size, trail,
+	    cursize - size);
+	emap_unlock_edata2(tsdn, central->emap, edata, trail);
+
+	return trail;
+}
+
+edata_t *
+hpa_central_alloc_reuse(tsdn_t *tsdn, hpa_central_t *central,
+    size_t size_min, size_t size_goal) {
+	assert((size_min & PAGE_MASK) == 0);
+	assert((size_goal & PAGE_MASK) == 0);
+
+	/*
+	 * Fragmentation avoidance is more important in the HPA than giving the
+	 * user their preferred amount of space, since we expect the average
+	 * unused extent to be more costly (PAC extents can get purged away
+	 * easily at any granularity; HPA extents are much more difficult to
+	 * purge away if they get stranded).  So we always search for the
+	 * earliest (in first-fit ordering) extent that can satisfy the request,
+	 * and use it, regardless of the goal size.
+	 */
+	edata_t *edata = eset_fit(&central->eset, size_min, PAGE,
+	    /* exact_only */ false, /* lg_max_fit */ SC_PTR_BITS);
+	if (edata == NULL) {
+		return NULL;
+	}
+
+	eset_remove(&central->eset, edata);
+	/* Maybe the first fit is also under the limit. */
+	if (edata_size_get(edata) <= size_goal) {
+		goto label_success;
+	}
+
+	/* Otherwise, split. */
+	edata_t *trail = hpa_central_split(tsdn, central, edata, size_goal);
+	if (trail == NULL) {
+		eset_insert(&central->eset, edata);
+		return NULL;
+	}
+	eset_insert(&central->eset, trail);
+
+label_success:
+	emap_assert_mapped(tsdn, central->emap, edata);
+	assert(edata_size_get(edata) >= size_min);
+	/*
+	 * We don't yet support purging in the hpa_central; everything should be
+	 * dirty.
+	 */
+	assert(edata_state_get(edata) == extent_state_dirty);
+	assert(edata_base_get(edata) == edata_addr_get(edata));
+	edata_state_set(edata, extent_state_active);
+	return edata;
+}
+
+bool
+hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
+    size_t size, edata_t *edata) {
+	assert((size & PAGE_MASK) == 0);
+	assert(edata_base_get(edata) == edata_addr_get(edata));
+	assert(edata_size_get(edata) >= size);
+	assert(edata_arena_ind_get(edata)
+	    == base_ind_get(central->edata_cache->base));
+	assert(edata_is_head_get(edata));
+	assert(edata_state_get(edata) == extent_state_active);
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+	assert(edata_slab_get(edata) == false);
+	assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
+
+	/* edata should be a new alloc, and hence not already mapped. */
+	emap_assert_not_mapped(tsdn, central->emap, edata);
+
+	size_t cursize = edata_size_get(edata);
+
+	bool err = emap_register_boundary(tsdn, central->emap, edata, SC_NSIZES,
+	    /* slab */ false);
+	if (err) {
+		return true;
+	}
+	/* No splitting is necessary. */
+	if (cursize == size) {
+		size_t sn = central->sn_next++;
+		edata_sn_set(edata, sn);
+		return false;
+	}
+
+	/* We should split. */
+	edata_t *trail = hpa_central_split(tsdn, central, edata, size);
+	if (trail == NULL) {
+		emap_deregister_boundary(tsdn, central->emap, NULL);
+		return true;
+	}
+	size_t sn = central->sn_next++;
+	edata_sn_set(edata, sn);
+	edata_sn_set(trail, sn);
+
+	edata_state_set(trail, extent_state_dirty);
+	eset_insert(&central->eset, trail);
+	return false;
+}
+
+static edata_t *
+hpa_central_dalloc_get_merge_candidate(tsdn_t *tsdn, hpa_central_t *central,
+    void *addr) {
+	edata_t *edata = emap_lock_edata_from_addr(tsdn, central->emap, addr,
+	    /* inactive_only */ true);
+	if (edata == NULL) {
+		return NULL;
+	}
+	extent_pai_t pai = edata_pai_get(edata);
+	extent_state_t state = edata_state_get(edata);
+	emap_unlock_edata(tsdn, central->emap, edata);
+
+	if (pai != EXTENT_PAI_HPA) {
+		return NULL;
+	}
+	if (state == extent_state_active) {
+		return NULL;
+	}
+
+	return edata;
+}
+
+/* Merges b into a, freeing b back to the edata cache.. */
+static void
+hpa_central_dalloc_merge(tsdn_t *tsdn, hpa_central_t *central, edata_t *a,
+    edata_t *b) {
+	emap_prepare_t prepare;
+	emap_merge_prepare(tsdn, central->emap, &prepare, a, b);
+	emap_lock_edata2(tsdn, central->emap, a, b);
+	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
+	emap_merge_commit(tsdn, central->emap, &prepare, a, b);
+	emap_unlock_edata2(tsdn, central->emap, a, b);
+	edata_cache_put(tsdn, central->edata_cache, b);
+}
+
+void
+hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata) {
+	assert(edata_state_get(edata) == extent_state_active);
+
+	/*
+	 * These should really be called at the pa interface level, but
+	 * currently they're not.
+	 */
+	edata_addr_set(edata, edata_base_get(edata));
+	edata_zeroed_set(edata, false);
+
+	if (!edata_is_head_get(edata)) {
+		edata_t *lead = hpa_central_dalloc_get_merge_candidate(tsdn,
+		    central, edata_before_get(edata));
+		if (lead != NULL) {
+			eset_remove(&central->eset, lead);
+			hpa_central_dalloc_merge(tsdn, central, lead, edata);
+			edata = lead;
+		}
+	}
+	edata_t *trail = hpa_central_dalloc_get_merge_candidate(tsdn, central,
+	    edata_past_get(edata));
+	if (trail != NULL && !edata_is_head_get(trail)) {
+		eset_remove(&central->eset, trail);
+		hpa_central_dalloc_merge(tsdn, central, edata, trail);
+	}
+	edata_state_set(edata, extent_state_dirty);
+	eset_insert(&central->eset, edata);
+}
diff --git a/test/unit/hpa_central.c b/test/unit/hpa_central.c
new file mode 100644
index 0000000..f90b6e3
--- /dev/null
+++ b/test/unit/hpa_central.c
@@ -0,0 +1,450 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa_central.h"
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_central_t;
+	 */
+	hpa_central_t central;
+	base_t *base;
+	edata_cache_t edata_cache;
+	emap_t emap;
+};
+
+void
+create_test_data(hpa_central_t **r_central, base_t **r_base) {
+	bool err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ 111,
+	    &ehooks_default_extent_hooks);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base,
+	    /* zeroed */ false);
+	assert_false(err, "");
+
+	hpa_central_init(&test_data->central, &test_data->edata_cache,
+	    &test_data->emap);
+
+	*r_central = (hpa_central_t *)test_data;
+	*r_base = base;
+}
+
+static void
+destroy_test_data(hpa_central_t *central) {
+	test_data_t *test_data = (test_data_t *)central;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static edata_t *
+test_edata(base_t *base, uintptr_t addr, size_t size) {
+	edata_t *edata = base_alloc_edata(TSDN_NULL, base);
+	assert_ptr_not_null(edata, "");
+	edata_init(edata, base_ind_get(base), (void *)addr,
+	    size, /* slab */ false, /* szind_t */ SC_NSIZES, /* sn */ 0,
+	    extent_state_active, /* zeroed */ true, /* comitted */ true,
+	    EXTENT_PAI_HPA, /* is_head */ true);
+	return edata;
+}
+
+static void
+edata_expect_alloc(base_t *base, edata_t *edata, uintptr_t addr, size_t size) {
+	expect_ptr_not_null(edata, "Alloc should have succeeded");
+	expect_u_eq(base_ind_get(base), edata_arena_ind_get(edata), "");
+	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata), "");
+	expect_d_eq(extent_state_active, edata_state_get(edata), "");
+	assert_ptr_eq((void *)addr, edata_base_get(edata), "");
+	assert_zu_eq(size, edata_size_get(edata), "");
+}
+
+
+TEST_BEGIN(test_empty) {
+	hpa_central_t *central;
+	base_t *base;
+	create_test_data(&central, &base);
+
+	edata_t *edata;
+
+	edata = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, PAGE);
+	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
+
+	edata = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, 2 * PAGE);
+	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
+
+	edata = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, 8 * PAGE);
+	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
+
+	edata = hpa_central_alloc_reuse(TSDN_NULL, central, 4 * PAGE, 8 * PAGE);
+	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
+
+	destroy_test_data(central);
+}
+TEST_END
+
+TEST_BEGIN(test_first_fit_simple) {
+	hpa_central_t *central;
+	base_t *base;
+	create_test_data(&central, &base);
+
+	edata_t *edata1 = test_edata(base, 10 * PAGE, 10 * PAGE);
+	bool err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata1);
+	expect_false(err, "Unexpected grow failure");
+	edata_expect_alloc(base, edata1, 10 * PAGE, PAGE);
+
+	edata_t *edata2 = test_edata(base, 4 * PAGE, 1 * PAGE);
+	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata2);
+	expect_false(err, "Unexpected grow failure");
+	edata_expect_alloc(base, edata2, 4 * PAGE, PAGE);
+
+	hpa_central_dalloc(TSDN_NULL, central, edata2);
+
+	/*
+	 * Even though there's a lower-addressed extent that a by-size search
+	 * will find earlier, we should still pick the earlier one.
+	 */
+	edata_t *edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, PAGE);
+	/*
+	 * Recall there's still an active page at the beginning of the extent
+	 * added at 10 * PAGE; the next allocation from it should be at 11 *
+	 * PAGE.
+	 */
+	edata_expect_alloc(base, edata3, 11 * PAGE, PAGE);
+
+	destroy_test_data(central);
+}
+TEST_END
+
+TEST_BEGIN(test_first_fit_large_goal) {
+	/*
+	 * See the comment in hpa_central_alloc_reuse; we should prefer an
+	 * earlier allocation over a later one, even if it means we fall short
+	 * of the goal size.
+	 */
+	hpa_central_t *central;
+	base_t *base;
+	create_test_data(&central, &base);
+
+	edata_t *edata1 = test_edata(base, 10 * PAGE, 10 * PAGE);
+	bool err = hpa_central_alloc_grow(TSDN_NULL, central, 2 * PAGE, edata1);
+	expect_false(err, "Unexpected grow failure");
+	edata_expect_alloc(base, edata1, 10 * PAGE, 2 * PAGE);
+
+	/* We need a page, but would like 2. */
+	edata_t *edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
+	    2 * PAGE);
+	edata_expect_alloc(base, edata2, 12 * PAGE, 2 * PAGE);
+
+	hpa_central_dalloc(TSDN_NULL, central, edata1);
+
+	/*
+	 * Now, we have a 2-page inactive extent, then a 2-page active extent,
+	 * then a 6-page inactive extent.  If our minimum size is 2 but the goal
+	 * size is 4, we should still pick the first hole rather than the
+	 * second.
+	 */
+	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 4 * PAGE);
+	edata_expect_alloc(base, edata1, 10 * PAGE, 2 * PAGE);
+
+	/*
+	 * Make sure we didn't succeed only by forgetting about that last range
+	 * or something.
+	 */
+	edata_t *edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, 4 * PAGE,
+	    4 * PAGE);
+	edata_expect_alloc(base, edata3, 14 * PAGE, 4 * PAGE);
+
+	destroy_test_data(central);
+}
+TEST_END
+
+TEST_BEGIN(test_merging) {
+	hpa_central_t *central;
+	base_t *base;
+	create_test_data(&central, &base);
+
+	/* Test an exact match */
+	bool err;
+	edata_t *edata1 = test_edata(base, 10 * PAGE, PAGE);
+	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata1);
+	expect_false(err, "Alloc should have succeeded");
+	edata_expect_alloc(base, edata1, 10 * PAGE, PAGE);
+
+	edata_t *edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
+	    PAGE);
+	expect_ptr_null(edata2, "Allocation should have failed");
+
+	/*
+	 * Create two more regions; one immediately before the first and one
+	 * immediately after.  The extents shouldn't get merged.
+	 */
+	edata2 = test_edata(base, 11 * PAGE, PAGE);
+	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata2);
+	edata_expect_alloc(base, edata2, 11 * PAGE, PAGE);
+
+	edata_t *edata3 = test_edata(base, 12 * PAGE, 20 * PAGE);
+	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata3);
+	edata_expect_alloc(base, edata3, 12 * PAGE, PAGE);
+
+	/*
+	 * OK, we've got 3 contiguous ranges; [10, 11), [11, 12), and [12, 22).
+	 * They shouldn't get merged though, even once freed.  We free the
+	 * middle range last to test merging (or rather, the lack thereof) in
+	 * both directions.
+	 */
+	hpa_central_dalloc(TSDN_NULL, central, edata1);
+	hpa_central_dalloc(TSDN_NULL, central, edata3);
+	hpa_central_dalloc(TSDN_NULL, central, edata2);
+
+	/*
+	 * A two-page range should only be satisfied by the third added region.
+	 */
+	edata_t *edata = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE,
+	    2 * PAGE);
+	edata_expect_alloc(base, edata, 12 * PAGE, 2 * PAGE);
+	hpa_central_dalloc(TSDN_NULL, central, edata);
+
+	/* Same with a three-page range. */
+	edata = hpa_central_alloc_reuse(TSDN_NULL, central, 3 * PAGE, 3 * PAGE);
+	edata_expect_alloc(base, edata, 12 * PAGE, 3 * PAGE);
+	hpa_central_dalloc(TSDN_NULL, central, edata);
+
+	/* Let's try some cases that *should* get merged. */
+	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
+	edata_expect_alloc(base, edata1, 12 * PAGE, 2 * PAGE);
+	edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
+	edata_expect_alloc(base, edata2, 14 * PAGE, 2 * PAGE);
+	edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
+	edata_expect_alloc(base, edata3, 16 * PAGE, 2 * PAGE);
+
+	/* Merge with predecessor. */
+	hpa_central_dalloc(TSDN_NULL, central, edata1);
+	hpa_central_dalloc(TSDN_NULL, central, edata2);
+	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 4 * PAGE,
+	    4 * PAGE);
+	edata_expect_alloc(base, edata1, 12 * PAGE, 4 * PAGE);
+
+	/* Merge with successor */
+	hpa_central_dalloc(TSDN_NULL, central, edata3);
+	hpa_central_dalloc(TSDN_NULL, central, edata1);
+	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 6 * PAGE,
+	    6 * PAGE);
+	edata_expect_alloc(base, edata1, 12 * PAGE, 6 * PAGE);
+	hpa_central_dalloc(TSDN_NULL, central, edata1);
+
+	/*
+	 * Let's try merging with both.  We need to get three adjacent
+	 * allocations again; do it the same way as before.
+	 */
+	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
+	edata_expect_alloc(base, edata1, 12 * PAGE, 2 * PAGE);
+	edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
+	edata_expect_alloc(base, edata2, 14 * PAGE, 2 * PAGE);
+	edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
+	edata_expect_alloc(base, edata3, 16 * PAGE, 2 * PAGE);
+
+	hpa_central_dalloc(TSDN_NULL, central, edata1);
+	hpa_central_dalloc(TSDN_NULL, central, edata3);
+	hpa_central_dalloc(TSDN_NULL, central, edata2);
+
+	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 6 * PAGE,
+	    6 * PAGE);
+	edata_expect_alloc(base, edata1, 12 * PAGE, 6 * PAGE);
+
+	destroy_test_data(central);
+}
+TEST_END
+
+TEST_BEGIN(test_stress_simple) {
+	hpa_central_t *central;
+	base_t *base;
+	create_test_data(&central, &base);
+
+	enum {
+		range_base = 1024 * PAGE,
+		range_pages = 256,
+		range_size = range_pages * PAGE
+	};
+
+	edata_t *edatas[range_pages];
+
+	bool err;
+	edata_t *range = test_edata(base, range_base, range_size);
+	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, range);
+	expect_false(err, "Unexpected grow failure");
+	hpa_central_dalloc(TSDN_NULL, central, range);
+
+	for (size_t i = 0; i < range_pages; i++) {
+		edatas[i] = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
+		    PAGE);
+		edata_expect_alloc(base, edatas[i], range_base + i * PAGE,
+		    PAGE);
+	}
+	/* Free up the odd indices. */
+	for (size_t i = 0; i < range_pages; i++) {
+		if (i % 2 == 0) {
+			continue;
+		}
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i]);
+	}
+	/*
+	 * Reallocate them again.  Try it with a goal size that can't be
+	 * satisfied.
+	 */
+	for (size_t i = 0; i < range_pages; i++) {
+		if (i % 2 == 0) {
+			continue;
+		}
+		edatas[i] = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
+		    PAGE);
+		edata_expect_alloc(base, edatas[i], range_base + i * PAGE,
+		    PAGE);
+	}
+	/*
+	 * In each batch of 8, create a free range of 4 pages and a free range
+	 * of 2 pages.
+	 */
+	for (size_t i = 0; i < range_pages; i += 8) {
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 1]);
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 2]);
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 3]);
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 4]);
+
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 6]);
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 7]);
+	}
+
+	/*
+	 * And allocate 3 pages into the first, and 2 pages into the second.  To
+	 * mix things up a little, lets get those amounts via goal sizes
+	 * instead.
+	 */
+	for (size_t i = 0; i < range_pages; i += 8) {
+		edatas[i + 1] = hpa_central_alloc_reuse(TSDN_NULL, central,
+		    2 * PAGE, 3 * PAGE);
+		edata_expect_alloc(base, edatas[i + 1],
+		    range_base + (i + 1) * PAGE, 3 * PAGE);
+
+		edatas[i + 6] = hpa_central_alloc_reuse(TSDN_NULL, central,
+		    2 * PAGE, 4 * PAGE);
+		edata_expect_alloc(base, edatas[i + 6],
+		    range_base + (i + 6) * PAGE, 2 * PAGE);
+	}
+
+	edata_t *edata = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE,
+	    2 * PAGE);
+	expect_ptr_null(edata, "Should be no free ranges of 2 pages");
+
+	destroy_test_data(central);
+}
+TEST_END
+
+TEST_BEGIN(test_stress_random) {
+	const size_t range_length = 32 * PAGE;
+	const size_t range_base = 100 * PAGE;
+	const size_t size_max_pages = 16;
+
+	hpa_central_t *central;
+	base_t *base;
+	create_test_data(&central, &base);
+
+	/*
+	 * We loop through this once per some operations, so we don't want it to
+	 * get too big.
+	 */
+	const size_t nlive_edatas_max = 100;
+	size_t nlive_edatas = 0;
+	edata_t **live_edatas = calloc(nlive_edatas_max, sizeof(edata_t *));
+	size_t nranges = 0;
+
+	/*
+	 * Nothing special about this constant; we're only fixing it for
+	 * consistency across runs.
+	 */
+	size_t prng_state = (size_t)0x76999ffb014df07c;
+	for (size_t i = 0; i < 100 * 1000; i++) {
+		size_t operation = prng_range_zu(&prng_state, 2);
+		if (operation == 0) {
+			/* Do an alloc. */
+			if (nlive_edatas == nlive_edatas_max) {
+				continue;
+			}
+			size_t min_pages = 1 + prng_range_zu(
+			    &prng_state, size_max_pages);
+			size_t goal_pages = min_pages + prng_range_zu(
+			    &prng_state, size_max_pages - min_pages + 1);
+			edata_t *edata = hpa_central_alloc_reuse(TSDN_NULL,
+			    central, min_pages * PAGE, goal_pages * PAGE);
+			if (edata == NULL) {
+				edata = test_edata(base,
+				    range_base + range_length * nranges,
+				    range_length);
+				bool err = hpa_central_alloc_grow(TSDN_NULL,
+				    central, goal_pages * PAGE, edata);
+				assert_false(err, "Unexpected grow failure");
+				nranges++;
+			}
+			uintptr_t begin = (uintptr_t)edata_base_get(edata);
+			uintptr_t end = (uintptr_t)edata_last_get(edata);
+			size_t range_begin = (begin - range_base) / range_length;
+			size_t range_end = (end - range_base) / range_length;
+			expect_zu_eq(range_begin, range_end,
+			    "Should not have allocations spanning "
+			    "multiple ranges");
+			expect_zu_ge(begin, range_base,
+			    "Gave back a pointer outside of the reserved "
+			    "range");
+			expect_zu_lt(end, range_base + range_length * nranges,
+			    "Gave back a pointer outside of the reserved "
+			    "range");
+			for (size_t j = 0; j < nlive_edatas; j++) {
+				edata_t *other = live_edatas[j];
+				uintptr_t other_begin =
+				    (uintptr_t)edata_base_get(other);
+				uintptr_t other_end =
+				    (uintptr_t)edata_last_get(other);
+				expect_true(
+				    (begin < other_begin && end < other_begin)
+				    || (begin > other_end),
+				    "Gave back two extents that overlap");
+			}
+			live_edatas[nlive_edatas] = edata;
+			nlive_edatas++;
+		} else {
+			/* Do a free. */
+			if (nlive_edatas == 0) {
+				continue;
+			}
+			size_t victim = prng_range_zu(&prng_state,
+			    nlive_edatas);
+			edata_t *to_free = live_edatas[victim];
+			live_edatas[victim] = live_edatas[nlive_edatas - 1];
+			nlive_edatas--;
+			hpa_central_dalloc(TSDN_NULL, central, to_free);
+		}
+	}
+
+	free(live_edatas);
+	destroy_test_data(central);
+}
+TEST_END
+
+int main(void) {
+	return test_no_reentrancy(
+	    test_empty,
+	    test_first_fit_simple,
+	    test_first_fit_large_goal,
+	    test_merging,
+	    test_stress_simple,
+	    test_stress_random);
+}
-- 
cgit v0.12


From b971f7c4dda04ba26f9fb52709c7153cef27021c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 5 Oct 2020 17:39:01 -0700
Subject: Add "default" option to slab sizes.

This comes in handy when overriding earlier settings to test alternate ones.  We
don't really include tests for this, but I claim that's OK here:
- It's fairly straightforward
- It's fairly hard to test well
- This entire code path is undocumented and mostly for our internal
  experimentation in the first place.
- I tested manually.
---
 src/jemalloc.c | 4 ++++
 src/sc.c       | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0ca400e..b21b2d9 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1458,6 +1458,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			if (CONF_MATCH("slab_sizes")) {
+				if (CONF_MATCH_VALUE("default")) {
+					sc_data_init(sc_data);
+					CONF_CONTINUE;
+				}
 				bool err;
 				const char *slab_size_segment_cur = v;
 				size_t vlen_left = vlen;
diff --git a/src/sc.c b/src/sc.c
index 1474eac..37683ff 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -257,8 +257,6 @@ size_classes(
 
 void
 sc_data_init(sc_data_t *sc_data) {
-	assert(!sc_data->initialized);
-
 	size_classes(sc_data, LG_SIZEOF_PTR, LG_QUANTUM, SC_LG_TINY_MIN,
 	    SC_LG_MAX_LOOKUP, LG_PAGE, SC_LG_NGROUP);
 
-- 
cgit v0.12


From a9aa6f6d0fd695d57a0fd1123da6099bb85132c3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 12 Oct 2020 16:11:51 -0700
Subject: Fix the alloc_ctx check in free_fastpath.

The sanity check requires a functional TSD, which free_fastpath only guarantees
after the threshold branch.  Move the check function to afterwards.
---
 src/jemalloc.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index b21b2d9..bbf6255 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2972,11 +2972,6 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		/* This is a dead store, except when opt size checking is on. */
 		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 	}
-	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
-	if (fail) {
-		/* See the comment in isfree. */
-		return true;
-	}
 
 	uint64_t deallocated, threshold;
 	te_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
@@ -2985,12 +2980,21 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	uint64_t deallocated_after = deallocated + usize;
 	/*
 	 * Check for events and tsd non-nominal (fast_threshold will be set to
-	 * 0) in a single branch.
+	 * 0) in a single branch.  Note that this handles the uninitialized case
+	 * as well (TSD init will be triggered on the non-fastpath).  Therefore
+	 * anything depends on a functional TSD (e.g. the alloc_ctx sanity check
+	 * below) needs to be after this branch.
 	 */
 	if (unlikely(deallocated_after >= threshold)) {
 		return false;
 	}
 
+	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
+	if (fail) {
+		/* See the comment in isfree. */
+		return true;
+	}
+
 	tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
 	    /* slow */ false, /* is_alloc */ false);
 	cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
-- 
cgit v0.12


From be9548f2bef30b75294fdd0eb6721d1bf6e6a56a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 13 Oct 2020 12:40:34 -0700
Subject: Tcaches: Fix a subtle race condition.

Without a lock held continuously between checking tcaches_past and incrementing
it, it's possible for two threads to go down manual creation path
simultaneously.  If the number of tcaches is one less than the maximum, it's
possible for both to create a tcache and increment tcaches_past, with the second
thread returning a value larger than TCACHES_MAX.
---
 src/tcache.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index b681ee1..90ca372 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -767,7 +767,7 @@ static bool
 tcaches_create_prep(tsd_t *tsd, base_t *base) {
 	bool err;
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
 
 	if (tcaches == NULL) {
 		tcaches = base_alloc(tsd_tsdn(tsd), base,
@@ -785,7 +785,6 @@ tcaches_create_prep(tsd_t *tsd, base_t *base) {
 
 	err = false;
 label_return:
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 	return err;
 }
 
@@ -795,6 +794,8 @@ tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind) {
 
 	bool err;
 
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+
 	if (tcaches_create_prep(tsd, base)) {
 		err = true;
 		goto label_return;
@@ -807,7 +808,6 @@ tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind) {
 	}
 
 	tcaches_t *elm;
-	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	if (tcaches_avail != NULL) {
 		elm = tcaches_avail;
 		tcaches_avail = tcaches_avail->next;
@@ -819,10 +819,10 @@ tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind) {
 		*r_ind = tcaches_past;
 		tcaches_past++;
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 
 	err = false;
 label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 	witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
 	return err;
 }
-- 
cgit v0.12


From 3de19ba401bd752af37e4f235878f764c8ba55fb Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 14 Oct 2020 16:45:19 -0700
Subject: Eagerly detect double free and sized dealloc bugs for large sizes.

---
 Makefile.in                                 |  2 +-
 include/jemalloc/internal/arena_inlines_b.h | 35 +++++++++++++++++
 src/jemalloc.c                              |  2 +-
 src/tcache.c                                |  4 ++
 test/unit/double_free.c                     | 56 +++++++++++++++++++++++++++
 test/unit/double_free.h                     |  1 +
 test/unit/size_check.c                      | 59 +++++++++++++++++++----------
 7 files changed, 136 insertions(+), 23 deletions(-)
 create mode 100644 test/unit/double_free.c
 create mode 100644 test/unit/double_free.h

diff --git a/Makefile.in b/Makefile.in
index ba0c80b..008cffd 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -204,6 +204,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/counter.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
+	$(srcroot)test/unit/double_free.c \
 	$(srcroot)test/unit/edata_cache.c \
 	$(srcroot)test/unit/emitter.c \
 	$(srcroot)test/unit/extent_quantize.c \
@@ -308,7 +309,6 @@ TESTS_STRESS := $(srcroot)test/stress/batch_alloc.c \
 	$(srcroot)test/stress/large_microbench.c \
 	$(srcroot)test/stress/mallctl.c \
 	$(srcroot)test/stress/microbench.c
-	
 
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 335c079..7971b4c 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
@@ -203,6 +204,32 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	return sz_index2size(full_alloc_ctx.szind);
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+large_dalloc_safety_checks(edata_t *edata, szind_t szind) {
+	if (!config_opt_safety_checks) {
+		return false;
+	}
+
+	/*
+	 * Eagerly detect double free and sized dealloc bugs for large sizes.
+	 * The cost is low enough (as edata will be accessed anyway) to be
+	 * enabled all the time.
+	 */
+	if (unlikely(edata_state_get(edata) != extent_state_active)) {
+		safety_check_fail("Invalid deallocation detected: "
+		    "pages being freed (%p) not currently active, "
+		    "possibly caused by double free bugs.",
+		    (uintptr_t)edata_addr_get(edata));
+		return true;
+	}
+	if (unlikely(sz_index2size(szind) != edata_usize_get(edata))) {
+		safety_check_fail_sized_dealloc(/* current_dealloc */ true);
+		return true;
+	}
+
+	return false;
+}
+
 static inline void
 arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	if (config_prof && unlikely(szind < SC_NBINS)) {
@@ -210,6 +237,10 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	} else {
 		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
 		    ptr);
+		if (large_dalloc_safety_checks(edata, szind)) {
+			/* See the comment in isfree. */
+			return;
+		}
 		large_dalloc(tsdn, edata);
 	}
 }
@@ -250,6 +281,10 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 	} else {
 		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
 		    ptr);
+		if (large_dalloc_safety_checks(edata, szind)) {
+			/* See the comment in isfree. */
+			return;
+		}
 		large_dalloc(tsdn, edata);
 	}
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index bbf6255..1d6191a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2812,7 +2812,7 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 		    &dbg_ctx);
 		if (alloc_ctx->szind != dbg_ctx.szind) {
 			safety_check_fail_sized_dealloc(
-			    /* curent_dealloc */ true);
+			    /* current_dealloc */ true);
 			return true;
 		}
 		if (alloc_ctx->slab != dbg_ctx.slab) {
diff --git a/src/tcache.c b/src/tcache.c
index 90ca372..06efe66 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -428,6 +428,10 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 					dalloc_count++;
 				}
 			} else {
+				if (large_dalloc_safety_checks(edata, binind)) {
+					/* See the comment in isfree. */
+					continue;
+				}
 				large_dalloc_finish(tsdn, edata);
 			}
 		}
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
new file mode 100644
index 0000000..73155b9
--- /dev/null
+++ b/test/unit/double_free.c
@@ -0,0 +1,56 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/safety_check.h"
+
+bool fake_abort_called;
+void fake_abort(const char *message) {
+	(void)message;
+	fake_abort_called = true;
+}
+
+void
+test_large_double_free_pre(void) {
+	safety_check_set_abort(&fake_abort);
+	fake_abort_called = false;
+}
+
+void
+test_large_double_free_post() {
+	expect_b_eq(fake_abort_called, true, "Double-free check didn't fire.");
+	safety_check_set_abort(NULL);
+}
+
+TEST_BEGIN(test_large_double_free_tcache) {
+	test_skip_if(!config_opt_safety_checks);
+	/*
+	 * Skip debug builds, since too many assertions will be triggered with
+	 * double-free before hitting the one we are interested in.
+	 */
+	test_skip_if(config_debug);
+
+	test_large_double_free_pre();
+	char *ptr = malloc(SC_LARGE_MINCLASS);
+	free(ptr);
+	free(ptr);
+	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+	test_large_double_free_post();
+}
+TEST_END
+
+TEST_BEGIN(test_large_double_free_no_tcache) {
+	test_skip_if(!config_opt_safety_checks);
+	test_skip_if(config_debug);
+
+	test_large_double_free_pre();
+	char *ptr = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
+	dallocx(ptr, MALLOCX_TCACHE_NONE);
+	dallocx(ptr, MALLOCX_TCACHE_NONE);
+	test_large_double_free_post();
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_large_double_free_no_tcache,
+	    test_large_double_free_tcache);
+}
diff --git a/test/unit/double_free.h b/test/unit/double_free.h
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/test/unit/double_free.h
@@ -0,0 +1 @@
+
diff --git a/test/unit/size_check.c b/test/unit/size_check.c
index 3d2912d..accdc40 100644
--- a/test/unit/size_check.c
+++ b/test/unit/size_check.c
@@ -8,48 +8,65 @@ void fake_abort(const char *message) {
 	fake_abort_called = true;
 }
 
-#define SIZE1 SC_SMALL_MAXCLASS
-#define SIZE2 (SC_SMALL_MAXCLASS / 2)
+#define SMALL_SIZE1 SC_SMALL_MAXCLASS
+#define SMALL_SIZE2 (SC_SMALL_MAXCLASS / 2)
 
-TEST_BEGIN(test_invalid_size_sdallocx) {
-	test_skip_if(!config_opt_size_checks);
+#define LARGE_SIZE1 SC_LARGE_MINCLASS
+#define LARGE_SIZE2 (LARGE_SIZE1 * 2)
+
+void *
+test_invalid_size_pre(size_t sz) {
 	safety_check_set_abort(&fake_abort);
 
 	fake_abort_called = false;
-	void *ptr = malloc(SIZE1);
+	void *ptr = malloc(sz);
 	assert_ptr_not_null(ptr, "Unexpected failure");
-	sdallocx(ptr, SIZE2, 0);
-	expect_true(fake_abort_called, "Safety check didn't fire");
 
+	return ptr;
+}
+
+void
+test_invalid_size_post(void) {
+	expect_true(fake_abort_called, "Safety check didn't fire");
 	safety_check_set_abort(NULL);
 }
+
+TEST_BEGIN(test_invalid_size_sdallocx) {
+	test_skip_if(!config_opt_size_checks);
+
+	void *ptr = test_invalid_size_pre(SMALL_SIZE1);
+	sdallocx(ptr, SMALL_SIZE2, 0);
+	test_invalid_size_post();
+
+	ptr = test_invalid_size_pre(LARGE_SIZE1);
+	sdallocx(ptr, LARGE_SIZE2, 0);
+	test_invalid_size_post();
+}
 TEST_END
 
 TEST_BEGIN(test_invalid_size_sdallocx_nonzero_flag) {
 	test_skip_if(!config_opt_size_checks);
-	safety_check_set_abort(&fake_abort);
 
-	fake_abort_called = false;
-	void *ptr = malloc(SIZE1);
-	assert_ptr_not_null(ptr, "Unexpected failure");
-	sdallocx(ptr, SIZE2, MALLOCX_TCACHE_NONE);
-	expect_true(fake_abort_called, "Safety check didn't fire");
+	void *ptr = test_invalid_size_pre(SMALL_SIZE1);
+	sdallocx(ptr, SMALL_SIZE2, MALLOCX_TCACHE_NONE);
+	test_invalid_size_post();
 
-	safety_check_set_abort(NULL);
+	ptr = test_invalid_size_pre(LARGE_SIZE1);
+	sdallocx(ptr, LARGE_SIZE2, MALLOCX_TCACHE_NONE);
+	test_invalid_size_post();
 }
 TEST_END
 
 TEST_BEGIN(test_invalid_size_sdallocx_noflags) {
 	test_skip_if(!config_opt_size_checks);
-	safety_check_set_abort(&fake_abort);
 
-	fake_abort_called = false;
-	void *ptr = malloc(SIZE1);
-	assert_ptr_not_null(ptr, "Unexpected failure");
-	je_sdallocx_noflags(ptr, SIZE2);
-	expect_true(fake_abort_called, "Safety check didn't fire");
+	void *ptr = test_invalid_size_pre(SMALL_SIZE1);
+	je_sdallocx_noflags(ptr, SMALL_SIZE2);
+	test_invalid_size_post();
 
-	safety_check_set_abort(NULL);
+	ptr = test_invalid_size_pre(LARGE_SIZE1);
+	je_sdallocx_noflags(ptr, LARGE_SIZE2);
+	test_invalid_size_post();
 }
 TEST_END
 
-- 
cgit v0.12


From 5e41ff9b740258bddebcbd5575e1670a15f8b1ae Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Oct 2020 16:37:16 -0700
Subject: Add a hard limit on tcache max size class.

For locality reasons, tcache bins are integrated in TSD.  Allowing all size
classes to be cached has little benefit, but takes up much thread local storage.
In addition, it complicates the layout which we try hard to optimize.
---
 doc/jemalloc.xml.in                        |  4 ++--
 include/jemalloc/internal/tcache_structs.h |  2 +-
 include/jemalloc/internal/tcache_types.h   |  5 +++++
 src/tcache.c                               | 12 ++++++------
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index f283fd3..8e9a5d8 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1314,8 +1314,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         </term>
         <listitem><para>Maximum size class (log base 2) to cache in the
         thread-specific cache (tcache).  At a minimum, all small size classes
-        are cached, and at a maximum all large size classes are cached.  The
-        default maximum is 32 KiB (2^15).</para></listitem>
+        are cached; and at a maximum, size classes up to 8 MiB can be cached.
+        The default maximum is 32 KiB (2^15).</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.thp">
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 331bd24..176d73d 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -54,7 +54,7 @@ struct tcache_slow_s {
 
 struct tcache_s {
 	tcache_slow_t	*tcache_slow;
-	cache_bin_t	bins[SC_NSIZES];
+	cache_bin_t	bins[TCACHE_NBINS_MAX];
 };
 
 /* Linkage for list of available (previously used) explicit tcache IDs. */
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index fb311e7..583677e 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -27,4 +27,9 @@ typedef struct tcaches_s tcaches_t;
 /* Used for explicit tcache only. Means flushed but not destroyed. */
 #define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
 
+#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_maxclass = 8M */
+#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
+#define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
+    (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
+
 #endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/src/tcache.c b/src/tcache.c
index 06efe66..63eddc2 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -936,20 +936,20 @@ tcache_ncached_max_compute(szind_t szind) {
 bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
 	/* If necessary, clamp opt_lg_tcache_max. */
-	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
-	    SC_SMALL_MAXCLASS) {
+	tcache_maxclass = opt_lg_tcache_max < 0 ? 0 :
+	    ZU(1) << opt_lg_tcache_max;
+	if (tcache_maxclass < SC_SMALL_MAXCLASS) {
 		tcache_maxclass = SC_SMALL_MAXCLASS;
-	} else {
-		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
+	} else if (tcache_maxclass > TCACHE_MAXCLASS_LIMIT) {
+		tcache_maxclass = TCACHE_MAXCLASS_LIMIT;
 	}
+	nhbins = sz_size2index(tcache_maxclass) + 1;
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
-	nhbins = sz_size2index(tcache_maxclass) + 1;
-
 	/* Initialize tcache_bin_info. */
 	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base,
 	    nhbins * sizeof(cache_bin_info_t), CACHELINE);
-- 
cgit v0.12


From 4ef5b8b4df3d4e2e534bbbdf558740f1056bc524 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 13 Oct 2020 15:18:35 -0700
Subject: Add a logo to doc_internal.

This is the logo from the jemalloc development team's snazzy windbreakers.  We
don't actually use it in any documentation yet, but there's no reason we
couldn't.  In the meantime, it's probably best if it exists somewhere more
stable than various email inboxes.
---
 doc_internal/jemalloc.svg | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc_internal/jemalloc.svg

diff --git a/doc_internal/jemalloc.svg b/doc_internal/jemalloc.svg
new file mode 100644
index 0000000..5e77327
--- /dev/null
+++ b/doc_internal/jemalloc.svg
@@ -0,0 +1 @@
+<svg id="Layer_3" data-name="Layer 3" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 499 184.27"><defs><style>.cls-1,.cls-3{fill:none;}.cls-2{clip-path:url(#clip-path);}.cls-3{stroke:#262262;stroke-linecap:round;stroke-linejoin:round;stroke-width:4px;}</style><clipPath id="clip-path" transform="translate(-100.66 -259.87)"><path class="cls-1" d="M144.57,396c0,18.2-9.37,27.83-37.33,23.55V400.1c11.11,2.14,12.18-.27,12.18-11.5V324.11h25Zm-12.71-78.66c-9,0-15.52-1.48-15.52-12.71S122.9,292,131.86,292s15.52,1.2,15.52,12.58C147.38,315.55,141,317.29,131.86,317.29Zm50.57,76.39c-30.64,0-35.85-18.86-35.85-35.59s5.61-35.32,35.72-35.32c35.32,0,33.44,28,33.44,40.67H170.12c.54,9.5,4,14.05,11.37,14.05,6.83,0,9.64-3.34,10-7.89l24.75.13C215.48,383.38,205.84,393.68,182.43,393.68Zm-1.47-55c-6.69,0-10,2.81-10.84,12h21.41C190.73,341.9,188.18,338.69,181,338.69Zm112.78,53.65V351.4c0-4.15-1.33-8.16-6-8.16-5,0-6,3.75-6,8.16v40.94H256.42V351.4c0-4.15-.81-8.16-5.89-8.16s-6.29,3.75-6.29,8.16v40.94H219.09V324.11h14l4.15,8c2.67-4.69,10.56-9.37,18.86-9.37,7.36,0,16.19,2.14,21,9.1,3.48-5.22,11.11-9.1,20.21-9.1,19.13,0,21.54,11.37,21.54,27.16v42.41Zm83.09,0L372.41,383c-5.48,7.22-13.11,10.7-24.35,10.7-14.85,0-26.75-6-26.75-19.93,0-15.26,12.44-20.88,44.28-23,0-9.5-1.61-12.57-8.83-12.57-6.69,0-8.56,3.48-8.56,9.9H323.45c0-12.85,6.82-25.29,32.64-25.29,30,0,34.65,14.45,34.65,31.17v38.4Zm-21.54-28.63c-6.29.94-8.3,4.28-8.3,7.36,0,4.28,2.41,6.69,8.3,6.69s10.17-4.82,10.17-15.12ZM396,392.34V297.75h24.75v94.59Zm30.77,0V297.75h24.75v94.59Zm62.21,1.34c-28.09,0-34.11-18.6-34.11-35.32s6.29-35.59,34.38-35.59c27.7,0,34.12,19,34.12,35.59C523.33,375.22,516.91,393.68,488.94,393.68Zm.27-50.84c-7.89,0-11.37,4.82-11.37,15.52s3.61,15.39,11.1,15.39c7.9,0,11.38-4.42,11.38-15.39C500.32,347.79,497.24,342.84,489.21,342.84Zm69.17,50.84c-28.9,0-34.52-18.6-34.52-35.32s5.76-35.59,34.12-35.59c21.14,0,34.52,10.84,34.52,31.17H568.42c0-9.23-5.49-11.23-10.17-11.23-7,0-11.11,4.54-11.11,15.38s4,15.52,11.11,15.52c4.81,0,10-2.41,10-10.57H592.5C592.5,383.38,579,393.68,558.38,393.68Z"/></clipPath></defs><title>jemalloc Final Logo</title><g class="cls-2"><line class="cls-3" x1="345" y1="182.27" x2="345" y2="2"/><line class="cls-3" x1="225" y1="182.27" x2="225" y2="2"/><line class="cls-3" x1="105" y1="182.27" x2="105" y2="2"/><line class="cls-3" x1="43" y1="182.27" x2="43" y2="2"/><line class="cls-3" x1="475" y1="182.27" x2="475" y2="2"/><line class="cls-3" x1="195" y1="182.27" x2="195" y2="2"/><line class="cls-3" x1="75" y1="182.27" x2="75" y2="2"/><line class="cls-3" x1="337" y1="182.27" x2="337" y2="2"/><line class="cls-3" x1="215" y1="182.27" x2="215" y2="2"/><line class="cls-3" x1="95" y1="182.27" x2="95" y2="2"/><line class="cls-3" x1="415" y1="182.27" x2="415" y2="2"/><line class="cls-3" x1="385" y1="182.27" x2="385" y2="2"/><line class="cls-3" x1="183" y1="182.27" x2="183" y2="2"/><line class="cls-3" x1="65" y1="182.27" x2="65" y2="2"/><line class="cls-3" x1="173" y1="182.27" x2="173" y2="2"/><line class="cls-3" x1="145" y1="182.27" x2="145" y2="2"/><line class="cls-3" x1="163" y1="182.27" x2="163" y2="2"/><line class="cls-3" x1="460" y1="182.27" x2="460" y2="2"/><line class="cls-3" x1="281" y1="182.27" x2="281" y2="2"/><line class="cls-3" x1="313" y1="182.27" x2="313" y2="2"/><line class="cls-3" x1="252" y1="182.27" x2="252" y2="2"/><line class="cls-3" x1="450" y1="182.27" x2="450" y2="2"/><line class="cls-3" x1="271" y1="182.27" x2="271" y2="2"/><line class="cls-3" x1="332" y1="182.27" x2="332" y2="2"/><line class="cls-3" x1="203" y1="182.27" x2="203" y2="2"/><line class="cls-3" x1="13" y1="182.27" x2="13" y2="2"/><line class="cls-3" x1="373" y1="182.27" x2="373" y2="2"/><line class="cls-3" x1="354" y1="182.27" x2="354" y2="2"/><line class="cls-3" x1="235" y1="182.27" x2="235" y2="2"/><line class="cls-3" x1="115" y1="182.27" x2="115" y2="2"/><line class="cls-3" x1="53" y1="182.27" x2="53" y2="2"/><line class="cls-3" x1="484" y1="182.27" x2="484" y2="2"/><line class="cls-3" x1="405" y1="182.27" x2="405" y2="2"/><line class="cls-3" x1="85" y1="182.27" x2="85" y2="2"/><line class="cls-3" x1="225" y1="182.27" x2="225" y2="2"/><line class="cls-3" x1="105" y1="182.27" x2="105" y2="2"/><line class="cls-3" x1="43" y1="182.27" x2="43" y2="2"/><line class="cls-3" x1="435" y1="182.27" x2="435" y2="2"/><line class="cls-3" x1="123" y1="182.27" x2="123" y2="2"/><line class="cls-3" x1="75" y1="182.27" x2="75" y2="2"/><line class="cls-3" x1="183" y1="182.27" x2="183" y2="2"/><line class="cls-3" x1="155" y1="182.27" x2="155" y2="2"/><line class="cls-3" x1="173" y1="182.27" x2="173" y2="2"/><line class="cls-3" x1="145" y1="182.27" x2="145" y2="2"/><line class="cls-3" x1="470" y1="182.27" x2="470" y2="2"/><line class="cls-3" x1="292" y1="182.27" x2="292" y2="2"/><line class="cls-3" x1="262" y1="182.27" x2="262" y2="2"/><line class="cls-3" x1="460" y1="182.27" x2="460" y2="2"/><line class="cls-3" x1="281" y1="182.27" x2="281" y2="2"/><line class="cls-3" x1="313" y1="182.27" x2="313" y2="2"/><line class="cls-3" x1="243" y1="182.27" x2="243" y2="2"/><line class="cls-3" x1="22" y1="182.27" x2="22" y2="2"/><line class="cls-3" x1="383" y1="182.27" x2="383" y2="2"/><line class="cls-3" x1="5" y1="182.27" x2="5" y2="2"/><line class="cls-3" x1="133" y1="182.27" x2="133" y2="2"/><line class="cls-3" x1="362" y1="182.27" x2="362" y2="2"/><line class="cls-3" x1="288" y1="182.27" x2="288" y2="2"/><line class="cls-3" x1="298" y1="182.27" x2="298" y2="2"/><line class="cls-3" x1="423" y1="182.27" x2="423" y2="2"/><line class="cls-3" x1="369" y1="182.27" x2="369" y2="2"/><line class="cls-3" x1="490" y1="182.27" x2="490" y2="2"/><line class="cls-3" x1="2" y1="182.27" x2="2" y2="2"/><line class="cls-3" x1="493" y1="182.27" x2="493" y2="2"/><line class="cls-3" x1="225" y1="182.27" x2="225" y2="2"/><line class="cls-3" x1="105" y1="182.27" x2="105" y2="2"/><line class="cls-3" x1="43" y1="182.27" x2="43" y2="2"/><line class="cls-3" x1="475" y1="182.27" x2="475" y2="2"/><line class="cls-3" x1="195" y1="182.27" x2="195" y2="2"/><line class="cls-3" x1="75" y1="182.27" x2="75" y2="2"/><line class="cls-3" x1="337" y1="182.27" x2="337" y2="2"/><line class="cls-3" x1="215" y1="182.27" x2="215" y2="2"/><line class="cls-3" x1="95" y1="182.27" x2="95" y2="2"/><line class="cls-3" x1="415" y1="182.27" x2="415" y2="2"/><line class="cls-3" x1="385" y1="182.27" x2="385" y2="2"/><line class="cls-3" x1="183" y1="182.27" x2="183" y2="2"/><line class="cls-3" x1="65" y1="182.27" x2="65" y2="2"/><line class="cls-3" x1="173" y1="182.27" x2="173" y2="2"/><line class="cls-3" x1="145" y1="182.27" x2="145" y2="2"/><line class="cls-3" x1="163" y1="182.27" x2="163" y2="2"/><line class="cls-3" x1="460" y1="182.27" x2="460" y2="2"/><line class="cls-3" x1="281" y1="182.27" x2="281" y2="2"/><line class="cls-3" x1="313" y1="182.27" x2="313" y2="2"/><line class="cls-3" x1="252" y1="182.27" x2="252" y2="2"/><line class="cls-3" x1="450" y1="182.27" x2="450" y2="2"/><line class="cls-3" x1="271" y1="182.27" x2="271" y2="2"/><line class="cls-3" x1="306" y1="182.27" x2="306" y2="2"/><line class="cls-3" x1="203" y1="182.27" x2="203" y2="2"/><line class="cls-3" x1="13" y1="182.27" x2="13" y2="2"/><line class="cls-3" x1="373" y1="182.27" x2="373" y2="2"/><line class="cls-3" x1="354" y1="182.27" x2="354" y2="2"/><line class="cls-3" x1="235" y1="182.27" x2="235" y2="2"/><line class="cls-3" x1="115" y1="182.27" x2="115" y2="2"/><line class="cls-3" x1="53" y1="182.27" x2="53" y2="2"/><line class="cls-3" x1="484" y1="182.27" x2="484" y2="2"/><line class="cls-3" x1="405" y1="182.27" x2="405" y2="2"/><line class="cls-3" x1="85" y1="182.27" x2="85" y2="2"/><line class="cls-3" x1="225" y1="182.27" x2="225" y2="2"/><line class="cls-3" x1="105" y1="182.27" x2="105" y2="2"/><line class="cls-3" x1="43" y1="182.27" x2="43" y2="2"/><line class="cls-3" x1="435" y1="182.27" x2="435" y2="2"/><line class="cls-3" x1="123" y1="182.27" x2="123" y2="2"/><line class="cls-3" x1="75" y1="182.27" x2="75" y2="2"/><line class="cls-3" x1="183" y1="182.27" x2="183" y2="2"/><line class="cls-3" x1="155" y1="182.27" x2="155" y2="2"/><line class="cls-3" x1="173" y1="182.27" x2="173" y2="2"/><line class="cls-3" x1="145" y1="182.27" x2="145" y2="2"/><line class="cls-3" x1="470" y1="182.27" x2="470" y2="2"/><line class="cls-3" x1="292" y1="182.27" x2="292" y2="2"/><line class="cls-3" x1="262" y1="182.27" x2="262" y2="2"/><line class="cls-3" x1="460" y1="182.27" x2="460" y2="2"/><line class="cls-3" x1="281" y1="182.27" x2="281" y2="2"/><line class="cls-3" x1="328" y1="182.27" x2="328" y2="2"/><line class="cls-3" x1="243" y1="182.27" x2="243" y2="2"/><line class="cls-3" x1="22" y1="182.27" x2="22" y2="2"/><line class="cls-3" x1="383" y1="182.27" x2="383" y2="2"/><line class="cls-3" x1="5" y1="182.27" x2="5" y2="2"/><line class="cls-3" x1="32" y1="182.27" x2="32" y2="2"/><line class="cls-3" x1="133" y1="182.27" x2="133" y2="2"/><line class="cls-3" x1="362" y1="182.27" x2="362" y2="2"/><line class="cls-3" x1="288" y1="182.27" x2="288" y2="2"/><line class="cls-3" x1="298" y1="182.27" x2="298" y2="2"/><line class="cls-3" x1="423" y1="182.27" x2="423" y2="2"/><line class="cls-3" x1="369" y1="182.27" x2="369" y2="2"/><line class="cls-3" x1="490" y1="182.27" x2="490" y2="2"/><line class="cls-3" x1="2" y1="182.27" x2="2" y2="2"/><line class="cls-3" x1="493" y1="182.27" x2="493" y2="2"/><line class="cls-3" x1="349" y1="182.27" x2="349" y2="2"/><line class="cls-3" x1="229" y1="182.27" x2="229" y2="2"/><line class="cls-3" x1="109" y1="182.27" x2="109" y2="2"/><line class="cls-3" x1="47" y1="182.27" x2="47" y2="2"/><line class="cls-3" x1="479" y1="182.27" x2="479" y2="2"/><line class="cls-3" x1="399" y1="182.27" x2="399" y2="2"/><line class="cls-3" x1="199" y1="182.27" x2="199" y2="2"/><line class="cls-3" x1="79" y1="182.27" x2="79" y2="2"/><line class="cls-3" x1="341" y1="182.27" x2="341" y2="2"/><line class="cls-3" x1="219" y1="182.27" x2="219" y2="2"/><line class="cls-3" x1="99" y1="182.27" x2="99" y2="2"/><line class="cls-3" x1="41" y1="182.27" x2="41" y2="2"/><line class="cls-3" x1="419" y1="182.27" x2="419" y2="2"/><line class="cls-3" x1="389" y1="182.27" x2="389" y2="2"/><line class="cls-3" x1="187" y1="182.27" x2="187" y2="2"/><line class="cls-3" x1="69" y1="182.27" x2="69" y2="2"/><line class="cls-3" x1="177" y1="182.27" x2="177" y2="2"/><line class="cls-3" x1="149" y1="182.27" x2="149" y2="2"/><line class="cls-3" x1="464" y1="182.27" x2="464" y2="2"/><line class="cls-3" x1="285" y1="182.27" x2="285" y2="2"/><line class="cls-3" x1="317" y1="182.27" x2="317" y2="2"/><line class="cls-3" x1="454" y1="182.27" x2="454" y2="2"/><line class="cls-3" x1="275" y1="182.27" x2="275" y2="2"/><line class="cls-3" x1="308" y1="182.27" x2="308" y2="2"/><line class="cls-3" x1="207" y1="182.27" x2="207" y2="2"/><line class="cls-3" x1="17" y1="182.27" x2="17" y2="2"/><line class="cls-3" x1="377" y1="182.27" x2="377" y2="2"/><line class="cls-3" x1="358" y1="182.27" x2="358" y2="2"/><line class="cls-3" x1="238" y1="182.27" x2="238" y2="2"/><line class="cls-3" x1="119" y1="182.27" x2="119" y2="2"/><line class="cls-3" x1="488" y1="182.27" x2="488" y2="2"/><line class="cls-3" x1="409" y1="182.27" x2="409" y2="2"/><line class="cls-3" x1="229" y1="182.27" x2="229" y2="2"/><line class="cls-3" x1="109" y1="182.27" x2="109" y2="2"/><line class="cls-3" x1="47" y1="182.27" x2="47" y2="2"/><line class="cls-3" x1="439" y1="182.27" x2="439" y2="2"/><line class="cls-3" x1="399" y1="182.27" x2="399" y2="2"/><line class="cls-3" x1="127" y1="182.27" x2="127" y2="2"/><line class="cls-3" x1="79" y1="182.27" x2="79" y2="2"/><line class="cls-3" x1="187" y1="182.27" x2="187" y2="2"/><line class="cls-3" x1="159" y1="182.27" x2="159" y2="2"/><line class="cls-3" x1="177" y1="182.27" x2="177" y2="2"/><line class="cls-3" x1="149" y1="182.27" x2="149" y2="2"/><line class="cls-3" x1="474" y1="182.27" x2="474" y2="2"/><line class="cls-3" x1="266" y1="182.27" x2="266" y2="2"/><line class="cls-3" x1="464" y1="182.27" x2="464" y2="2"/><line class="cls-3" x1="285" y1="182.27" x2="285" y2="2"/><line class="cls-3" x1="317" y1="182.27" x2="317" y2="2"/><line class="cls-3" x1="247" y1="182.27" x2="247" y2="2"/><line class="cls-3" x1="26" y1="182.27" x2="26" y2="2"/><line class="cls-3" x1="387" y1="182.27" x2="387" y2="2"/><line class="cls-3" x1="9" y1="182.27" x2="9" y2="2"/><line class="cls-3" x1="137" y1="182.27" x2="137" y2="2"/><line class="cls-3" x1="292" y1="182.27" x2="292" y2="2"/><line class="cls-3" x1="373" y1="182.27" x2="373" y2="2"/><line class="cls-3" x1="56" y1="182.27" x2="56" y2="2"/><line class="cls-3" x1="494" y1="182.27" x2="494" y2="2"/><line class="cls-3" x1="497" y1="182.27" x2="497" y2="2"/><line class="cls-3" x1="349" y1="182.27" x2="349" y2="2"/><line class="cls-3" x1="229" y1="182.27" x2="229" y2="2"/><line class="cls-3" x1="109" y1="182.27" x2="109" y2="2"/><line class="cls-3" x1="47" y1="182.27" x2="47" y2="2"/><line class="cls-3" x1="479" y1="182.27" x2="479" y2="2"/><line class="cls-3" x1="399" y1="182.27" x2="399" y2="2"/><line class="cls-3" x1="199" y1="182.27" x2="199" y2="2"/><line class="cls-3" x1="79" y1="182.27" x2="79" y2="2"/><line class="cls-3" x1="341" y1="182.27" x2="341" y2="2"/><line class="cls-3" x1="219" y1="182.27" x2="219" y2="2"/><line class="cls-3" x1="99" y1="182.27" x2="99" y2="2"/><line class="cls-3" x1="41" y1="182.27" x2="41" y2="2"/><line class="cls-3" x1="419" y1="182.27" x2="419" y2="2"/><line class="cls-3" x1="389" y1="182.27" x2="389" y2="2"/><line class="cls-3" x1="187" y1="182.27" x2="187" y2="2"/><line class="cls-3" x1="69" y1="182.27" x2="69" y2="2"/><line class="cls-3" x1="177" y1="182.27" x2="177" y2="2"/><line class="cls-3" x1="149" y1="182.27" x2="149" y2="2"/><line class="cls-3" x1="141" y1="182.27" x2="141" y2="2"/><line class="cls-3" x1="464" y1="182.27" x2="464" y2="2"/><line class="cls-3" x1="285" y1="182.27" x2="285" y2="2"/><line class="cls-3" x1="317" y1="182.27" x2="317" y2="2"/><line class="cls-3" x1="454" y1="182.27" x2="454" y2="2"/><line class="cls-3" x1="275" y1="182.27" x2="275" y2="2"/><line class="cls-3" x1="308" y1="182.27" x2="308" y2="2"/><line class="cls-3" x1="207" y1="182.27" x2="207" y2="2"/><line class="cls-3" x1="17" y1="182.27" x2="17" y2="2"/><line class="cls-3" x1="377" y1="182.27" x2="377" y2="2"/><line class="cls-3" x1="119" y1="182.27" x2="119" y2="2"/><line class="cls-3" x1="488" y1="182.27" x2="488" y2="2"/><line class="cls-3" x1="409" y1="182.27" x2="409" y2="2"/><line class="cls-3" x1="229" y1="182.27" x2="229" y2="2"/><line class="cls-3" x1="109" y1="182.27" x2="109" y2="2"/><line class="cls-3" x1="47" y1="182.27" x2="47" y2="2"/><line class="cls-3" x1="439" y1="182.27" x2="439" y2="2"/><line class="cls-3" x1="399" y1="182.27" x2="399" y2="2"/><line class="cls-3" x1="127" y1="182.27" x2="127" y2="2"/><line class="cls-3" x1="79" y1="182.27" x2="79" y2="2"/><line class="cls-3" x1="187" y1="182.27" x2="187" y2="2"/><line class="cls-3" x1="159" y1="182.27" x2="159" y2="2"/><line class="cls-3" x1="177" y1="182.27" x2="177" y2="2"/><line class="cls-3" x1="149" y1="182.27" x2="149" y2="2"/><line class="cls-3" x1="474" y1="182.27" x2="474" y2="2"/><line class="cls-3" x1="295" y1="182.27" x2="295" y2="2"/><line class="cls-3" x1="266" y1="182.27" x2="266" y2="2"/><line class="cls-3" x1="464" y1="182.27" x2="464" y2="2"/><line class="cls-3" x1="285" y1="182.27" x2="285" y2="2"/><line class="cls-3" x1="317" y1="182.27" x2="317" y2="2"/><line class="cls-3" x1="247" y1="182.27" x2="247" y2="2"/><line class="cls-3" x1="58" y1="182.27" x2="58" y2="2"/><line class="cls-3" x1="387" y1="182.27" x2="387" y2="2"/><line class="cls-3" x1="9" y1="182.27" x2="9" y2="2"/><line class="cls-3" x1="292" y1="182.27" x2="292" y2="2"/><line class="cls-3" x1="301" y1="182.27" x2="301" y2="2"/><line class="cls-3" x1="428" y1="182.27" x2="428" y2="2"/><line class="cls-3" x1="373" y1="182.27" x2="373" y2="2"/><line class="cls-3" x1="56" y1="182.27" x2="56" y2="2"/><line class="cls-3" x1="494" y1="182.27" x2="494" y2="2"/><line class="cls-3" x1="497" y1="182.27" x2="497" y2="2"/></g></svg>
\ No newline at end of file
-- 
cgit v0.12


From 5ba861715abde3a68f6ad73a54ccb41f39874ece Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 14 Oct 2020 11:02:39 -0700
Subject: Add thread name in prof last-N records

---
 src/prof_recent.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index cfaa5a6..b1aeef3 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -484,6 +484,12 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 
 	emitter_json_kv(emitter, "alloc_thread_uid", emitter_type_uint64,
 	    &node->alloc_tctx->thr_uid);
+	prof_tdata_t *alloc_tdata = node->alloc_tctx->tdata;
+	assert(alloc_tdata != NULL);
+	if (alloc_tdata->thread_name != NULL) {
+		emitter_json_kv(emitter, "alloc_thread_name",
+		    emitter_type_string, &alloc_tdata->thread_name);
+	}
 	uint64_t alloc_time_ns = nstime_ns(&node->alloc_time);
 	emitter_json_kv(emitter, "alloc_time", emitter_type_uint64,
 	    &alloc_time_ns);
@@ -494,6 +500,12 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	if (released && node->dalloc_tctx != NULL) {
 		emitter_json_kv(emitter, "dalloc_thread_uid",
 		    emitter_type_uint64, &node->dalloc_tctx->thr_uid);
+		prof_tdata_t *dalloc_tdata = node->dalloc_tctx->tdata;
+		assert(dalloc_tdata != NULL);
+		if (dalloc_tdata->thread_name != NULL) {
+			emitter_json_kv(emitter, "dalloc_thread_name",
+			    emitter_type_string, &dalloc_tdata->thread_name);
+		}
 		assert(!nstime_equals_zero(&node->dalloc_time));
 		uint64_t dalloc_time_ns = nstime_ns(&node->dalloc_time);
 		emitter_json_kv(emitter, "dalloc_time", emitter_type_uint64,
-- 
cgit v0.12


From c8209150f9d219a137412b06431c9d52839c7272 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 19 Oct 2020 22:48:26 -0700
Subject: Switch from opt.lg_tcache_max to opt.tcache_max

Though for convenience, keep parsing lg_tcache_max.
---
 doc/jemalloc.xml.in                        | 16 ++++++++------
 include/jemalloc/internal/tcache_externs.h |  2 +-
 src/ctl.c                                  |  6 ++---
 src/jemalloc.c                             | 35 +++++++++++++++++++++++-------
 src/tcache.c                               | 14 ++++--------
 test/unit/mallctl.c                        |  2 +-
 test/unit/stats.c                          |  4 ++--
 7 files changed, 47 insertions(+), 32 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 8e9a5d8..e5f2aa6 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1301,21 +1301,23 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         a certain size.  Thread-specific caching allows many allocations to be
         satisfied without performing any thread synchronization, at the cost of
         increased memory use.  See the <link
-        linkend="opt.lg_tcache_max"><mallctl>opt.lg_tcache_max</mallctl></link>
+        linkend="opt.tcache_max"><mallctl>opt.tcache_max</mallctl></link>
         option for related tuning information.  This option is enabled by
         default.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.lg_tcache_max">
+      <varlistentry id="opt.tcache_max">
         <term>
-          <mallctl>opt.lg_tcache_max</mallctl>
+          <mallctl>opt.tcache_max</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Maximum size class (log base 2) to cache in the
-        thread-specific cache (tcache).  At a minimum, all small size classes
-        are cached; and at a maximum, size classes up to 8 MiB can be cached.
-        The default maximum is 32 KiB (2^15).</para></listitem>
+        <listitem><para>Maximum size class to cache in the thread-specific cache
+        (tcache).  At a minimum, all small size classes are cached; and at a
+        maximum, size classes up to 8 MiB can be cached.  The default maximum is
+        32 KiB (2^15).  As a convenience, this may also be set by specifying
+        lg_tcache_max, which will be taken to be the base-2 logarithm of the
+        setting of tcache_max</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.thp">
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index f044d32..95f3a68 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 
 extern bool opt_tcache;
-extern ssize_t opt_lg_tcache_max;
+extern size_t opt_tcache_max;
 extern ssize_t	opt_lg_tcache_nslots_mul;
 extern unsigned opt_tcache_nslots_small_min;
 extern unsigned opt_tcache_nslots_small_max;
diff --git a/src/ctl.c b/src/ctl.c
index aec3473..db0e05f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -109,7 +109,7 @@ CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_tcache)
-CTL_PROTO(opt_lg_tcache_max)
+CTL_PROTO(opt_tcache_max)
 CTL_PROTO(opt_tcache_nslots_small_min)
 CTL_PROTO(opt_tcache_nslots_small_max)
 CTL_PROTO(opt_tcache_nslots_large)
@@ -362,7 +362,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("utrace"),	CTL(opt_utrace)},
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("tcache"),	CTL(opt_tcache)},
-	{NAME("lg_tcache_max"),	CTL(opt_lg_tcache_max)},
+	{NAME("tcache_max"),	CTL(opt_tcache_max)},
 	{NAME("tcache_nslots_small_min"),
 		CTL(opt_tcache_nslots_small_min)},
 	{NAME("tcache_nslots_small_max"),
@@ -1837,7 +1837,7 @@ CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
-CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
+CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
 CTL_RO_NL_GEN(opt_tcache_nslots_small_min, opt_tcache_nslots_small_min,
     unsigned)
 CTL_RO_NL_GEN(opt_tcache_nslots_small_max, opt_tcache_nslots_small_max,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1d6191a..170b172 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1170,15 +1170,18 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #define CONF_DONT_CHECK_MAX(um, max)	false
 #define CONF_CHECK_MAX(um, max)	((um) > (max))
 
+#define CONF_VALUE_READ(max_t, result)					\
+	      char *end;						\
+	      set_errno(0);						\
+	      result = (max_t)malloc_strtoumax(v, &end, 0);
+#define CONF_VALUE_READ_FAIL()						\
+	      (get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen)
+
 #define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip) \
 			if (CONF_MATCH(n)) {				\
 				max_t mv;				\
-				char *end;				\
-									\
-				set_errno(0);				\
-				mv = (max_t)malloc_strtoumax(v, &end, 0); \
-				if (get_errno() != 0 || (uintptr_t)end -\
-				    (uintptr_t)v != vlen) {		\
+				CONF_VALUE_READ(max_t, mv)		\
+				if (CONF_VALUE_READ_FAIL()) {		\
 					CONF_ERROR("Invalid conf value",\
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
@@ -1379,8 +1382,24 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
 			}
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
-			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
-			    -1, (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max",
+			    0, TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			if (CONF_MATCH("lg_tcache_max")) {
+				size_t m;
+				CONF_VALUE_READ(size_t, m)
+				if (CONF_VALUE_READ_FAIL()) {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				} else {
+					/* clip if necessary */
+					if (m > TCACHE_LG_MAXCLASS_LIMIT) {
+						m = TCACHE_LG_MAXCLASS_LIMIT;
+					}
+					opt_tcache_max = (size_t)1 << m;
+				}
+				CONF_CONTINUE;
+			}
 			/*
 			 * Anyone trying to set a value outside -16 to 16 is
 			 * deeply confused.
diff --git a/src/tcache.c b/src/tcache.c
index 63eddc2..6bf1d30 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -11,11 +11,8 @@
 
 bool opt_tcache = true;
 
-/*
- * (1U << opt_lg_tcache_max) is used to compute tcache_maxclass.  This choice
- * (32kb by default) works well as a default in practice.
- */
-ssize_t opt_lg_tcache_max = 15;
+/* tcache_maxclass is set to 32KB by default.  */
+size_t opt_tcache_max = ((size_t)1) << 15;
 
 /* Reasonable defaults for min and max values. */
 unsigned opt_tcache_nslots_small_min = 20;
@@ -935,14 +932,11 @@ tcache_ncached_max_compute(szind_t szind) {
 
 bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
-	/* If necessary, clamp opt_lg_tcache_max. */
-	tcache_maxclass = opt_lg_tcache_max < 0 ? 0 :
-	    ZU(1) << opt_lg_tcache_max;
+	tcache_maxclass = sz_s2u(opt_tcache_max);
 	if (tcache_maxclass < SC_SMALL_MAXCLASS) {
 		tcache_maxclass = SC_SMALL_MAXCLASS;
-	} else if (tcache_maxclass > TCACHE_MAXCLASS_LIMIT) {
-		tcache_maxclass = TCACHE_MAXCLASS_LIMIT;
 	}
+	assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
 	nhbins = sz_size2index(tcache_maxclass) + 1;
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 3de5694..cf5c88e 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -179,7 +179,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, xmalloc, xmalloc);
 	TEST_MALLCTL_OPT(bool, tcache, always);
 	TEST_MALLCTL_OPT(size_t, lg_extent_max_active_fit, always);
-	TEST_MALLCTL_OPT(size_t, lg_tcache_max, always);
+	TEST_MALLCTL_OPT(size_t, tcache_max, always);
 	TEST_MALLCTL_OPT(const char *, thp, always);
 	TEST_MALLCTL_OPT(const char *, zero_realloc, always);
 	TEST_MALLCTL_OPT(bool, prof, prof);
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 20a32dd..21a29a6 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -393,7 +393,7 @@ test_tcache_bytes_for_usize(size_t usize) {
 TEST_BEGIN(test_stats_tcache_bytes_small) {
 	test_skip_if(!config_stats);
 	test_skip_if(!opt_tcache);
-	test_skip_if((ZU(1) << opt_lg_tcache_max) < SC_SMALL_MAXCLASS);
+	test_skip_if(opt_tcache_max < SC_SMALL_MAXCLASS);
 
 	test_tcache_bytes_for_usize(SC_SMALL_MAXCLASS);
 }
@@ -402,7 +402,7 @@ TEST_END
 TEST_BEGIN(test_stats_tcache_bytes_large) {
 	test_skip_if(!config_stats);
 	test_skip_if(!opt_tcache);
-	test_skip_if((ZU(1) << opt_lg_tcache_max) < SC_LARGE_MINCLASS);
+	test_skip_if(opt_tcache_max < SC_LARGE_MINCLASS);
 
 	test_tcache_bytes_for_usize(SC_LARGE_MINCLASS);
 }
-- 
cgit v0.12


From 1c7da3331795970c6049e5b526637bf692a4243e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 14 Aug 2020 13:36:41 -0700
Subject: HPA: Tie components into a PAI implementation.

---
 Makefile.in                                        |   2 +
 include/jemalloc/internal/arena_externs.h          |   1 +
 include/jemalloc/internal/hpa.h                    |  92 +++++
 .../jemalloc/internal/jemalloc_internal_externs.h  |   1 +
 include/jemalloc/internal/pa.h                     |  32 ++
 include/jemalloc/internal/psset.h                  |   1 -
 include/jemalloc/internal/witness.h                |   8 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |   3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |   3 +
 src/arena.c                                        |  16 +
 src/ctl.c                                          |   3 +
 src/hpa.c                                          | 447 +++++++++++++++++++++
 src/hpa_central.c                                  |   2 +
 src/jemalloc.c                                     |  42 ++
 src/pa.c                                           |  70 +++-
 src/pa_extra.c                                     |  12 +
 src/stats.c                                        |   1 +
 test/unit/arena_decay.c                            |   5 +
 test/unit/hpa.c                                    | 235 +++++++++++
 test/unit/mallctl.c                                |   1 +
 test/unit/prof_gdump.c                             |   1 +
 test/unit/retained.c                               |   1 +
 test/unit/stats.c                                  |   2 +-
 25 files changed, 972 insertions(+), 11 deletions(-)
 create mode 100644 include/jemalloc/internal/hpa.h
 create mode 100644 src/hpa.c
 create mode 100644 test/unit/hpa.c

diff --git a/Makefile.in b/Makefile.in
index 008cffd..67568f0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -119,6 +119,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/geom_grow.c \
 	$(srcroot)src/hook.c \
+	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_central.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
@@ -212,6 +213,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
+	$(srcroot)test/unit/hpa.c \
 	$(srcroot)test/unit/hpa_central.c \
 	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/inspect.c \
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index a2fdff9..9d4da31 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -16,6 +16,7 @@ extern const char *percpu_arena_mode_names[];
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 extern emap_t arena_emap_global;
+extern hpa_t arena_hpa_global;
 
 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
new file mode 100644
index 0000000..83f2203
--- /dev/null
+++ b/include/jemalloc/internal/hpa.h
@@ -0,0 +1,92 @@
+#ifndef JEMALLOC_INTERNAL_HPA_H
+#define JEMALLOC_INTERNAL_HPA_H
+
+#include "jemalloc/internal/geom_grow.h"
+#include "jemalloc/internal/hpa_central.h"
+#include "jemalloc/internal/pai.h"
+#include "jemalloc/internal/psset.h"
+
+typedef struct hpa_s hpa_t;
+struct hpa_s {
+	/*
+	 * We have two mutexes for the central allocator; mtx protects its
+	 * state, while grow_mtx protects controls the ability to grow the
+	 * backing store.  This prevents race conditions in which the central
+	 * allocator has exhausted its memory while mutiple threads are trying
+	 * to allocate.  If they all reserved more address space from the OS
+	 * without synchronization, we'd end consuming much more than necessary.
+	 */
+	malloc_mutex_t grow_mtx;
+	malloc_mutex_t mtx;
+	hpa_central_t central;
+	/* The arena ind we're associated with. */
+	unsigned ind;
+	/*
+	 * This edata cache is the global one that we use for new allocations in
+	 * growing; practically, it comes from a0.
+	 */
+	edata_cache_t *edata_cache;
+	geom_grow_t geom_grow;
+};
+
+typedef struct hpa_shard_s hpa_shard_t;
+struct hpa_shard_s {
+	/*
+	 * pai must be the first member; we cast from a pointer to it to a
+	 * pointer to the hpa_shard_t.
+	 */
+	pai_t pai;
+	malloc_mutex_t grow_mtx;
+	malloc_mutex_t mtx;
+	/*
+	 * This edata cache is the one we use when allocating a small extent
+	 * from a pageslab.  The pageslab itself comes from the centralized
+	 * allocator, and so will use its edata_cache.
+	 */
+	edata_cache_t *edata_cache;
+	hpa_t *hpa;
+	psset_t psset;
+
+	/*
+	 * When we're grabbing a new ps from the central allocator, how big
+	 * would we like it to be?  This is mostly about the level of batching
+	 * we use in our requests to the centralized allocator.
+	 */
+	size_t ps_goal;
+	/*
+	 * What's the maximum size we'll try to allocate out of the psset?  We
+	 * don't want this to be too large relative to ps_goal, as a
+	 * fragmentation avoidance measure.
+	 */
+	size_t ps_alloc_max;
+	/* The arena ind we're associated with. */
+	unsigned ind;
+};
+
+bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap,
+    edata_cache_t *edata_cache);
+bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa,
+    edata_cache_t *edata_cache, unsigned ind, size_t ps_goal,
+    size_t ps_alloc_max);
+void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
+
+/*
+ * We share the fork ordering with the PA and arena prefork handling; that's why
+ * these are 2 and 3 rather than 0 or 1.
+ */
+void hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
+
+/*
+ * These should be acquired after all the shard locks in phase 4, but before any
+ * locks in phase 4.  The central HPA may acquire an edata cache mutex (of a0),
+ * so it needs to be lower in the witness ordering, but it's also logically
+ * global and not tied to any particular arena.
+ */
+void hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa);
+void hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa);
+void hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa);
+
+#endif /* JEMALLOC_INTERNAL_HPA_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 3e7124d..c26153e 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -12,6 +12,7 @@ extern bool malloc_slow;
 extern bool opt_abort;
 extern bool opt_abort_conf;
 extern bool opt_confirm_conf;
+extern bool opt_hpa;
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index f6d0a7c..7f73c27 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/hpa.h"
 #include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/pac.h"
 #include "jemalloc/internal/pai.h"
@@ -66,12 +67,32 @@ struct pa_shard_s {
 	 */
 	atomic_zu_t nactive;
 
+	/*
+	 * Whether or not we should prefer the hugepage allocator.  Atomic since
+	 * it may be concurrently modified by a thread setting extent hooks.
+	 * Note that we still may do HPA operations in this arena; if use_hpa is
+	 * changed from true to false, we'll free back to the hugepage allocator
+	 * for those allocations.
+	 */
+	atomic_b_t use_hpa;
+	/*
+	 * If we never used the HPA to begin with, it wasn't initialized, and so
+	 * we shouldn't try to e.g. acquire its mutexes during fork.  This
+	 * tracks that knowledge.
+	 */
+	bool ever_used_hpa;
+
 	/* Allocates from a PAC. */
 	pac_t pac;
 
+	/* Allocates from a HPA. */
+	hpa_shard_t hpa_shard;
+
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
 
+	unsigned ind;
+
 	malloc_mutex_t *stats_mtx;
 	pa_shard_stats_t *stats;
 
@@ -99,6 +120,17 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
     nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
 
 /*
+ * This isn't exposed to users; we allow late enablement of the HPA shard so
+ * that we can boot without worrying about the HPA, then turn it on in a0.
+ */
+bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa);
+/*
+ * We stop using the HPA when custom extent hooks are installed, but still
+ * redirect deallocations to it.
+ */
+void pa_shard_disable_hpa(pa_shard_t *shard);
+
+/*
  * This does the PA-specific parts of arena reset (i.e. freeing all active
  * allocations).
  */
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index abbfc24..72ff240 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -49,7 +49,6 @@ struct psset_s {
 
 void psset_init(psset_t *psset);
 
-
 /*
  * Tries to obtain a chunk from an existing pageslab already in the set.
  * Returns true on failure.
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 652afe6..686bf40 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -43,8 +43,16 @@ enum witness_rank_e {
 	WITNESS_RANK_CORE,
 	WITNESS_RANK_DECAY = WITNESS_RANK_CORE,
 	WITNESS_RANK_TCACHE_QL,
+
 	WITNESS_RANK_EXTENT_GROW,
+	WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW,
+
 	WITNESS_RANK_EXTENTS,
+	WITNESS_RANK_HPA_SHARD = WITNESS_RANK_EXTENTS,
+
+	WITNESS_RANK_HPA_GROW,
+	WITNESS_RANK_HPA,
+
 	WITNESS_RANK_EDATA_CACHE,
 
 	WITNESS_RANK_EMAP,
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 2dcc994..46e497a 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 81f3934..f46a92f 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -58,6 +58,9 @@
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_central.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index fd814c3..dbf6f95 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 81f3934..f46a92f 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -58,6 +58,9 @@
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_central.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index f8e8cba..74f90cc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -37,6 +37,7 @@ static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
 emap_t arena_emap_global;
+hpa_t arena_hpa_global;
 
 const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
@@ -1360,6 +1361,8 @@ arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
 		info = arena_background_thread_info_get(arena);
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 	}
+	/* No using the HPA now that we have the custom hooks. */
+	pa_shard_disable_hpa(&arena->pa_shard);
 	extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks);
 	if (have_background_thread) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
@@ -1516,6 +1519,19 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	nstime_init_update(&arena->create_time);
 
+	/*
+	 * We turn on the HPA if set to.  There are two exceptions:
+	 * - Custom extent hooks (we should only return memory allocated from
+	 *   them in that case).
+	 * - Arena 0 initialization.  In this case, we're mid-bootstrapping, and
+	 *   so arena_hpa_global is not yet initialized.
+	 */
+	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
+		if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global)) {
+			goto label_error;
+		}
+	}
+
 	/* We don't support reentrancy for arena 0 bootstrapping. */
 	if (ind != 0) {
 		/*
diff --git a/src/ctl.c b/src/ctl.c
index db0e05f..9e22e66 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -90,6 +90,7 @@ CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
 CTL_PROTO(opt_confirm_conf)
+CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -343,6 +344,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
+	{NAME("hpa"),		CTL(opt_hpa)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -1816,6 +1818,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
+CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
diff --git a/src/hpa.c b/src/hpa.c
new file mode 100644
index 0000000..842384b
--- /dev/null
+++ b/src/hpa.c
@@ -0,0 +1,447 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpa.h"
+
+#include "jemalloc/internal/flat_bitmap.h"
+#include "jemalloc/internal/witness.h"
+
+static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t alignment, bool zero);
+static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size, bool zero);
+static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size);
+static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+
+bool
+hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, edata_cache_t *edata_cache) {
+	bool err;
+
+	/*
+	 * We fundamentally rely on a address-space-hungry growth strategy for
+	 * hugepages.  This may change in the future, but for now we should have
+	 * refused to turn on any HPA at a higher level of the stack.
+	 */
+	assert(LG_SIZEOF_PTR == 3);
+
+	err = malloc_mutex_init(&hpa->grow_mtx, "hpa_grow", WITNESS_RANK_HPA_GROW,
+	    malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	err = malloc_mutex_init(&hpa->mtx, "hpa", WITNESS_RANK_HPA,
+	    malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+
+	hpa_central_init(&hpa->central, edata_cache, emap);
+	if (err) {
+		return true;
+	}
+	hpa->ind = base_ind_get(base);
+	hpa->edata_cache = edata_cache;
+
+	geom_grow_init(&hpa->geom_grow);
+
+	return false;
+}
+
+bool
+hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
+    unsigned ind, size_t ps_goal, size_t ps_alloc_max) {
+	bool err;
+	err = malloc_mutex_init(&shard->grow_mtx, "hpa_shard_grow",
+	    WITNESS_RANK_HPA_SHARD_GROW, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	err = malloc_mutex_init(&shard->mtx, "hpa_shard",
+	    WITNESS_RANK_HPA_SHARD, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+
+	shard->edata_cache = edata_cache;
+	shard->hpa = hpa;
+	psset_init(&shard->psset);
+	shard->ps_goal = ps_goal;
+	shard->ps_alloc_max = ps_alloc_max;
+
+	/*
+	 * Fill these in last, so that if an hpa_shard gets used despite
+	 * initialization failing, we'll at least crash instead of just
+	 * operating on corrupted data.
+	 */
+	shard->pai.alloc = &hpa_alloc;
+	shard->pai.expand = &hpa_expand;
+	shard->pai.shrink = &hpa_shrink;
+	shard->pai.dalloc = &hpa_dalloc;
+
+	shard->ind = ind;
+	assert(ind == base_ind_get(edata_cache->base));
+
+	return false;
+}
+
+static edata_t *
+hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min,
+    size_t size_goal) {
+	bool err;
+	edata_t *edata;
+
+	hpa_t *hpa = shard->hpa;
+
+	malloc_mutex_lock(tsdn, &hpa->mtx);
+	edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min,
+	    size_goal);
+	malloc_mutex_unlock(tsdn, &hpa->mtx);
+	if (edata != NULL) {
+		edata_arena_ind_set(edata, shard->ind);
+		return edata;
+	}
+	/* No existing range can satisfy the request; try to grow. */
+	malloc_mutex_lock(tsdn, &hpa->grow_mtx);
+
+	/*
+	 * We could have raced with other grow attempts; re-check to see if we
+	 * did, and are now able to satisfy the request.
+	 */
+	malloc_mutex_lock(tsdn, &hpa->mtx);
+	edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min,
+	    size_goal);
+	malloc_mutex_unlock(tsdn, &hpa->mtx);
+	if (edata != NULL) {
+		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
+		edata_arena_ind_set(edata, shard->ind);
+		return edata;
+	}
+
+	/*
+	 * No such luck. We've dropped mtx, so other allocations can proceed
+	 * while we allocate the new extent.  We know no one else will grow in
+	 * the meantime, though, since we still hold grow_mtx.
+	 */
+	size_t alloc_size;
+	pszind_t skip;
+
+	size_t hugepage_goal_min = HUGEPAGE_CEILING(size_goal);
+
+	err = geom_grow_size_prepare(&hpa->geom_grow, hugepage_goal_min,
+	    &alloc_size, &skip);
+	if (err) {
+		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
+		return NULL;
+	}
+	alloc_size = HUGEPAGE_CEILING(alloc_size);
+
+	/*
+	 * Eventually, we need to think about this more systematically, and in
+	 * terms of extent hooks.  For now, though, we know we only care about
+	 * overcommitting systems, and we're not going to purge much.
+	 */
+	bool commit = true;
+	void *addr = pages_map(NULL, alloc_size, HUGEPAGE, &commit);
+	if (addr == NULL) {
+		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
+		return NULL;
+	}
+	err = pages_huge(addr, alloc_size);
+	/*
+	 * Ignore this for now; even if the allocation fails, the address space
+	 * should still be usable.
+	 */
+	(void)err;
+
+	edata = edata_cache_get(tsdn, hpa->edata_cache);
+	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
+		pages_unmap(addr, alloc_size);
+		return NULL;
+	}
+
+	/*
+	 * The serial number here is just a placeholder; the hpa_central gets to
+	 * decide how it wants to fill it in.
+	 *
+	 * The grow edata is associated with the hpa_central_t arena ind; the
+	 * subsequent allocation we get (in the hpa_central_alloc_grow call
+	 * below) will be filled in with the shard ind.
+	 */
+	edata_init(edata, hpa->ind, addr, alloc_size, /* slab */ false,
+	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ true,
+	    /* comitted */ true, EXTENT_PAI_HPA, /* is_head */ true);
+
+	malloc_mutex_lock(tsdn, &hpa->mtx);
+	/* Note that this replace edata with the allocation to return. */
+	err = hpa_central_alloc_grow(tsdn, &hpa->central, size_goal, edata);
+	malloc_mutex_unlock(tsdn, &hpa->mtx);
+
+	if (!err) {
+		geom_grow_size_commit(&hpa->geom_grow, skip);
+	}
+	malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
+	edata_arena_ind_set(edata, shard->ind);
+
+	if (err) {
+		pages_unmap(addr, alloc_size);
+		edata_cache_put(tsdn, hpa->edata_cache, edata);
+		return NULL;
+	}
+
+	return edata;
+}
+
+static edata_t *
+hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
+	assert(size < shard->ps_alloc_max);
+
+	bool err;
+	edata_t *edata = edata_cache_get(tsdn, shard->edata_cache);
+	if (edata == NULL) {
+		return NULL;
+	}
+	edata_arena_ind_set(edata, shard->ind);
+
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	err = psset_alloc_reuse(&shard->psset, edata, size);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	if (!err) {
+		return edata;
+	}
+	/* Nothing in the psset works; we have to grow it. */
+	malloc_mutex_lock(tsdn, &shard->grow_mtx);
+
+	/* As above; check for grow races. */
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	err = psset_alloc_reuse(&shard->psset, edata, size);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	if (!err) {
+		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+		return edata;
+	}
+
+	edata_t *grow_edata = hpa_alloc_central(tsdn, shard, size,
+	    shard->ps_goal);
+	if (grow_edata == NULL) {
+		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+		edata_cache_put(tsdn, shard->edata_cache, edata);
+		return NULL;
+	}
+	edata_arena_ind_set(grow_edata, shard->ind);
+	edata_slab_set(grow_edata, true);
+	fb_group_t *fb = edata_slab_data_get(grow_edata)->bitmap;
+	fb_init(fb, shard->ps_goal / PAGE);
+
+	/* We got the new edata; allocate from it. */
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	psset_alloc_new(&shard->psset, grow_edata, edata, size);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+
+	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+	return edata;
+}
+
+static hpa_shard_t *
+hpa_from_pai(pai_t *self) {
+	assert(self->alloc = &hpa_alloc);
+	assert(self->expand = &hpa_expand);
+	assert(self->shrink = &hpa_shrink);
+	assert(self->dalloc = &hpa_dalloc);
+	return (hpa_shard_t *)self;
+}
+
+static edata_t *
+hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t alignment, bool zero) {
+
+	assert((size & PAGE_MASK) == 0);
+	/* We don't handle alignment or zeroing for now. */
+	if (alignment > PAGE || zero) {
+		return NULL;
+	}
+
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	hpa_shard_t *shard = hpa_from_pai(self);
+
+	edata_t *edata;
+	if (size <= shard->ps_alloc_max) {
+		edata = hpa_alloc_psset(tsdn, shard, size);
+		if (edata != NULL) {
+			emap_register_boundary(tsdn, shard->hpa->central.emap,
+			    edata, SC_NSIZES, /* slab */ false);
+		}
+	} else {
+		edata = hpa_alloc_central(tsdn, shard, size, size);
+	}
+
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+	if (edata != NULL) {
+		emap_assert_mapped(tsdn, shard->hpa->central.emap, edata);
+		assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+		assert(edata_state_get(edata) == extent_state_active);
+		assert(edata_arena_ind_get(edata) == shard->ind);
+		assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
+		assert(!edata_slab_get(edata));
+		assert(edata_committed_get(edata));
+		assert(edata_base_get(edata) == edata_addr_get(edata));
+		assert(edata_base_get(edata) != NULL);
+	}
+	return edata;
+}
+
+static bool
+hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size, bool zero) {
+	/* Expand not yet supported. */
+	return true;
+}
+
+static bool
+hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size) {
+	/* Shrink not yet supported. */
+	return true;
+}
+
+static void
+hpa_dalloc_central(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
+	hpa_t *hpa = shard->hpa;
+
+	edata_arena_ind_set(edata, hpa->ind);
+	malloc_mutex_lock(tsdn, &hpa->mtx);
+	hpa_central_dalloc(tsdn, &hpa->central, edata);
+	malloc_mutex_unlock(tsdn, &hpa->mtx);
+}
+
+static void
+hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	hpa_shard_t *shard = hpa_from_pai(self);
+
+	edata_addr_set(edata, edata_base_get(edata));
+	edata_zeroed_set(edata, false);
+
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+	assert(edata_state_get(edata) == extent_state_active);
+	assert(edata_arena_ind_get(edata) == shard->ind);
+	assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
+	assert(!edata_slab_get(edata));
+	assert(edata_committed_get(edata));
+	assert(edata_base_get(edata) != NULL);
+
+	/*
+	 * There are two cases:
+	 * - The psset field is NULL.  In this case, the edata comes directly
+	 *   from the hpa_central_t and should be returned to it.
+	 * - THe psset field is not NULL, in which case we return the edata to
+	 *   the appropriate slab (which may in turn cause it to become empty,
+	 *   triggering an eviction of the whole slab, which should then be
+	 *   returned to the hpa_central_t).
+	 */
+	if (edata_ps_get(edata) != NULL) {
+		emap_deregister_boundary(tsdn, shard->hpa->central.emap, edata);
+
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		edata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+
+		edata_cache_put(tsdn, shard->edata_cache, edata);
+
+		if (evicted_ps != NULL) {
+			/*
+			 * The deallocation caused a pageslab to become empty.
+			 * Free it back to the centralized allocator.
+			 */
+			bool err = emap_register_boundary(tsdn,
+			    shard->hpa->central.emap, evicted_ps, SC_NSIZES,
+			    /* slab */ false);
+			/*
+			 * Registration can only fail on OOM, but the boundary
+			 * mappings should have been initialized during
+			 * allocation.
+			 */
+			assert(!err);
+			edata_slab_set(evicted_ps, false);
+			edata_ps_set(evicted_ps, NULL);
+
+			assert(edata_arena_ind_get(evicted_ps) == shard->ind);
+			hpa_dalloc_central(tsdn, shard, evicted_ps);
+		}
+	} else {
+		hpa_dalloc_central(tsdn, shard, edata);
+	}
+}
+
+static void
+hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
+	assert(bin_stats->npageslabs == 0);
+	assert(bin_stats->nactive == 0);
+	assert(bin_stats->ninactive == 0);
+}
+
+void
+hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
+	/*
+	 * By the time we're here, the arena code should have dalloc'd all the
+	 * active extents, which means we should have eventually evicted
+	 * everything from the psset, so it shouldn't be able to serve even a
+	 * 1-page allocation.
+	 */
+	if (config_debug) {
+		edata_t edata = {0};
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		bool psset_empty = psset_alloc_reuse(&shard->psset, &edata,
+		    PAGE);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		assert(psset_empty);
+		hpa_shard_assert_stats_empty(&shard->psset.full_slab_stats);
+		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+			hpa_shard_assert_stats_empty(
+			    &shard->psset.slab_stats[i]);
+		}
+	}
+}
+
+void
+hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_prefork(tsdn, &shard->grow_mtx);
+}
+
+void
+hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_prefork(tsdn, &shard->mtx);
+}
+
+void
+hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_postfork_parent(tsdn, &shard->grow_mtx);
+	malloc_mutex_postfork_parent(tsdn, &shard->mtx);
+}
+
+void
+hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_postfork_child(tsdn, &shard->grow_mtx);
+	malloc_mutex_postfork_child(tsdn, &shard->mtx);
+}
+
+void
+hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa) {
+	malloc_mutex_prefork(tsdn, &hpa->grow_mtx);
+	malloc_mutex_prefork(tsdn, &hpa->mtx);
+}
+
+void
+hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa) {
+	malloc_mutex_postfork_parent(tsdn, &hpa->grow_mtx);
+	malloc_mutex_postfork_parent(tsdn, &hpa->mtx);
+}
+
+void
+hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa) {
+	malloc_mutex_postfork_child(tsdn, &hpa->grow_mtx);
+	malloc_mutex_postfork_child(tsdn, &hpa->mtx);
+}
diff --git a/src/hpa_central.c b/src/hpa_central.c
index d106595..a1895c8 100644
--- a/src/hpa_central.c
+++ b/src/hpa_central.c
@@ -79,6 +79,7 @@ hpa_central_alloc_reuse(tsdn_t *tsdn, hpa_central_t *central,
 		eset_insert(&central->eset, edata);
 		return NULL;
 	}
+	emap_assert_mapped(tsdn, central->emap, trail);
 	eset_insert(&central->eset, trail);
 
 label_success:
@@ -178,6 +179,7 @@ hpa_central_dalloc_merge(tsdn_t *tsdn, hpa_central_t *central, edata_t *a,
 void
 hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata) {
 	assert(edata_state_get(edata) == extent_state_active);
+	assert(edata_ps_get(edata) == NULL);
 
 	/*
 	 * These should really be called at the pa interface level, but
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 170b172..0dc685b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -133,6 +133,10 @@ unsigned	ncpus;
 
 /* Protects arenas initialization. */
 malloc_mutex_t arenas_lock;
+
+/* The global hpa, and whether it's on. */
+bool opt_hpa = false;
+
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
@@ -1476,6 +1480,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   opt_max_background_threads,
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
+			CONF_HANDLE_BOOL(opt_hpa, "hpa")
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
 					sc_data_init(sc_data);
@@ -1760,6 +1765,33 @@ malloc_init_hard_a0_locked() {
 		return true;
 	}
 	a0 = arena_get(TSDN_NULL, 0, false);
+
+	if (opt_hpa && LG_SIZEOF_PTR == 2) {
+		if (opt_abort_conf) {
+			malloc_printf("<jemalloc>: Hugepages not currently "
+			    "supported on 32-bit architectures; aborting.");
+		} else {
+			malloc_printf("<jemalloc>: Hugepages not currently "
+			    "supported on 32-bit architectures; disabling.");
+			opt_hpa = false;
+		}
+	} else if (opt_hpa) {
+		/*
+		 * The global HPA uses the edata cache from a0, and so needs to
+		 * be initialized specially, after a0 is.  The arena init code
+		 * handles this case specially, and does not turn on the HPA for
+		 * a0 when opt_hpa is true.  This lets us do global HPA
+		 * initialization against a valid a0.
+		 */
+		if (hpa_init(&arena_hpa_global, b0get(), &arena_emap_global,
+		    &a0->pa_shard.edata_cache)) {
+			return true;
+		}
+		if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global)) {
+			return true;
+		}
+	}
+
 	malloc_init_state = malloc_init_a0_initialized;
 
 	return false;
@@ -4206,6 +4238,10 @@ _malloc_prefork(void)
 				}
 			}
 		}
+		if (i == 3 && opt_hpa) {
+			hpa_prefork3(tsd_tsdn(tsd), &arena_hpa_global);
+		}
+
 	}
 	prof_prefork1(tsd_tsdn(tsd));
 	stats_prefork(tsd_tsdn(tsd));
@@ -4244,6 +4280,9 @@ _malloc_postfork(void)
 			arena_postfork_parent(tsd_tsdn(tsd), arena);
 		}
 	}
+	if (opt_hpa) {
+		hpa_postfork_parent(tsd_tsdn(tsd), &arena_hpa_global);
+	}
 	prof_postfork_parent(tsd_tsdn(tsd));
 	if (have_background_thread) {
 		background_thread_postfork_parent(tsd_tsdn(tsd));
@@ -4274,6 +4313,9 @@ jemalloc_postfork_child(void) {
 			arena_postfork_child(tsd_tsdn(tsd), arena);
 		}
 	}
+	if (opt_hpa) {
+		hpa_postfork_child(tsd_tsdn(tsd), &arena_hpa_global);
+	}
 	prof_postfork_child(tsd_tsdn(tsd));
 	if (have_background_thread) {
 		background_thread_postfork_child(tsd_tsdn(tsd));
diff --git a/src/pa.c b/src/pa.c
index f068fd9..672db7b 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,6 +1,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/hpa.h"
+
 static void
 pa_nactive_add(pa_shard_t *shard, size_t add_pages) {
 	atomic_fetch_add_zu(&shard->nactive, add_pages, ATOMIC_RELAXED);
@@ -21,12 +23,18 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	if (edata_cache_init(&shard->edata_cache, base)) {
 		return true;
 	}
+
 	if (pac_init(tsdn, &shard->pac, base, emap, &shard->edata_cache,
 	    cur_time, dirty_decay_ms, muzzy_decay_ms, &stats->pac_stats,
 	    stats_mtx)) {
 		return true;
 	}
 
+	shard->ind = ind;
+
+	shard->ever_used_hpa = false;
+	atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
+
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
 
 	shard->stats_mtx = stats_mtx;
@@ -39,6 +47,29 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	return false;
 }
 
+bool
+pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa) {
+	/*
+	 * These are constants for now; eventually they'll probably be
+	 * tuneable.
+	 */
+	size_t ps_goal = 512 * 1024;
+	size_t ps_alloc_max = 256 * 1024;
+	if (hpa_shard_init(&shard->hpa_shard, hpa, &shard->edata_cache,
+	    shard->ind, ps_goal, ps_alloc_max)) {
+		return true;
+	}
+	shard->ever_used_hpa = true;
+	atomic_store_b(&shard->use_hpa, true, ATOMIC_RELAXED);
+
+	return false;
+}
+
+void
+pa_shard_disable_hpa(pa_shard_t *shard) {
+	atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
+}
+
 void
 pa_shard_reset(pa_shard_t *shard) {
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
@@ -49,14 +80,30 @@ pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 	pac_destroy(tsdn, &shard->pac);
 }
 
+static pai_t *
+pa_get_pai(pa_shard_t *shard, edata_t *edata) {
+	return (edata_pai_get(edata) == EXTENT_PAI_PAC
+	    ? &shard->pac.pai : &shard->hpa_shard.pai);
+}
+
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
     bool slab, szind_t szind, bool zero) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment,
-	    zero);
+	edata_t *edata = NULL;
+	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
+		edata = pai_alloc(tsdn, &shard->hpa_shard.pai, size, alignment,
+		    zero);
+	}
+	/*
+	 * Fall back to the PAC if the HPA is off or couldn't serve the given
+	 * allocation request.
+	 */
+	if (edata == NULL) {
+		edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero);
+	}
 
 	if (edata != NULL) {
 		pa_nactive_add(shard, size >> LG_PAGE);
@@ -67,6 +114,9 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 			emap_register_interior(tsdn, shard->emap, edata, szind);
 		}
 	}
+	if (edata != NULL) {
+		assert(edata_arena_ind_get(edata) == shard->ind);
+	}
 	return edata;
 }
 
@@ -79,8 +129,9 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 
 	size_t expand_amount = new_size - old_size;
 
-	bool error = pai_expand(tsdn, &shard->pac.pai, edata, old_size,
-	    new_size, zero);
+	pai_t *pai = pa_get_pai(shard, edata);
+
+	bool error = pai_expand(tsdn, pai, edata, old_size, new_size, zero);
 	if (error) {
 		return true;
 	}
@@ -100,13 +151,13 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	size_t shrink_amount = old_size - new_size;
 
 	*generated_dirty = false;
-	bool error = pai_shrink(tsdn, &shard->pac.pai, edata, old_size,
-	    new_size);
+	pai_t *pai = pa_get_pai(shard, edata);
+	bool error = pai_shrink(tsdn, pai, edata, old_size, new_size);
 	if (error) {
 		return true;
 	}
 	pa_nactive_sub(shard, shrink_amount >> LG_PAGE);
-	*generated_dirty = true;
+	*generated_dirty = (edata_pai_get(edata) == EXTENT_PAI_PAC);
 
 	edata_szind_set(edata, szind);
 	emap_remap(tsdn, shard->emap, edata, szind, /* slab */ false);
@@ -123,8 +174,9 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	}
 	edata_szind_set(edata, SC_NSIZES);
 	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
-	pai_dalloc(tsdn, &shard->pac.pai, edata);
-	*generated_dirty = true;
+	pai_t *pai = pa_get_pai(shard, edata);
+	pai_dalloc(tsdn, pai, edata);
+	*generated_dirty = (edata_pai_get(edata) == EXTENT_PAI_PAC);
 }
 
 bool
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 8bf54b9..402603e 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -17,6 +17,9 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
 void
 pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
+	if (shard->ever_used_hpa) {
+		hpa_shard_prefork2(tsdn, &shard->hpa_shard);
+	}
 }
 
 void
@@ -24,6 +27,9 @@ pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_prefork(tsdn, &shard->pac.ecache_dirty);
 	ecache_prefork(tsdn, &shard->pac.ecache_muzzy);
 	ecache_prefork(tsdn, &shard->pac.ecache_retained);
+	if (shard->ever_used_hpa) {
+		hpa_shard_prefork3(tsdn, &shard->hpa_shard);
+	}
 }
 
 void
@@ -40,6 +46,9 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
+	if (shard->ever_used_hpa) {
+		hpa_shard_postfork_parent(tsdn, &shard->hpa_shard);
+	}
 }
 
 void
@@ -51,6 +60,9 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_child(tsdn, &shard->pac.grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
+	if (shard->ever_used_hpa) {
+		hpa_shard_postfork_child(tsdn, &shard->hpa_shard);
+	}
 }
 
 void
diff --git a/src/stats.c b/src/stats.c
index 407b60c..b2ec57b 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1095,6 +1095,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_UNSIGNED("narenas")
 	OPT_WRITE_CHAR_P("percpu_arena")
 	OPT_WRITE_SIZE_T("oversize_threshold")
+	OPT_WRITE_BOOL("hpa")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 86f7057..555f71a 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -185,6 +185,7 @@ generate_dirty(unsigned arena_ind, size_t size) {
 
 TEST_BEGIN(test_decay_ticks) {
 	test_skip_if(check_background_thread_enabled());
+	test_skip_if(opt_hpa);
 
 	ticker_t *decay_ticker;
 	unsigned tick0, tick1, arena_ind;
@@ -424,6 +425,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 
 TEST_BEGIN(test_decay_ticker) {
 	test_skip_if(check_background_thread_enabled());
+	test_skip_if(opt_hpa);
 #define NPS 2048
 	ssize_t ddt = opt_dirty_decay_ms;
 	ssize_t mdt = opt_muzzy_decay_ms;
@@ -485,6 +487,7 @@ TEST_END
 
 TEST_BEGIN(test_decay_nonmonotonic) {
 	test_skip_if(check_background_thread_enabled());
+	test_skip_if(opt_hpa);
 #define NPS (SMOOTHSTEP_NSTEPS + 1)
 	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
@@ -542,6 +545,7 @@ TEST_END
 
 TEST_BEGIN(test_decay_now) {
 	test_skip_if(check_background_thread_enabled());
+	test_skip_if(opt_hpa);
 
 	unsigned arena_ind = do_arena_create(0, 0);
 	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
@@ -562,6 +566,7 @@ TEST_END
 
 TEST_BEGIN(test_decay_never) {
 	test_skip_if(check_background_thread_enabled() || !config_stats);
+	test_skip_if(opt_hpa);
 
 	unsigned arena_ind = do_arena_create(-1, -1);
 	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
new file mode 100644
index 0000000..8b319b9
--- /dev/null
+++ b/test/unit/hpa.c
@@ -0,0 +1,235 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+
+#define HPA_IND 111
+#define SHARD_IND 222
+
+#define PS_GOAL (128 * PAGE)
+#define PS_ALLOC_MAX (64 * PAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t shard;
+	base_t *shard_base;
+	edata_cache_t shard_edata_cache;
+
+	hpa_t hpa;
+	base_t *hpa_base;
+	edata_cache_t hpa_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_t *
+create_test_data() {
+	bool err;
+	base_t *shard_base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks);
+	assert_ptr_not_null(shard_base, "");
+
+	base_t *hpa_base = base_new(TSDN_NULL, /* ind */ HPA_IND,
+	    &ehooks_default_extent_hooks);
+	assert_ptr_not_null(hpa_base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->shard_base = shard_base;
+	test_data->hpa_base = hpa_base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, shard_base);
+	assert_false(err, "");
+
+	err = edata_cache_init(&test_data->hpa_edata_cache, hpa_base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->hpa_base,
+	    /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_init(&test_data->hpa, hpa_base, &test_data->emap,
+	    &test_data->hpa_edata_cache);
+	assert_false(err, "");
+
+	err = hpa_shard_init(&test_data->shard, &test_data->hpa,
+	    &test_data->shard_edata_cache, SHARD_IND, PS_GOAL, PS_ALLOC_MAX);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->shard_base);
+	base_delete(TSDN_NULL, test_data->hpa_base);
+	free(test_data);
+}
+
+typedef struct mem_contents_s mem_contents_t;
+struct mem_contents_s {
+	uintptr_t my_addr;
+	size_t size;
+	edata_t *my_edata;
+	rb_node(mem_contents_t) link;
+};
+
+static int
+mem_contents_cmp(const mem_contents_t *a, const mem_contents_t *b) {
+	return (a->my_addr > b->my_addr) - (a->my_addr < b->my_addr);
+}
+
+typedef rb_tree(mem_contents_t) mem_tree_t;
+rb_gen(static, mem_tree_, mem_tree_t, mem_contents_t, link,
+    mem_contents_cmp);
+
+static void
+node_assert_ordered(mem_contents_t *a, mem_contents_t *b) {
+	assert_zu_lt(a->my_addr, a->my_addr + a->size, "Overflow");
+	assert_zu_le(a->my_addr + a->size, b->my_addr, "");
+}
+
+static void
+node_check(mem_tree_t *tree, mem_contents_t *contents) {
+	edata_t *edata = contents->my_edata;
+	assert_ptr_eq(contents, (void *)contents->my_addr, "");
+	assert_ptr_eq(contents, edata_base_get(edata), "");
+	assert_zu_eq(contents->size, edata_size_get(edata), "");
+	assert_ptr_eq(contents->my_edata, edata, "");
+
+	mem_contents_t *next = mem_tree_next(tree, contents);
+	if (next != NULL) {
+		node_assert_ordered(contents, next);
+	}
+	mem_contents_t *prev = mem_tree_prev(tree, contents);
+	if (prev != NULL) {
+		node_assert_ordered(prev, contents);
+	}
+}
+
+static void
+node_insert(mem_tree_t *tree, edata_t *edata, size_t npages) {
+	mem_contents_t *contents = (mem_contents_t *)edata_base_get(edata);
+	contents->my_addr = (uintptr_t)edata_base_get(edata);
+	contents->size = edata_size_get(edata);
+	contents->my_edata = edata;
+	mem_tree_insert(tree, contents);
+	node_check(tree, contents);
+}
+
+static void
+node_remove(mem_tree_t *tree, edata_t *edata) {
+	mem_contents_t *contents = (mem_contents_t *)edata_base_get(edata);
+	node_check(tree, contents);
+	mem_tree_remove(tree, contents);
+}
+
+TEST_BEGIN(test_stress) {
+	test_skip_if(LG_SIZEOF_PTR != 3);
+
+	hpa_shard_t *shard = create_test_data();
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	const size_t nlive_edatas_max = 500;
+	size_t nlive_edatas = 0;
+	edata_t **live_edatas = calloc(nlive_edatas_max, sizeof(edata_t *));
+	/*
+	 * Nothing special about this constant; we're only fixing it for
+	 * consistency across runs.
+	 */
+	size_t prng_state = (size_t)0x76999ffb014df07c;
+
+	mem_tree_t tree;
+	mem_tree_new(&tree);
+
+	for (size_t i = 0; i < 100 * 1000; i++) {
+		size_t operation = prng_range_zu(&prng_state, 4);
+		if (operation < 2) {
+			/* Alloc */
+			if (nlive_edatas == nlive_edatas_max) {
+				continue;
+			}
+
+			size_t npages_min;
+			size_t npages_max;
+			/*
+			 * We make sure to get an even balance of small and
+			 * large allocations.
+			 */
+			if (operation == 0) {
+				npages_min = 1;
+				npages_max = SC_LARGE_MINCLASS / PAGE - 1;
+			} else {
+				npages_min = SC_LARGE_MINCLASS / PAGE;
+				npages_max = 5 * npages_min;
+			}
+			size_t npages = npages_min + prng_range_zu(&prng_state,
+			    npages_max - npages_min);
+			edata_t *edata = pai_alloc(tsdn, &shard->pai,
+			    npages * PAGE, PAGE, false);
+			assert_ptr_not_null(edata,
+			    "Unexpected allocation failure");
+			live_edatas[nlive_edatas] = edata;
+			nlive_edatas++;
+			node_insert(&tree, edata, npages);
+		} else {
+			/* Free. */
+			if (nlive_edatas == 0) {
+				continue;
+			}
+			size_t victim = prng_range_zu(&prng_state, nlive_edatas);
+			edata_t *to_free = live_edatas[victim];
+			live_edatas[victim] = live_edatas[nlive_edatas - 1];
+			nlive_edatas--;
+			node_remove(&tree, to_free);
+			pai_dalloc(tsdn, &shard->pai, to_free);
+		}
+	}
+
+	size_t ntreenodes = 0;
+	for (mem_contents_t *contents = mem_tree_first(&tree); contents != NULL;
+	    contents = mem_tree_next(&tree, contents)) {
+		ntreenodes++;
+		node_check(&tree, contents);
+	}
+	expect_zu_eq(ntreenodes, nlive_edatas, "");
+
+	/*
+	 * Test hpa_shard_destroy, which requires as a precondition that all its
+	 * extents have been deallocated.
+	 */
+	for (size_t i = 0; i < nlive_edatas; i++) {
+		edata_t *to_free = live_edatas[i];
+		node_remove(&tree, to_free);
+		pai_dalloc(tsdn, &shard->pai, to_free);
+	}
+	hpa_shard_destroy(tsdn, shard);
+
+	free(live_edatas);
+	destroy_test_data(shard);
+}
+TEST_END
+
+int
+main(void) {
+	/*
+	 * These trigger unused-function warnings on CI runs, even if declared
+	 * with static inline.
+	 */
+	(void)mem_tree_empty;
+	(void)mem_tree_last;
+	(void)mem_tree_search;
+	(void)mem_tree_nsearch;
+	(void)mem_tree_psearch;
+	(void)mem_tree_iter;
+	(void)mem_tree_reverse_iter;
+	(void)mem_tree_destroy;
+	return test_no_reentrancy(
+	    test_stress);
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index cf5c88e..cda1a65 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -163,6 +163,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
+	TEST_MALLCTL_OPT(bool, hpa, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 9a47a19..46e4503 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -17,6 +17,7 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 }
 
 TEST_BEGIN(test_gdump) {
+	test_skip_if(opt_hpa);
 	bool active, gdump, gdump_old;
 	void *p, *q, *r, *s;
 	size_t sz;
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 8139617..80ee8cd 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -99,6 +99,7 @@ thd_start(void *arg) {
 
 TEST_BEGIN(test_retained) {
 	test_skip_if(!config_stats);
+	test_skip_if(opt_hpa);
 
 	arena_ind = do_arena_create(NULL);
 	sz = nallocx(HUGEPAGE, 0);
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 21a29a6..6b6594d 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -119,7 +119,7 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	    "Unexepected mallctl() result");
 
 	if (config_stats) {
-		if (!background_thread_enabled()) {
+		if (!background_thread_enabled() && !opt_hpa) {
 			expect_u64_gt(dirty_npurge + muzzy_npurge, 0,
 			    "At least one purge should have occurred");
 		}
-- 
cgit v0.12


From bf025d2ec8f68fa50c5eb8bdb303a684c3f9c544 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 Sep 2020 15:22:47 -0700
Subject: HPA: Make slab sizes and maxes configurable.

This allows easy experimentation with them as tuning parameters.
---
 include/jemalloc/internal/jemalloc_internal_externs.h |  2 ++
 include/jemalloc/internal/pa.h                        |  3 ++-
 src/arena.c                                           |  3 ++-
 src/ctl.c                                             |  6 ++++++
 src/jemalloc.c                                        | 16 +++++++++++++++-
 src/pa.c                                              | 15 ++++++++-------
 src/stats.c                                           |  2 ++
 7 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index c26153e..b152068 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -13,6 +13,8 @@ extern bool opt_abort;
 extern bool opt_abort_conf;
 extern bool opt_confirm_conf;
 extern bool opt_hpa;
+extern size_t opt_hpa_slab_goal;
+extern size_t opt_hpa_slab_max_alloc;
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 7f73c27..eced8ca 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -123,7 +123,8 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * This isn't exposed to users; we allow late enablement of the HPA shard so
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
-bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa);
+bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
+    size_t ps_alloc_max);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
diff --git a/src/arena.c b/src/arena.c
index 74f90cc..3403526 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1527,7 +1527,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 *   so arena_hpa_global is not yet initialized.
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
-		if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global)) {
+		if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global,
+		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc)) {
 			goto label_error;
 		}
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 9e22e66..fe6e844 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -91,6 +91,8 @@ CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
+CTL_PROTO(opt_hpa_slab_goal)
+CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -345,6 +347,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("hpa"),		CTL(opt_hpa)},
+	{NAME("hpa_slab_goal"),	CTL(opt_hpa_slab_goal)},
+	{NAME("hpa_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -1819,6 +1823,8 @@ CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
+CTL_RO_NL_GEN(opt_hpa_slab_goal, opt_hpa_slab_goal, size_t)
+CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0dc685b..fd822e0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -136,6 +136,8 @@ malloc_mutex_t arenas_lock;
 
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
+size_t opt_hpa_slab_goal = 512 * 1024;
+size_t opt_hpa_slab_max_alloc = 256 * 1024;
 
 /*
  * Arenas that are used to service external requests.  Not all elements of the
@@ -1481,6 +1483,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			CONF_HANDLE_BOOL(opt_hpa, "hpa")
+			/*
+			 * If someone violates these mins and maxes, they're
+			 * confused.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_slab_goal, "hpa_slab_goal",
+			    PAGE, 512 * PAGE, CONF_CHECK_MIN, CONF_CHECK_MAX,
+			    true)
+			CONF_HANDLE_SIZE_T(opt_hpa_slab_max_alloc,
+			    "hpa_slab_max_alloc", PAGE, 512 * PAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true)
+
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
 					sc_data_init(sc_data);
@@ -1787,7 +1800,8 @@ malloc_init_hard_a0_locked() {
 		    &a0->pa_shard.edata_cache)) {
 			return true;
 		}
-		if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global)) {
+		if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global,
+		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc)) {
 			return true;
 		}
 	}
diff --git a/src/pa.c b/src/pa.c
index 672db7b..a8aa32d 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -48,13 +48,14 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 }
 
 bool
-pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa) {
-	/*
-	 * These are constants for now; eventually they'll probably be
-	 * tuneable.
-	 */
-	size_t ps_goal = 512 * 1024;
-	size_t ps_alloc_max = 256 * 1024;
+pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
+    size_t ps_alloc_max) {
+	ps_goal &= ~PAGE_MASK;
+	ps_alloc_max &= ~PAGE_MASK;
+
+	if (ps_alloc_max > ps_goal) {
+		ps_alloc_max = ps_goal;
+	}
 	if (hpa_shard_init(&shard->hpa_shard, hpa, &shard->edata_cache,
 	    shard->ind, ps_goal, ps_alloc_max)) {
 		return true;
diff --git a/src/stats.c b/src/stats.c
index b2ec57b..78068f4 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1096,6 +1096,8 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("percpu_arena")
 	OPT_WRITE_SIZE_T("oversize_threshold")
 	OPT_WRITE_BOOL("hpa")
+	OPT_WRITE_SIZE_T("hpa_slab_goal")
+	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
-- 
cgit v0.12


From 484f04733e5bd9908faf502fced6df66ca33f9f9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 Sep 2020 15:35:10 -0700
Subject: HPA: Add central mutex contention stats.

---
 include/jemalloc/internal/mutex_prof.h |  4 +++-
 src/ctl.c                              | 12 ++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 190402e..91ab411 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -11,7 +11,9 @@
     OP(ctl)								\
     OP(prof)								\
     OP(prof_thds_data)							\
-    OP(prof_dump)
+    OP(prof_dump)							\
+    OP(hpa_central)							\
+    OP(hpa_central_grow)
 
 typedef enum {
 #define OP(mtx) global_prof_mutex_##mtx,
diff --git a/src/ctl.c b/src/ctl.c
index fe6e844..89b7545 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1142,6 +1142,14 @@ ctl_refresh(tsdn_t *tsdn) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
 			    global_prof_mutex_prof_dump, prof_dump_mtx);
 		}
+		if (opt_hpa) {
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_hpa_central,
+			    arena_hpa_global.mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_hpa_central_grow,
+			    arena_hpa_global.grow_mtx);
+		}
 		if (have_background_thread) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
 			    global_prof_mutex_background_thread,
@@ -3134,6 +3142,10 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(tdatas_mtx);
 		MUTEX_PROF_RESET(prof_dump_mtx);
 	}
+	if (opt_hpa) {
+		MUTEX_PROF_RESET(arena_hpa_global.mtx);
+		MUTEX_PROF_RESET(arena_hpa_global.grow_mtx);
+	}
 
 
 	/* Per arena mutexes. */
-- 
cgit v0.12


From 534504d4a7086084a46ac42c700e9429d2c72fd1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 Sep 2020 18:29:28 -0700
Subject: HPA: add size-exclusion functionality.

I.e. only allowing allocations under or over certain sizes.
---
 include/jemalloc/internal/hpa.h                    | 13 +++++++-
 .../jemalloc/internal/jemalloc_internal_externs.h  |  2 ++
 include/jemalloc/internal/pa.h                     |  2 +-
 src/arena.c                                        |  3 +-
 src/ctl.c                                          |  8 ++++-
 src/hpa.c                                          | 13 +++++---
 src/jemalloc.c                                     | 11 +++++--
 src/pa.c                                           |  4 +--
 src/stats.c                                        |  2 ++
 test/unit/hpa.c                                    | 38 +++++++++++++++++++---
 test/unit/mallctl.c                                |  4 +++
 11 files changed, 84 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 83f2203..3decbf1 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -59,6 +59,17 @@ struct hpa_shard_s {
 	 * fragmentation avoidance measure.
 	 */
 	size_t ps_alloc_max;
+	/*
+	 * What's the maximum size we'll try to allocate out of the shard at
+	 * all?
+	 */
+	size_t small_max;
+	/*
+	 * What's the minimum size for which we'll go straight to the global
+	 * arena?
+	 */
+	size_t large_min;
+
 	/* The arena ind we're associated with. */
 	unsigned ind;
 };
@@ -67,7 +78,7 @@ bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache);
 bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa,
     edata_cache_t *edata_cache, unsigned ind, size_t ps_goal,
-    size_t ps_alloc_max);
+    size_t ps_alloc_max, size_t small_max, size_t large_min);
 void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
 
 /*
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index b152068..8faadaa 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -15,6 +15,8 @@ extern bool opt_confirm_conf;
 extern bool opt_hpa;
 extern size_t opt_hpa_slab_goal;
 extern size_t opt_hpa_slab_max_alloc;
+extern size_t opt_hpa_small_max;
+extern size_t opt_hpa_large_min;
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index eced8ca..473d682 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -124,7 +124,7 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
 bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
-    size_t ps_alloc_max);
+    size_t ps_alloc_max, size_t small_max, size_t large_min);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
diff --git a/src/arena.c b/src/arena.c
index 3403526..5fb5843 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1528,7 +1528,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
 		if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global,
-		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc)) {
+		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
+		    opt_hpa_small_max, opt_hpa_large_min)) {
 			goto label_error;
 		}
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 89b7545..9b8ab75 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -93,6 +93,8 @@ CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_goal)
 CTL_PROTO(opt_hpa_slab_max_alloc)
+CTL_PROTO(opt_hpa_small_max)
+CTL_PROTO(opt_hpa_large_min)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -348,7 +350,9 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("hpa"),		CTL(opt_hpa)},
 	{NAME("hpa_slab_goal"),	CTL(opt_hpa_slab_goal)},
-	{NAME("hpa_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
+	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
+	{NAME("hpa_small_max"),	CTL(opt_hpa_small_max)},
+	{NAME("hpa_large_min"),	CTL(opt_hpa_large_min)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -1833,6 +1837,8 @@ CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_slab_goal, opt_hpa_slab_goal, size_t)
 CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
+CTL_RO_NL_GEN(opt_hpa_small_max, opt_hpa_small_max, size_t)
+CTL_RO_NL_GEN(opt_hpa_large_min, opt_hpa_large_min, size_t)
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
diff --git a/src/hpa.c b/src/hpa.c
index 842384b..597261d 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -50,7 +50,8 @@ hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, edata_cache_t *edata_cache) {
 
 bool
 hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
-    unsigned ind, size_t ps_goal, size_t ps_alloc_max) {
+    unsigned ind, size_t ps_goal, size_t ps_alloc_max, size_t small_max,
+    size_t large_min) {
 	bool err;
 	err = malloc_mutex_init(&shard->grow_mtx, "hpa_shard_grow",
 	    WITNESS_RANK_HPA_SHARD_GROW, malloc_mutex_rank_exclusive);
@@ -68,6 +69,8 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
 	psset_init(&shard->psset);
 	shard->ps_goal = ps_goal;
 	shard->ps_alloc_max = ps_alloc_max;
+	shard->small_max = small_max;
+	shard->large_min = large_min;
 
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
@@ -195,7 +198,7 @@ hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min,
 
 static edata_t *
 hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
-	assert(size < shard->ps_alloc_max);
+	assert(size <= shard->ps_alloc_max);
 
 	bool err;
 	edata_t *edata = edata_cache_get(tsdn, shard->edata_cache);
@@ -257,16 +260,18 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero) {
 
 	assert((size & PAGE_MASK) == 0);
+	hpa_shard_t *shard = hpa_from_pai(self);
 	/* We don't handle alignment or zeroing for now. */
 	if (alignment > PAGE || zero) {
 		return NULL;
 	}
+	if (size > shard->small_max && size < shard->large_min) {
+		return NULL;
+	}
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	hpa_shard_t *shard = hpa_from_pai(self);
-
 	edata_t *edata;
 	if (size <= shard->ps_alloc_max) {
 		edata = hpa_alloc_psset(tsdn, shard, size);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index fd822e0..8ce9ca1 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -136,8 +136,10 @@ malloc_mutex_t arenas_lock;
 
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
-size_t opt_hpa_slab_goal = 512 * 1024;
+size_t opt_hpa_slab_goal = 128 * 1024;
 size_t opt_hpa_slab_max_alloc = 256 * 1024;
+size_t opt_hpa_small_max = 32 * 1024;
+size_t opt_hpa_large_min = 4 * 1024 * 1024;
 
 /*
  * Arenas that are used to service external requests.  Not all elements of the
@@ -1493,6 +1495,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_SIZE_T(opt_hpa_slab_max_alloc,
 			    "hpa_slab_max_alloc", PAGE, 512 * PAGE,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true)
+			CONF_HANDLE_SIZE_T(opt_hpa_small_max, "hpa_small_max",
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true)
+			CONF_HANDLE_SIZE_T(opt_hpa_large_min, "hpa_large_min",
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true)
 
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
@@ -1801,7 +1807,8 @@ malloc_init_hard_a0_locked() {
 			return true;
 		}
 		if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global,
-		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc)) {
+		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
+		    opt_hpa_small_max, opt_hpa_large_min)) {
 			return true;
 		}
 	}
diff --git a/src/pa.c b/src/pa.c
index a8aa32d..8e1ec84 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -49,7 +49,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 
 bool
 pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
-    size_t ps_alloc_max) {
+    size_t ps_alloc_max, size_t small_max, size_t large_min) {
 	ps_goal &= ~PAGE_MASK;
 	ps_alloc_max &= ~PAGE_MASK;
 
@@ -57,7 +57,7 @@ pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
 		ps_alloc_max = ps_goal;
 	}
 	if (hpa_shard_init(&shard->hpa_shard, hpa, &shard->edata_cache,
-	    shard->ind, ps_goal, ps_alloc_max)) {
+	    shard->ind, ps_goal, ps_alloc_max, small_max, large_min)) {
 		return true;
 	}
 	shard->ever_used_hpa = true;
diff --git a/src/stats.c b/src/stats.c
index 78068f4..7cbf204 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1098,6 +1098,8 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("hpa")
 	OPT_WRITE_SIZE_T("hpa_slab_goal")
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
+	OPT_WRITE_SIZE_T("hpa_small_max")
+	OPT_WRITE_SIZE_T("hpa_large_min")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 8b319b9..b58dced 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -8,6 +8,9 @@
 #define PS_GOAL (128 * PAGE)
 #define PS_ALLOC_MAX (64 * PAGE)
 
+#define HPA_SMALL_MAX (200 * PAGE)
+#define HPA_LARGE_MIN (300 * PAGE)
+
 typedef struct test_data_s test_data_t;
 struct test_data_s {
 	/*
@@ -57,7 +60,8 @@ create_test_data() {
 	assert_false(err, "");
 
 	err = hpa_shard_init(&test_data->shard, &test_data->hpa,
-	    &test_data->shard_edata_cache, SHARD_IND, PS_GOAL, PS_ALLOC_MAX);
+	    &test_data->shard_edata_cache, SHARD_IND, PS_GOAL, PS_ALLOC_MAX,
+	    HPA_SMALL_MAX, HPA_LARGE_MIN);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
@@ -71,6 +75,31 @@ destroy_test_data(hpa_shard_t *shard) {
 	free(test_data);
 }
 
+TEST_BEGIN(test_small_max_large_min) {
+	test_skip_if(LG_SIZEOF_PTR != 3);
+
+	hpa_shard_t *shard = create_test_data();
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	edata_t *edata;
+
+	/* Small max */
+	edata = pai_alloc(tsdn, &shard->pai, HPA_SMALL_MAX, PAGE, false);
+	expect_ptr_not_null(edata, "Allocation of small max failed");
+	edata = pai_alloc(tsdn, &shard->pai, HPA_SMALL_MAX + PAGE, PAGE, false);
+	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
+
+	/* Large min */
+	edata = pai_alloc(tsdn, &shard->pai, HPA_LARGE_MIN, PAGE, false);
+	expect_ptr_not_null(edata, "Allocation of large min failed");
+	edata = pai_alloc(tsdn, &shard->pai, HPA_LARGE_MIN - PAGE, PAGE, false);
+	expect_ptr_null(edata,
+	    "Allocation of smaller than large min succeeded");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 typedef struct mem_contents_s mem_contents_t;
 struct mem_contents_s {
 	uintptr_t my_addr;
@@ -164,10 +193,10 @@ TEST_BEGIN(test_stress) {
 			 */
 			if (operation == 0) {
 				npages_min = 1;
-				npages_max = SC_LARGE_MINCLASS / PAGE - 1;
+				npages_max = HPA_SMALL_MAX / PAGE;
 			} else {
-				npages_min = SC_LARGE_MINCLASS / PAGE;
-				npages_max = 5 * npages_min;
+				npages_min = HPA_LARGE_MIN / PAGE;
+				npages_max = HPA_LARGE_MIN / PAGE + 20;
 			}
 			size_t npages = npages_min + prng_range_zu(&prng_state,
 			    npages_max - npages_min);
@@ -231,5 +260,6 @@ main(void) {
 	(void)mem_tree_reverse_iter;
 	(void)mem_tree_destroy;
 	return test_no_reentrancy(
+	    test_small_max_large_min,
 	    test_stress);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index cda1a65..ecbcda9 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -164,6 +164,10 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(bool, hpa, always);
+	TEST_MALLCTL_OPT(size_t, hpa_slab_goal, always);
+	TEST_MALLCTL_OPT(size_t, hpa_slab_max_alloc, always);
+	TEST_MALLCTL_OPT(size_t, hpa_small_max, always);
+	TEST_MALLCTL_OPT(size_t, hpa_large_min, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
-- 
cgit v0.12


From 1964b08394e01a5b6881013c0f34ee20073cc328 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 Sep 2020 12:01:52 -0700
Subject: HPA: Add stats for the hpa_shard.

---
 include/jemalloc/internal/arena_externs.h |   2 +-
 include/jemalloc/internal/ctl.h           |   1 +
 include/jemalloc/internal/hpa.h           |   7 ++
 include/jemalloc/internal/mutex_prof.h    |   4 +-
 include/jemalloc/internal/pa.h            |   2 +-
 include/jemalloc/internal/psset.h         |   7 ++
 include/jemalloc/internal/stats.h         |   3 +-
 src/arena.c                               |   4 +-
 src/ctl.c                                 |  87 +++++++++++++++++++++++-
 src/hpa.c                                 |   1 -
 src/pa_extra.c                            |  22 ++++++-
 src/stats.c                               | 106 ++++++++++++++++++++++++++++--
 12 files changed, 230 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 9d4da31..c8e1e38 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -28,7 +28,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats);
+    pac_estats_t *estats, hpa_shard_stats_t *hpastats);
 void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index fbc432b..305d365 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -45,6 +45,7 @@ typedef struct ctl_arena_stats_s {
 	bin_stats_data_t bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
 	pac_estats_t estats[SC_NPSIZES];
+	hpa_shard_stats_t hpastats;
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 3decbf1..3fe9fc4 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -29,6 +29,13 @@ struct hpa_s {
 	geom_grow_t geom_grow;
 };
 
+/* Used only by CTL; not actually stored here (i.e., all derived). */
+typedef struct hpa_shard_stats_s hpa_shard_stats_t;
+struct hpa_shard_stats_s {
+	psset_bin_stats_t psset_full_slab_stats;
+	psset_bin_stats_t psset_slab_stats[PSSET_NPSIZES];
+};
+
 typedef struct hpa_shard_s hpa_shard_t;
 struct hpa_shard_s {
 	/*
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 91ab411..970f469 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -31,7 +31,9 @@ typedef enum {
     OP(decay_dirty)							\
     OP(decay_muzzy)							\
     OP(base)								\
-    OP(tcache_list)
+    OP(tcache_list)							\
+    OP(hpa_shard)							\
+    OP(hpa_shard_grow)
 
 typedef enum {
 #define OP(mtx) arena_prof_mutex_##mtx,
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 473d682..d138f2f 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -192,7 +192,7 @@ void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
 
 void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    size_t *resident);
+    hpa_shard_stats_t *hpa_stats_out, size_t *resident);
 
 /*
  * Reads the PA-owned mutex stats into the output stats array, at the
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 72ff240..7bba3cb 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -31,6 +31,13 @@ struct psset_bin_stats_s {
 	size_t ninactive;
 };
 
+static inline void
+psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
+	dst->npageslabs += src->npageslabs;
+	dst->nactive += src->nactive;
+	dst->ninactive += src->ninactive;
+}
+
 typedef struct psset_s psset_t;
 struct psset_s {
 	/*
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 93bde22..727f7dc 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -11,7 +11,8 @@
     OPTION('b',		bins,		true,		false)		\
     OPTION('l',		large,		true,		false)		\
     OPTION('x',		mutex,		true,		false)		\
-    OPTION('e',		extents,	true,		false)
+    OPTION('e',		extents,	true,		false)		\
+    OPTION('h',		hpa,		config_stats,	false)
 
 enum {
 #define OPTION(o, v, d, s) stats_print_option_num_##v,
diff --git a/src/arena.c b/src/arena.c
index 5fb5843..dc58a28 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -81,7 +81,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats) {
+    pac_estats_t *estats, hpa_shard_stats_t *hpastats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -139,7 +139,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	}
 
 	pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
-	    estats, &astats->resident);
+	    estats, hpastats, &astats->resident);
 
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
diff --git a/src/ctl.c b/src/ctl.c
index 9b8ab75..b4e6517 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -216,6 +216,13 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
 INDEX_PROTO(stats_arenas_i_extents_j)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive)
+INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
 CTL_PROTO(stats_arenas_i_dss)
@@ -584,6 +591,41 @@ MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 };
 
+static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
+	{NAME("npageslabs"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs)},
+	{NAME("nactive"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive)},
+	{NAME("ninactive"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive)}
+};
+
+static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
+	{NAME("npageslabs"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs)},
+	{NAME("nactive"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive)},
+	{NAME("ninactive"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive)}
+};
+
+static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
+	{NAME(""),
+		CHILD(named, stats_arenas_i_hpa_shard_nonfull_slabs_j)}
+};
+
+static const ctl_indexed_node_t stats_arenas_i_hpa_shard_nonfull_slabs_node[] =
+{
+	{INDEX(stats_arenas_i_hpa_shard_nonfull_slabs_j)}
+};
+
+static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
+	{NAME("full_slabs"),	CHILD(named,
+	    stats_arenas_i_hpa_shard_full_slabs)},
+	{NAME("nonfull_slabs"),	CHILD(indexed,
+	    stats_arenas_i_hpa_shard_nonfull_slabs)}
+};
+
 static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
 	{NAME("uptime"),	CTL(stats_arenas_i_uptime)},
@@ -613,7 +655,8 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
 	{NAME("lextents"),	CHILD(indexed, stats_arenas_i_lextents)},
 	{NAME("extents"),	CHILD(indexed, stats_arenas_i_extents)},
-	{NAME("mutexes"),	CHILD(named, stats_arenas_i_mutexes)}
+	{NAME("mutexes"),	CHILD(named, stats_arenas_i_mutexes)},
+	{NAME("hpa_shard"),	CHILD(named, stats_arenas_i_hpa_shard)}
 };
 static const ctl_named_node_t super_stats_arenas_i_node[] = {
 	{NAME(""),		CHILD(named, stats_arenas_i)}
@@ -844,6 +887,8 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		    sizeof(arena_stats_large_t));
 		memset(ctl_arena->astats->estats, 0, SC_NPSIZES *
 		    sizeof(pac_estats_t));
+		memset(&ctl_arena->astats->hpastats, 0,
+		    sizeof(hpa_shard_stats_t));
 	}
 }
 
@@ -857,7 +902,8 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		    &ctl_arena->muzzy_decay_ms, &ctl_arena->pactive,
 		    &ctl_arena->pdirty, &ctl_arena->pmuzzy,
 		    &ctl_arena->astats->astats, ctl_arena->astats->bstats,
-		    ctl_arena->astats->lstats, ctl_arena->astats->estats);
+		    ctl_arena->astats->lstats, ctl_arena->astats->estats,
+		    &ctl_arena->astats->hpastats);
 
 		for (i = 0; i < SC_NBINS; i++) {
 			bin_stats_t *bstats =
@@ -1033,6 +1079,16 @@ MUTEX_PROF_ARENA_MUTEXES
 			sdstats->estats[i].retained_bytes
 			    += astats->estats[i].retained_bytes;
 		}
+
+		/* Merge HPA stats. */
+		psset_bin_stats_accum(&sdstats->hpastats.psset_full_slab_stats,
+		    &astats->hpastats.psset_full_slab_stats);
+		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+			psset_bin_stats_accum(
+			    &sdstats->hpastats.psset_slab_stats[i],
+			    &astats->hpastats.psset_slab_stats[i]);
+		}
+
 	}
 }
 
@@ -3256,6 +3312,33 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 	return super_stats_arenas_i_extents_j_node;
 }
 
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs,
+    arenas_i(mib[2])->astats->hpastats.psset_full_slab_stats.npageslabs,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive,
+    arenas_i(mib[2])->astats->hpastats.psset_full_slab_stats.nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive,
+    arenas_i(mib[2])->astats->hpastats.psset_full_slab_stats.ninactive, size_t);
+
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs,
+    arenas_i(mib[2])->astats->hpastats.psset_slab_stats[mib[5]].npageslabs,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive,
+    arenas_i(mib[2])->astats->hpastats.psset_slab_stats[mib[5]].nactive,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive,
+    arenas_i(mib[2])->astats->hpastats.psset_slab_stats[mib[5]].ninactive,
+    size_t);
+
+static const ctl_named_node_t *
+stats_arenas_i_hpa_shard_nonfull_slabs_j_index(tsdn_t *tsdn, const size_t *mib,
+    size_t miblen, size_t j) {
+	if (j >= PSSET_NPSIZES) {
+		return NULL;
+	}
+	return super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node;
+}
+
 static bool
 ctl_arenas_i_verify(size_t i) {
 	size_t a = arenas_i2a_impl(i, true, true);
diff --git a/src/hpa.c b/src/hpa.c
index 597261d..08992bd 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -258,7 +258,6 @@ hpa_from_pai(pai_t *self) {
 static edata_t *
 hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero) {
-
 	assert((size & PAGE_MASK) == 0);
 	hpa_shard_t *shard = hpa_from_pai(self);
 	/* We don't handle alignment or zeroing for now. */
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 402603e..db236ad 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -76,7 +76,7 @@ pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
 void
 pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    size_t *resident) {
+    hpa_shard_stats_t *hpa_stats_out, size_t *resident) {
 	cassert(config_stats);
 
 	pa_shard_stats_out->pac_stats.retained +=
@@ -138,6 +138,18 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 		estats_out[i].muzzy_bytes = muzzy_bytes;
 		estats_out[i].retained_bytes = retained_bytes;
 	}
+
+	if (shard->ever_used_hpa) {
+		malloc_mutex_lock(tsdn, &shard->hpa_shard.mtx);
+		psset_bin_stats_accum(&hpa_stats_out->psset_full_slab_stats,
+		    &shard->hpa_shard.psset.full_slab_stats);
+		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+			psset_bin_stats_accum(
+			    &hpa_stats_out->psset_slab_stats[i],
+			    &shard->hpa_shard.psset.slab_stats[i]);
+		}
+		malloc_mutex_unlock(tsdn, &shard->hpa_shard.mtx);
+	}
 }
 
 static void
@@ -163,4 +175,12 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
 	    &shard->pac.decay_dirty.mtx, arena_prof_mutex_decay_dirty);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
 	    &shard->pac.decay_muzzy.mtx, arena_prof_mutex_decay_muzzy);
+
+	if (shard->ever_used_hpa) {
+		pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+		    &shard->hpa_shard.mtx, arena_prof_mutex_hpa_shard);
+		pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+		    &shard->hpa_shard.grow_mtx,
+		    arena_prof_mutex_hpa_shard_grow);
+	}
 }
diff --git a/src/stats.c b/src/stats.c
index 7cbf204..f03e5e4 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -43,6 +43,16 @@ const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
 } while (0)
 
+#define CTL_M2_M5_GET(n, i, j, v, t) do {				\
+	size_t mib[CTL_MAX_DEPTH];					\
+	size_t miblen = sizeof(mib) / sizeof(size_t);			\
+	size_t sz = sizeof(t);						\
+	xmallctlnametomib(n, mib, &miblen);				\
+	mib[2] = (i);							\
+	mib[5] = (j);							\
+	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
+} while (0)
+
 /******************************************************************************/
 /* Data. */
 
@@ -651,6 +661,87 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 }
 
 static void
+stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
+	emitter_row_t header_row;
+	emitter_row_init(&header_row);
+	emitter_row_t row;
+	emitter_row_init(&row);
+
+	size_t npageslabs;
+	size_t nactive;
+	size_t ninactive;
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs",
+	    i, &npageslabs, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive",
+	    i, &nactive, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive",
+	    i, &ninactive, size_t);
+
+	emitter_table_printf(emitter,
+	    "HPA shard stats:\n"
+	    "  In full slabs:\n"
+	    "      npageslabs: %zu\n"
+	    "      nactive: %zu\n"
+	    "      ninactive: %zu\n",
+	    npageslabs, nactive, ninactive);
+	emitter_json_object_kv_begin(emitter, "hpa_shard");
+	emitter_json_object_kv_begin(emitter, "full_slabs");
+	emitter_json_kv(emitter, "npageslabs", emitter_type_size, &npageslabs);
+	emitter_json_kv(emitter, "nactive", emitter_type_size, &nactive);
+	emitter_json_kv(emitter, "ninactive", emitter_type_size, &ninactive);
+	emitter_json_object_end(emitter); /* End "full_slabs" */
+
+	COL_HDR(row, size, NULL, right, 20, size)
+	COL_HDR(row, ind, NULL, right, 4, unsigned)
+	COL_HDR(row, npageslabs, NULL, right, 13, size)
+	COL_HDR(row, nactive, NULL, right, 13, size)
+	COL_HDR(row, ninactive, NULL, right, 13, size)
+
+	emitter_table_row(emitter, &header_row);
+	emitter_json_array_kv_begin(emitter, "nonfull_slabs");
+	bool in_gap = false;
+	for (pszind_t j = 0; j < PSSET_NPSIZES; j++) {
+		CTL_M2_M5_GET(
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs",
+		    i, j, &npageslabs, size_t);
+		CTL_M2_M5_GET(
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive",
+		    i, j, &nactive, size_t);
+		CTL_M2_M5_GET(
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive",
+		    i, j, &ninactive, size_t);
+
+		bool in_gap_prev = in_gap;
+		in_gap = (npageslabs == 0);
+		if (in_gap_prev && !in_gap) {
+			emitter_table_printf(emitter,
+			    "                     ---\n");
+		}
+
+		col_size.size_val = sz_pind2sz(j);
+		col_ind.size_val = j;
+		col_npageslabs.size_val = npageslabs;
+		col_nactive.size_val = nactive;
+		col_ninactive.size_val = ninactive;
+		if (!in_gap) {
+			emitter_table_row(emitter, &row);
+		}
+
+		emitter_json_object_begin(emitter);
+		emitter_json_kv(emitter, "npageslabs", emitter_type_size,
+		    &npageslabs);
+		emitter_json_kv(emitter, "nactive", emitter_type_size,
+		    &nactive);
+		emitter_json_kv(emitter, "ninactive", emitter_type_size,
+		    &ninactive);
+		emitter_json_object_end(emitter);
+	}
+	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
+	emitter_json_object_end(emitter); /* End "hpa_shard" */
+}
+
+static void
 stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptime) {
 	emitter_row_t row;
 	emitter_col_t col_name;
@@ -677,7 +768,7 @@ stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptim
 
 static void
 stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
-    bool mutex, bool extents) {
+    bool mutex, bool extents, bool hpa) {
 	unsigned nthreads;
 	const char *dss;
 	ssize_t dirty_decay_ms, muzzy_decay_ms;
@@ -997,6 +1088,9 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	if (extents) {
 		stats_arena_extents_print(emitter, i);
 	}
+	if (hpa) {
+		stats_arena_hpa_shard_print(emitter, i);
+	}
 }
 
 static void
@@ -1272,7 +1366,7 @@ stats_general_print(emitter_t *emitter) {
 
 static void
 stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
-    bool unmerged, bool bins, bool large, bool mutex, bool extents) {
+    bool unmerged, bool bins, bool large, bool mutex, bool extents, bool hpa) {
 	/*
 	 * These should be deleted.  We keep them around for a while, to aid in
 	 * the transition to the emitter code.
@@ -1405,7 +1499,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 			emitter_table_printf(emitter, "Merged arenas stats:\n");
 			emitter_json_object_kv_begin(emitter, "merged");
 			stats_arena_print(emitter, MALLCTL_ARENAS_ALL, bins,
-			    large, mutex, extents);
+			    large, mutex, extents, hpa);
 			emitter_json_object_end(emitter); /* Close "merged". */
 		}
 
@@ -1416,7 +1510,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 			    "Destroyed arenas stats:\n");
 			emitter_json_object_kv_begin(emitter, "destroyed");
 			stats_arena_print(emitter, MALLCTL_ARENAS_DESTROYED,
-			    bins, large, mutex, extents);
+			    bins, large, mutex, extents, hpa);
 			emitter_json_object_end(emitter); /* Close "destroyed". */
 		}
 
@@ -1432,7 +1526,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 					emitter_table_printf(emitter,
 					    "arenas[%s]:\n", arena_ind_str);
 					stats_arena_print(emitter, i, bins,
-					    large, mutex, extents);
+					    large, mutex, extents, hpa);
 					/* Close "<arena-ind>". */
 					emitter_json_object_end(emitter);
 				}
@@ -1497,7 +1591,7 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 	}
 	if (config_stats) {
 		stats_print_helper(&emitter, merged, destroyed, unmerged,
-		    bins, large, mutex, extents);
+		    bins, large, mutex, extents, hpa);
 	}
 
 	emitter_json_object_end(&emitter); /* Closes the "jemalloc" dict. */
-- 
cgit v0.12


From ea51e97bb893f560c70f42478d67c8159ee09b3d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 15 Oct 2020 13:46:38 -0700
Subject: Add SEC module: a small extent cache.

This can be used to take pressure off a more centralized, worse-sharded
allocator without requiring a full break of the arena abstraction.
---
 Makefile.in                                        |   2 +
 include/jemalloc/internal/cache_bin.h              |   1 -
 include/jemalloc/internal/sec.h                    | 118 +++++
 include/jemalloc/internal/witness.h                |   2 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |   3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |   3 +
 src/sec.c                                          | 263 +++++++++++
 test/unit/sec.c                                    | 500 +++++++++++++++++++++
 10 files changed, 893 insertions(+), 1 deletion(-)
 create mode 100644 include/jemalloc/internal/sec.h
 create mode 100644 src/sec.c
 create mode 100644 test/unit/sec.c

diff --git a/Makefile.in b/Makefile.in
index 67568f0..0136a40 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -142,6 +142,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
 	$(srcroot)src/sc.c \
+	$(srcroot)src/sec.c \
 	$(srcroot)src/stats.c \
 	$(srcroot)src/sz.c \
 	$(srcroot)src/tcache.c \
@@ -253,6 +254,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/rtree.c \
 	$(srcroot)test/unit/safety_check.c \
 	$(srcroot)test/unit/sc.c \
+	$(srcroot)test/unit/sec.c \
 	$(srcroot)test/unit/seq.c \
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/size_check.c \
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index c016769..0767862 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -99,7 +99,6 @@ struct cache_bin_s {
 	 * array.
 	 */
 	uint16_t low_bits_empty;
-
 };
 
 /*
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
new file mode 100644
index 0000000..7c1465e
--- /dev/null
+++ b/include/jemalloc/internal/sec.h
@@ -0,0 +1,118 @@
+#ifndef JEMALLOC_INTERNAL_SEC_H
+#define JEMALLOC_INTERNAL_SEC_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/pai.h"
+
+/*
+ * Small extent cache.
+ *
+ * This includes some utilities to cache small extents.  We have a per-pszind
+ * bin with its own lock and edata heap (including only extents of that size).
+ * We don't try to do any coalescing of extents (since it would require
+ * cross-bin locks).  As a result, we need to be careful about fragmentation.
+ * As a gesture in that direction, we limit the size of caches, apply first-fit
+ * within the bins, and, when flushing a bin, flush all of its extents rather
+ * than just those up to some threshold.  When we allocate again, we'll get a
+ * chance to move to better ones.
+ */
+
+/*
+ * This is a *small* extent cache, after all.  Assuming 4k pages and an ngroup
+ * of 4, this allows caching of sizes up to 128k.
+ */
+#define SEC_NPSIZES 16
+/*
+ * For now, we put a cap on the number of SECs an arena can have.  There's no
+ * reason it can't be dynamic; it's just inconvenient.  This number of shards
+ * are embedded in the arenas, so there's a space / configurability tradeoff
+ * here.  Eventually, we should probably dynamically allocate only however many
+ * we require.
+ */
+#define SEC_NSHARDS_MAX 8
+
+/*
+ * For now, this is just one field; eventually, we'll probably want to get more
+ * fine-grained data out (like per-size class statistics).
+ */
+typedef struct sec_stats_s sec_stats_t;
+struct sec_stats_s {
+	/* Sum of bytes_cur across all shards. */
+	size_t bytes;
+};
+
+static inline void
+sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
+	dst->bytes += src->bytes;
+}
+
+typedef struct sec_shard_s sec_shard_t;
+struct sec_shard_s {
+	/*
+	 * We don't keep per-bin mutexes, even though that would allow more
+	 * sharding; this allows global cache-eviction, which in turn allows for
+	 * better balancing across free lists.
+	 */
+	malloc_mutex_t mtx;
+	/*
+	 * A SEC may need to be shut down (i.e. flushed of its contents and
+	 * prevented from further caching).  To avoid tricky synchronization
+	 * issues, we just track enabled-status in each shard, guarded by a
+	 * mutex.  In practice, this is only ever checked during brief races,
+	 * since the arena-level atomic boolean tracking HPA enabled-ness means
+	 * that we won't go down these pathways very often after custom extent
+	 * hooks are installed.
+	 */
+	bool enabled;
+	edata_list_active_t freelist[SEC_NPSIZES];
+	size_t bytes_cur;
+};
+
+typedef struct sec_s sec_t;
+struct sec_s {
+	pai_t pai;
+	pai_t *fallback;
+
+	/*
+	 * We'll automatically refuse to cache any objects in this sec if
+	 * they're larger than alloc_max bytes.
+	 */
+	size_t alloc_max;
+	/*
+	 * Exceeding this amount of cached extents in a shard causes *all* of
+	 * the shards in that bin to be flushed.
+	 */
+	size_t bytes_max;
+
+	/*
+	 * We don't necessarily always use all the shards; requests are
+	 * distributed across shards [0, nshards - 1).
+	 */
+	size_t nshards;
+	sec_shard_t shards[SEC_NSHARDS_MAX];
+};
+
+bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
+    size_t bytes_max);
+void sec_flush(tsdn_t *tsdn, sec_t *sec);
+void sec_disable(tsdn_t *tsdn, sec_t *sec);
+
+/*
+ * Morally, these two stats methods probably ought to be a single one (and the
+ * mutex_prof_data ought to live in the sec_stats_t.  But splitting them apart
+ * lets them fit easily into the pa_shard stats framework (which also has this
+ * split), which simplifies the stats management.
+ */
+void sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats);
+void sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
+    mutex_prof_data_t *mutex_prof_data);
+
+/*
+ * We use the arena lock ordering; these are acquired in phase 2 of forking, but
+ * should be acquired before the underlying allocator mutexes.
+ */
+void sec_prefork2(tsdn_t *tsdn, sec_t *sec);
+void sec_postfork_parent(tsdn_t *tsdn, sec_t *sec);
+void sec_postfork_child(tsdn_t *tsdn, sec_t *sec);
+
+#endif /* JEMALLOC_INTERNAL_SEC_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 686bf40..662907c 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -44,6 +44,8 @@ enum witness_rank_e {
 	WITNESS_RANK_DECAY = WITNESS_RANK_CORE,
 	WITNESS_RANK_TCACHE_QL,
 
+	WITNESS_RANK_SEC_SHARD,
+
 	WITNESS_RANK_EXTENT_GROW,
 	WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW,
 
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 46e497a..f14f87f 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -82,6 +82,7 @@
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
+    <ClCompile Include="..\..\..\..\src\sec.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index f46a92f..689a520 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -130,6 +130,9 @@
     <ClCompile Include="..\..\..\..\src\sc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index dbf6f95..30c6b29 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -82,6 +82,7 @@
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
+    <ClCompile Include="..\..\..\..\src\sec.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index f46a92f..689a520 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -130,6 +130,9 @@
     <ClCompile Include="..\..\..\..\src\sc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/sec.c b/src/sec.c
new file mode 100644
index 0000000..f3c906b
--- /dev/null
+++ b/src/sec.c
@@ -0,0 +1,263 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/sec.h"
+
+static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t alignment, bool zero);
+static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size, bool zero);
+static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size);
+static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+
+bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
+    size_t bytes_max) {
+	if (nshards > SEC_NSHARDS_MAX) {
+		nshards = SEC_NSHARDS_MAX;
+	}
+	for (size_t i = 0; i < nshards; i++) {
+		sec_shard_t *shard = &sec->shards[i];
+		bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
+		    WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
+		if (err) {
+			return true;
+		}
+		shard->enabled = true;
+		for (pszind_t j = 0; j < SEC_NPSIZES; j++) {
+			edata_list_active_init(&shard->freelist[j]);
+		}
+		shard->bytes_cur = 0;
+	}
+	sec->fallback = fallback;
+	sec->alloc_max = alloc_max;
+	if (sec->alloc_max > sz_pind2sz(SEC_NPSIZES - 1)) {
+		sec->alloc_max = sz_pind2sz(SEC_NPSIZES - 1);
+	}
+
+	sec->bytes_max = bytes_max;
+	sec->nshards = nshards;
+
+	/*
+	 * Initialize these last so that an improper use of an SEC whose
+	 * initialization failed will segfault in an easy-to-spot way.
+	 */
+	sec->pai.alloc = &sec_alloc;
+	sec->pai.expand = &sec_expand;
+	sec->pai.shrink = &sec_shrink;
+	sec->pai.dalloc = &sec_dalloc;
+
+	return false;
+}
+
+static sec_shard_t *
+sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
+	/*
+	 * Eventually, we should implement affinity, tracking source shard using
+	 * the edata_t's newly freed up fields.  For now, just randomly
+	 * distribute across all shards.
+	 */
+	if (tsdn_null(tsdn)) {
+		return &sec->shards[0];
+	}
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	/*
+	 * Use the trick from Daniel Lemire's "A fast alternative to the modulo
+	 * reduction.  Use a 64 bit number to store 32 bits, since we'll
+	 * deliberately overflow when we multiply by the number of shards.
+	 */
+	uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
+	uint32_t idx = (uint32_t)((rand32 * (uint64_t)sec->nshards) >> 32);
+	return &sec->shards[idx];
+}
+
+static edata_t *
+sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
+    pszind_t pszind) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	if (!shard->enabled) {
+		return NULL;
+	}
+	edata_t *edata = edata_list_active_first(&shard->freelist[pszind]);
+	if (edata != NULL) {
+		edata_list_active_remove(&shard->freelist[pszind], edata);
+		assert(edata_size_get(edata) <= shard->bytes_cur);
+		shard->bytes_cur -= edata_size_get(edata);
+	}
+	return edata;
+}
+
+static edata_t *
+sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
+	assert((size & PAGE_MASK) == 0);
+
+	sec_t *sec = (sec_t *)self;
+
+	if (zero || alignment > PAGE || sec->nshards == 0
+	    || size > sec->alloc_max) {
+		return pai_alloc(tsdn, sec->fallback, size, alignment, zero);
+	}
+	pszind_t pszind = sz_psz2ind(size);
+	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, pszind);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	if (edata == NULL) {
+		/*
+		 * See the note in dalloc, below; really, we should add a
+		 * batch_alloc method to the PAI and get more than one extent at
+		 * a time.
+		 */
+		edata = pai_alloc(tsdn, sec->fallback, size, alignment, zero);
+	}
+	return edata;
+}
+
+static bool
+sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool zero) {
+	sec_t *sec = (sec_t *)self;
+	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero);
+}
+
+static bool
+sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size) {
+	sec_t *sec = (sec_t *)self;
+	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size);
+}
+
+static void
+sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	shard->bytes_cur = 0;
+	edata_list_active_t to_flush;
+	edata_list_active_init(&to_flush);
+	for (pszind_t i = 0; i < SEC_NPSIZES; i++) {
+		edata_list_active_concat(&to_flush, &shard->freelist[i]);
+	}
+	/*
+	 * A better way to do this would be to add a batch dalloc function to
+	 * the pai_t.  Practically, the current method turns into O(n) locks and
+	 * unlocks at the fallback allocator.  But some implementations (e.g.
+	 * HPA) can straightforwardly do many deallocations in a single lock /
+	 * unlock pair.
+	 */
+	while (!edata_list_active_empty(&to_flush)) {
+		edata_t *e = edata_list_active_first(&to_flush);
+		edata_list_active_remove(&to_flush, e);
+		pai_dalloc(tsdn, sec->fallback, e);
+	}
+}
+
+static void
+sec_shard_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
+    edata_t *edata) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	assert(shard->bytes_cur <= sec->bytes_max);
+	size_t size = edata_size_get(edata);
+	pszind_t pszind = sz_psz2ind(size);
+	/*
+	 * Prepending here results in FIFO allocation per bin, which seems
+	 * reasonable.
+	 */
+	edata_list_active_prepend(&shard->freelist[pszind], edata);
+	shard->bytes_cur += size;
+	if (shard->bytes_cur > sec->bytes_max) {
+		/*
+		 * We've exceeded the shard limit.  We make two nods in the
+		 * direction of fragmentation avoidance: we flush everything in
+		 * the shard, rather than one particular bin, and we hold the
+		 * lock while flushing (in case one of the extents we flush is
+		 * highly preferred from a fragmentation-avoidance perspective
+		 * in the backing allocator).  This has the extra advantage of
+		 * not requiring advanced cache balancing strategies.
+		 */
+		sec_do_flush_locked(tsdn, sec, shard);
+	}
+}
+
+static void
+sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	sec_t *sec = (sec_t *)self;
+	if (sec->nshards == 0 || edata_size_get(edata) > sec->alloc_max) {
+		pai_dalloc(tsdn, sec->fallback, edata);
+		return;
+	}
+	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	if (shard->enabled) {
+		sec_shard_dalloc_locked(tsdn, sec, shard, edata);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+	} else {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		pai_dalloc(tsdn, sec->fallback, edata);
+	}
+}
+
+void
+sec_flush(tsdn_t *tsdn, sec_t *sec) {
+	for (size_t i = 0; i < sec->nshards; i++) {
+		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+		sec_do_flush_locked(tsdn, sec, &sec->shards[i]);
+		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	}
+}
+
+void
+sec_disable(tsdn_t *tsdn, sec_t *sec) {
+	for (size_t i = 0; i < sec->nshards; i++) {
+		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+		sec->shards[i].enabled = false;
+		sec_do_flush_locked(tsdn, sec, &sec->shards[i]);
+		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	}
+}
+
+void
+sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
+	size_t sum = 0;
+	for (size_t i = 0; i < sec->nshards; i++) {
+		/*
+		 * We could save these lock acquisitions by making bytes_cur
+		 * atomic, but stats collection is rare anyways and we expect
+		 * the number and type of stats to get more interesting.
+		 */
+		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+		sum += sec->shards[i].bytes_cur;
+		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	}
+	stats->bytes += sum;
+}
+
+void
+sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
+    mutex_prof_data_t *mutex_prof_data) {
+	for (size_t i = 0; i < sec->nshards; i++) {
+		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+		malloc_mutex_prof_accum(tsdn, mutex_prof_data,
+		    &sec->shards[i].mtx);
+		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	}
+}
+
+void
+sec_prefork2(tsdn_t *tsdn, sec_t *sec) {
+	for (size_t i = 0; i < sec->nshards; i++) {
+		malloc_mutex_prefork(tsdn, &sec->shards[i].mtx);
+	}
+}
+
+void
+sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) {
+	for (size_t i = 0; i < sec->nshards; i++) {
+		malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx);
+	}
+}
+
+void
+sec_postfork_child(tsdn_t *tsdn, sec_t *sec) {
+	for (size_t i = 0; i < sec->nshards; i++) {
+		malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx);
+	}
+}
diff --git a/test/unit/sec.c b/test/unit/sec.c
new file mode 100644
index 0000000..cb0c17d
--- /dev/null
+++ b/test/unit/sec.c
@@ -0,0 +1,500 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/sec.h"
+
+typedef struct pai_test_allocator_s pai_test_allocator_t;
+struct pai_test_allocator_s {
+	pai_t pai;
+	bool alloc_fail;
+	size_t alloc_count;
+	size_t dalloc_count;
+	/*
+	 * We use a simple bump allocator as the implementation.  This isn't
+	 * *really* correct, since we may allow expansion into a subsequent
+	 * allocation, but it's not like the SEC is really examining the
+	 * pointers it gets back; this is mostly just helpful for debugging.
+	 */
+	uintptr_t next_ptr;
+	size_t expand_count;
+	bool expand_return_value;
+	size_t shrink_count;
+	bool shrink_return_value;
+};
+
+static inline edata_t *
+pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t alignment, bool zero) {
+	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	if (ta->alloc_fail) {
+		return NULL;
+	}
+	edata_t *edata = malloc(sizeof(edata_t));
+	assert_ptr_not_null(edata, "");
+	ta->next_ptr += alignment - 1;
+	edata_init(edata, /* arena_ind */ 0,
+	    (void *)(ta->next_ptr & ~(alignment - 1)), size,
+	    /* slab */ false,
+	    /* szind */ 0, /* sn */ 1, extent_state_active, /* zero */ zero,
+	    /* comitted */ true, /* ranged */ false, EXTENT_NOT_HEAD);
+	ta->next_ptr += size;
+	ta->alloc_count++;
+	return edata;
+}
+
+static bool
+pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size, bool zero) {
+	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	ta->expand_count++;
+	return ta->expand_return_value;
+}
+
+static bool
+pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size) {
+	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	ta->shrink_count++;
+	return ta->shrink_return_value;
+}
+
+static void
+pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	ta->dalloc_count++;
+	free(edata);
+}
+
+static inline void
+pai_test_allocator_init(pai_test_allocator_t *ta) {
+	ta->alloc_fail = false;
+	ta->alloc_count = 0;
+	ta->dalloc_count = 0;
+	/* Just don't start the edata at 0. */
+	ta->next_ptr = 10 * PAGE;
+	ta->expand_count = 0;
+	ta->expand_return_value = false;
+	ta->shrink_count = 0;
+	ta->shrink_return_value = false;
+	ta->pai.alloc = &pai_test_allocator_alloc;
+	ta->pai.expand = &pai_test_allocator_expand;
+	ta->pai.shrink = &pai_test_allocator_shrink;
+	ta->pai.dalloc = &pai_test_allocator_dalloc;
+}
+
+TEST_BEGIN(test_reuse) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+	/*
+	 * We can't use the "real" tsd, since we malloc within the test
+	 * allocator hooks; we'd get lock inversion crashes.  Eventually, we
+	 * should have a way to mock tsds, but for now just don't do any
+	 * lock-order checking.
+	 */
+	tsdn_t *tsdn = TSDN_NULL;
+	/*
+	 * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
+	 * able to get to 30 pages in the cache before triggering a flush.
+	 */
+	enum { NALLOCS = 10 };
+	edata_t *one_page[NALLOCS];
+	edata_t *two_page[NALLOCS];
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 2 * PAGE,
+	    /* bytes_max */ NALLOCS * PAGE + NALLOCS * 2 * PAGE);
+	for (int i = 0; i < NALLOCS; i++) {
+		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
+		two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
+	}
+	expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of allocations");
+	/*
+	 * Free in a different order than we allocated, to make sure free-list
+	 * separation works correctly.
+	 */
+	for (int i = NALLOCS - 1; i >= 0; i--) {
+		pai_dalloc(tsdn, &sec.pai, one_page[i]);
+	}
+	for (int i = NALLOCS - 1; i >= 0; i--) {
+		pai_dalloc(tsdn, &sec.pai, two_page[i]);
+	}
+	expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of allocations");
+	/*
+	 * Check that the n'th most recent deallocated extent is returned for
+	 * the n'th alloc request of a given size.
+	 */
+	for (int i = 0; i < NALLOCS; i++) {
+		edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_eq(one_page[i], alloc1,
+		    "Got unexpected allocation");
+		expect_ptr_eq(two_page[i], alloc2,
+		    "Got unexpected allocation");
+	}
+	expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of allocations");
+}
+TEST_END
+
+
+TEST_BEGIN(test_auto_flush) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+	/*
+	 * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
+	 * able to get to 30 pages in the cache before triggering a flush.
+	 */
+	enum { NALLOCS = 10 };
+	edata_t *extra_alloc;
+	edata_t *allocs[NALLOCS];
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+	    /* bytes_max */ NALLOCS * PAGE);
+	for (int i = 0; i < NALLOCS; i++) {
+		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
+	}
+	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
+	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of allocations");
+	/* Free until the SEC is full, but should not have flushed yet. */
+	for (int i = 0; i < NALLOCS; i++) {
+		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+	}
+	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of allocations");
+	/*
+	 * Free the extra allocation; this should trigger a flush of all
+	 * extents in the cache.
+	 */
+	pai_dalloc(tsdn, &sec.pai, extra_alloc);
+	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(NALLOCS + 1, ta.dalloc_count,
+	    "Incorrect number of deallocations");
+}
+TEST_END
+
+/*
+ * A disable and a flush are *almost* equivalent; the only difference is what
+ * happens afterwards; disabling disallows all future caching as well.
+ */
+static void
+do_disable_flush_test(bool is_disable) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	enum { NALLOCS = 10 };
+	edata_t *allocs[NALLOCS];
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+	    /* bytes_max */ NALLOCS * PAGE);
+	for (int i = 0; i < NALLOCS; i++) {
+		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
+	}
+	/* Free all but the last aloc. */
+	for (int i = 0; i < NALLOCS - 1; i++) {
+		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+	}
+	expect_zu_eq(NALLOCS, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of allocations");
+
+	if (is_disable) {
+		sec_disable(tsdn, &sec);
+	} else {
+		sec_flush(tsdn, &sec);
+	}
+
+	expect_zu_eq(NALLOCS, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(NALLOCS - 1, ta.dalloc_count,
+	    "Incorrect number of deallocations");
+
+	/*
+	 * If we free into a disabled SEC, it should forward to the fallback.
+	 * Otherwise, the SEC should accept the allocation.
+	 */
+	pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1]);
+
+	expect_zu_eq(NALLOCS, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(is_disable ? NALLOCS : NALLOCS - 1, ta.dalloc_count,
+	    "Incorrect number of deallocations");
+}
+
+TEST_BEGIN(test_disable) {
+	do_disable_flush_test(/* is_disable */ true);
+}
+TEST_END
+
+TEST_BEGIN(test_flush) {
+	do_disable_flush_test(/* is_disable */ false);
+}
+TEST_END
+
+TEST_BEGIN(test_alloc_max_respected) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	size_t alloc_max = 2 * PAGE;
+	size_t attempted_alloc = 3 * PAGE;
+
+	sec_init(&sec, &ta.pai, /* nshards */ 1, alloc_max,
+	    /* bytes_max */ 1000 * PAGE);
+
+	for (size_t i = 0; i < 100; i++) {
+		expect_zu_eq(i, ta.alloc_count,
+		    "Incorrect number of allocations");
+		expect_zu_eq(i, ta.dalloc_count,
+		    "Incorrect number of deallocations");
+		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
+		    PAGE, /* zero */ false);
+		expect_ptr_not_null(edata, "Unexpected alloc failure");
+		expect_zu_eq(i + 1, ta.alloc_count,
+		    "Incorrect number of allocations");
+		expect_zu_eq(i, ta.dalloc_count,
+		    "Incorrect number of deallocations");
+		pai_dalloc(tsdn, &sec.pai, edata);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_expand_shrink_delegate) {
+	/*
+	 * Expand and shrink shouldn't affect sec state; they should just
+	 * delegate to the fallback PAI.
+	 */
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 10 * PAGE,
+	    /* bytes_max */ 1000 * PAGE);
+	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+	    /* zero */ false);
+	expect_ptr_not_null(edata, "Unexpected alloc failure");
+
+	bool err = pai_expand(tsdn, &sec.pai, edata, PAGE, 4 * PAGE,
+	    /* zero */ false);
+	expect_false(err, "Unexpected expand failure");
+	expect_zu_eq(1, ta.expand_count, "");
+	ta.expand_return_value = true;
+	err = pai_expand(tsdn, &sec.pai, edata, 4 * PAGE, 3 * PAGE,
+	    /* zero */ false);
+	expect_true(err, "Unexpected expand success");
+	expect_zu_eq(2, ta.expand_count, "");
+
+	err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE);
+	expect_false(err, "Unexpected shrink failure");
+	expect_zu_eq(1, ta.shrink_count, "");
+	ta.shrink_return_value = true;
+	err = pai_shrink(tsdn, &sec.pai, edata, 2 * PAGE, PAGE);
+	expect_true(err, "Unexpected shrink success");
+	expect_zu_eq(2, ta.shrink_count, "");
+}
+TEST_END
+
+TEST_BEGIN(test_nshards_0) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	sec_init(&sec, &ta.pai, /* nshards */ 0, /* alloc_max */ 10 * PAGE,
+	    /* bytes_max */ 1000 * PAGE);
+
+	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+	    /* zero */ false);
+	pai_dalloc(tsdn, &sec.pai, edata);
+
+	/* Both operations should have gone directly to the fallback. */
+	expect_zu_eq(1, ta.alloc_count, "");
+	expect_zu_eq(1, ta.dalloc_count, "");
+}
+TEST_END
+
+static void
+expect_stats_pages(tsdn_t *tsdn, sec_t *sec, size_t npages) {
+	sec_stats_t stats;
+	/*
+	 * Check that the stats merging accumulates rather than overwrites by
+	 * putting some (made up) data there to begin with.
+	 */
+	stats.bytes = 123;
+	sec_stats_merge(tsdn, sec, &stats);
+	assert_zu_eq(npages * PAGE + 123, stats.bytes, "");
+}
+
+TEST_BEGIN(test_stats_simple) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	enum {
+		NITERS = 100,
+		FLUSH_PAGES = 10,
+	};
+
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+	    /* bytes_max */ FLUSH_PAGES * PAGE);
+
+	edata_t *allocs[FLUSH_PAGES];
+	for (size_t i = 0; i < FLUSH_PAGES; i++) {
+		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_stats_pages(tsdn, &sec, 0);
+	}
+
+	/* Increase and decrease, without flushing. */
+	for (size_t i = 0; i < NITERS; i++) {
+		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
+			pai_dalloc(tsdn, &sec.pai, allocs[j]);
+			expect_stats_pages(tsdn, &sec, j + 1);
+		}
+		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
+			allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+			    /* zero */ false);
+			expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1);
+		}
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_stats_auto_flush) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	enum {
+		FLUSH_PAGES = 10,
+	};
+
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+	    /* bytes_max */ FLUSH_PAGES * PAGE);
+
+	edata_t *extra_alloc0;
+	edata_t *extra_alloc1;
+	edata_t *allocs[2 * FLUSH_PAGES];
+
+	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+
+	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
+		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_stats_pages(tsdn, &sec, 0);
+	}
+
+	for (size_t i = 0; i < FLUSH_PAGES; i++) {
+		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		expect_stats_pages(tsdn, &sec, i + 1);
+	}
+	pai_dalloc(tsdn, &sec.pai, extra_alloc0);
+	/* The last dalloc should have triggered a flush. */
+	expect_stats_pages(tsdn, &sec, 0);
+
+	/* Flush the remaining pages; stats should still work. */
+	for (size_t i = 0; i < FLUSH_PAGES; i++) {
+		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i]);
+		expect_stats_pages(tsdn, &sec, i + 1);
+	}
+
+	pai_dalloc(tsdn, &sec.pai, extra_alloc1);
+	/* The last dalloc should have triggered a flush, again. */
+	expect_stats_pages(tsdn, &sec, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_stats_manual_flush) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	enum {
+		FLUSH_PAGES = 10,
+	};
+
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+	    /* bytes_max */ FLUSH_PAGES * PAGE);
+
+	edata_t *allocs[FLUSH_PAGES];
+	for (size_t i = 0; i < FLUSH_PAGES; i++) {
+		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_stats_pages(tsdn, &sec, 0);
+	}
+
+	/* Dalloc the first half of the allocations. */
+	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
+		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		expect_stats_pages(tsdn, &sec, i + 1);
+	}
+
+	sec_flush(tsdn, &sec);
+	expect_stats_pages(tsdn, &sec, 0);
+
+	/* Flush the remaining pages. */
+	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
+		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i]);
+		expect_stats_pages(tsdn, &sec, i + 1);
+	}
+	sec_disable(tsdn, &sec);
+	expect_stats_pages(tsdn, &sec, 0);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_reuse,
+	    test_auto_flush,
+	    test_disable,
+	    test_flush,
+	    test_alloc_max_respected,
+	    test_expand_shrink_delegate,
+	    test_nshards_0,
+	    test_stats_simple,
+	    test_stats_auto_flush,
+	    test_stats_manual_flush);
+}
-- 
cgit v0.12


From 6599651aee2b1b1ab0c52fdb03f23394bd683c47 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 16 Oct 2020 13:14:59 -0700
Subject: PA: Use an SEC in fron of the HPA shard.

---
 include/jemalloc/internal/arena_externs.h          |  3 ++-
 include/jemalloc/internal/ctl.h                    |  1 +
 include/jemalloc/internal/hpa.h                    |  6 ++---
 .../jemalloc/internal/jemalloc_internal_externs.h  |  5 ++++
 include/jemalloc/internal/mutex_prof.h             |  3 ++-
 include/jemalloc/internal/pa.h                     | 20 +++++++++++----
 src/arena.c                                        | 28 +++++++++++++++-----
 src/ctl.c                                          | 19 +++++++++++++-
 src/hpa.c                                          |  6 ++---
 src/jemalloc.c                                     | 30 +++++++++++++++++-----
 src/pa.c                                           | 19 ++++++++++----
 src/pa_extra.c                                     | 23 +++++++++++++----
 src/stats.c                                        |  8 ++++++
 src/tcache.c                                       |  6 +++--
 test/unit/mallctl.c                                |  3 +++
 15 files changed, 140 insertions(+), 40 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index c8e1e38..40223b5 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -28,7 +28,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats, hpa_shard_stats_t *hpastats);
+    pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
 void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
@@ -99,6 +99,7 @@ void arena_prefork4(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork5(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork6(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork7(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork8(tsdn_t *tsdn, arena_t *arena);
 void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
 void arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 305d365..a6ae05c 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -46,6 +46,7 @@ typedef struct ctl_arena_stats_s {
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
 	pac_estats_t estats[SC_NPSIZES];
 	hpa_shard_stats_t hpastats;
+	sec_stats_t secstats;
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 3fe9fc4..24c6856 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -90,10 +90,10 @@ void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
 
 /*
  * We share the fork ordering with the PA and arena prefork handling; that's why
- * these are 2 and 3 rather than 0 or 1.
+ * these are 3 and 4 rather than 0 and 1.
  */
-void hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
 
@@ -103,7 +103,7 @@ void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
  * so it needs to be lower in the witness ordering, but it's also logically
  * global and not tied to any particular arena.
  */
-void hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa);
+void hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa);
 void hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa);
 void hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa);
 
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 8faadaa..814a7a1 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -17,6 +17,11 @@ extern size_t opt_hpa_slab_goal;
 extern size_t opt_hpa_slab_max_alloc;
 extern size_t opt_hpa_small_max;
 extern size_t opt_hpa_large_min;
+
+extern size_t opt_hpa_sec_max_alloc;
+extern size_t opt_hpa_sec_max_bytes;
+extern size_t opt_hpa_sec_nshards;
+
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 970f469..ef0bf0d 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -33,7 +33,8 @@ typedef enum {
     OP(base)								\
     OP(tcache_list)							\
     OP(hpa_shard)							\
-    OP(hpa_shard_grow)
+    OP(hpa_shard_grow)							\
+    OP(hpa_sec)
 
 typedef enum {
 #define OP(mtx) arena_prof_mutex_##mtx,
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index d138f2f..5e97d0b 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -10,6 +10,7 @@
 #include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/pac.h"
 #include "jemalloc/internal/pai.h"
+#include "jemalloc/internal/sec.h"
 
 /*
  * The page allocator; responsible for acquiring pages of memory for
@@ -85,7 +86,12 @@ struct pa_shard_s {
 	/* Allocates from a PAC. */
 	pac_t pac;
 
-	/* Allocates from a HPA. */
+	/*
+	 * We place a small extent cache in front of the HPA, since we intend
+	 * these configurations to use many fewer arenas, and therefore have a
+	 * higher risk of hot locks.
+	 */
+	sec_t hpa_sec;
 	hpa_shard_t hpa_shard;
 
 	/* The source of edata_t objects. */
@@ -124,18 +130,20 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
 bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
-    size_t ps_alloc_max, size_t small_max, size_t large_min);
+    size_t ps_alloc_max, size_t small_max, size_t large_min, size_t sec_nshards,
+    size_t sec_alloc_max, size_t sec_bytes_max);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
  */
-void pa_shard_disable_hpa(pa_shard_t *shard);
+void pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard);
 
 /*
  * This does the PA-specific parts of arena reset (i.e. freeing all active
  * allocations).
  */
-void pa_shard_reset(pa_shard_t *shard);
+void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard);
+
 /*
  * Destroy all the remaining retained extents.  Should only be called after
  * decaying all active, dirty, and muzzy extents to the retained state, as the
@@ -184,6 +192,7 @@ void pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
 
@@ -192,7 +201,8 @@ void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
 
 void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    hpa_shard_stats_t *hpa_stats_out, size_t *resident);
+    hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
+    size_t *resident);
 
 /*
  * Reads the PA-owned mutex stats into the output stats array, at the
diff --git a/src/arena.c b/src/arena.c
index dc58a28..360827e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -81,7 +81,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats, hpa_shard_stats_t *hpastats) {
+    pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -139,7 +139,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	}
 
 	pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
-	    estats, hpastats, &astats->resident);
+	    estats, hpastats, secstats, &astats->resident);
 
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
@@ -483,6 +483,14 @@ arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 
 void
 arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
+	if (all) {
+		/*
+		 * We should take a purge of "all" to mean "save as much memory
+		 * as possible", including flushing any caches (for situations
+		 * like thread death, or manual purge calls).
+		 */
+		sec_flush(tsdn, &arena->pa_shard.hpa_sec);
+	}
 	if (arena_decay_dirty(tsdn, arena, is_background_thread, all)) {
 		return;
 	}
@@ -631,7 +639,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 			    &arena->bins[i].bin_shards[j]);
 		}
 	}
-	pa_shard_reset(&arena->pa_shard);
+	pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
 }
 
 void
@@ -1362,7 +1370,7 @@ arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 	}
 	/* No using the HPA now that we have the custom hooks. */
-	pa_shard_disable_hpa(&arena->pa_shard);
+	pa_shard_disable_hpa(tsd_tsdn(tsd), &arena->pa_shard);
 	extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks);
 	if (have_background_thread) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
@@ -1529,7 +1537,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
 		if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global,
 		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
-		    opt_hpa_small_max, opt_hpa_large_min)) {
+		    opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
+		    opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
 			goto label_error;
 		}
 	}
@@ -1658,16 +1667,21 @@ arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork5(tsdn_t *tsdn, arena_t *arena) {
-	base_prefork(tsdn, arena->base);
+	pa_shard_prefork5(tsdn, &arena->pa_shard);
 }
 
 void
 arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->large_mtx);
+	base_prefork(tsdn, arena->base);
 }
 
 void
 arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
+	malloc_mutex_prefork(tsdn, &arena->large_mtx);
+}
+
+void
+arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
 			bin_prefork(tsdn, &arena->bins[i].bin_shards[j]);
diff --git a/src/ctl.c b/src/ctl.c
index b4e6517..874aaac 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -95,6 +95,9 @@ CTL_PROTO(opt_hpa_slab_goal)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_small_max)
 CTL_PROTO(opt_hpa_large_min)
+CTL_PROTO(opt_hpa_sec_max_alloc)
+CTL_PROTO(opt_hpa_sec_max_bytes)
+CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -246,6 +249,7 @@ CTL_PROTO(stats_arenas_i_metadata_thp)
 CTL_PROTO(stats_arenas_i_tcache_bytes)
 CTL_PROTO(stats_arenas_i_resident)
 CTL_PROTO(stats_arenas_i_abandoned_vm)
+CTL_PROTO(stats_arenas_i_hpa_sec_bytes)
 INDEX_PROTO(stats_arenas_i)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
@@ -360,6 +364,9 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
 	{NAME("hpa_small_max"),	CTL(opt_hpa_small_max)},
 	{NAME("hpa_large_min"),	CTL(opt_hpa_large_min)},
+	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
+	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
+	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -650,6 +657,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
 	{NAME("resident"),	CTL(stats_arenas_i_resident)},
 	{NAME("abandoned_vm"),	CTL(stats_arenas_i_abandoned_vm)},
+	{NAME("hpa_sec_bytes"),	CTL(stats_arenas_i_hpa_sec_bytes)},
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
 	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
@@ -889,6 +897,8 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		    sizeof(pac_estats_t));
 		memset(&ctl_arena->astats->hpastats, 0,
 		    sizeof(hpa_shard_stats_t));
+		memset(&ctl_arena->astats->secstats, 0,
+		    sizeof(sec_stats_t));
 	}
 }
 
@@ -903,7 +913,7 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		    &ctl_arena->pdirty, &ctl_arena->pmuzzy,
 		    &ctl_arena->astats->astats, ctl_arena->astats->bstats,
 		    ctl_arena->astats->lstats, ctl_arena->astats->estats,
-		    &ctl_arena->astats->hpastats);
+		    &ctl_arena->astats->hpastats, &ctl_arena->astats->secstats);
 
 		for (i = 0; i < SC_NBINS; i++) {
 			bin_stats_t *bstats =
@@ -1089,6 +1099,7 @@ MUTEX_PROF_ARENA_MUTEXES
 			    &astats->hpastats.psset_slab_stats[i]);
 		}
 
+		sec_stats_accum(&sdstats->secstats, &astats->secstats);
 	}
 }
 
@@ -1895,6 +1906,9 @@ CTL_RO_NL_GEN(opt_hpa_slab_goal, opt_hpa_slab_goal, size_t)
 CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_small_max, opt_hpa_small_max, size_t)
 CTL_RO_NL_GEN(opt_hpa_large_min, opt_hpa_large_min, size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
@@ -3114,6 +3128,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
     &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.abandoned_vm,
     ATOMIC_RELAXED), size_t)
 
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_bytes,
+    arenas_i(mib[2])->astats->secstats.bytes, size_t)
+
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
     arenas_i(mib[2])->astats->allocated_small, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc,
diff --git a/src/hpa.c b/src/hpa.c
index 08992bd..f49aa2b 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -411,12 +411,12 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 void
-hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard) {
+hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_prefork(tsdn, &shard->grow_mtx);
 }
 
 void
-hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
+hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_prefork(tsdn, &shard->mtx);
 }
 
@@ -433,7 +433,7 @@ hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 void
-hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa) {
+hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa) {
 	malloc_mutex_prefork(tsdn, &hpa->grow_mtx);
 	malloc_mutex_prefork(tsdn, &hpa->mtx);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8ce9ca1..09b168c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -141,6 +141,11 @@ size_t opt_hpa_slab_max_alloc = 256 * 1024;
 size_t opt_hpa_small_max = 32 * 1024;
 size_t opt_hpa_large_min = 4 * 1024 * 1024;
 
+size_t opt_hpa_sec_max_alloc = 32 * 1024;
+/* These settings correspond to a maximum of 1MB cached per arena. */
+size_t opt_hpa_sec_max_bytes = 256 * 1024;
+size_t opt_hpa_sec_nshards = 4;
+
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
@@ -1494,11 +1499,18 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    true)
 			CONF_HANDLE_SIZE_T(opt_hpa_slab_max_alloc,
 			    "hpa_slab_max_alloc", PAGE, 512 * PAGE,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true)
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_small_max, "hpa_small_max",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true)
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_large_min, "hpa_large_min",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true)
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_bytes, "hpa_sec_max_bytes",
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_nshards, "hpa_sec_nshards",
+			    0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
@@ -1808,7 +1820,8 @@ malloc_init_hard_a0_locked() {
 		}
 		if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global,
 		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
-		    opt_hpa_small_max, opt_hpa_large_min)) {
+		    opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
+		    opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
 			return true;
 		}
 	}
@@ -4226,7 +4239,7 @@ _malloc_prefork(void)
 		background_thread_prefork1(tsd_tsdn(tsd));
 	}
 	/* Break arena prefork into stages to preserve lock order. */
-	for (i = 0; i < 8; i++) {
+	for (i = 0; i < 9; i++) {
 		for (j = 0; j < narenas; j++) {
 			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
 			    NULL) {
@@ -4255,12 +4268,15 @@ _malloc_prefork(void)
 				case 7:
 					arena_prefork7(tsd_tsdn(tsd), arena);
 					break;
+				case 8:
+					arena_prefork8(tsd_tsdn(tsd), arena);
+					break;
 				default: not_reached();
 				}
 			}
 		}
-		if (i == 3 && opt_hpa) {
-			hpa_prefork3(tsd_tsdn(tsd), &arena_hpa_global);
+		if (i == 4 && opt_hpa) {
+			hpa_prefork4(tsd_tsdn(tsd), &arena_hpa_global);
 		}
 
 	}
diff --git a/src/pa.c b/src/pa.c
index 8e1ec84..825b10a 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -49,7 +49,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 
 bool
 pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
-    size_t ps_alloc_max, size_t small_max, size_t large_min) {
+    size_t ps_alloc_max, size_t small_max, size_t large_min,
+    size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max) {
 	ps_goal &= ~PAGE_MASK;
 	ps_alloc_max &= ~PAGE_MASK;
 
@@ -60,6 +61,10 @@ pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
 	    shard->ind, ps_goal, ps_alloc_max, small_max, large_min)) {
 		return true;
 	}
+	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards,
+	    sec_alloc_max, sec_bytes_max)) {
+		return true;
+	}
 	shard->ever_used_hpa = true;
 	atomic_store_b(&shard->use_hpa, true, ATOMIC_RELAXED);
 
@@ -67,24 +72,27 @@ pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
 }
 
 void
-pa_shard_disable_hpa(pa_shard_t *shard) {
+pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
+	sec_disable(tsdn, &shard->hpa_sec);
 }
 
 void
-pa_shard_reset(pa_shard_t *shard) {
+pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
+	sec_flush(tsdn, &shard->hpa_sec);
 }
 
 void
 pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
+	sec_flush(tsdn, &shard->hpa_sec);
 	pac_destroy(tsdn, &shard->pac);
 }
 
 static pai_t *
 pa_get_pai(pa_shard_t *shard, edata_t *edata) {
 	return (edata_pai_get(edata) == EXTENT_PAI_PAC
-	    ? &shard->pac.pai : &shard->hpa_shard.pai);
+	    ? &shard->pac.pai : &shard->hpa_sec.pai);
 }
 
 edata_t *
@@ -95,7 +103,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 	edata_t *edata = NULL;
 	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
-		edata = pai_alloc(tsdn, &shard->hpa_shard.pai, size, alignment,
+		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
 		    zero);
 	}
 	/*
@@ -173,6 +181,7 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 		emap_deregister_interior(tsdn, shard->emap, edata);
 		edata_slab_set(edata, false);
 	}
+	edata_addr_set(edata, edata_base_get(edata));
 	edata_szind_set(edata, SC_NSIZES);
 	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
 	pai_t *pai = pa_get_pai(shard, edata);
diff --git a/src/pa_extra.c b/src/pa_extra.c
index db236ad..24cb653 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -16,24 +16,31 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
 
 void
 pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
-	malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
 	if (shard->ever_used_hpa) {
-		hpa_shard_prefork2(tsdn, &shard->hpa_shard);
+		sec_prefork2(tsdn, &shard->hpa_sec);
 	}
 }
 
 void
 pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
+	malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
+	if (shard->ever_used_hpa) {
+		hpa_shard_prefork3(tsdn, &shard->hpa_shard);
+	}
+}
+
+void
+pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_prefork(tsdn, &shard->pac.ecache_dirty);
 	ecache_prefork(tsdn, &shard->pac.ecache_muzzy);
 	ecache_prefork(tsdn, &shard->pac.ecache_retained);
 	if (shard->ever_used_hpa) {
-		hpa_shard_prefork3(tsdn, &shard->hpa_shard);
+		hpa_shard_prefork4(tsdn, &shard->hpa_shard);
 	}
 }
 
 void
-pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) {
+pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard) {
 	edata_cache_prefork(tsdn, &shard->edata_cache);
 }
 
@@ -47,6 +54,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
 	if (shard->ever_used_hpa) {
+		sec_postfork_parent(tsdn, &shard->hpa_sec);
 		hpa_shard_postfork_parent(tsdn, &shard->hpa_shard);
 	}
 }
@@ -61,6 +69,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
 	if (shard->ever_used_hpa) {
+		sec_postfork_child(tsdn, &shard->hpa_sec);
 		hpa_shard_postfork_child(tsdn, &shard->hpa_shard);
 	}
 }
@@ -76,7 +85,8 @@ pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
 void
 pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    hpa_shard_stats_t *hpa_stats_out, size_t *resident) {
+    hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
+    size_t *resident) {
 	cassert(config_stats);
 
 	pa_shard_stats_out->pac_stats.retained +=
@@ -149,6 +159,7 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 			    &shard->hpa_shard.psset.slab_stats[i]);
 		}
 		malloc_mutex_unlock(tsdn, &shard->hpa_shard.mtx);
+		sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out);
 	}
 }
 
@@ -182,5 +193,7 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
 		pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
 		    &shard->hpa_shard.grow_mtx,
 		    arena_prof_mutex_hpa_shard_grow);
+		sec_mutex_stats_read(tsdn, &shard->hpa_sec,
+		    &mutex_prof_data[arena_prof_mutex_hpa_sec]);
 	}
 }
diff --git a/src/stats.c b/src/stats.c
index f03e5e4..4b40721 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -678,6 +678,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive",
 	    i, &ninactive, size_t);
 
+	size_t sec_bytes;
+	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
+	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
+	    emitter_type_size, &sec_bytes);
+
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
 	    "  In full slabs:\n"
@@ -1194,6 +1199,9 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_small_max")
 	OPT_WRITE_SIZE_T("hpa_large_min")
+	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
+	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
+	OPT_WRITE_SIZE_T("hpa_sec_nshards")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
diff --git a/src/tcache.c b/src/tcache.c
index 6bf1d30..edbedf7 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -716,9 +716,11 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	if (arena_nthreads_get(arena, false) == 0 &&
 	    !background_thread_enabled()) {
 		/* Force purging when no threads assigned to the arena anymore. */
-		arena_decay(tsd_tsdn(tsd), arena, false, true);
+		arena_decay(tsd_tsdn(tsd), arena,
+		    /* is_background_thread */ false, /* all */ true);
 	} else {
-		arena_decay(tsd_tsdn(tsd), arena, false, false);
+		arena_decay(tsd_tsdn(tsd), arena,
+		    /* is_background_thread */ false, /* all */ false);
 	}
 }
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index ecbcda9..278bd09 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -168,6 +168,9 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_slab_max_alloc, always);
 	TEST_MALLCTL_OPT(size_t, hpa_small_max, always);
 	TEST_MALLCTL_OPT(size_t, hpa_large_min, always);
+	TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
+	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
+	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
-- 
cgit v0.12


From 634ec6f50abd57e6371e0c745ab699f2cf6d08e6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 18 Sep 2020 15:50:27 -0700
Subject: Edata: add an "age" field.

---
 include/jemalloc/internal/edata.h | 44 +++++++++++++++++++++++++++++++--------
 src/edata.c                       |  1 +
 2 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index f175af9..632c6c3 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -71,6 +71,7 @@ struct edata_map_info_s {
 typedef struct edata_s edata_t;
 typedef ph(edata_t) edata_tree_t;
 typedef ph(edata_t) edata_heap_t;
+typedef ph(edata_t) edata_age_heap_t;
 struct edata_s {
 	/*
 	 * Bitfield containing several fields:
@@ -193,16 +194,11 @@ struct edata_s {
 	};
 
 	/*
-	 * Reserved for hugepages -- once that allocator is more settled, we
-	 * might be able to claw some of this back.  Until then, don't get any
-	 * funny ideas about using the space we just freed up to keep some other
-	 * bit of metadata around.  That kind of thinking can be hazardous to
-	 * your health.
-	 *
-	 * This keeps the size of an edata_t at exactly 128 bytes on
-	 * architectures with 8-byte pointers and 4k pages.
+	 * In some context-specific sense, the age of an active extent.  Each
+	 * context can pick a specific meaning, and share the definition of the
+	 * edata_age_heap_t below.
 	 */
-	void *reserved1;
+	uint64_t age;
 	union {
 		/*
 		 * We could steal a low bit from these fields to indicate what
@@ -374,6 +370,11 @@ edata_bsize_get(const edata_t *edata) {
 	return edata->e_bsize;
 }
 
+static inline uint64_t
+edata_age_get(const edata_t *edata) {
+	return edata->age;
+}
+
 static inline edata_t *
 edata_ps_get(const edata_t *edata) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
@@ -469,6 +470,11 @@ edata_bsize_set(edata_t *edata, size_t bsize) {
 }
 
 static inline void
+edata_age_set(edata_t *edata, uint64_t age) {
+	edata->age = age;
+}
+
+static inline void
 edata_ps_set(edata_t *edata, edata_t *ps) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA || ps == NULL);
 	edata->ps = ps;
@@ -615,6 +621,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	if (config_prof) {
 		edata_prof_tctx_set(edata, NULL);
 	}
+	edata_age_set(edata, 0);
 	edata_ps_set(edata, NULL);
 	edata_longest_free_range_set(edata, 0);
 }
@@ -630,6 +637,7 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 	edata_state_set(edata, extent_state_active);
 	edata_zeroed_set(edata, true);
 	edata_committed_set(edata, true);
+	edata_age_set(edata, 0);
 	/*
 	 * This isn't strictly true, but base allocated extents never get
 	 * deallocated and can't be looked up in the emap, but no sense in
@@ -698,7 +706,25 @@ edata_esnead_comp(const edata_t *a, const edata_t *b) {
 	return ret;
 }
 
+static inline int
+edata_age_comp(const edata_t *a, const edata_t *b) {
+	uint64_t a_age = edata_age_get(a);
+	uint64_t b_age = edata_age_get(b);
+
+	/*
+	 * Equal ages are possible in certain race conditions, like two distinct
+	 * threads simultaneously allocating a new fresh slab without holding a
+	 * bin lock.
+	 */
+	int ret = (a_age > b_age) - (a_age < b_age);
+	if (ret != 0) {
+		return ret;
+	}
+	return edata_snad_comp(a, b);
+}
+
 ph_proto(, edata_avail_, edata_tree_t, edata_t)
 ph_proto(, edata_heap_, edata_heap_t, edata_t)
+ph_proto(, edata_age_heap_, edata_age_heap_t, edata_t);
 
 #endif /* JEMALLOC_INTERNAL_EDATA_H */
diff --git a/src/edata.c b/src/edata.c
index 5e53e99..214e993 100644
--- a/src/edata.c
+++ b/src/edata.c
@@ -4,3 +4,4 @@
 ph_gen(, edata_avail_, edata_tree_t, edata_t, ph_link,
     edata_esnead_comp)
 ph_gen(, edata_heap_, edata_heap_t, edata_t, ph_link, edata_snad_comp)
+ph_gen(, edata_age_heap_, edata_age_heap_t, edata_t, ph_link, edata_age_comp)
-- 
cgit v0.12


From d16849c91da35c37359331195c6213421a17976a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 18 Sep 2020 16:36:40 -0700
Subject: psset: Do first-fit based on slab age.

This functions more like the serial number strategy of the ecache and
hpa_central_t.  Longer-lived slabs are more likely to continue to live for
longer in the future.
---
 include/jemalloc/internal/psset.h |  5 ++-
 src/psset.c                       | 47 ++++++++++-------------
 test/unit/psset.c                 | 80 ++++++++++++++++++++++++++++++++++++---
 3 files changed, 100 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 7bba3cb..1431123 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -44,7 +44,7 @@ struct psset_s {
 	 * The pageslabs, quantized by the size class of the largest contiguous
 	 * free run of pages in a pageslab.
 	 */
-	edata_heap_t pageslabs[PSSET_NPSIZES];
+	edata_age_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
 	/*
 	 * Full slabs don't live in any edata heap.  But we still track their
@@ -52,6 +52,9 @@ struct psset_s {
 	 */
 	psset_bin_stats_t full_slab_stats;
 	psset_bin_stats_t slab_stats[PSSET_NPSIZES];
+
+	/* How many alloc_new calls have happened? */
+	uint64_t age_counter;
 };
 
 void psset_init(psset_t *psset);
diff --git a/src/psset.c b/src/psset.c
index 04d3548..9fc7ec1 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -11,7 +11,7 @@ static const bitmap_info_t psset_bitmap_info =
 void
 psset_init(psset_t *psset) {
 	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
-		edata_heap_new(&psset->pageslabs[i]);
+		edata_age_heap_new(&psset->pageslabs[i]);
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
 	psset->full_slab_stats.npageslabs = 0;
@@ -22,6 +22,7 @@ psset_init(psset_t *psset) {
 		psset->slab_stats[i].nactive = 0;
 		psset->slab_stats[i].ninactive = 0;
 	}
+	psset->age_counter = 0;
 }
 
 /*
@@ -48,13 +49,13 @@ psset_bin_stats_adjust(psset_bin_stats_t *binstats, edata_t *ps, bool inc) {
 
 static void
 psset_edata_heap_remove(psset_t *psset, pszind_t pind, edata_t *ps) {
-	edata_heap_remove(&psset->pageslabs[pind], ps);
+	edata_age_heap_remove(&psset->pageslabs[pind], ps);
 	psset_bin_stats_adjust(&psset->slab_stats[pind], ps, /* inc */ false);
 }
 
 static void
 psset_edata_heap_insert(psset_t *psset, pszind_t pind, edata_t *ps) {
-	edata_heap_insert(&psset->pageslabs[pind], ps);
+	edata_age_heap_insert(&psset->pageslabs[pind], ps);
 	psset_bin_stats_adjust(&psset->slab_stats[pind], ps, /* inc */ true);
 }
 
@@ -70,32 +71,24 @@ psset_assert_ps_consistent(edata_t *ps) {
  */
 static edata_t *
 psset_recycle_extract(psset_t *psset, size_t size) {
-	pszind_t ret_ind;
-	edata_t *ret = NULL;
-	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
-	for (pszind_t i = (pszind_t)bitmap_ffu(psset->bitmap,
-	    &psset_bitmap_info, (size_t)pind);
-	    i < PSSET_NPSIZES;
-	    i = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
-		(size_t)i + 1)) {
-		assert(!edata_heap_empty(&psset->pageslabs[i]));
-		edata_t *ps = edata_heap_first(&psset->pageslabs[i]);
-		if (ret == NULL || edata_snad_comp(ps, ret) < 0) {
-			ret = ps;
-			ret_ind = i;
-		}
+	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
+	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
+	    (size_t)min_pind);
+	if (pind == PSSET_NPSIZES) {
+		return NULL;
 	}
-	if (ret == NULL) {
+	edata_t *ps = edata_age_heap_first(&psset->pageslabs[pind]);
+	if (ps == NULL) {
 		return NULL;
 	}
 
-	psset_edata_heap_remove(psset, ret_ind, ret);
-	if (edata_heap_empty(&psset->pageslabs[ret_ind])) {
-		bitmap_set(psset->bitmap, &psset_bitmap_info, ret_ind);
+	psset_edata_heap_remove(psset, pind, ps);
+	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
+		bitmap_set(psset->bitmap, &psset_bitmap_info, pind);
 	}
 
-	psset_assert_ps_consistent(ret);
-	return ret;
+	psset_assert_ps_consistent(ps);
+	return ps;
 }
 
 static void
@@ -107,7 +100,7 @@ psset_insert(psset_t *psset, edata_t *ps, size_t largest_range) {
 
 	assert(pind < PSSET_NPSIZES);
 
-	if (edata_heap_empty(&psset->pageslabs[pind])) {
+	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
 	}
 	psset_edata_heap_insert(psset, pind, ps);
@@ -215,6 +208,8 @@ psset_alloc_new(psset_t *psset, edata_t *ps, edata_t *r_edata, size_t size) {
 	assert(fb_empty(ps_fb, ps_npages));
 	assert(ps_npages >= (size >> LG_PAGE));
 	edata_nfree_set(ps, (uint32_t)ps_npages);
+	edata_age_set(ps, psset->age_counter);
+	psset->age_counter++;
 	psset_ps_alloc_insert(psset, ps, r_edata, size);
 }
 
@@ -287,7 +282,7 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	 */
 	if (ps_old_longest_free_range > 0) {
 		psset_edata_heap_remove(psset, old_pind, ps);
-		if (edata_heap_empty(&psset->pageslabs[old_pind])) {
+		if (edata_age_heap_empty(&psset->pageslabs[old_pind])) {
 			bitmap_set(psset->bitmap, &psset_bitmap_info,
 			    (size_t)old_pind);
 		}
@@ -299,7 +294,7 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	/* Otherwise, it gets reinserted. */
 	pszind_t new_pind = sz_psz2ind(sz_psz_quantize_floor(
 	    new_range_len << LG_PAGE));
-	if (edata_heap_empty(&psset->pageslabs[new_pind])) {
+	if (edata_age_heap_empty(&psset->pageslabs[new_pind])) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info,
 		    (size_t)new_pind);
 	}
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 0bc4460..861903d 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -266,8 +266,7 @@ TEST_BEGIN(test_multi_pageslab) {
 
 	/*
 	 * Free up a 2-page hole in the earlier slab, and a 1-page one in the
-	 * later one.  We should still pick the earlier slab for a 1-page
-	 * allocation.
+	 * later one.  We should still pick the later one.
 	 */
 	ps = psset_dalloc(&psset, &alloc[0][0]);
 	expect_ptr_null(ps, "Unexpected eviction");
@@ -276,8 +275,8 @@ TEST_BEGIN(test_multi_pageslab) {
 	ps = psset_dalloc(&psset, &alloc[1][0]);
 	expect_ptr_null(ps, "Unexpected eviction");
 	err = psset_alloc_reuse(&psset, &alloc[0][0], PAGE);
-	expect_ptr_eq(&pageslab[0], edata_ps_get(&alloc[0][0]),
-	    "Should have picked first pageslab");
+	expect_ptr_eq(&pageslab[1], edata_ps_get(&alloc[0][0]),
+	    "Should have picked the fuller pageslab");
 
 	/*
 	 * Now both slabs have 1-page holes. Free up a second one in the later
@@ -370,6 +369,76 @@ TEST_BEGIN(test_stats) {
 }
 TEST_END
 
+TEST_BEGIN(test_oldest_fit) {
+	bool err;
+	edata_t alloc[PAGESLAB_PAGES];
+	edata_t worse_alloc[PAGESLAB_PAGES];
+
+	edata_t pageslab;
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_init(&pageslab, /* arena_ind */ 0, (void *)(10 * PAGESLAB_SIZE),
+	    PAGESLAB_SIZE, /* slab */ true, SC_NSIZES, PAGESLAB_SN + 1,
+	    extent_state_active, /* zeroed */ false, /* comitted */ true,
+	    EXTENT_PAI_HPA, EXTENT_IS_HEAD);
+
+	/*
+	 * This pageslab is better from an edata_comp_snad POV, but will be
+	 * added to the set after the previous one, and so should be less
+	 * preferred for allocations.
+	 */
+	edata_t worse_pageslab;
+	memset(&worse_pageslab, 0, sizeof(pageslab));
+	edata_init(&worse_pageslab, /* arena_ind */ 0,
+	    (void *)(9 * PAGESLAB_SIZE), PAGESLAB_SIZE, /* slab */ true,
+	    SC_NSIZES, PAGESLAB_SN - 1, extent_state_active, /* zeroed */ false,
+	    /* comitted */ true, EXTENT_PAI_HPA, EXTENT_IS_HEAD);
+
+	psset_t psset;
+	psset_init(&psset);
+
+	edata_init_test(&alloc[0]);
+	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+		edata_init_test(&alloc[i]);
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+		expect_ptr_eq(&pageslab, edata_ps_get(&alloc[i]),
+		    "Allocated from the wrong pageslab");
+	}
+
+	edata_init_test(&worse_alloc[0]);
+	psset_alloc_new(&psset, &worse_pageslab, &worse_alloc[0], PAGE);
+	expect_ptr_eq(&worse_pageslab, edata_ps_get(&worse_alloc[0]),
+	    "Allocated from the wrong pageslab");
+	/*
+	 * Make the two pssets otherwise indistinguishable; all full except for
+	 * a single page.
+	 */
+	for (size_t i = 1; i < PAGESLAB_PAGES - 1; i++) {
+		edata_init_test(&worse_alloc[i]);
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+		expect_ptr_eq(&worse_pageslab, edata_ps_get(&alloc[i]),
+		    "Allocated from the wrong pageslab");
+	}
+
+	/* Deallocate the last page from the older pageslab. */
+	edata_t *evicted = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	expect_ptr_null(evicted, "Unexpected eviction");
+
+	/*
+	 * This edata is the whole purpose for the test; it should come from the
+	 * older pageslab.
+	 */
+	edata_t test_edata;
+	edata_init_test(&test_edata);
+	err = psset_alloc_reuse(&psset, &test_edata, PAGE);
+	expect_false(err, "Nonempty psset failed page allocation");
+	expect_ptr_eq(&pageslab, edata_ps_get(&test_edata),
+	    "Allocated from the wrong pageslab");
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -378,5 +447,6 @@ main(void) {
 	    test_reuse,
 	    test_evict,
 	    test_multi_pageslab,
-	    test_stats);
+	    test_stats,
+	    test_oldest_fit);
 }
-- 
cgit v0.12


From ea32060f9ca5e14077cda7fa2401a1f91f55ad82 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 22 Oct 2020 14:13:09 -0700
Subject: SEC: Implement thread affinity.

For now, just have every thread pick a shard once and stick with it.
---
 include/jemalloc/internal/tsd.h |  2 ++
 src/sec.c                       | 22 ++++++++++++++--------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 9408b2c..5ac85e1 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -77,6 +77,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
+    O(sec_shard,		uint8_t,		uint8_t)	\
     O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
     O(tsd_link,			tsd_link_t,		tsd_link_t)	\
     O(in_hook,			bool,			bool)		\
@@ -106,6 +107,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* iarena */		NULL,					\
     /* arena */			NULL,					\
     /* arenas_tdata */		NULL,					\
+    /* sec_shard */		(uint8_t)-1,				\
     /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
     /* tsd_link */		{NULL},					\
     /* in_hook */		false,					\
diff --git a/src/sec.c b/src/sec.c
index f3c906b..262d813 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -61,14 +61,20 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 		return &sec->shards[0];
 	}
 	tsd_t *tsd = tsdn_tsd(tsdn);
-	/*
-	 * Use the trick from Daniel Lemire's "A fast alternative to the modulo
-	 * reduction.  Use a 64 bit number to store 32 bits, since we'll
-	 * deliberately overflow when we multiply by the number of shards.
-	 */
-	uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
-	uint32_t idx = (uint32_t)((rand32 * (uint64_t)sec->nshards) >> 32);
-	return &sec->shards[idx];
+	uint8_t *idxp = tsd_sec_shardp_get(tsd);
+	if (*idxp == (uint8_t)-1) {
+		/*
+		 * First use; initialize using the trick from Daniel Lemire's
+		 * "A fast alternative to the modulo reduction.  Use a 64 bit
+		 * number to store 32 bits, since we'll deliberately overflow
+		 * when we multiply by the number of shards.
+		 */
+		uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
+		uint32_t idx = (uint32_t)((rand32 * (uint64_t)sec->nshards) >> 32);
+		assert(idx < (uint32_t)sec->nshards);
+		*idxp = (uint8_t)idx;
+	}
+	return &sec->shards[*idxp];
 }
 
 static edata_t *
-- 
cgit v0.12


From bf72188f80c59328b20441c79861f9373c22bccd Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 21 Oct 2020 19:47:57 -0700
Subject: Allow opt.tcache_max to accept small size classes.

Previously all the small size classes were cached.  However this has downsides
-- particularly when page size is greater than 4K (e.g. iOS), which will result
in much higher SMALL_MAXCLASS.

This change allows tcache_max to be set to lower values, to better control
resources taken by tcache.
---
 Makefile.in                                |   1 +
 doc/jemalloc.xml.in                        |   2 +-
 include/jemalloc/internal/cache_bin.h      |  11 ++
 include/jemalloc/internal/tcache_inlines.h |  31 +++++-
 src/cache_bin.c                            |  19 ++--
 src/tcache.c                               |  52 +++++++--
 test/unit/arena_decay.c                    |   7 +-
 test/unit/arena_decay.sh                   |   2 +-
 test/unit/tcache_max.c                     | 170 +++++++++++++++++++++++++++++
 test/unit/tcache_max.sh                    |   3 +
 10 files changed, 265 insertions(+), 33 deletions(-)
 create mode 100644 test/unit/tcache_max.c
 create mode 100644 test/unit/tcache_max.sh

diff --git a/Makefile.in b/Makefile.in
index 0136a40..34df239 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -264,6 +264,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/spin.c \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/stats_print.c \
+	$(srcroot)test/unit/tcache_max.c \
 	$(srcroot)test/unit/test_hooks.c \
 	$(srcroot)test/unit/thread_event.c \
 	$(srcroot)test/unit/ticker.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index e5f2aa6..e24c191 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1313,7 +1313,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <literal>r-</literal>
         </term>
         <listitem><para>Maximum size class to cache in the thread-specific cache
-        (tcache).  At a minimum, all small size classes are cached; and at a
+        (tcache).  At a minimum, the first size class is cached; and at a
         maximum, size classes up to 8 MiB can be cached.  The default maximum is
         32 KiB (2^15).  As a convenience, this may also be set by specifying
         lg_tcache_max, which will be taken to be the base-2 logarithm of the
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 0767862..64275f2 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -21,6 +21,17 @@
 typedef uint16_t cache_bin_sz_t;
 
 /*
+ * Leave a noticeable mark pattern on the cache bin stack boundaries, in case a
+ * bug starts leaking those.  Make it look like the junk pattern but be distinct
+ * from it.
+ */
+static const uintptr_t cache_bin_preceding_junk =
+    (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
+/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
+static const uintptr_t cache_bin_trailing_junk =
+    (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
+
+/*
  * That implies the following value, for the maximum number of items in any
  * individual bin.  The cache bins track their bounds looking just at the low
  * bits of a pointer, compared against a cache_bin_sz_t.  So that's
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 1cba918..926c852 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -26,6 +26,20 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	tsd_slow_update(tsd);
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
+	assert(ind < SC_NBINS);
+	bool ret = (cache_bin_info_ncached_max(&tcache_bin_info[ind]) == 0);
+	if (ret && bin != NULL) {
+		/* small size class but cache bin disabled. */
+		assert(ind >= nhbins);
+		assert((uintptr_t)(*bin->stack_head) ==
+		    cache_bin_preceding_junk);
+	}
+
+	return ret;
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t binind, bool zero, bool slow_path) {
@@ -42,6 +56,11 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 		if (unlikely(arena == NULL)) {
 			return NULL;
 		}
+		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
+			/* stats and zero are handled directly by the arena. */
+			return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
+			    binind, zero);
+		}
 
 		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
 		    bin, binind, &tcache_hard_success);
@@ -104,13 +123,17 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
-	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
-	    <= SC_SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SC_SMALL_MAXCLASS);
 
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_info_ncached_max(
-		    &tcache_bin_info[binind]) >> opt_lg_tcache_flush_small_div;
+		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
+			arena_dalloc_small(tsd_tsdn(tsd), ptr);
+			return;
+		}
+		cache_bin_sz_t max = cache_bin_info_ncached_max(
+		    &tcache_bin_info[binind]);
+		unsigned remain = max >> opt_lg_tcache_flush_small_div;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 1e26c4e..1d04b0d 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -24,6 +24,7 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
 	 */
 	*size = sizeof(void *) * 2;
 	for (szind_t i = 0; i < ninfos; i++) {
+		assert(infos[i].ncached_max > 0);
 		*size += infos[i].ncached_max * sizeof(void *);
 	}
 
@@ -46,26 +47,20 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
 		    &computed_alignment);
 		assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
 	}
-	/*
-	 * Leave a noticeable mark pattern on the boundaries, in case a bug
-	 * starts leaking those.  Make it look like the junk pattern but be
-	 * distinct from it.
-	 */
-	uintptr_t preceding_ptr_junk = (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
-	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = preceding_ptr_junk;
+
+	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
+	    cache_bin_preceding_junk;
 	*cur_offset += sizeof(void *);
 }
 
 void
 cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
     size_t *cur_offset) {
-	/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
-	uintptr_t trailing_ptr_junk = (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
-	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = trailing_ptr_junk;
+	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
+	    cache_bin_trailing_junk;
 	*cur_offset += sizeof(void *);
 }
 
-
 void
 cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
     size_t *cur_offset) {
@@ -90,6 +85,8 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	    (uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
 	assert(cache_bin_ncached_get(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin, info) == empty_position);
+
+	assert(bin_stack_size > 0 || empty_position == full_position);
 }
 
 bool
diff --git a/src/tcache.c b/src/tcache.c
index edbedf7..41a1b82 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -62,7 +62,9 @@ cache_bin_info_t	*tcache_bin_info;
 static size_t tcache_bin_alloc_size;
 static size_t tcache_bin_alloc_alignment;
 
+/* Number of cache bins enabled, including both large and small. */
 unsigned		nhbins;
+/* Max size class to be cached (can be small or large). */
 size_t			tcache_maxclass;
 
 tcaches_t		*tcaches;
@@ -567,7 +569,14 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	tcache_slow->arena = NULL;
 	tcache_slow->dyn_alloc = mem;
 
-	memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
+	/*
+	 * We reserve cache bins for all small size classes, even if some may
+	 * not get used (i.e. bins higher than nhbins).  This allows the fast
+	 * and common paths to access cache bin metadata safely w/o worrying
+	 * about which ones are disabled.
+	 */
+	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
+	memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins);
 
 	size_t cur_offset = 0;
 	cache_bin_preincrement(tcache_bin_info, nhbins, mem,
@@ -576,19 +585,34 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 		if (i < SC_NBINS) {
 			tcache_slow->lg_fill_div[i] = 1;
 			tcache_slow->bin_refilled[i] = false;
+			tcache_slow->bin_flush_delay_items[i]
+			    = tcache_gc_item_delay_compute(i);
 		}
 		cache_bin_t *cache_bin = &tcache->bins[i];
 		cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
 		    &cur_offset);
 	}
+	/*
+	 * For small size classes beyond tcache_maxclass (i.e. nhbins < NBINS),
+	 * their cache bins are initialized to a state to safely and efficiently
+	 * fail all fastpath alloc / free, so that no additional check around
+	 * nhbins is needed on fastpath.
+	 */
+	for (unsigned i = nhbins; i < SC_NBINS; i++) {
+		/* Disabled small bins. */
+		cache_bin_t *cache_bin = &tcache->bins[i];
+		void *fake_stack = mem;
+		size_t fake_offset = 0;
+
+		cache_bin_init(cache_bin, &tcache_bin_info[i], fake_stack,
+		    &fake_offset);
+		assert(tcache_small_bin_disabled(i, cache_bin));
+	}
+
 	cache_bin_postincrement(tcache_bin_info, nhbins, mem,
 	    &cur_offset);
 	/* Sanity check that the whole stack is used. */
 	assert(cur_offset == tcache_bin_alloc_size);
-	for (unsigned i = 0; i < SC_NBINS; i++) {
-		tcache_slow->bin_flush_delay_items[i]
-		    = tcache_gc_item_delay_compute(i);
-	}
 }
 
 /* Initialize auto tcache (embedded in TSD). */
@@ -935,9 +959,6 @@ tcache_ncached_max_compute(szind_t szind) {
 bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
 	tcache_maxclass = sz_s2u(opt_tcache_max);
-	if (tcache_maxclass < SC_SMALL_MAXCLASS) {
-		tcache_maxclass = SC_SMALL_MAXCLASS;
-	}
 	assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
 	nhbins = sz_size2index(tcache_maxclass) + 1;
 
@@ -946,16 +967,25 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 		return true;
 	}
 
-	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base,
-	    nhbins * sizeof(cache_bin_info_t), CACHELINE);
+	/* Initialize tcache_bin_info.  See comments in tcache_init(). */
+	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
+	size_t size = n_reserved_bins * sizeof(cache_bin_info_t);
+	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base, size,
+	    CACHELINE);
 	if (tcache_bin_info == NULL) {
 		return true;
 	}
+
 	for (szind_t i = 0; i < nhbins; i++) {
 		unsigned ncached_max = tcache_ncached_max_compute(i);
 		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
 	}
+	for (szind_t i = nhbins; i < SC_NBINS; i++) {
+		/* Disabled small bins. */
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+		assert(tcache_small_bin_disabled(i, NULL));
+	}
+
 	cache_bin_info_compute_alloc(tcache_bin_info, nhbins,
 	    &tcache_bin_alloc_size, &tcache_bin_alloc_alignment);
 
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 555f71a..a266168 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -432,7 +432,6 @@ TEST_BEGIN(test_decay_ticker) {
 	unsigned arena_ind = do_arena_create(ddt, mdt);
 	int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
-	size_t large;
 
 	/*
 	 * Allocate a bunch of large objects, pause the clock, deallocate every
@@ -440,12 +439,10 @@ TEST_BEGIN(test_decay_ticker) {
 	 * [md]allocx() in a tight loop while advancing time rapidly to verify
 	 * the ticker triggers purging.
 	 */
-
-	size_t tcache_max;
+	size_t large;
 	size_t sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
-	large = nallocx(tcache_max + 1, flags);
 
 	do_purge(arena_ind);
 	uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind);
diff --git a/test/unit/arena_decay.sh b/test/unit/arena_decay.sh
index 45aeccf..52f1b20 100644
--- a/test/unit/arena_decay.sh
+++ b/test/unit/arena_decay.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,lg_tcache_max:0"
+export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,tcache_max:1024"
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
new file mode 100644
index 0000000..0594cef
--- /dev/null
+++ b/test/unit/tcache_max.c
@@ -0,0 +1,170 @@
+#include "test/jemalloc_test.h"
+
+enum {
+	alloc_option_start = 0,
+	use_malloc = 0,
+	use_mallocx,
+	alloc_option_end
+};
+
+enum {
+	dalloc_option_start = 0,
+	use_free = 0,
+	use_dallocx,
+	use_sdallocx,
+	dalloc_option_end
+};
+
+static unsigned alloc_option, dalloc_option;
+static size_t tcache_max;
+
+static void *
+alloc_func(size_t sz) {
+	void *ret;
+
+	switch (alloc_option) {
+	case use_malloc:
+		ret = malloc(sz);
+		break;
+	case use_mallocx:
+		ret = mallocx(sz, 0);
+		break;
+	default:
+		unreachable();
+	}
+	expect_ptr_not_null(ret, "Unexpected malloc / mallocx failure");
+
+	return ret;
+}
+
+static void
+dalloc_func(void *ptr, size_t sz) {
+	switch (dalloc_option) {
+	case use_free:
+		free(ptr);
+		break;
+	case use_dallocx:
+		dallocx(ptr, 0);
+		break;
+	case use_sdallocx:
+		sdallocx(ptr, sz, 0);
+		break;
+	default:
+		unreachable();
+	}
+}
+
+static size_t
+tcache_bytes_read(void) {
+	uint64_t epoch;
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+
+	size_t tcache_bytes;
+	size_t sz = sizeof(tcache_bytes);
+	assert_d_eq(mallctl(
+	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
+
+	return tcache_bytes;
+}
+
+static void
+tcache_bytes_check_update(size_t *prev, ssize_t diff) {
+	size_t tcache_bytes = tcache_bytes_read();
+	expect_zu_eq(tcache_bytes, *prev + diff, "tcache bytes not expected");
+
+	*prev += diff;
+}
+
+static void
+test_tcache_bytes_alloc(size_t alloc_size) {
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
+
+	size_t usize = sz_s2u(alloc_size);
+	/* No change is expected if usize is outside of tcache_max range. */
+	bool cached = (usize <= tcache_max);
+	ssize_t diff = cached ? usize : 0;
+
+	void *ptr1 = alloc_func(alloc_size);
+	void *ptr2 = alloc_func(alloc_size);
+
+	size_t bytes = tcache_bytes_read();
+	dalloc_func(ptr2, alloc_size);
+	/* Expect tcache_bytes increase after dalloc */
+	tcache_bytes_check_update(&bytes, diff);
+
+	dalloc_func(ptr1, alloc_size);
+	/* Expect tcache_bytes increase again */
+	tcache_bytes_check_update(&bytes, diff);
+
+	void *ptr3 = alloc_func(alloc_size);
+	if (cached) {
+		expect_ptr_eq(ptr1, ptr3, "Unexpected cached ptr");
+	}
+	/* Expect tcache_bytes decrease after alloc */
+	tcache_bytes_check_update(&bytes, -diff);
+
+	void *ptr4 = alloc_func(alloc_size);
+	if (cached) {
+		expect_ptr_eq(ptr2, ptr4, "Unexpected cached ptr");
+	}
+	/* Expect tcache_bytes decrease again */
+	tcache_bytes_check_update(&bytes, -diff);
+
+	dalloc_func(ptr3, alloc_size);
+	tcache_bytes_check_update(&bytes, diff);
+	dalloc_func(ptr4, alloc_size);
+	tcache_bytes_check_update(&bytes, diff);
+}
+
+static void
+test_tcache_max_impl(void) {
+	size_t sz;
+	sz = sizeof(tcache_max);
+	assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
+	    &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+
+	/* opt.tcache_max set to 1024 in tcache_max.sh */
+	expect_zu_eq(tcache_max, 1024, "tcache_max not expected");
+
+	test_tcache_bytes_alloc(1);
+	test_tcache_bytes_alloc(tcache_max - 1);
+	test_tcache_bytes_alloc(tcache_max);
+	test_tcache_bytes_alloc(tcache_max + 1);
+
+	test_tcache_bytes_alloc(PAGE - 1);
+	test_tcache_bytes_alloc(PAGE);
+	test_tcache_bytes_alloc(PAGE + 1);
+
+	size_t large;
+	sz = sizeof(large);
+	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
+	    0), 0, "Unexpected mallctl() failure");
+
+	test_tcache_bytes_alloc(large - 1);
+	test_tcache_bytes_alloc(large);
+	test_tcache_bytes_alloc(large + 1);
+}
+
+TEST_BEGIN(test_tcache_max) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+
+	for (alloc_option = alloc_option_start;
+	     alloc_option < alloc_option_end;
+	     alloc_option++) {
+		for (dalloc_option = dalloc_option_start;
+		     dalloc_option < dalloc_option_end;
+		     dalloc_option++) {
+			test_tcache_max_impl();
+		}
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_tcache_max);
+}
diff --git a/test/unit/tcache_max.sh b/test/unit/tcache_max.sh
new file mode 100644
index 0000000..4480d73
--- /dev/null
+++ b/test/unit/tcache_max.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="tcache_max:1024"
-- 
cgit v0.12


From ef6d51ed44ab864e6db8722a19758f67cc7b12d9 Mon Sep 17 00:00:00 2001
From: DC <devnexen@gmail.com>
Date: Sun, 25 Oct 2020 15:17:24 +0000
Subject: DragonFlyBSD build support.

---
 include/jemalloc/internal/jemalloc_internal_decls.h | 2 +-
 src/background_thread.c                             | 2 +-
 src/jemalloc.c                                      | 2 +-
 src/prof_sys.c                                      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 32058ce..7d212c4 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -32,7 +32,7 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
-#  ifdef __FreeBSD__
+#  if defined(__FreeBSD__) || defined(__DragonFly__)
 #  include <pthread_np.h>
 #  endif
 #  include <signal.h>
diff --git a/src/background_thread.c b/src/background_thread.c
index a36836c..d4f96b1 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -506,7 +506,7 @@ background_thread_entry(void *ind_arg) {
 	assert(thread_ind < max_background_threads);
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
 	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
-#elif defined(__FreeBSD__)
+#elif defined(__FreeBSD__) || defined(__DragonFly__)
 	pthread_set_name_np(pthread_self(), "jemalloc_bg_thd");
 #endif
 	if (opt_percpu_arena != percpu_arena_disabled) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 09b168c..2a791e1 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -815,7 +815,7 @@ malloc_ncpus(void) {
 	 * is available, to avoid using more arenas than necessary.
 	 */
 	{
-#  if defined(__FreeBSD__)
+#  if defined(__FreeBSD__) || defined(__DragonFly__)
 		cpuset_t set;
 #  else
 		cpu_set_t set;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index dddba4b..777ef1d 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -436,7 +436,7 @@ prof_dump_open_maps_impl() {
 	int mfd;
 
 	cassert(config_prof);
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) || defined(__DragonFly__)
 	mfd = prof_open_maps_internal("/proc/curproc/map");
 #elif defined(_WIN32)
 	mfd = -1; // Not implemented
-- 
cgit v0.12


From 180b84315933b7d986fff7539eeb262eb44bc75d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 27 Oct 2020 12:42:23 -0700
Subject: Appveyor: fix 404 errors.

It looks like the mirrors we were using no longer carry this package, but that
it is installed by default and so no longer needs a remote mirror.
---
 .appveyor.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index f44868d..d31f9ae 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -31,7 +31,6 @@ install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
   - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
   - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
-  - pacman --noconfirm -S mingw-w64-%CPU%-make
 
 build_script:
   - bash -c "autoconf"
-- 
cgit v0.12


From d2d941017b8a62ee7d835ccfb7b34c54ce32e371 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Sun, 1 Nov 2020 20:52:56 +0000
Subject: MADV_DO[NOT]DUMP support equivalence on FreeBSD.

---
 configure.ac                                          | 10 ++++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  5 +++++
 src/pages.c                                           |  8 ++++++--
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index d55c0b8..ca5e2f1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2132,6 +2132,16 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_HUGEPAGE);
 	madvise((void *)0, 0, MADV_NOHUGEPAGE);
 ], [je_cv_thp])
+  dnl Check for madvise(..., MADV_[NO]CORE).
+  JE_COMPILABLE([madvise(..., MADV_[[NO]]CORE)], [
+#include <sys/mman.h>
+], [
+	madvise((void *)0, 0, MADV_NOCORE);
+	madvise((void *)0, 0, MADV_CORE);
+], [je_cv_madv_nocore])
+  if test "x${je_cv_madv_nocore}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_MADVISE_NOCORE], [ ])
+  fi
 case "${host_cpu}" in
   arm*)
     ;;
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7af28f7..5ea1a19 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -299,6 +299,11 @@
 #undef JEMALLOC_MADVISE_DONTDUMP
 
 /*
+ * Defined if MADV_[NO]CORE is supported as an argument to madvise.
+ */
+#undef JEMALLOC_MADVISE_NOCORE
+
+/*
  * Defined if transparent huge pages (THPs) are supported via the
  * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
  */
diff --git a/src/pages.c b/src/pages.c
index 05bbf72..59a03f2 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -413,8 +413,10 @@ bool
 pages_dontdump(void *addr, size_t size) {
 	assert(PAGE_ADDR2BASE(addr) == addr);
 	assert(PAGE_CEILING(size) == size);
-#ifdef JEMALLOC_MADVISE_DONTDUMP
+#if defined(JEMALLOC_MADVISE_DONTDUMP)
 	return madvise(addr, size, MADV_DONTDUMP) != 0;
+#elif defined(JEMALLOC_MADVISE_NOCORE)
+	return madvise(addr, size, MADV_NOCORE) != 0;
 #else
 	return false;
 #endif
@@ -424,8 +426,10 @@ bool
 pages_dodump(void *addr, size_t size) {
 	assert(PAGE_ADDR2BASE(addr) == addr);
 	assert(PAGE_CEILING(size) == size);
-#ifdef JEMALLOC_MADVISE_DONTDUMP
+#if defined(JEMALLOC_MADVISE_DONTDUMP)
 	return madvise(addr, size, MADV_DODUMP) != 0;
+#elif defined(JEMALLOC_MADVISE_NOCORE)
+	return madvise(addr, size, MADV_CORE) != 0;
 #else
 	return false;
 #endif
-- 
cgit v0.12


From 27ef02ca9a21f2e6a432e67dd3d2bafc8a04371f Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Thu, 23 Apr 2020 20:23:04 +0100
Subject: Android build fix proposal.

These are detected at configure time while they are glibc
specifics. the bionic equivalent is not api compatible
and dlopen is restricted in this platform.
---
 configure.ac | 52 ++++++++++++++++++++++++++++------------------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/configure.ac b/configure.ac
index ca5e2f1..1e6de8a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -661,10 +661,11 @@ case "${host}" in
   *-*-bitrig*)
 	abi="elf"
 	;;
-  *-*-linux-android)
+  *-*-linux-android*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
+	glibc="0"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
@@ -679,6 +680,7 @@ case "${host}" in
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
+	glibc="1"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
@@ -2258,37 +2260,39 @@ fi
 dnl ============================================================================
 dnl Check for glibc malloc hooks
 
-JE_COMPILABLE([glibc malloc hook], [
-#include <stddef.h>
+if test "x$glibc" = "x1" ; then
+  JE_COMPILABLE([glibc malloc hook], [
+  #include <stddef.h>
 
-extern void (* __free_hook)(void *ptr);
-extern void *(* __malloc_hook)(size_t size);
-extern void *(* __realloc_hook)(void *ptr, size_t size);
+  extern void (* __free_hook)(void *ptr);
+  extern void *(* __malloc_hook)(size_t size);
+  extern void *(* __realloc_hook)(void *ptr, size_t size);
 ], [
-  void *ptr = 0L;
-  if (__malloc_hook) ptr = __malloc_hook(1);
-  if (__realloc_hook) ptr = __realloc_hook(ptr, 2);
-  if (__free_hook && ptr) __free_hook(ptr);
+    void *ptr = 0L;
+    if (__malloc_hook) ptr = __malloc_hook(1);
+    if (__realloc_hook) ptr = __realloc_hook(ptr, 2);
+    if (__free_hook && ptr) __free_hook(ptr);
 ], [je_cv_glibc_malloc_hook])
-if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then
-  if test "x${JEMALLOC_PREFIX}" = "x" ; then
-    AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ])
-    wrap_syms="${wrap_syms} __free_hook __malloc_hook __realloc_hook"
+  if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then
+    if test "x${JEMALLOC_PREFIX}" = "x" ; then
+      AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ])
+      wrap_syms="${wrap_syms} __free_hook __malloc_hook __realloc_hook"
+    fi
   fi
-fi
 
-JE_COMPILABLE([glibc memalign hook], [
-#include <stddef.h>
+  JE_COMPILABLE([glibc memalign hook], [
+  #include <stddef.h>
 
-extern void *(* __memalign_hook)(size_t alignment, size_t size);
+  extern void *(* __memalign_hook)(size_t alignment, size_t size);
 ], [
-  void *ptr = 0L;
-  if (__memalign_hook) ptr = __memalign_hook(16, 7);
+    void *ptr = 0L;
+    if (__memalign_hook) ptr = __memalign_hook(16, 7);
 ], [je_cv_glibc_memalign_hook])
-if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then
-  if test "x${JEMALLOC_PREFIX}" = "x" ; then
-    AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ])
-    wrap_syms="${wrap_syms} __memalign_hook"
+  if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then
+    if test "x${JEMALLOC_PREFIX}" = "x" ; then
+      AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ])
+      wrap_syms="${wrap_syms} __memalign_hook"
+    fi
   fi
 fi
 
-- 
cgit v0.12


From 1b3ee75667dd7820808d35d16bfcebdd146be70a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 30 Oct 2020 16:31:32 -0700
Subject: Add experimental.thread.activity_callback.

This (experimental, undocumented) functionality can be used by users to track
various statistics of interest at a finer level of granularity than the thread.
---
 include/jemalloc/internal/activity_callback.h | 23 +++++++++
 include/jemalloc/internal/tsd.h               |  5 ++
 src/ctl.c                                     | 35 ++++++++++++-
 src/peak_event.c                              | 17 +++++-
 test/unit/mallctl.c                           | 74 ++++++++++++++++++++++++++-
 5 files changed, 151 insertions(+), 3 deletions(-)
 create mode 100644 include/jemalloc/internal/activity_callback.h

diff --git a/include/jemalloc/internal/activity_callback.h b/include/jemalloc/internal/activity_callback.h
new file mode 100644
index 0000000..6c2e84e
--- /dev/null
+++ b/include/jemalloc/internal/activity_callback.h
@@ -0,0 +1,23 @@
+#ifndef JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
+#define JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
+
+/*
+ * The callback to be executed "periodically", in response to some amount of
+ * allocator activity.
+ *
+ * This callback need not be computing any sort of peak (although that's the
+ * intended first use case), but we drive it from the peak counter, so it's
+ * keeps things tidy to keep it here.
+ *
+ * The calls to this thunk get driven by the peak_event module.
+ */
+#define ACTIVITY_CALLBACK_THUNK_INITIALIZER {NULL, NULL}
+typedef void (*activity_callback_t)(void *uctx, uint64_t allocated,
+    uint64_t deallocated);
+typedef struct activity_callback_thunk_s activity_callback_thunk_t;
+struct activity_callback_thunk_s {
+	activity_callback_t callback;
+	void *uctx;
+};
+
+#endif /* JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 5ac85e1..6076419 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TSD_H
 #define JEMALLOC_INTERNAL_TSD_H
 
+#include "jemalloc/internal/activity_callback.h"
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/bin_types.h"
@@ -82,6 +83,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(tsd_link,			tsd_link_t,		tsd_link_t)	\
     O(in_hook,			bool,			bool)		\
     O(peak,			peak_t,			peak_t)		\
+    O(activity_callback_thunk,	activity_callback_thunk_t,		\
+	activity_callback_thunk_t)					\
     O(tcache_slow,		tcache_slow_t,		tcache_slow_t)	\
     O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)
 
@@ -112,6 +115,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* tsd_link */		{NULL},					\
     /* in_hook */		false,					\
     /* peak */			PEAK_INITIALIZER,			\
+    /* activity_callback_thunk */					\
+	ACTIVITY_CALLBACK_THUNK_INITIALIZER,				\
     /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,
 
diff --git a/src/ctl.c b/src/ctl.c
index 874aaac..d5dd1d1 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -264,6 +264,7 @@ CTL_PROTO(stats_retained)
 CTL_PROTO(stats_zero_reallocs)
 CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
+CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
 CTL_PROTO(experimental_arenas_i_pactivep)
@@ -712,6 +713,11 @@ static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("remove"),	CTL(experimental_hooks_remove)}
 };
 
+static const ctl_named_node_t experimental_thread_node[] = {
+	{NAME("activity_callback"),
+		CTL(experimental_thread_activity_callback)}
+};
+
 static const ctl_named_node_t experimental_utilization_node[] = {
 	{NAME("query"),		CTL(experimental_utilization_query)},
 	{NAME("batch_query"),	CTL(experimental_utilization_batch_query)}
@@ -738,7 +744,8 @@ static const ctl_named_node_t experimental_node[] = {
 	{NAME("utilization"),	CHILD(named, experimental_utilization)},
 	{NAME("arenas"),	CHILD(indexed, experimental_arenas)},
 	{NAME("prof_recent"),	CHILD(named, experimental_prof_recent)},
-	{NAME("batch_alloc"),	CTL(experimental_batch_alloc)}
+	{NAME("batch_alloc"),	CTL(experimental_batch_alloc)},
+	{NAME("thread"),	CHILD(named, experimental_thread)}
 };
 
 static const ctl_named_node_t	root_node[] = {
@@ -3428,6 +3435,32 @@ label_return:
 	return ret;
 }
 
+static int
+experimental_thread_activity_callback_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (!config_stats) {
+		return ENOENT;
+	}
+
+	activity_callback_thunk_t t_old = tsd_activity_callback_thunk_get(tsd);
+	READ(t_old, activity_callback_thunk_t);
+
+	if (newp != NULL) {
+		/*
+		 * This initialization is unnecessary.  If it's omitted, though,
+		 * clang gets confused and warns on the subsequent use of t_new.
+		 */
+		activity_callback_thunk_t t_new = {NULL, NULL};
+		WRITE(t_new, activity_callback_thunk_t);
+		tsd_activity_callback_thunk_set(tsd, t_new);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
 /*
  * Output six memory utilization entries for an input pointer, the first one of
  * type (void *) and the remaining five of type size_t, describing the following
diff --git a/src/peak_event.c b/src/peak_event.c
index 79d91e0..4093fbc 100644
--- a/src/peak_event.c
+++ b/src/peak_event.c
@@ -1,9 +1,11 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-#include "jemalloc/internal/peak.h"
 #include "jemalloc/internal/peak_event.h"
 
+#include "jemalloc/internal/activity_callback.h"
+#include "jemalloc/internal/peak.h"
+
 /*
  * Update every 64K by default.  We're not exposing this as a configuration
  * option for now; we don't want to bind ourselves too tightly to any particular
@@ -21,6 +23,17 @@ peak_event_update(tsd_t *tsd) {
 	peak_update(peak, alloc, dalloc);
 }
 
+static void
+peak_event_activity_callback(tsd_t *tsd) {
+	activity_callback_thunk_t *thunk = tsd_activity_callback_thunkp_get(
+	    tsd);
+	uint64_t alloc = tsd_thread_allocated_get(tsd);
+	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
+	if (thunk->callback != NULL) {
+		thunk->callback(thunk->uctx, alloc, dalloc);
+	}
+}
+
 /* Set current state to zero. */
 void
 peak_event_zero(tsd_t *tsd) {
@@ -49,6 +62,7 @@ peak_alloc_postponed_event_wait(tsd_t *tsd) {
 void
 peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
 	peak_event_update(tsd);
+	peak_event_activity_callback(tsd);
 }
 
 uint64_t
@@ -64,4 +78,5 @@ peak_dalloc_postponed_event_wait(tsd_t *tsd) {
 void
 peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
 	peak_event_update(tsd);
+	peak_event_activity_callback(tsd);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 278bd09..d4e2621 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1030,6 +1030,77 @@ TEST_BEGIN(test_thread_peak) {
 }
 TEST_END
 
+typedef struct activity_test_data_s activity_test_data_t;
+struct activity_test_data_s {
+	uint64_t obtained_alloc;
+	uint64_t obtained_dalloc;
+};
+
+static void
+activity_test_callback(void *uctx, uint64_t alloc, uint64_t dalloc) {
+	activity_test_data_t *test_data = (activity_test_data_t *)uctx;
+	test_data->obtained_alloc = alloc;
+	test_data->obtained_dalloc = dalloc;
+}
+
+TEST_BEGIN(test_thread_activity_callback) {
+	test_skip_if(!config_stats);
+
+	const size_t big_size = 10 * 1024 * 1024;
+	void *ptr;
+	int err;
+	size_t sz;
+
+	uint64_t *allocatedp;
+	uint64_t *deallocatedp;
+	sz = sizeof(allocatedp);
+	err = mallctl("thread.allocatedp", &allocatedp, &sz, NULL, 0);
+	assert_d_eq(0, err, "");
+	err = mallctl("thread.deallocatedp", &deallocatedp, &sz, NULL, 0);
+	assert_d_eq(0, err, "");
+
+	activity_callback_thunk_t old_thunk = {(activity_callback_t)111,
+		(void *)222};
+
+	activity_test_data_t test_data = {333, 444};
+	activity_callback_thunk_t new_thunk =
+	    {&activity_test_callback, &test_data};
+
+	sz = sizeof(old_thunk);
+	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
+	    &new_thunk, sizeof(new_thunk));
+	assert_d_eq(0, err, "");
+
+	expect_true(old_thunk.callback == NULL, "Callback already installed");
+	expect_true(old_thunk.uctx == NULL, "Callback data already installed");
+
+	ptr = mallocx(big_size, 0);
+	expect_u64_eq(test_data.obtained_alloc, *allocatedp, "");
+	expect_u64_eq(test_data.obtained_dalloc, *deallocatedp, "");
+
+	free(ptr);
+	expect_u64_eq(test_data.obtained_alloc, *allocatedp, "");
+	expect_u64_eq(test_data.obtained_dalloc, *deallocatedp, "");
+
+	sz = sizeof(old_thunk);
+	new_thunk = (activity_callback_thunk_t){ NULL, NULL };
+	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
+	    &new_thunk, sizeof(new_thunk));
+	assert_d_eq(0, err, "");
+
+	expect_true(old_thunk.callback == &activity_test_callback, "");
+	expect_true(old_thunk.uctx == &test_data, "");
+
+	/* Inserting NULL should have turned off tracking. */
+	test_data.obtained_alloc = 333;
+	test_data.obtained_dalloc = 444;
+	ptr = mallocx(big_size, 0);
+	free(ptr);
+	expect_u64_eq(333, test_data.obtained_alloc, "");
+	expect_u64_eq(444, test_data.obtained_dalloc, "");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -1063,5 +1134,6 @@ main(void) {
 	    test_hooks,
 	    test_hooks_exhaustion,
 	    test_thread_idle,
-	    test_thread_peak);
+	    test_thread_peak,
+	    test_thread_activity_callback);
 }
-- 
cgit v0.12


From c9757d9e3ba6b53e7f4ecbe9c1872a74df51fe4b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 30 Oct 2020 15:05:48 -0700
Subject: HPA: Don't disable shards that were never started.

---
 src/pa.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/pa.c b/src/pa.c
index 825b10a..59873c1 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -74,19 +74,26 @@ pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
 void
 pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
-	sec_disable(tsdn, &shard->hpa_sec);
+	if (shard->ever_used_hpa) {
+		sec_disable(tsdn, &shard->hpa_sec);
+	}
 }
 
 void
 pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
-	sec_flush(tsdn, &shard->hpa_sec);
+	if (shard->ever_used_hpa) {
+		sec_flush(tsdn, &shard->hpa_sec);
+	}
 }
 
 void
 pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 	sec_flush(tsdn, &shard->hpa_sec);
 	pac_destroy(tsdn, &shard->pac);
+	if (shard->ever_used_hpa) {
+		sec_flush(tsdn, &shard->hpa_sec);
+	}
 }
 
 static pai_t *
-- 
cgit v0.12


From 03a604711113c9d883242291ca11b77c83ba4c75 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 29 Oct 2020 05:11:16 -0700
Subject: Edata cache small: rewrite.

In previous designs, this was intended to be a sort of cache that couldn't fail.
In the current design, we want to use it just as a contention reduction
mechanism.  Rewrite it with those goals in mind.
---
 include/jemalloc/internal/edata_cache.h |  39 +++---
 src/edata_cache.c                       | 132 ++++++++++++++------
 test/unit/edata_cache.c                 | 206 ++++++++++++++++++++++++++++----
 3 files changed, 301 insertions(+), 76 deletions(-)

diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 02685c8..f7d0c31 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -4,6 +4,16 @@
 #include "jemalloc/internal/base.h"
 
 /*
+ * Public for tests.  When we go to the fallback when the small cache is empty,
+ * we grab up to 8 items (grabbing less only if the fallback is exhausted).
+ * When we exceed 16, we flush.  This caps the maximum memory lost per cache to
+ * 16 * sizeof(edata_t), a max of 2k on architectures where the edata_t is 128
+ * bytes.
+ */
+#define EDATA_CACHE_SMALL_MAX 16
+#define EDATA_CACHE_SMALL_FILL 8
+
+/*
  * A cache of edata_t structures allocated via base_alloc_edata (as opposed to
  * the underlying extents they describe).  The contents of returned edata_t
  * objects are garbage and cannot be relied upon.
@@ -25,32 +35,23 @@ void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
 
+/*
+ * An edata_cache_small is like an edata_cache, but it relies on external
+ * synchronization and avoids first-fit strategies.
+ */
+
 typedef struct edata_cache_small_s edata_cache_small_t;
 struct edata_cache_small_s {
 	edata_list_inactive_t list;
 	size_t count;
 	edata_cache_t *fallback;
+	bool disabled;
 };
 
-/*
- * An edata_cache_small is like an edata_cache, but it relies on external
- * synchronization and avoids first-fit strategies.  You can call "prepare" to
- * acquire at least num edata_t objects, and then "finish" to flush all
- * excess ones back to their fallback edata_cache_t.  Once they have been
- * acquired, they can be allocated without failing (and in fact, this is
- * required -- it's not permitted to attempt to get an edata_t without first
- * preparing for it).
- */
-
 void edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback);
-
-/* Returns whether or not an error occurred. */
-bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
-    size_t num);
-edata_t *edata_cache_small_get(edata_cache_small_t *ecs);
-
-void edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata);
-void edata_cache_small_finish(tsdn_t *tsdn, edata_cache_small_t *ecs,
-    size_t num);
+edata_t *edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs);
+void edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    edata_t *edata);
+void edata_cache_small_disable(tsdn_t *tsdn, edata_cache_small_t *ecs);
 
 #endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/src/edata_cache.c b/src/edata_cache.c
index d899ce8..ecfce41 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -27,8 +27,7 @@ edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 		return base_alloc_edata(tsdn, edata_cache->base);
 	}
 	edata_avail_remove(&edata_cache->avail, edata);
-	size_t count = atomic_load_zu(&edata_cache->count, ATOMIC_RELAXED);
-	atomic_store_zu(&edata_cache->count, count - 1, ATOMIC_RELAXED);
+	atomic_load_sub_store_zu(&edata_cache->count, 1);
 	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
 	return edata;
 }
@@ -37,8 +36,7 @@ void
 edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata) {
 	malloc_mutex_lock(tsdn, &edata_cache->mtx);
 	edata_avail_insert(&edata_cache->avail, edata);
-	size_t count = atomic_load_zu(&edata_cache->count, ATOMIC_RELAXED);
-	atomic_store_zu(&edata_cache->count, count + 1, ATOMIC_RELAXED);
+	atomic_load_add_store_zu(&edata_cache->count, 1);
 	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
 }
 
@@ -62,48 +60,110 @@ edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
 	edata_list_inactive_init(&ecs->list);
 	ecs->count = 0;
 	ecs->fallback = fallback;
+	ecs->disabled = false;
 }
 
-edata_t *
-edata_cache_small_get(edata_cache_small_t *ecs) {
-	assert(ecs->count > 0);
-	edata_t *edata = edata_list_inactive_first(&ecs->list);
-	assert(edata != NULL);
-	edata_list_inactive_remove(&ecs->list, edata);
-	ecs->count--;
-	return edata;
+static void
+edata_cache_small_try_fill_from_fallback(tsdn_t *tsdn,
+    edata_cache_small_t *ecs) {
+	assert(ecs->count == 0);
+	edata_t *edata;
+	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
+	while (ecs->count < EDATA_CACHE_SMALL_FILL) {
+		edata = edata_avail_first(&ecs->fallback->avail);
+		if (edata == NULL) {
+			break;
+		}
+		edata_avail_remove(&ecs->fallback->avail, edata);
+		edata_list_inactive_append(&ecs->list, edata);
+		ecs->count++;
+		atomic_load_sub_store_zu(&ecs->fallback->count, 1);
+	}
+	malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
 }
 
-void
-edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata) {
-	assert(edata != NULL);
-	edata_list_inactive_append(&ecs->list, edata);
-	ecs->count++;
-}
+edata_t *
+edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_EDATA_CACHE, 0);
+
+	if (ecs->disabled) {
+		assert(ecs->count == 0);
+		assert(edata_list_inactive_first(&ecs->list) == NULL);
+		return edata_cache_get(tsdn, ecs->fallback);
+	}
 
-bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
-    size_t num) {
-	while (ecs->count < num) {
+	edata_t *edata = edata_list_inactive_first(&ecs->list);
+	if (edata != NULL) {
+		edata_list_inactive_remove(&ecs->list, edata);
+		ecs->count--;
+		return edata;
+	}
+	/* Slow path; requires synchronization. */
+	edata_cache_small_try_fill_from_fallback(tsdn, ecs);
+	edata = edata_list_inactive_first(&ecs->list);
+	if (edata != NULL) {
+		edata_list_inactive_remove(&ecs->list, edata);
+		ecs->count--;
+	} else {
 		/*
-		 * Obviously, we can be smarter here and batch the locking that
-		 * happens inside of edata_cache_get.  But for now, something
-		 * quick-and-dirty is fine.
+		 * Slowest path (fallback was also empty); allocate something
+		 * new.
 		 */
-		edata_t *edata = edata_cache_get(tsdn, ecs->fallback);
-		if (edata == NULL) {
-			return true;
-		}
-		ql_elm_new(edata, ql_link_inactive);
-		edata_cache_small_put(ecs, edata);
+		edata = base_alloc_edata(tsdn, ecs->fallback->base);
 	}
-	return false;
+	return edata;
 }
 
-void edata_cache_small_finish(tsdn_t *tsdn, edata_cache_small_t *ecs,
-    size_t num) {
-	while (ecs->count > num) {
-		/* Same deal here -- we should be batching. */
-		edata_t *edata = edata_cache_small_get(ecs);
+static void
+edata_cache_small_flush_all(tsdn_t *tsdn, edata_cache_small_t *ecs) {
+	/*
+	 * You could imagine smarter cache management policies (like
+	 * only flushing down to some threshold in anticipation of
+	 * future get requests).  But just flushing everything provides
+	 * a good opportunity to defrag too, and lets us share code between the
+	 * flush and disable pathways.
+	 */
+	edata_t *edata;
+	size_t nflushed = 0;
+	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
+	while ((edata = edata_list_inactive_first(&ecs->list)) != NULL) {
+		edata_list_inactive_remove(&ecs->list, edata);
+		edata_avail_insert(&ecs->fallback->avail, edata);
+		nflushed++;
+	}
+	atomic_load_add_store_zu(&ecs->fallback->count, ecs->count);
+	malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
+	assert(nflushed == ecs->count);
+	ecs->count = 0;
+}
+
+void
+edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs, edata_t *edata) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_EDATA_CACHE, 0);
+
+	if (ecs->disabled) {
+		assert(ecs->count == 0);
+		assert(edata_list_inactive_first(&ecs->list) == NULL);
 		edata_cache_put(tsdn, ecs->fallback, edata);
+		return;
 	}
+
+	/*
+	 * Prepend rather than append, to do LIFO ordering in the hopes of some
+	 * cache locality.
+	 */
+	edata_list_inactive_prepend(&ecs->list, edata);
+	ecs->count++;
+	if (ecs->count > EDATA_CACHE_SMALL_MAX) {
+		assert(ecs->count == EDATA_CACHE_SMALL_MAX + 1);
+		edata_cache_small_flush_all(tsdn, ecs);
+	}
+}
+
+void
+edata_cache_small_disable(tsdn_t *tsdn, edata_cache_small_t *ecs) {
+	edata_cache_small_flush_all(tsdn, ecs);
+	ecs->disabled = true;
 }
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
index 22c9dcb..9a5d14b 100644
--- a/test/unit/edata_cache.c
+++ b/test/unit/edata_cache.c
@@ -47,37 +47,198 @@ TEST_BEGIN(test_edata_cache) {
 }
 TEST_END
 
-TEST_BEGIN(test_edata_cache_small) {
+TEST_BEGIN(test_edata_cache_small_simple) {
 	edata_cache_t ec;
 	edata_cache_small_t ecs;
 
 	test_edata_cache_init(&ec);
 	edata_cache_small_init(&ecs, &ec);
 
-	bool err = edata_cache_small_prepare(TSDN_NULL, &ecs, 2);
-	assert_false(err, "");
-	assert_zu_eq(ecs.count, 2, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	edata_t *ed1 = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_ptr_not_null(ed1, "");
+	expect_zu_eq(ecs.count, 0, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_t *ed1 = edata_cache_small_get(&ecs);
-	assert_zu_eq(ecs.count, 1, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	edata_t *ed2 = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_ptr_not_null(ed2, "");
+	expect_zu_eq(ecs.count, 0, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_t *ed2 = edata_cache_small_get(&ecs);
-	assert_zu_eq(ecs.count, 0, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	edata_cache_small_put(TSDN_NULL, &ecs, ed1);
+	expect_zu_eq(ecs.count, 1, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_cache_small_put(&ecs, ed1);
-	assert_zu_eq(ecs.count, 1, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	edata_cache_small_put(TSDN_NULL, &ecs, ed2);
+	expect_zu_eq(ecs.count, 2, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_cache_small_put(&ecs, ed2);
-	assert_zu_eq(ecs.count, 2, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	/* LIFO ordering. */
+	expect_ptr_eq(ed2, edata_cache_small_get(TSDN_NULL, &ecs), "");
+	expect_zu_eq(ecs.count, 1, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_cache_small_finish(TSDN_NULL, &ecs, 1);
-	assert_zu_eq(ecs.count, 1, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 1, "");
+	expect_ptr_eq(ed1, edata_cache_small_get(TSDN_NULL, &ecs), "");
+	expect_zu_eq(ecs.count, 0, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	test_edata_cache_destroy(&ec);
+}
+TEST_END
+
+TEST_BEGIN(test_edata_cache_fill) {
+	edata_cache_t ec;
+	edata_cache_small_t ecs;
+
+	test_edata_cache_init(&ec);
+	edata_cache_small_init(&ecs, &ec);
+
+	edata_t *allocs[EDATA_CACHE_SMALL_FILL * 2];
+
+	/*
+	 * If the fallback cache can't satisfy the request, we shouldn't do
+	 * extra allocations until compelled to.  Put half the fill goal in the
+	 * fallback.
+	 */
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
+		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
+	}
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
+		edata_cache_put(TSDN_NULL, &ec, allocs[i]);
+	}
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL / 2,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL / 2 - 1, ecs.count,
+	    "Should have grabbed all edatas available but no more.");
+
+	for (int i = 1; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
+		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	expect_zu_eq(0, ecs.count, "");
+
+	/* When forced, we should alloc from the base. */
+	edata_t *edata = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_ptr_not_null(edata, "");
+	expect_zu_eq(0, ecs.count, "Allocated more than necessary");
+	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED),
+	    "Allocated more than necessary");
+
+	/*
+	 * We should correctly fill in the common case where the fallback isn't
+	 * exhausted, too.
+	 */
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL * 2; i++) {
+		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL * 2; i++) {
+		edata_cache_put(TSDN_NULL, &ec, allocs[i]);
+	}
+
+	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+	for (int i = 1; i < EDATA_CACHE_SMALL_FILL; i++) {
+		expect_zu_eq(EDATA_CACHE_SMALL_FILL - i, ecs.count, "");
+		expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+		    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1, ecs.count, "");
+	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+	for (int i = 1; i < EDATA_CACHE_SMALL_FILL; i++) {
+		expect_zu_eq(EDATA_CACHE_SMALL_FILL - i, ecs.count, "");
+		expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	test_edata_cache_destroy(&ec);
+}
+TEST_END
+
+TEST_BEGIN(test_edata_cache_flush) {
+	edata_cache_t ec;
+	edata_cache_small_t ecs;
+
+	test_edata_cache_init(&ec);
+	edata_cache_small_init(&ecs, &ec);
+
+	edata_t *allocs[2 * EDATA_CACHE_SMALL_MAX + 2];
+	for (int i = 0; i < 2 * EDATA_CACHE_SMALL_MAX + 2; i++) {
+		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	for (int i = 0; i < EDATA_CACHE_SMALL_MAX; i++) {
+		edata_cache_small_put(TSDN_NULL, &ecs, allocs[i]);
+		expect_zu_eq(i + 1, ecs.count, "");
+		expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+	}
+	edata_cache_small_put(TSDN_NULL, &ecs, allocs[EDATA_CACHE_SMALL_MAX]);
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_MAX + 1,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	for (int i = EDATA_CACHE_SMALL_MAX + 1;
+	    i < 2 * EDATA_CACHE_SMALL_MAX + 1; i++) {
+		edata_cache_small_put(TSDN_NULL, &ecs, allocs[i]);
+		expect_zu_eq(i - EDATA_CACHE_SMALL_MAX, ecs.count, "");
+		expect_zu_eq(EDATA_CACHE_SMALL_MAX + 1,
+		    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+	}
+	edata_cache_small_put(TSDN_NULL, &ecs, allocs[2 * EDATA_CACHE_SMALL_MAX + 1]);
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(2 * EDATA_CACHE_SMALL_MAX + 2,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	test_edata_cache_destroy(&ec);
+}
+TEST_END
+
+TEST_BEGIN(test_edata_cache_disable) {
+	edata_cache_t ec;
+	edata_cache_small_t ecs;
+
+	test_edata_cache_init(&ec);
+	edata_cache_small_init(&ecs, &ec);
+
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL; i++) {
+		edata_t *edata = edata_cache_get(TSDN_NULL, &ec);
+		expect_ptr_not_null(edata, "");
+		edata_cache_small_put(TSDN_NULL, &ecs, edata);
+	}
+
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL, ecs.count, "");
+	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	edata_cache_small_disable(TSDN_NULL, &ecs);
+
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "Disabling should flush");
+
+	edata_t *edata = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED),
+	    "Disabled ecs should forward on get");
+
+	edata_cache_small_put(TSDN_NULL, &ecs, edata);
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED),
+	    "Disabled ecs should forward on put");
 
 	test_edata_cache_destroy(&ec);
 }
@@ -87,5 +248,8 @@ int
 main(void) {
 	return test(
 	    test_edata_cache,
-	    test_edata_cache_small);
+	    test_edata_cache_small_simple,
+	    test_edata_cache_fill,
+	    test_edata_cache_flush,
+	    test_edata_cache_disable);
 }
-- 
cgit v0.12


From 589638182ae58ae8031eac2cd9ba9d5b05783b42 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 30 Oct 2020 14:43:43 -0700
Subject: Use the edata_cache_small_t in the HPA.

---
 include/jemalloc/internal/hpa.h         | 11 ++++++++++-
 include/jemalloc/internal/hpa_central.h |  2 +-
 src/hpa.c                               | 23 ++++++++++++++++++-----
 src/hpa_central.c                       | 10 +++++-----
 src/pa.c                                |  3 ++-
 5 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 24c6856..1cef6e5 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -24,6 +24,9 @@ struct hpa_s {
 	/*
 	 * This edata cache is the global one that we use for new allocations in
 	 * growing; practically, it comes from a0.
+	 *
+	 * We don't use an edata_cache_small in front of this, since we expect a
+	 * small finite number of allocations from it.
 	 */
 	edata_cache_t *edata_cache;
 	geom_grow_t geom_grow;
@@ -50,7 +53,7 @@ struct hpa_shard_s {
 	 * from a pageslab.  The pageslab itself comes from the centralized
 	 * allocator, and so will use its edata_cache.
 	 */
-	edata_cache_t *edata_cache;
+	edata_cache_small_t ecs;
 	hpa_t *hpa;
 	psset_t psset;
 
@@ -86,6 +89,12 @@ bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap,
 bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa,
     edata_cache_t *edata_cache, unsigned ind, size_t ps_goal,
     size_t ps_alloc_max, size_t small_max, size_t large_min);
+/*
+ * Notify the shard that we won't use it for allocations much longer.  Due to
+ * the possibility of races, we don't actually prevent allocations; just flush
+ * and disable the embedded edata_cache_small.
+ */
+void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
 
 /*
diff --git a/include/jemalloc/internal/hpa_central.h b/include/jemalloc/internal/hpa_central.h
index b90ca41..8659f71 100644
--- a/include/jemalloc/internal/hpa_central.h
+++ b/include/jemalloc/internal/hpa_central.h
@@ -9,7 +9,7 @@ struct hpa_central_s {
 	/* The emap we use for metadata operations. */
 	emap_t *emap;
 
-	edata_cache_t *edata_cache;
+	edata_cache_small_t ecs;
 	eset_t eset;
 
 	size_t sn_next;
diff --git a/src/hpa.c b/src/hpa.c
index f49aa2b..b329dbb 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -64,7 +64,8 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
 		return true;
 	}
 
-	shard->edata_cache = edata_cache;
+	assert(edata_cache != NULL);
+	edata_cache_small_init(&shard->ecs, edata_cache);
 	shard->hpa = hpa;
 	psset_init(&shard->psset);
 	shard->ps_goal = ps_goal;
@@ -201,13 +202,14 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	assert(size <= shard->ps_alloc_max);
 
 	bool err;
-	edata_t *edata = edata_cache_get(tsdn, shard->edata_cache);
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs);
 	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
 		return NULL;
 	}
 	edata_arena_ind_set(edata, shard->ind);
 
-	malloc_mutex_lock(tsdn, &shard->mtx);
 	err = psset_alloc_reuse(&shard->psset, edata, size);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	if (!err) {
@@ -229,7 +231,11 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	    shard->ps_goal);
 	if (grow_edata == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		edata_cache_put(tsdn, shard->edata_cache, edata);
+
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		edata_cache_small_put(tsdn, &shard->ecs, edata);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+
 		return NULL;
 	}
 	edata_arena_ind_set(grow_edata, shard->ind);
@@ -351,9 +357,9 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 
 		malloc_mutex_lock(tsdn, &shard->mtx);
 		edata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
+		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 
-		edata_cache_put(tsdn, shard->edata_cache, edata);
 
 		if (evicted_ps != NULL) {
 			/*
@@ -388,6 +394,13 @@ hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
 }
 
 void
+hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	edata_cache_small_disable(tsdn, &shard->ecs);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+}
+
+void
 hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 	/*
 	 * By the time we're here, the arena code should have dalloc'd all the
diff --git a/src/hpa_central.c b/src/hpa_central.c
index a1895c8..346d942 100644
--- a/src/hpa_central.c
+++ b/src/hpa_central.c
@@ -7,7 +7,7 @@ void
 hpa_central_init(hpa_central_t *central, edata_cache_t *edata_cache,
     emap_t *emap) {
 	central->emap = emap;
-	central->edata_cache = edata_cache;
+	edata_cache_small_init(&central->ecs, edata_cache);
 	eset_init(&central->eset, extent_state_dirty);
 	central->sn_next = 0;
 }
@@ -19,7 +19,7 @@ hpa_central_init(hpa_central_t *central, edata_cache_t *edata_cache,
 static edata_t *
 hpa_central_split(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata,
     size_t size) {
-	edata_t *trail = edata_cache_get(tsdn, central->edata_cache);
+	edata_t *trail = edata_cache_small_get(tsdn, &central->ecs);
 	if (trail == NULL) {
 		return NULL;
 	}
@@ -34,7 +34,7 @@ hpa_central_split(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata,
 	bool err = emap_split_prepare(tsdn, central->emap, &prepare, edata,
 	    size, trail, cursize - size);
 	if (err) {
-		edata_cache_put(tsdn, central->edata_cache, trail);
+		edata_cache_small_put(tsdn, &central->ecs, trail);
 		return NULL;
 	}
 	emap_lock_edata2(tsdn, central->emap, edata, trail);
@@ -102,7 +102,7 @@ hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
 	assert(edata_base_get(edata) == edata_addr_get(edata));
 	assert(edata_size_get(edata) >= size);
 	assert(edata_arena_ind_get(edata)
-	    == base_ind_get(central->edata_cache->base));
+	    == base_ind_get(central->ecs.fallback->base));
 	assert(edata_is_head_get(edata));
 	assert(edata_state_get(edata) == extent_state_active);
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
@@ -173,7 +173,7 @@ hpa_central_dalloc_merge(tsdn_t *tsdn, hpa_central_t *central, edata_t *a,
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
 	emap_merge_commit(tsdn, central->emap, &prepare, a, b);
 	emap_unlock_edata2(tsdn, central->emap, a, b);
-	edata_cache_put(tsdn, central->edata_cache, b);
+	edata_cache_small_put(tsdn, &central->ecs, b);
 }
 
 void
diff --git a/src/pa.c b/src/pa.c
index 59873c1..aee7bcd 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -76,6 +76,7 @@ pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
 	if (shard->ever_used_hpa) {
 		sec_disable(tsdn, &shard->hpa_sec);
+		hpa_shard_disable(tsdn, &shard->hpa_shard);
 	}
 }
 
@@ -89,10 +90,10 @@ pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
 
 void
 pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
-	sec_flush(tsdn, &shard->hpa_sec);
 	pac_destroy(tsdn, &shard->pac);
 	if (shard->ever_used_hpa) {
 		sec_flush(tsdn, &shard->hpa_sec);
+		hpa_shard_disable(tsdn, &shard->hpa_shard);
 	}
 }
 
-- 
cgit v0.12


From b3c5690b7e982c7343d22592f9a43d0e2857defe Mon Sep 17 00:00:00 2001
From: "Issam E. Maghni" <issam.e.maghni@mailbox.org>
Date: Mon, 9 Nov 2020 12:28:56 -0500
Subject: Update config.{guess,sub} to 2020-11-07@77632d9

---
 build-aux/config.guess | 1008 +++++++++++--------
 build-aux/config.sub   | 2624 ++++++++++++++++++++++++------------------------
 2 files changed, 1942 insertions(+), 1690 deletions(-)

diff --git a/build-aux/config.guess b/build-aux/config.guess
index 2e9ad7f..0fc11ed 100755
--- a/build-aux/config.guess
+++ b/build-aux/config.guess
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright 1992-2016 Free Software Foundation, Inc.
+#   Copyright 1992-2020 Free Software Foundation, Inc.
 
-timestamp='2016-10-02'
+timestamp='2020-11-07'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -15,7 +15,7 @@ timestamp='2016-10-02'
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
+# along with this program; if not, see <https://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
@@ -27,19 +27,19 @@ timestamp='2016-10-02'
 # Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
 #
 # You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
 #
 # Please send patches to <config-patches@gnu.org>.
 
 
-me=`echo "$0" | sed -e 's,.*/,,'`
+me=$(echo "$0" | sed -e 's,.*/,,')
 
 usage="\
 Usage: $0 [OPTION]
 
 Output the configuration name of the system \`$me' is run on.
 
-Operation modes:
+Options:
   -h, --help         print this help, then exit
   -t, --time-stamp   print date of last modification, then exit
   -v, --version      print version number, then exit
@@ -50,7 +50,7 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright 1992-2016 Free Software Foundation, Inc.
+Copyright 1992-2020 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -84,8 +84,6 @@ if test $# != 0; then
   exit 1
 fi
 
-trap 'exit 1' 1 2 15
-
 # CC_FOR_BUILD -- compiler used by this script. Note that the use of a
 # compiler to aid in system detection is discouraged as it requires
 # temporary files to be created and, as you can see below, it is a
@@ -96,66 +94,77 @@ trap 'exit 1' 1 2 15
 
 # Portable tmp directory creation inspired by the Autoconf team.
 
-set_cc_for_build='
-trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
-trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
-: ${TMPDIR=/tmp} ;
- { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
- { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
- { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
- { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
-dummy=$tmp/dummy ;
-tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
-case $CC_FOR_BUILD,$HOST_CC,$CC in
- ,,)    echo "int x;" > $dummy.c ;
-	for c in cc gcc c89 c99 ; do
-	  if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
-	     CC_FOR_BUILD="$c"; break ;
-	  fi ;
-	done ;
-	if test x"$CC_FOR_BUILD" = x ; then
-	  CC_FOR_BUILD=no_compiler_found ;
-	fi
-	;;
- ,,*)   CC_FOR_BUILD=$CC ;;
- ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
-esac ; set_cc_for_build= ;'
+tmp=
+# shellcheck disable=SC2172
+trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15
+
+set_cc_for_build() {
+    # prevent multiple calls if $tmp is already set
+    test "$tmp" && return 0
+    : "${TMPDIR=/tmp}"
+    # shellcheck disable=SC2039
+    { tmp=$( (umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null) && test -n "$tmp" && test -d "$tmp" ; } ||
+	{ test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } ||
+	{ tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+	{ echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; }
+    dummy=$tmp/dummy
+    case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in
+	,,)    echo "int x;" > "$dummy.c"
+	       for driver in cc gcc c89 c99 ; do
+		   if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
+		       CC_FOR_BUILD="$driver"
+		       break
+		   fi
+	       done
+	       if test x"$CC_FOR_BUILD" = x ; then
+		   CC_FOR_BUILD=no_compiler_found
+	       fi
+	       ;;
+	,,*)   CC_FOR_BUILD=$CC ;;
+	,*,*)  CC_FOR_BUILD=$HOST_CC ;;
+    esac
+}
 
 # This is needed to find uname on a Pyramid OSx when run in the BSD universe.
 # (ghazi@noc.rutgers.edu 1994-08-24)
-if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+if test -f /.attbin/uname ; then
 	PATH=$PATH:/.attbin ; export PATH
 fi
 
-UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
-UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
-UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
-UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+UNAME_MACHINE=$( (uname -m) 2>/dev/null) || UNAME_MACHINE=unknown
+UNAME_RELEASE=$( (uname -r) 2>/dev/null) || UNAME_RELEASE=unknown
+UNAME_SYSTEM=$( (uname -s) 2>/dev/null) || UNAME_SYSTEM=unknown
+UNAME_VERSION=$( (uname -v) 2>/dev/null) || UNAME_VERSION=unknown
 
-case "${UNAME_SYSTEM}" in
+case "$UNAME_SYSTEM" in
 Linux|GNU|GNU/*)
 	# If the system lacks a compiler, then just pick glibc.
 	# We could probably try harder.
 	LIBC=gnu
 
-	eval $set_cc_for_build
-	cat <<-EOF > $dummy.c
+	set_cc_for_build
+	cat <<-EOF > "$dummy.c"
 	#include <features.h>
 	#if defined(__UCLIBC__)
 	LIBC=uclibc
 	#elif defined(__dietlibc__)
 	LIBC=dietlibc
 	#else
+	#include <stdarg.h>
+	#ifdef __DEFINED_va_list
+	LIBC=musl
+	#else
 	LIBC=gnu
 	#endif
+	#endif
 	EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
+	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g')"
 	;;
 esac
 
 # Note: order is significant - the case branches are not exclusive.
 
-case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
     *:NetBSD:*:*)
 	# NetBSD (nbsd) targets should (where applicable) match one or
 	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
@@ -168,31 +177,32 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# Note: NetBSD doesn't particularly care about the vendor
 	# portion of the name.  We always set it to "unknown".
 	sysctl="sysctl -n hw.machine_arch"
-	UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
-	    /sbin/$sysctl 2>/dev/null || \
-	    /usr/sbin/$sysctl 2>/dev/null || \
-	    echo unknown)`
-	case "${UNAME_MACHINE_ARCH}" in
+	UNAME_MACHINE_ARCH=$( (uname -p 2>/dev/null || \
+	    "/sbin/$sysctl" 2>/dev/null || \
+	    "/usr/sbin/$sysctl" 2>/dev/null || \
+	    echo unknown))
+	case "$UNAME_MACHINE_ARCH" in
+	    aarch64eb) machine=aarch64_be-unknown ;;
 	    armeb) machine=armeb-unknown ;;
 	    arm*) machine=arm-unknown ;;
 	    sh3el) machine=shl-unknown ;;
 	    sh3eb) machine=sh-unknown ;;
 	    sh5el) machine=sh5le-unknown ;;
 	    earmv*)
-		arch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
-		endian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\(eb\)$,\1,p'`
-		machine=${arch}${endian}-unknown
+		arch=$(echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,')
+		endian=$(echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p')
+		machine="${arch}${endian}"-unknown
 		;;
-	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
+	    *) machine="$UNAME_MACHINE_ARCH"-unknown ;;
 	esac
 	# The Operating System including object format, if it has switched
 	# to ELF recently (or will in the future) and ABI.
-	case "${UNAME_MACHINE_ARCH}" in
+	case "$UNAME_MACHINE_ARCH" in
 	    earm*)
 		os=netbsdelf
 		;;
 	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
-		eval $set_cc_for_build
+		set_cc_for_build
 		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
 			| grep -q __ELF__
 		then
@@ -208,10 +218,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		;;
 	esac
 	# Determine ABI tags.
-	case "${UNAME_MACHINE_ARCH}" in
+	case "$UNAME_MACHINE_ARCH" in
 	    earm*)
 		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
-		abi=`echo ${UNAME_MACHINE_ARCH} | sed -e "$expr"`
+		abi=$(echo "$UNAME_MACHINE_ARCH" | sed -e "$expr")
 		;;
 	esac
 	# The OS release
@@ -219,60 +229,75 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# thus, need a distinct triplet. However, they do not need
 	# kernel version information, so it can be replaced with a
 	# suitable tag, in the style of linux-gnu.
-	case "${UNAME_VERSION}" in
+	case "$UNAME_VERSION" in
 	    Debian*)
 		release='-gnu'
 		;;
 	    *)
-		release=`echo ${UNAME_RELEASE} | sed -e 's/[-_].*//' | cut -d. -f1,2`
+		release=$(echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2)
 		;;
 	esac
 	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
 	# contains redundant information, the shorter form:
 	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
-	echo "${machine}-${os}${release}${abi}"
+	echo "$machine-${os}${release}${abi-}"
 	exit ;;
     *:Bitrig:*:*)
-	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
-	echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE}
+	UNAME_MACHINE_ARCH=$(arch | sed 's/Bitrig.//')
+	echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE"
 	exit ;;
     *:OpenBSD:*:*)
-	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
-	echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
+	UNAME_MACHINE_ARCH=$(arch | sed 's/OpenBSD.//')
+	echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE"
 	exit ;;
     *:LibertyBSD:*:*)
-	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
-	echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE}
+	UNAME_MACHINE_ARCH=$(arch | sed 's/^.*BSD\.//')
+	echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE"
+	exit ;;
+    *:MidnightBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE"
 	exit ;;
     *:ekkoBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE"
 	exit ;;
     *:SolidBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE"
+	exit ;;
+    *:OS108:*:*)
+	echo "$UNAME_MACHINE"-unknown-os108_"$UNAME_RELEASE"
 	exit ;;
     macppc:MirBSD:*:*)
-	echo powerpc-unknown-mirbsd${UNAME_RELEASE}
+	echo powerpc-unknown-mirbsd"$UNAME_RELEASE"
 	exit ;;
     *:MirBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE"
 	exit ;;
     *:Sortix:*:*)
-	echo ${UNAME_MACHINE}-unknown-sortix
+	echo "$UNAME_MACHINE"-unknown-sortix
+	exit ;;
+    *:Twizzler:*:*)
+	echo "$UNAME_MACHINE"-unknown-twizzler
+	exit ;;
+    *:Redox:*:*)
+	echo "$UNAME_MACHINE"-unknown-redox
+	exit ;;
+    mips:OSF1:*.*)
+	echo mips-dec-osf1
 	exit ;;
     alpha:OSF1:*:*)
 	case $UNAME_RELEASE in
 	*4.0)
-		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+		UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $3}')
 		;;
 	*5.*)
-		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $4}')
 		;;
 	esac
 	# According to Compaq, /usr/sbin/psrinfo has been available on
 	# OSF/1 and Tru64 systems produced since 1995.  I hope that
 	# covers most systems running today.  This code pipes the CPU
 	# types through head -n 1, so we only detect the type of CPU 0.
-	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+	ALPHA_CPU_TYPE=$(/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1)
 	case "$ALPHA_CPU_TYPE" in
 	    "EV4 (21064)")
 		UNAME_MACHINE=alpha ;;
@@ -310,28 +335,19 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# A Tn.n version is a released field test version.
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
-	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	echo "$UNAME_MACHINE"-dec-osf"$(echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)"
 	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
 	exitcode=$?
 	trap '' 0
 	exit $exitcode ;;
-    Alpha\ *:Windows_NT*:*)
-	# How do we know it's Interix rather than the generic POSIX subsystem?
-	# Should we change UNAME_MACHINE based on the output of uname instead
-	# of the specific Alpha model?
-	echo alpha-pc-interix
-	exit ;;
-    21064:Windows_NT:50:3)
-	echo alpha-dec-winnt3.5
-	exit ;;
     Amiga*:UNIX_System_V:4.0:*)
 	echo m68k-unknown-sysv4
 	exit ;;
     *:[Aa]miga[Oo][Ss]:*:*)
-	echo ${UNAME_MACHINE}-unknown-amigaos
+	echo "$UNAME_MACHINE"-unknown-amigaos
 	exit ;;
     *:[Mm]orph[Oo][Ss]:*:*)
-	echo ${UNAME_MACHINE}-unknown-morphos
+	echo "$UNAME_MACHINE"-unknown-morphos
 	exit ;;
     *:OS/390:*:*)
 	echo i370-ibm-openedition
@@ -343,7 +359,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	echo powerpc-ibm-os400
 	exit ;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
-	echo arm-acorn-riscix${UNAME_RELEASE}
+	echo arm-acorn-riscix"$UNAME_RELEASE"
 	exit ;;
     arm*:riscos:*:*|arm*:RISCOS:*:*)
 	echo arm-unknown-riscos
@@ -353,7 +369,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	exit ;;
     Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
 	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
-	if test "`(/bin/universe) 2>/dev/null`" = att ; then
+	if test "$( (/bin/universe) 2>/dev/null)" = att ; then
 		echo pyramid-pyramid-sysv3
 	else
 		echo pyramid-pyramid-bsd
@@ -366,28 +382,28 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	echo sparc-icl-nx6
 	exit ;;
     DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
-	case `/usr/bin/uname -p` in
+	case $(/usr/bin/uname -p) in
 	    sparc) echo sparc-icl-nx7; exit ;;
 	esac ;;
     s390x:SunOS:*:*)
-	echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	echo "$UNAME_MACHINE"-ibm-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
 	exit ;;
     sun4H:SunOS:5.*:*)
-	echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	echo sparc-hal-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
 	exit ;;
     sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
-	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	echo sparc-sun-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
 	exit ;;
     i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
-	echo i386-pc-auroraux${UNAME_RELEASE}
+	echo i386-pc-auroraux"$UNAME_RELEASE"
 	exit ;;
     i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
-	eval $set_cc_for_build
+	set_cc_for_build
 	SUN_ARCH=i386
 	# If there is a compiler, see if it is configured for 64-bit objects.
 	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
 	# This test works for both compilers.
-	if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+	if test "$CC_FOR_BUILD" != no_compiler_found; then
 	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
 		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
 		grep IS_64BIT_ARCH >/dev/null
@@ -395,40 +411,40 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		SUN_ARCH=x86_64
 	    fi
 	fi
-	echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	echo "$SUN_ARCH"-pc-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
 	exit ;;
     sun4*:SunOS:6*:*)
 	# According to config.sub, this is the proper way to canonicalize
 	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
 	# it's likely to be more like Solaris than SunOS4.
-	echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	echo sparc-sun-solaris3"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
 	exit ;;
     sun4*:SunOS:*:*)
-	case "`/usr/bin/arch -k`" in
+	case "$(/usr/bin/arch -k)" in
 	    Series*|S4*)
-		UNAME_RELEASE=`uname -v`
+		UNAME_RELEASE=$(uname -v)
 		;;
 	esac
 	# Japanese Language versions have a version number like `4.1.3-JL'.
-	echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+	echo sparc-sun-sunos"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/')"
 	exit ;;
     sun3*:SunOS:*:*)
-	echo m68k-sun-sunos${UNAME_RELEASE}
+	echo m68k-sun-sunos"$UNAME_RELEASE"
 	exit ;;
     sun*:*:4.2BSD:*)
-	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
-	test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3
-	case "`/bin/arch`" in
+	UNAME_RELEASE=$( (sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null)
+	test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3
+	case "$(/bin/arch)" in
 	    sun3)
-		echo m68k-sun-sunos${UNAME_RELEASE}
+		echo m68k-sun-sunos"$UNAME_RELEASE"
 		;;
 	    sun4)
-		echo sparc-sun-sunos${UNAME_RELEASE}
+		echo sparc-sun-sunos"$UNAME_RELEASE"
 		;;
 	esac
 	exit ;;
     aushp:SunOS:*:*)
-	echo sparc-auspex-sunos${UNAME_RELEASE}
+	echo sparc-auspex-sunos"$UNAME_RELEASE"
 	exit ;;
     # The situation for MiNT is a little confusing.  The machine name
     # can be virtually everything (everything which is not
@@ -439,44 +455,44 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     # MiNT.  But MiNT is downward compatible to TOS, so this should
     # be no problem.
     atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-	echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint"$UNAME_RELEASE"
 	exit ;;
     atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
-	echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint"$UNAME_RELEASE"
 	exit ;;
     *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-	echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint"$UNAME_RELEASE"
 	exit ;;
     milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-	echo m68k-milan-mint${UNAME_RELEASE}
+	echo m68k-milan-mint"$UNAME_RELEASE"
 	exit ;;
     hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-	echo m68k-hades-mint${UNAME_RELEASE}
+	echo m68k-hades-mint"$UNAME_RELEASE"
 	exit ;;
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-	echo m68k-unknown-mint${UNAME_RELEASE}
+	echo m68k-unknown-mint"$UNAME_RELEASE"
 	exit ;;
     m68k:machten:*:*)
-	echo m68k-apple-machten${UNAME_RELEASE}
+	echo m68k-apple-machten"$UNAME_RELEASE"
 	exit ;;
     powerpc:machten:*:*)
-	echo powerpc-apple-machten${UNAME_RELEASE}
+	echo powerpc-apple-machten"$UNAME_RELEASE"
 	exit ;;
     RISC*:Mach:*:*)
 	echo mips-dec-mach_bsd4.3
 	exit ;;
     RISC*:ULTRIX:*:*)
-	echo mips-dec-ultrix${UNAME_RELEASE}
+	echo mips-dec-ultrix"$UNAME_RELEASE"
 	exit ;;
     VAX*:ULTRIX*:*:*)
-	echo vax-dec-ultrix${UNAME_RELEASE}
+	echo vax-dec-ultrix"$UNAME_RELEASE"
 	exit ;;
     2020:CLIX:*:* | 2430:CLIX:*:*)
-	echo clipper-intergraph-clix${UNAME_RELEASE}
+	echo clipper-intergraph-clix"$UNAME_RELEASE"
 	exit ;;
     mips:*:*:UMIPS | mips:*:*:RISCos)
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
+	set_cc_for_build
+	sed 's/^	//' << EOF > "$dummy.c"
 #ifdef __cplusplus
 #include <stdio.h>  /* for printf() prototype */
 	int main (int argc, char *argv[]) {
@@ -485,23 +501,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 #endif
 	#if defined (host_mips) && defined (MIPSEB)
 	#if defined (SYSTYPE_SYSV)
-	  printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
+	  printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0);
 	#endif
 	#if defined (SYSTYPE_SVR4)
-	  printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
+	  printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0);
 	#endif
 	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
-	  printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
+	  printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0);
 	#endif
 	#endif
 	  exit (-1);
 	}
 EOF
-	$CC_FOR_BUILD -o $dummy $dummy.c &&
-	  dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
-	  SYSTEM_NAME=`$dummy $dummyarg` &&
+	$CC_FOR_BUILD -o "$dummy" "$dummy.c" &&
+	  dummyarg=$(echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p') &&
+	  SYSTEM_NAME=$("$dummy" "$dummyarg") &&
 	    { echo "$SYSTEM_NAME"; exit; }
-	echo mips-mips-riscos${UNAME_RELEASE}
+	echo mips-mips-riscos"$UNAME_RELEASE"
 	exit ;;
     Motorola:PowerMAX_OS:*:*)
 	echo powerpc-motorola-powermax
@@ -526,18 +542,18 @@ EOF
 	exit ;;
     AViiON:dgux:*:*)
 	# DG/UX returns AViiON for all architectures
-	UNAME_PROCESSOR=`/usr/bin/uname -p`
-	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+	UNAME_PROCESSOR=$(/usr/bin/uname -p)
+	if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110
 	then
-	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
-	       [ ${TARGET_BINARY_INTERFACE}x = x ]
+	    if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \
+	       test "$TARGET_BINARY_INTERFACE"x = x
 	    then
-		echo m88k-dg-dgux${UNAME_RELEASE}
+		echo m88k-dg-dgux"$UNAME_RELEASE"
 	    else
-		echo m88k-dg-dguxbcs${UNAME_RELEASE}
+		echo m88k-dg-dguxbcs"$UNAME_RELEASE"
 	    fi
 	else
-	    echo i586-dg-dgux${UNAME_RELEASE}
+	    echo i586-dg-dgux"$UNAME_RELEASE"
 	fi
 	exit ;;
     M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
@@ -554,26 +570,26 @@ EOF
 	echo m68k-tektronix-bsd
 	exit ;;
     *:IRIX*:*:*)
-	echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+	echo mips-sgi-irix"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/g')"
 	exit ;;
     ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
 	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
-	exit ;;               # Note that: echo "'`uname -s`'" gives 'AIX '
+	exit ;;               # Note that: echo "'$(uname -s)'" gives 'AIX '
     i*86:AIX:*:*)
 	echo i386-ibm-aix
 	exit ;;
     ia64:AIX:*:*)
-	if [ -x /usr/bin/oslevel ] ; then
-		IBM_REV=`/usr/bin/oslevel`
+	if test -x /usr/bin/oslevel ; then
+		IBM_REV=$(/usr/bin/oslevel)
 	else
-		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
 	fi
-	echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
+	echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV"
 	exit ;;
     *:AIX:2:3)
 	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
-		eval $set_cc_for_build
-		sed 's/^		//' << EOF >$dummy.c
+		set_cc_for_build
+		sed 's/^		//' << EOF > "$dummy.c"
 		#include <sys/systemcfg.h>
 
 		main()
@@ -584,7 +600,7 @@ EOF
 			exit(0);
 			}
 EOF
-		if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
+		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy")
 		then
 			echo "$SYSTEM_NAME"
 		else
@@ -597,28 +613,28 @@ EOF
 	fi
 	exit ;;
     *:AIX:*:[4567])
-	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
-	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
+	IBM_CPU_ID=$(/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }')
+	if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then
 		IBM_ARCH=rs6000
 	else
 		IBM_ARCH=powerpc
 	fi
-	if [ -x /usr/bin/lslpp ] ; then
-		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc |
-			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
+	if test -x /usr/bin/lslpp ; then
+		IBM_REV=$(/usr/bin/lslpp -Lqc bos.rte.libc |
+			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/)
 	else
-		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
 	fi
-	echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+	echo "$IBM_ARCH"-ibm-aix"$IBM_REV"
 	exit ;;
     *:AIX:*:*)
 	echo rs6000-ibm-aix
 	exit ;;
-    ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+    ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
 	echo romp-ibm-bsd4.4
 	exit ;;
     ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
-	echo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to
+	echo romp-ibm-bsd"$UNAME_RELEASE"   # 4.3 with uname added to
 	exit ;;                             # report: romp-ibm BSD 4.3
     *:BOSX:*:*)
 	echo rs6000-bull-bosx
@@ -633,28 +649,28 @@ EOF
 	echo m68k-hp-bsd4.4
 	exit ;;
     9000/[34678]??:HP-UX:*:*)
-	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
-	case "${UNAME_MACHINE}" in
-	    9000/31? )            HP_ARCH=m68000 ;;
-	    9000/[34]?? )         HP_ARCH=m68k ;;
+	HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
+	case "$UNAME_MACHINE" in
+	    9000/31?)            HP_ARCH=m68000 ;;
+	    9000/[34]??)         HP_ARCH=m68k ;;
 	    9000/[678][0-9][0-9])
-		if [ -x /usr/bin/getconf ]; then
-		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
-		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
-		    case "${sc_cpu_version}" in
+		if test -x /usr/bin/getconf; then
+		    sc_cpu_version=$(/usr/bin/getconf SC_CPU_VERSION 2>/dev/null)
+		    sc_kernel_bits=$(/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null)
+		    case "$sc_cpu_version" in
 		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
 		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
 		      532)                      # CPU_PA_RISC2_0
-			case "${sc_kernel_bits}" in
+			case "$sc_kernel_bits" in
 			  32) HP_ARCH=hppa2.0n ;;
 			  64) HP_ARCH=hppa2.0w ;;
 			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
 			esac ;;
 		    esac
 		fi
-		if [ "${HP_ARCH}" = "" ]; then
-		    eval $set_cc_for_build
-		    sed 's/^		//' << EOF >$dummy.c
+		if test "$HP_ARCH" = ""; then
+		    set_cc_for_build
+		    sed 's/^		//' << EOF > "$dummy.c"
 
 		#define _HPUX_SOURCE
 		#include <stdlib.h>
@@ -687,13 +703,13 @@ EOF
 		    exit (0);
 		}
 EOF
-		    (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=$("$dummy")
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
 		fi ;;
 	esac
-	if [ ${HP_ARCH} = hppa2.0w ]
+	if test "$HP_ARCH" = hppa2.0w
 	then
-	    eval $set_cc_for_build
+	    set_cc_for_build
 
 	    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
 	    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
@@ -712,15 +728,15 @@ EOF
 		HP_ARCH=hppa64
 	    fi
 	fi
-	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+	echo "$HP_ARCH"-hp-hpux"$HPUX_REV"
 	exit ;;
     ia64:HP-UX:*:*)
-	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
-	echo ia64-hp-hpux${HPUX_REV}
+	HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
+	echo ia64-hp-hpux"$HPUX_REV"
 	exit ;;
     3050*:HI-UX:*:*)
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
+	set_cc_for_build
+	sed 's/^	//' << EOF > "$dummy.c"
 	#include <unistd.h>
 	int
 	main ()
@@ -745,11 +761,11 @@ EOF
 	  exit (0);
 	}
 EOF
-	$CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
+	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy") &&
 		{ echo "$SYSTEM_NAME"; exit; }
 	echo unknown-hitachi-hiuxwe2
 	exit ;;
-    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
 	echo hppa1.1-hp-bsd
 	exit ;;
     9000/8??:4.3bsd:*:*)
@@ -758,17 +774,17 @@ EOF
     *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
 	echo hppa1.0-hp-mpeix
 	exit ;;
-    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
 	echo hppa1.1-hp-osf
 	exit ;;
     hp8??:OSF1:*:*)
 	echo hppa1.0-hp-osf
 	exit ;;
     i*86:OSF1:*:*)
-	if [ -x /usr/sbin/sysversion ] ; then
-	    echo ${UNAME_MACHINE}-unknown-osf1mk
+	if test -x /usr/sbin/sysversion ; then
+	    echo "$UNAME_MACHINE"-unknown-osf1mk
 	else
-	    echo ${UNAME_MACHINE}-unknown-osf1
+	    echo "$UNAME_MACHINE"-unknown-osf1
 	fi
 	exit ;;
     parisc*:Lites*:*:*)
@@ -793,130 +809,123 @@ EOF
 	echo c4-convex-bsd
 	exit ;;
     CRAY*Y-MP:*:*:*)
-	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
     CRAY*[A-Z]90:*:*:*)
-	echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+	echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \
 	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
 	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
 	      -e 's/\.[^.]*$/.X/'
 	exit ;;
     CRAY*TS:*:*:*)
-	echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
     CRAY*T3E:*:*:*)
-	echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
     CRAY*SV1:*:*:*)
-	echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
     *:UNICOS/mp:*:*)
-	echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
-	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
-	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
-	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+	FUJITSU_PROC=$(uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)
+	FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
+	FUJITSU_REL=$(echo "$UNAME_RELEASE" | sed -e 's/ /_/')
 	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     5000:UNIX_System_V:4.*:*)
-	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
-	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
+	FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
+	FUJITSU_REL=$(echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/')
 	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
-	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE"
 	exit ;;
     sparc*:BSD/OS:*:*)
-	echo sparc-unknown-bsdi${UNAME_RELEASE}
+	echo sparc-unknown-bsdi"$UNAME_RELEASE"
 	exit ;;
     *:BSD/OS:*:*)
-	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
+	exit ;;
+    arm:FreeBSD:*:*)
+	UNAME_PROCESSOR=$(uname -p)
+	set_cc_for_build
+	if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+	    | grep -q __ARM_PCS_VFP
+	then
+	    echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabi
+	else
+	    echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabihf
+	fi
 	exit ;;
     *:FreeBSD:*:*)
-	UNAME_PROCESSOR=`/usr/bin/uname -p`
-	case ${UNAME_PROCESSOR} in
+	UNAME_PROCESSOR=$(/usr/bin/uname -p)
+	case "$UNAME_PROCESSOR" in
 	    amd64)
-		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
-	    *)
-		echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+		UNAME_PROCESSOR=x86_64 ;;
+	    i386)
+		UNAME_PROCESSOR=i586 ;;
 	esac
+	echo "$UNAME_PROCESSOR"-unknown-freebsd"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
 	exit ;;
     i*:CYGWIN*:*)
-	echo ${UNAME_MACHINE}-pc-cygwin
+	echo "$UNAME_MACHINE"-pc-cygwin
 	exit ;;
     *:MINGW64*:*)
-	echo ${UNAME_MACHINE}-pc-mingw64
+	echo "$UNAME_MACHINE"-pc-mingw64
 	exit ;;
     *:MINGW*:*)
-	echo ${UNAME_MACHINE}-pc-mingw32
+	echo "$UNAME_MACHINE"-pc-mingw32
 	exit ;;
     *:MSYS*:*)
-	echo ${UNAME_MACHINE}-pc-msys
-	exit ;;
-    i*:windows32*:*)
-	# uname -m includes "-pc" on this system.
-	echo ${UNAME_MACHINE}-mingw32
+	echo "$UNAME_MACHINE"-pc-msys
 	exit ;;
     i*:PW*:*)
-	echo ${UNAME_MACHINE}-pc-pw32
+	echo "$UNAME_MACHINE"-pc-pw32
 	exit ;;
     *:Interix*:*)
-	case ${UNAME_MACHINE} in
+	case "$UNAME_MACHINE" in
 	    x86)
-		echo i586-pc-interix${UNAME_RELEASE}
+		echo i586-pc-interix"$UNAME_RELEASE"
 		exit ;;
 	    authenticamd | genuineintel | EM64T)
-		echo x86_64-unknown-interix${UNAME_RELEASE}
+		echo x86_64-unknown-interix"$UNAME_RELEASE"
 		exit ;;
 	    IA64)
-		echo ia64-unknown-interix${UNAME_RELEASE}
+		echo ia64-unknown-interix"$UNAME_RELEASE"
 		exit ;;
 	esac ;;
-    [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
-	echo i${UNAME_MACHINE}-pc-mks
-	exit ;;
-    8664:Windows_NT:*)
-	echo x86_64-pc-mks
-	exit ;;
-    i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
-	# How do we know it's Interix rather than the generic POSIX subsystem?
-	# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
-	# UNAME_MACHINE based on the output of uname instead of i386?
-	echo i586-pc-interix
-	exit ;;
     i*:UWIN*:*)
-	echo ${UNAME_MACHINE}-pc-uwin
+	echo "$UNAME_MACHINE"-pc-uwin
 	exit ;;
     amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
-	echo x86_64-unknown-cygwin
-	exit ;;
-    p*:CYGWIN*:*)
-	echo powerpcle-unknown-cygwin
+	echo x86_64-pc-cygwin
 	exit ;;
     prep*:SunOS:5.*:*)
-	echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	echo powerpcle-unknown-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
 	exit ;;
     *:GNU:*:*)
 	# the GNU system
-	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+	echo "$(echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,')-unknown-$LIBC$(echo "$UNAME_RELEASE"|sed -e 's,/.*$,,')"
 	exit ;;
     *:GNU/*:*:*)
 	# other systems with GNU libc and userland
-	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
+	echo "$UNAME_MACHINE-unknown-$(echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]")$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')-$LIBC"
 	exit ;;
-    i*86:Minix:*:*)
-	echo ${UNAME_MACHINE}-pc-minix
+    *:Minix:*:*)
+	echo "$UNAME_MACHINE"-unknown-minix
 	exit ;;
     aarch64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     aarch64_be:Linux:*:*)
 	UNAME_MACHINE=aarch64_be
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     alpha:Linux:*:*)
-	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in
 	  EV5)   UNAME_MACHINE=alphaev5 ;;
 	  EV56)  UNAME_MACHINE=alphaev56 ;;
 	  PCA56) UNAME_MACHINE=alphapca56 ;;
@@ -927,140 +936,178 @@ EOF
 	esac
 	objdump --private-headers /bin/sh | grep -q ld.so.1
 	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     arc:Linux:*:* | arceb:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     arm*:Linux:*:*)
-	eval $set_cc_for_build
+	set_cc_for_build
 	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
 	    | grep -q __ARM_EABI__
 	then
-	    echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	    echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	else
 	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
 		| grep -q __ARM_PCS_VFP
 	    then
-		echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
+		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi
 	    else
-		echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf
+		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf
 	    fi
 	fi
 	exit ;;
     avr32*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     cris:Linux:*:*)
-	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
+	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
 	exit ;;
     crisv32:Linux:*:*)
-	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
+	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
 	exit ;;
     e2k:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     frv:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     hexagon:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     i*86:Linux:*:*)
-	echo ${UNAME_MACHINE}-pc-linux-${LIBC}
+	echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
 	exit ;;
     ia64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     k1om:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     m32r*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     m68*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     mips:Linux:*:* | mips64:Linux:*:*)
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
+	set_cc_for_build
+	IS_GLIBC=0
+	test x"${LIBC}" = xgnu && IS_GLIBC=1
+	sed 's/^	//' << EOF > "$dummy.c"
 	#undef CPU
-	#undef ${UNAME_MACHINE}
-	#undef ${UNAME_MACHINE}el
+	#undef mips
+	#undef mipsel
+	#undef mips64
+	#undef mips64el
+	#if ${IS_GLIBC} && defined(_ABI64)
+	LIBCABI=gnuabi64
+	#else
+	#if ${IS_GLIBC} && defined(_ABIN32)
+	LIBCABI=gnuabin32
+	#else
+	LIBCABI=${LIBC}
+	#endif
+	#endif
+
+	#if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
+	CPU=mipsisa64r6
+	#else
+	#if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
+	CPU=mipsisa32r6
+	#else
+	#if defined(__mips64)
+	CPU=mips64
+	#else
+	CPU=mips
+	#endif
+	#endif
+	#endif
+
 	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-	CPU=${UNAME_MACHINE}el
+	MIPS_ENDIAN=el
 	#else
 	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
-	CPU=${UNAME_MACHINE}
+	MIPS_ENDIAN=
 	#else
-	CPU=
+	MIPS_ENDIAN=
 	#endif
 	#endif
 EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
-	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
+	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI')"
+	test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; }
 	;;
     mips64el:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     openrisc*:Linux:*:*)
-	echo or1k-unknown-linux-${LIBC}
+	echo or1k-unknown-linux-"$LIBC"
 	exit ;;
     or32:Linux:*:* | or1k*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     padre:Linux:*:*)
-	echo sparc-unknown-linux-${LIBC}
+	echo sparc-unknown-linux-"$LIBC"
 	exit ;;
     parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-${LIBC}
+	echo hppa64-unknown-linux-"$LIBC"
 	exit ;;
     parisc:Linux:*:* | hppa:Linux:*:*)
 	# Look for CPU level
-	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
-	  PA7*) echo hppa1.1-unknown-linux-${LIBC} ;;
-	  PA8*) echo hppa2.0-unknown-linux-${LIBC} ;;
-	  *)    echo hppa-unknown-linux-${LIBC} ;;
+	case $(grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2) in
+	  PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;;
+	  PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;;
+	  *)    echo hppa-unknown-linux-"$LIBC" ;;
 	esac
 	exit ;;
     ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-${LIBC}
+	echo powerpc64-unknown-linux-"$LIBC"
 	exit ;;
     ppc:Linux:*:*)
-	echo powerpc-unknown-linux-${LIBC}
+	echo powerpc-unknown-linux-"$LIBC"
 	exit ;;
     ppc64le:Linux:*:*)
-	echo powerpc64le-unknown-linux-${LIBC}
+	echo powerpc64le-unknown-linux-"$LIBC"
 	exit ;;
     ppcle:Linux:*:*)
-	echo powerpcle-unknown-linux-${LIBC}
+	echo powerpcle-unknown-linux-"$LIBC"
 	exit ;;
     riscv32:Linux:*:* | riscv64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
-	echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
+	echo "$UNAME_MACHINE"-ibm-linux-"$LIBC"
 	exit ;;
     sh64*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     sh*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     sparc:Linux:*:* | sparc64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     tile*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     vax:Linux:*:*)
-	echo ${UNAME_MACHINE}-dec-linux-${LIBC}
+	echo "$UNAME_MACHINE"-dec-linux-"$LIBC"
 	exit ;;
     x86_64:Linux:*:*)
-	echo ${UNAME_MACHINE}-pc-linux-${LIBC}
+	set_cc_for_build
+	LIBCABI=$LIBC
+	if test "$CC_FOR_BUILD" != no_compiler_found; then
+	    if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \
+		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		grep IS_X32 >/dev/null
+	    then
+		LIBCABI="$LIBC"x32
+	    fi
+	fi
+	echo "$UNAME_MACHINE"-pc-linux-"$LIBCABI"
 	exit ;;
     xtensa*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
@@ -1074,51 +1121,51 @@ EOF
 	# I am not positive that other SVR4 systems won't match this,
 	# I just have to hope.  -- rms.
 	# Use sysv4.2uw... so that sysv4* matches it.
-	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
+	echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION"
 	exit ;;
     i*86:OS/2:*:*)
 	# If we were able to find `uname', then EMX Unix compatibility
 	# is probably installed.
-	echo ${UNAME_MACHINE}-pc-os2-emx
+	echo "$UNAME_MACHINE"-pc-os2-emx
 	exit ;;
     i*86:XTS-300:*:STOP)
-	echo ${UNAME_MACHINE}-unknown-stop
+	echo "$UNAME_MACHINE"-unknown-stop
 	exit ;;
     i*86:atheos:*:*)
-	echo ${UNAME_MACHINE}-unknown-atheos
+	echo "$UNAME_MACHINE"-unknown-atheos
 	exit ;;
     i*86:syllable:*:*)
-	echo ${UNAME_MACHINE}-pc-syllable
+	echo "$UNAME_MACHINE"-pc-syllable
 	exit ;;
     i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
-	echo i386-unknown-lynxos${UNAME_RELEASE}
+	echo i386-unknown-lynxos"$UNAME_RELEASE"
 	exit ;;
     i*86:*DOS:*:*)
-	echo ${UNAME_MACHINE}-pc-msdosdjgpp
+	echo "$UNAME_MACHINE"-pc-msdosdjgpp
 	exit ;;
-    i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
-	UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+    i*86:*:4.*:*)
+	UNAME_REL=$(echo "$UNAME_RELEASE" | sed 's/\/MP$//')
 	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
-		echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
+		echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL"
 	else
-		echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
+		echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL"
 	fi
 	exit ;;
     i*86:*:5:[678]*)
 	# UnixWare 7.x, OpenUNIX and OpenServer 6.
-	case `/bin/uname -X | grep "^Machine"` in
+	case $(/bin/uname -X | grep "^Machine") in
 	    *486*)	     UNAME_MACHINE=i486 ;;
 	    *Pentium)	     UNAME_MACHINE=i586 ;;
 	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
 	esac
-	echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+	echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}"
 	exit ;;
     i*86:*:3.2:*)
 	if test -f /usr/options/cb.name; then
-		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
-		echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
+		UNAME_REL=$(sed -n 's/.*Version //p' </usr/options/cb.name)
+		echo "$UNAME_MACHINE"-pc-isc"$UNAME_REL"
 	elif /bin/uname -X 2>/dev/null >/dev/null ; then
-		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+		UNAME_REL=$( (/bin/uname -X|grep Release|sed -e 's/.*= //'))
 		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
 		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
 			&& UNAME_MACHINE=i586
@@ -1126,9 +1173,9 @@ EOF
 			&& UNAME_MACHINE=i686
 		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
 			&& UNAME_MACHINE=i686
-		echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
+		echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL"
 	else
-		echo ${UNAME_MACHINE}-pc-sysv32
+		echo "$UNAME_MACHINE"-pc-sysv32
 	fi
 	exit ;;
     pc:*:*:*)
@@ -1148,9 +1195,9 @@ EOF
 	exit ;;
     i860:*:4.*:*) # i860-SVR4
 	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
-	  echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+	  echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4
 	else # Add other i860-SVR4 vendors below as they are discovered.
-	  echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4
+	  echo i860-unknown-sysv"$UNAME_RELEASE"  # Unknown i860-SVR4
 	fi
 	exit ;;
     mini*:CTIX:SYS*5:*)
@@ -1168,41 +1215,41 @@ EOF
     3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
 	OS_REL=''
 	test -r /etc/.relid \
-	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	&& OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	  && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+	  && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-	  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+	  && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
     3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
 	  && { echo i486-ncr-sysv4; exit; } ;;
     NCR*:*:4.2:* | MPRAS*:*:4.2:*)
 	OS_REL='.3'
 	test -r /etc/.relid \
-	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	    && OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	    && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+	    && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; }
+	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
-	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
     m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
-	echo m68k-unknown-lynxos${UNAME_RELEASE}
+	echo m68k-unknown-lynxos"$UNAME_RELEASE"
 	exit ;;
     mc68030:UNIX_System_V:4.*:*)
 	echo m68k-atari-sysv4
 	exit ;;
     TSUNAMI:LynxOS:2.*:*)
-	echo sparc-unknown-lynxos${UNAME_RELEASE}
+	echo sparc-unknown-lynxos"$UNAME_RELEASE"
 	exit ;;
     rs6000:LynxOS:2.*:*)
-	echo rs6000-unknown-lynxos${UNAME_RELEASE}
+	echo rs6000-unknown-lynxos"$UNAME_RELEASE"
 	exit ;;
     PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
-	echo powerpc-unknown-lynxos${UNAME_RELEASE}
+	echo powerpc-unknown-lynxos"$UNAME_RELEASE"
 	exit ;;
     SM[BE]S:UNIX_SV:*:*)
-	echo mips-dde-sysv${UNAME_RELEASE}
+	echo mips-dde-sysv"$UNAME_RELEASE"
 	exit ;;
     RM*:ReliantUNIX-*:*:*)
 	echo mips-sni-sysv4
@@ -1212,8 +1259,8 @@ EOF
 	exit ;;
     *:SINIX-*:*:*)
 	if uname -p 2>/dev/null >/dev/null ; then
-		UNAME_MACHINE=`(uname -p) 2>/dev/null`
-		echo ${UNAME_MACHINE}-sni-sysv4
+		UNAME_MACHINE=$( (uname -p) 2>/dev/null)
+		echo "$UNAME_MACHINE"-sni-sysv4
 	else
 		echo ns32k-sni-sysv
 	fi
@@ -1233,23 +1280,23 @@ EOF
 	exit ;;
     i*86:VOS:*:*)
 	# From Paul.Green@stratus.com.
-	echo ${UNAME_MACHINE}-stratus-vos
+	echo "$UNAME_MACHINE"-stratus-vos
 	exit ;;
     *:VOS:*:*)
 	# From Paul.Green@stratus.com.
 	echo hppa1.1-stratus-vos
 	exit ;;
     mc68*:A/UX:*:*)
-	echo m68k-apple-aux${UNAME_RELEASE}
+	echo m68k-apple-aux"$UNAME_RELEASE"
 	exit ;;
     news*:NEWS-OS:6*:*)
 	echo mips-sony-newsos6
 	exit ;;
     R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
-	if [ -d /usr/nec ]; then
-		echo mips-nec-sysv${UNAME_RELEASE}
+	if test -d /usr/nec; then
+		echo mips-nec-sysv"$UNAME_RELEASE"
 	else
-		echo mips-unknown-sysv${UNAME_RELEASE}
+		echo mips-unknown-sysv"$UNAME_RELEASE"
 	fi
 	exit ;;
     BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
@@ -1268,80 +1315,97 @@ EOF
 	echo x86_64-unknown-haiku
 	exit ;;
     SX-4:SUPER-UX:*:*)
-	echo sx4-nec-superux${UNAME_RELEASE}
+	echo sx4-nec-superux"$UNAME_RELEASE"
 	exit ;;
     SX-5:SUPER-UX:*:*)
-	echo sx5-nec-superux${UNAME_RELEASE}
+	echo sx5-nec-superux"$UNAME_RELEASE"
 	exit ;;
     SX-6:SUPER-UX:*:*)
-	echo sx6-nec-superux${UNAME_RELEASE}
+	echo sx6-nec-superux"$UNAME_RELEASE"
 	exit ;;
     SX-7:SUPER-UX:*:*)
-	echo sx7-nec-superux${UNAME_RELEASE}
+	echo sx7-nec-superux"$UNAME_RELEASE"
 	exit ;;
     SX-8:SUPER-UX:*:*)
-	echo sx8-nec-superux${UNAME_RELEASE}
+	echo sx8-nec-superux"$UNAME_RELEASE"
 	exit ;;
     SX-8R:SUPER-UX:*:*)
-	echo sx8r-nec-superux${UNAME_RELEASE}
+	echo sx8r-nec-superux"$UNAME_RELEASE"
 	exit ;;
     SX-ACE:SUPER-UX:*:*)
-	echo sxace-nec-superux${UNAME_RELEASE}
+	echo sxace-nec-superux"$UNAME_RELEASE"
 	exit ;;
     Power*:Rhapsody:*:*)
-	echo powerpc-apple-rhapsody${UNAME_RELEASE}
+	echo powerpc-apple-rhapsody"$UNAME_RELEASE"
 	exit ;;
     *:Rhapsody:*:*)
-	echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
+	exit ;;
+    arm64:Darwin:*:*)
+	echo aarch64-apple-darwin"$UNAME_RELEASE"
 	exit ;;
     *:Darwin:*:*)
-	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
-	eval $set_cc_for_build
-	if test "$UNAME_PROCESSOR" = unknown ; then
-	    UNAME_PROCESSOR=powerpc
+	UNAME_PROCESSOR=$(uname -p)
+	case $UNAME_PROCESSOR in
+	    unknown) UNAME_PROCESSOR=powerpc ;;
+	esac
+	if command -v xcode-select > /dev/null 2> /dev/null && \
+		! xcode-select --print-path > /dev/null 2> /dev/null ; then
+	    # Avoid executing cc if there is no toolchain installed as
+	    # cc will be a stub that puts up a graphical alert
+	    # prompting the user to install developer tools.
+	    CC_FOR_BUILD=no_compiler_found
+	else
+	    set_cc_for_build
 	fi
-	if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
-	    if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
-		if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
-		    (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		    grep IS_64BIT_ARCH >/dev/null
-		then
-		    case $UNAME_PROCESSOR in
-			i386) UNAME_PROCESSOR=x86_64 ;;
-			powerpc) UNAME_PROCESSOR=powerpc64 ;;
-		    esac
-		fi
+	if test "$CC_FOR_BUILD" != no_compiler_found; then
+	    if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		   (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		   grep IS_64BIT_ARCH >/dev/null
+	    then
+		case $UNAME_PROCESSOR in
+		    i386) UNAME_PROCESSOR=x86_64 ;;
+		    powerpc) UNAME_PROCESSOR=powerpc64 ;;
+		esac
+	    fi
+	    # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
+	    if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
+		   (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		   grep IS_PPC >/dev/null
+	    then
+		UNAME_PROCESSOR=powerpc
 	    fi
 	elif test "$UNAME_PROCESSOR" = i386 ; then
-	    # Avoid executing cc on OS X 10.9, as it ships with a stub
-	    # that puts up a graphical alert prompting to install
-	    # developer tools.  Any system running Mac OS X 10.7 or
-	    # later (Darwin 11 and later) is required to have a 64-bit
-	    # processor. This is not true of the ARM version of Darwin
-	    # that Apple uses in portable devices.
-	    UNAME_PROCESSOR=x86_64
+	    # uname -m returns i386 or x86_64
+	    UNAME_PROCESSOR=$UNAME_MACHINE
 	fi
-	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
+	echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
 	exit ;;
     *:procnto*:*:* | *:QNX:[0123456789]*:*)
-	UNAME_PROCESSOR=`uname -p`
+	UNAME_PROCESSOR=$(uname -p)
 	if test "$UNAME_PROCESSOR" = x86; then
 		UNAME_PROCESSOR=i386
 		UNAME_MACHINE=pc
 	fi
-	echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
+	echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE"
 	exit ;;
     *:QNX:*:4*)
 	echo i386-pc-qnx
 	exit ;;
-    NEO-?:NONSTOP_KERNEL:*:*)
-	echo neo-tandem-nsk${UNAME_RELEASE}
+    NEO-*:NONSTOP_KERNEL:*:*)
+	echo neo-tandem-nsk"$UNAME_RELEASE"
 	exit ;;
     NSE-*:NONSTOP_KERNEL:*:*)
-	echo nse-tandem-nsk${UNAME_RELEASE}
+	echo nse-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSR-*:NONSTOP_KERNEL:*:*)
+	echo nsr-tandem-nsk"$UNAME_RELEASE"
 	exit ;;
-    NSR-?:NONSTOP_KERNEL:*:*)
-	echo nsr-tandem-nsk${UNAME_RELEASE}
+    NSV-*:NONSTOP_KERNEL:*:*)
+	echo nsv-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSX-*:NONSTOP_KERNEL:*:*)
+	echo nsx-tandem-nsk"$UNAME_RELEASE"
 	exit ;;
     *:NonStop-UX:*:*)
 	echo mips-compaq-nonstopux
@@ -1350,18 +1414,19 @@ EOF
 	echo bs2000-siemens-sysv
 	exit ;;
     DS/*:UNIX_System_V:*:*)
-	echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE"
 	exit ;;
     *:Plan9:*:*)
 	# "uname -m" is not consistent, so use $cputype instead. 386
 	# is converted to i386 for consistency with other x86
 	# operating systems.
+	# shellcheck disable=SC2154
 	if test "$cputype" = 386; then
 	    UNAME_MACHINE=i386
 	else
 	    UNAME_MACHINE="$cputype"
 	fi
-	echo ${UNAME_MACHINE}-unknown-plan9
+	echo "$UNAME_MACHINE"-unknown-plan9
 	exit ;;
     *:TOPS-10:*:*)
 	echo pdp10-unknown-tops10
@@ -1382,14 +1447,14 @@ EOF
 	echo pdp10-unknown-its
 	exit ;;
     SEI:*:*:SEIUX)
-	echo mips-sei-seiux${UNAME_RELEASE}
+	echo mips-sei-seiux"$UNAME_RELEASE"
 	exit ;;
     *:DragonFly:*:*)
-	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+	echo "$UNAME_MACHINE"-unknown-dragonfly"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
 	exit ;;
     *:*VMS:*:*)
-	UNAME_MACHINE=`(uname -p) 2>/dev/null`
-	case "${UNAME_MACHINE}" in
+	UNAME_MACHINE=$( (uname -p) 2>/dev/null)
+	case "$UNAME_MACHINE" in
 	    A*) echo alpha-dec-vms ; exit ;;
 	    I*) echo ia64-dec-vms ; exit ;;
 	    V*) echo vax-dec-vms ; exit ;;
@@ -1398,32 +1463,190 @@ EOF
 	echo i386-pc-xenix
 	exit ;;
     i*86:skyos:*:*)
-	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'`
+	echo "$UNAME_MACHINE"-pc-skyos"$(echo "$UNAME_RELEASE" | sed -e 's/ .*$//')"
 	exit ;;
     i*86:rdos:*:*)
-	echo ${UNAME_MACHINE}-pc-rdos
+	echo "$UNAME_MACHINE"-pc-rdos
 	exit ;;
     i*86:AROS:*:*)
-	echo ${UNAME_MACHINE}-pc-aros
+	echo "$UNAME_MACHINE"-pc-aros
 	exit ;;
     x86_64:VMkernel:*:*)
-	echo ${UNAME_MACHINE}-unknown-esx
+	echo "$UNAME_MACHINE"-unknown-esx
 	exit ;;
     amd64:Isilon\ OneFS:*:*)
 	echo x86_64-unknown-onefs
 	exit ;;
+    *:Unleashed:*:*)
+	echo "$UNAME_MACHINE"-unknown-unleashed"$UNAME_RELEASE"
+	exit ;;
+esac
+
+# No uname command or uname output not recognized.
+set_cc_for_build
+cat > "$dummy.c" <<EOF
+#ifdef _SEQUENT_
+#include <sys/types.h>
+#include <sys/utsname.h>
+#endif
+#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
+#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
+#include <signal.h>
+#if defined(_SIZE_T_) || defined(SIGLOST)
+#include <sys/utsname.h>
+#endif
+#endif
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
+     I don't know....  */
+  printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+  printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+  "4"
+#else
+  ""
+#endif
+  ); exit (0);
+#endif
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+  int version;
+  version=$( (hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null);
+  if (version < 4)
+    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+  else
+    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+  exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+  printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+  printf ("ns32k-encore-mach\n"); exit (0);
+#else
+  printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+  printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+  printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+  printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
+
+#if defined (_SEQUENT_)
+  struct utsname un;
+
+  uname(&un);
+  if (strncmp(un.version, "V2", 2) == 0) {
+    printf ("i386-sequent-ptx2\n"); exit (0);
+  }
+  if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+    printf ("i386-sequent-ptx1\n"); exit (0);
+  }
+  printf ("i386-sequent-ptx\n"); exit (0);
+#endif
+
+#if defined (vax)
+#if !defined (ultrix)
+#include <sys/param.h>
+#if defined (BSD)
+#if BSD == 43
+  printf ("vax-dec-bsd4.3\n"); exit (0);
+#else
+#if BSD == 199006
+  printf ("vax-dec-bsd4.3reno\n"); exit (0);
+#else
+  printf ("vax-dec-bsd\n"); exit (0);
+#endif
+#endif
+#else
+  printf ("vax-dec-bsd\n"); exit (0);
+#endif
+#else
+#if defined(_SIZE_T_) || defined(SIGLOST)
+  struct utsname un;
+  uname (&un);
+  printf ("vax-dec-ultrix%s\n", un.release); exit (0);
+#else
+  printf ("vax-dec-ultrix\n"); exit (0);
+#endif
+#endif
+#endif
+#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
+#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
+#if defined(_SIZE_T_) || defined(SIGLOST)
+  struct utsname *un;
+  uname (&un);
+  printf ("mips-dec-ultrix%s\n", un.release); exit (0);
+#else
+  printf ("mips-dec-ultrix\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (alliant) && defined (i860)
+  printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+  exit (1);
+}
+EOF
+
+$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=$($dummy) &&
+	{ echo "$SYSTEM_NAME"; exit; }
+
+# Apollos put the system type in the environment.
+test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; }
+
+echo "$0: unable to guess system type" >&2
+
+case "$UNAME_MACHINE:$UNAME_SYSTEM" in
+    mips:Linux | mips64:Linux)
+	# If we got here on MIPS GNU/Linux, output extra information.
+	cat >&2 <<EOF
+
+NOTE: MIPS GNU/Linux systems require a C compiler to fully recognize
+the system type. Please install a C compiler and try again.
+EOF
+	;;
 esac
 
 cat >&2 <<EOF
-$0: unable to guess system type
 
 This script (version $timestamp), has failed to recognize the
-operating system you are using. If your script is old, overwrite
-config.guess and config.sub with the latest versions from:
+operating system you are using. If your script is old, overwrite *all*
+copies of config.guess and config.sub with the latest versions from:
 
-  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
 and
-  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+EOF
+
+year=$(echo $timestamp | sed 's,-.*,,')
+# shellcheck disable=SC2003
+if test "$(expr "$(date +%Y)" - "$year")" -lt 3 ; then
+   cat >&2 <<EOF
 
 If $0 has already been updated, send the following data and any
 information you think might be pertinent to config-patches@gnu.org to
@@ -1431,31 +1654,32 @@ provide the necessary information to handle your system.
 
 config.guess timestamp = $timestamp
 
-uname -m = `(uname -m) 2>/dev/null || echo unknown`
-uname -r = `(uname -r) 2>/dev/null || echo unknown`
-uname -s = `(uname -s) 2>/dev/null || echo unknown`
-uname -v = `(uname -v) 2>/dev/null || echo unknown`
+uname -m = $( (uname -m) 2>/dev/null || echo unknown)
+uname -r = $( (uname -r) 2>/dev/null || echo unknown)
+uname -s = $( (uname -s) 2>/dev/null || echo unknown)
+uname -v = $( (uname -v) 2>/dev/null || echo unknown)
 
-/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
-/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
+/usr/bin/uname -p = $( (/usr/bin/uname -p) 2>/dev/null)
+/bin/uname -X     = $( (/bin/uname -X) 2>/dev/null)
 
-hostinfo               = `(hostinfo) 2>/dev/null`
-/bin/universe          = `(/bin/universe) 2>/dev/null`
-/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
-/bin/arch              = `(/bin/arch) 2>/dev/null`
-/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
-/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+hostinfo               = $( (hostinfo) 2>/dev/null)
+/bin/universe          = $( (/bin/universe) 2>/dev/null)
+/usr/bin/arch -k       = $( (/usr/bin/arch -k) 2>/dev/null)
+/bin/arch              = $( (/bin/arch) 2>/dev/null)
+/usr/bin/oslevel       = $( (/usr/bin/oslevel) 2>/dev/null)
+/usr/convex/getsysinfo = $( (/usr/convex/getsysinfo) 2>/dev/null)
 
-UNAME_MACHINE = ${UNAME_MACHINE}
-UNAME_RELEASE = ${UNAME_RELEASE}
-UNAME_SYSTEM  = ${UNAME_SYSTEM}
-UNAME_VERSION = ${UNAME_VERSION}
+UNAME_MACHINE = "$UNAME_MACHINE"
+UNAME_RELEASE = "$UNAME_RELEASE"
+UNAME_SYSTEM  = "$UNAME_SYSTEM"
+UNAME_VERSION = "$UNAME_VERSION"
 EOF
+fi
 
 exit 1
 
 # Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
+# eval: (add-hook 'before-save-hook 'time-stamp)
 # time-stamp-start: "timestamp='"
 # time-stamp-format: "%:y-%02m-%02d"
 # time-stamp-end: "'"
diff --git a/build-aux/config.sub b/build-aux/config.sub
index dd2ca93..c874b7a 100755
--- a/build-aux/config.sub
+++ b/build-aux/config.sub
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Configuration validation subroutine script.
-#   Copyright 1992-2016 Free Software Foundation, Inc.
+#   Copyright 1992-2020 Free Software Foundation, Inc.
 
-timestamp='2016-11-04'
+timestamp='2020-11-07'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -15,7 +15,7 @@ timestamp='2016-11-04'
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
+# along with this program; if not, see <https://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
@@ -33,7 +33,7 @@ timestamp='2016-11-04'
 # Otherwise, we print the canonical config type on stdout and succeed.
 
 # You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
 
 # This file is supposed to be the same for all GNU packages
 # and recognize all the CPU types, system types and aliases
@@ -50,14 +50,14 @@ timestamp='2016-11-04'
 #	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
 # It is wrong to echo any other type of specification.
 
-me=`echo "$0" | sed -e 's,.*/,,'`
+me=$(echo "$0" | sed -e 's,.*/,,')
 
 usage="\
 Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
 
 Canonicalize a configuration name.
 
-Operation modes:
+Options:
   -h, --help         print this help, then exit
   -t, --time-stamp   print date of last modification, then exit
   -v, --version      print version number, then exit
@@ -67,7 +67,7 @@ Report bugs and patches to <config-patches@gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright 1992-2016 Free Software Foundation, Inc.
+Copyright 1992-2020 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -89,12 +89,12 @@ while test $# -gt 0 ; do
     - )	# Use stdin as input.
        break ;;
     -* )
-       echo "$me: invalid option $1$help"
+       echo "$me: invalid option $1$help" >&2
        exit 1 ;;
 
     *local*)
        # First pass through any local machine types.
-       echo $1
+       echo "$1"
        exit ;;
 
     * )
@@ -110,1244 +110,1167 @@ case $# in
     exit 1;;
 esac
 
-# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
-# Here we must recognize all the valid KERNEL-OS combinations.
-maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
-case $maybe_os in
-  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
-  linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
-  knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \
-  kopensolaris*-gnu* | cloudabi*-eabi* | \
-  storm-chaos* | os2-emx* | rtmk-nova*)
-    os=-$maybe_os
-    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
-    ;;
-  android-linux)
-    os=-linux-android
-    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
-    ;;
-  *)
-    basic_machine=`echo $1 | sed 's/-[^-]*$//'`
-    if [ $basic_machine != $1 ]
-    then os=`echo $1 | sed 's/.*-/-/'`
-    else os=; fi
-    ;;
-esac
+# Split fields of configuration type
+# shellcheck disable=SC2162
+IFS="-" read field1 field2 field3 field4 <<EOF
+$1
+EOF
 
-### Let's recognize common machines as not being operating systems so
-### that things like config.sub decstation-3100 work.  We also
-### recognize some manufacturers as not being operating systems, so we
-### can provide default operating systems below.
-case $os in
-	-sun*os*)
-		# Prevent following clause from handling this invalid input.
-		;;
-	-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
-	-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
-	-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
-	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
-	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
-	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-	-apple | -axis | -knuth | -cray | -microblaze*)
-		os=
-		basic_machine=$1
-		;;
-	-bluegene*)
-		os=-cnk
-		;;
-	-sim | -cisco | -oki | -wec | -winbond)
-		os=
-		basic_machine=$1
-		;;
-	-scout)
-		;;
-	-wrs)
-		os=-vxworks
-		basic_machine=$1
-		;;
-	-chorusos*)
-		os=-chorusos
-		basic_machine=$1
-		;;
-	-chorusrdb)
-		os=-chorusrdb
-		basic_machine=$1
-		;;
-	-hiux*)
-		os=-hiuxwe2
-		;;
-	-sco6)
-		os=-sco5v6
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco5)
-		os=-sco3.2v5
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco4)
-		os=-sco3.2v4
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco3.2.[4-9]*)
-		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco3.2v[4-9]*)
-		# Don't forget version if it is 3.2v4 or newer.
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco5v6*)
-		# Don't forget version if it is 3.2v4 or newer.
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco*)
-		os=-sco3.2v2
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-udk*)
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-isc)
-		os=-isc2.2
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-clix*)
-		basic_machine=clipper-intergraph
-		;;
-	-isc*)
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-lynx*178)
-		os=-lynxos178
-		;;
-	-lynx*5)
-		os=-lynxos5
-		;;
-	-lynx*)
-		os=-lynxos
+# Separate into logical components for further validation
+case $1 in
+	*-*-*-*-*)
+		echo Invalid configuration \`"$1"\': more than four components >&2
+		exit 1
 		;;
-	-ptx*)
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+	*-*-*-*)
+		basic_machine=$field1-$field2
+		basic_os=$field3-$field4
 		;;
-	-windowsnt*)
-		os=`echo $os | sed -e 's/windowsnt/winnt/'`
+	*-*-*)
+		# Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two
+		# parts
+		maybe_os=$field2-$field3
+		case $maybe_os in
+			nto-qnx* | linux-* | uclinux-uclibc* \
+			| uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \
+			| netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \
+			| storm-chaos* | os2-emx* | rtmk-nova*)
+				basic_machine=$field1
+				basic_os=$maybe_os
+				;;
+			android-linux)
+				basic_machine=$field1-unknown
+				basic_os=linux-android
+				;;
+			*)
+				basic_machine=$field1-$field2
+				basic_os=$field3
+				;;
+		esac
 		;;
-	-psos*)
-		os=-psos
+	*-*)
+		# A lone config we happen to match not fitting any pattern
+		case $field1-$field2 in
+			decstation-3100)
+				basic_machine=mips-dec
+				basic_os=
+				;;
+			*-*)
+				# Second component is usually, but not always the OS
+				case $field2 in
+					# Prevent following clause from handling this valid os
+					sun*os*)
+						basic_machine=$field1
+						basic_os=$field2
+						;;
+					# Manufacturers
+					dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \
+					| att* | 7300* | 3300* | delta* | motorola* | sun[234]* \
+					| unicom* | ibm* | next | hp | isi* | apollo | altos* \
+					| convergent* | ncr* | news | 32* | 3600* | 3100* \
+					| hitachi* | c[123]* | convex* | sun | crds | omron* | dg \
+					| ultra | tti* | harris | dolphin | highlevel | gould \
+					| cbm | ns | masscomp | apple | axis | knuth | cray \
+					| microblaze* | sim | cisco \
+					| oki | wec | wrs | winbond)
+						basic_machine=$field1-$field2
+						basic_os=
+						;;
+					*)
+						basic_machine=$field1
+						basic_os=$field2
+						;;
+				esac
+			;;
+		esac
 		;;
-	-mint | -mint[0-9]*)
-		basic_machine=m68k-atari
-		os=-mint
+	*)
+		# Convert single-component short-hands not valid as part of
+		# multi-component configurations.
+		case $field1 in
+			386bsd)
+				basic_machine=i386-pc
+				basic_os=bsd
+				;;
+			a29khif)
+				basic_machine=a29k-amd
+				basic_os=udi
+				;;
+			adobe68k)
+				basic_machine=m68010-adobe
+				basic_os=scout
+				;;
+			alliant)
+				basic_machine=fx80-alliant
+				basic_os=
+				;;
+			altos | altos3068)
+				basic_machine=m68k-altos
+				basic_os=
+				;;
+			am29k)
+				basic_machine=a29k-none
+				basic_os=bsd
+				;;
+			amdahl)
+				basic_machine=580-amdahl
+				basic_os=sysv
+				;;
+			amiga)
+				basic_machine=m68k-unknown
+				basic_os=
+				;;
+			amigaos | amigados)
+				basic_machine=m68k-unknown
+				basic_os=amigaos
+				;;
+			amigaunix | amix)
+				basic_machine=m68k-unknown
+				basic_os=sysv4
+				;;
+			apollo68)
+				basic_machine=m68k-apollo
+				basic_os=sysv
+				;;
+			apollo68bsd)
+				basic_machine=m68k-apollo
+				basic_os=bsd
+				;;
+			aros)
+				basic_machine=i386-pc
+				basic_os=aros
+				;;
+			aux)
+				basic_machine=m68k-apple
+				basic_os=aux
+				;;
+			balance)
+				basic_machine=ns32k-sequent
+				basic_os=dynix
+				;;
+			blackfin)
+				basic_machine=bfin-unknown
+				basic_os=linux
+				;;
+			cegcc)
+				basic_machine=arm-unknown
+				basic_os=cegcc
+				;;
+			convex-c1)
+				basic_machine=c1-convex
+				basic_os=bsd
+				;;
+			convex-c2)
+				basic_machine=c2-convex
+				basic_os=bsd
+				;;
+			convex-c32)
+				basic_machine=c32-convex
+				basic_os=bsd
+				;;
+			convex-c34)
+				basic_machine=c34-convex
+				basic_os=bsd
+				;;
+			convex-c38)
+				basic_machine=c38-convex
+				basic_os=bsd
+				;;
+			cray)
+				basic_machine=j90-cray
+				basic_os=unicos
+				;;
+			crds | unos)
+				basic_machine=m68k-crds
+				basic_os=
+				;;
+			da30)
+				basic_machine=m68k-da30
+				basic_os=
+				;;
+			decstation | pmax | pmin | dec3100 | decstatn)
+				basic_machine=mips-dec
+				basic_os=
+				;;
+			delta88)
+				basic_machine=m88k-motorola
+				basic_os=sysv3
+				;;
+			dicos)
+				basic_machine=i686-pc
+				basic_os=dicos
+				;;
+			djgpp)
+				basic_machine=i586-pc
+				basic_os=msdosdjgpp
+				;;
+			ebmon29k)
+				basic_machine=a29k-amd
+				basic_os=ebmon
+				;;
+			es1800 | OSE68k | ose68k | ose | OSE)
+				basic_machine=m68k-ericsson
+				basic_os=ose
+				;;
+			gmicro)
+				basic_machine=tron-gmicro
+				basic_os=sysv
+				;;
+			go32)
+				basic_machine=i386-pc
+				basic_os=go32
+				;;
+			h8300hms)
+				basic_machine=h8300-hitachi
+				basic_os=hms
+				;;
+			h8300xray)
+				basic_machine=h8300-hitachi
+				basic_os=xray
+				;;
+			h8500hms)
+				basic_machine=h8500-hitachi
+				basic_os=hms
+				;;
+			harris)
+				basic_machine=m88k-harris
+				basic_os=sysv3
+				;;
+			hp300 | hp300hpux)
+				basic_machine=m68k-hp
+				basic_os=hpux
+				;;
+			hp300bsd)
+				basic_machine=m68k-hp
+				basic_os=bsd
+				;;
+			hppaosf)
+				basic_machine=hppa1.1-hp
+				basic_os=osf
+				;;
+			hppro)
+				basic_machine=hppa1.1-hp
+				basic_os=proelf
+				;;
+			i386mach)
+				basic_machine=i386-mach
+				basic_os=mach
+				;;
+			isi68 | isi)
+				basic_machine=m68k-isi
+				basic_os=sysv
+				;;
+			m68knommu)
+				basic_machine=m68k-unknown
+				basic_os=linux
+				;;
+			magnum | m3230)
+				basic_machine=mips-mips
+				basic_os=sysv
+				;;
+			merlin)
+				basic_machine=ns32k-utek
+				basic_os=sysv
+				;;
+			mingw64)
+				basic_machine=x86_64-pc
+				basic_os=mingw64
+				;;
+			mingw32)
+				basic_machine=i686-pc
+				basic_os=mingw32
+				;;
+			mingw32ce)
+				basic_machine=arm-unknown
+				basic_os=mingw32ce
+				;;
+			monitor)
+				basic_machine=m68k-rom68k
+				basic_os=coff
+				;;
+			morphos)
+				basic_machine=powerpc-unknown
+				basic_os=morphos
+				;;
+			moxiebox)
+				basic_machine=moxie-unknown
+				basic_os=moxiebox
+				;;
+			msdos)
+				basic_machine=i386-pc
+				basic_os=msdos
+				;;
+			msys)
+				basic_machine=i686-pc
+				basic_os=msys
+				;;
+			mvs)
+				basic_machine=i370-ibm
+				basic_os=mvs
+				;;
+			nacl)
+				basic_machine=le32-unknown
+				basic_os=nacl
+				;;
+			ncr3000)
+				basic_machine=i486-ncr
+				basic_os=sysv4
+				;;
+			netbsd386)
+				basic_machine=i386-pc
+				basic_os=netbsd
+				;;
+			netwinder)
+				basic_machine=armv4l-rebel
+				basic_os=linux
+				;;
+			news | news700 | news800 | news900)
+				basic_machine=m68k-sony
+				basic_os=newsos
+				;;
+			news1000)
+				basic_machine=m68030-sony
+				basic_os=newsos
+				;;
+			necv70)
+				basic_machine=v70-nec
+				basic_os=sysv
+				;;
+			nh3000)
+				basic_machine=m68k-harris
+				basic_os=cxux
+				;;
+			nh[45]000)
+				basic_machine=m88k-harris
+				basic_os=cxux
+				;;
+			nindy960)
+				basic_machine=i960-intel
+				basic_os=nindy
+				;;
+			mon960)
+				basic_machine=i960-intel
+				basic_os=mon960
+				;;
+			nonstopux)
+				basic_machine=mips-compaq
+				basic_os=nonstopux
+				;;
+			os400)
+				basic_machine=powerpc-ibm
+				basic_os=os400
+				;;
+			OSE68000 | ose68000)
+				basic_machine=m68000-ericsson
+				basic_os=ose
+				;;
+			os68k)
+				basic_machine=m68k-none
+				basic_os=os68k
+				;;
+			paragon)
+				basic_machine=i860-intel
+				basic_os=osf
+				;;
+			parisc)
+				basic_machine=hppa-unknown
+				basic_os=linux
+				;;
+			psp)
+				basic_machine=mipsallegrexel-sony
+				basic_os=psp
+				;;
+			pw32)
+				basic_machine=i586-unknown
+				basic_os=pw32
+				;;
+			rdos | rdos64)
+				basic_machine=x86_64-pc
+				basic_os=rdos
+				;;
+			rdos32)
+				basic_machine=i386-pc
+				basic_os=rdos
+				;;
+			rom68k)
+				basic_machine=m68k-rom68k
+				basic_os=coff
+				;;
+			sa29200)
+				basic_machine=a29k-amd
+				basic_os=udi
+				;;
+			sei)
+				basic_machine=mips-sei
+				basic_os=seiux
+				;;
+			sequent)
+				basic_machine=i386-sequent
+				basic_os=
+				;;
+			sps7)
+				basic_machine=m68k-bull
+				basic_os=sysv2
+				;;
+			st2000)
+				basic_machine=m68k-tandem
+				basic_os=
+				;;
+			stratus)
+				basic_machine=i860-stratus
+				basic_os=sysv4
+				;;
+			sun2)
+				basic_machine=m68000-sun
+				basic_os=
+				;;
+			sun2os3)
+				basic_machine=m68000-sun
+				basic_os=sunos3
+				;;
+			sun2os4)
+				basic_machine=m68000-sun
+				basic_os=sunos4
+				;;
+			sun3)
+				basic_machine=m68k-sun
+				basic_os=
+				;;
+			sun3os3)
+				basic_machine=m68k-sun
+				basic_os=sunos3
+				;;
+			sun3os4)
+				basic_machine=m68k-sun
+				basic_os=sunos4
+				;;
+			sun4)
+				basic_machine=sparc-sun
+				basic_os=
+				;;
+			sun4os3)
+				basic_machine=sparc-sun
+				basic_os=sunos3
+				;;
+			sun4os4)
+				basic_machine=sparc-sun
+				basic_os=sunos4
+				;;
+			sun4sol2)
+				basic_machine=sparc-sun
+				basic_os=solaris2
+				;;
+			sun386 | sun386i | roadrunner)
+				basic_machine=i386-sun
+				basic_os=
+				;;
+			sv1)
+				basic_machine=sv1-cray
+				basic_os=unicos
+				;;
+			symmetry)
+				basic_machine=i386-sequent
+				basic_os=dynix
+				;;
+			t3e)
+				basic_machine=alphaev5-cray
+				basic_os=unicos
+				;;
+			t90)
+				basic_machine=t90-cray
+				basic_os=unicos
+				;;
+			toad1)
+				basic_machine=pdp10-xkl
+				basic_os=tops20
+				;;
+			tpf)
+				basic_machine=s390x-ibm
+				basic_os=tpf
+				;;
+			udi29k)
+				basic_machine=a29k-amd
+				basic_os=udi
+				;;
+			ultra3)
+				basic_machine=a29k-nyu
+				basic_os=sym1
+				;;
+			v810 | necv810)
+				basic_machine=v810-nec
+				basic_os=none
+				;;
+			vaxv)
+				basic_machine=vax-dec
+				basic_os=sysv
+				;;
+			vms)
+				basic_machine=vax-dec
+				basic_os=vms
+				;;
+			vsta)
+				basic_machine=i386-pc
+				basic_os=vsta
+				;;
+			vxworks960)
+				basic_machine=i960-wrs
+				basic_os=vxworks
+				;;
+			vxworks68)
+				basic_machine=m68k-wrs
+				basic_os=vxworks
+				;;
+			vxworks29k)
+				basic_machine=a29k-wrs
+				basic_os=vxworks
+				;;
+			xbox)
+				basic_machine=i686-pc
+				basic_os=mingw32
+				;;
+			ymp)
+				basic_machine=ymp-cray
+				basic_os=unicos
+				;;
+			*)
+				basic_machine=$1
+				basic_os=
+				;;
+		esac
 		;;
 esac
 
-# Decode aliases for certain CPU-COMPANY combinations.
+# Decode 1-component or ad-hoc basic machines
 case $basic_machine in
-	# Recognize the basic CPU types without company name.
-	# Some are omitted here because they have special meanings below.
-	1750a | 580 \
-	| a29k \
-	| aarch64 | aarch64_be \
-	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
-	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
-	| am33_2.0 \
-	| arc | arceb \
-	| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
-	| avr | avr32 \
-	| ba \
-	| be32 | be64 \
-	| bfin \
-	| c4x | c8051 | clipper \
-	| d10v | d30v | dlx | dsp16xx \
-	| e2k | epiphany \
-	| fido | fr30 | frv | ft32 \
-	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
-	| hexagon \
-	| i370 | i860 | i960 | ia64 \
-	| ip2k | iq2000 \
-	| k1om \
-	| le32 | le64 \
-	| lm32 \
-	| m32c | m32r | m32rle | m68000 | m68k | m88k \
-	| maxq | mb | microblaze | microblazeel | mcore | mep | metag \
-	| mips | mipsbe | mipseb | mipsel | mipsle \
-	| mips16 \
-	| mips64 | mips64el \
-	| mips64octeon | mips64octeonel \
-	| mips64orion | mips64orionel \
-	| mips64r5900 | mips64r5900el \
-	| mips64vr | mips64vrel \
-	| mips64vr4100 | mips64vr4100el \
-	| mips64vr4300 | mips64vr4300el \
-	| mips64vr5000 | mips64vr5000el \
-	| mips64vr5900 | mips64vr5900el \
-	| mipsisa32 | mipsisa32el \
-	| mipsisa32r2 | mipsisa32r2el \
-	| mipsisa32r6 | mipsisa32r6el \
-	| mipsisa64 | mipsisa64el \
-	| mipsisa64r2 | mipsisa64r2el \
-	| mipsisa64r6 | mipsisa64r6el \
-	| mipsisa64sb1 | mipsisa64sb1el \
-	| mipsisa64sr71k | mipsisa64sr71kel \
-	| mipsr5900 | mipsr5900el \
-	| mipstx39 | mipstx39el \
-	| mn10200 | mn10300 \
-	| moxie \
-	| mt \
-	| msp430 \
-	| nds32 | nds32le | nds32be \
-	| nios | nios2 | nios2eb | nios2el \
-	| ns16k | ns32k \
-	| open8 | or1k | or1knd | or32 \
-	| pdp10 | pdp11 | pj | pjl \
-	| powerpc | powerpc64 | powerpc64le | powerpcle \
-	| pru \
-	| pyramid \
-	| riscv32 | riscv64 \
-	| rl78 | rx \
-	| score \
-	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
-	| sh64 | sh64le \
-	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
-	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
-	| spu \
-	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
-	| ubicom32 \
-	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
-	| visium \
-	| we32k \
-	| x86 | xc16x | xstormy16 | xtensa \
-	| z8k | z80)
-		basic_machine=$basic_machine-unknown
-		;;
-	c54x)
-		basic_machine=tic54x-unknown
-		;;
-	c55x)
-		basic_machine=tic55x-unknown
-		;;
-	c6x)
-		basic_machine=tic6x-unknown
-		;;
-	leon|leon[3-9])
-		basic_machine=sparc-$basic_machine
-		;;
-	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
-		basic_machine=$basic_machine-unknown
-		os=-none
+	# Here we handle the default manufacturer of certain CPU types.  It is in
+	# some cases the only manufacturer, in others, it is the most popular.
+	w89k)
+		cpu=hppa1.1
+		vendor=winbond
 		;;
-	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
+	op50n)
+		cpu=hppa1.1
+		vendor=oki
 		;;
-	ms1)
-		basic_machine=mt-unknown
+	op60c)
+		cpu=hppa1.1
+		vendor=oki
 		;;
-
-	strongarm | thumb | xscale)
-		basic_machine=arm-unknown
+	ibm*)
+		cpu=i370
+		vendor=ibm
 		;;
-	xgate)
-		basic_machine=$basic_machine-unknown
-		os=-none
+	orion105)
+		cpu=clipper
+		vendor=highlevel
 		;;
-	xscaleeb)
-		basic_machine=armeb-unknown
+	mac | mpw | mac-mpw)
+		cpu=m68k
+		vendor=apple
 		;;
-
-	xscaleel)
-		basic_machine=armel-unknown
+	pmac | pmac-mpw)
+		cpu=powerpc
+		vendor=apple
 		;;
 
-	# We use `pc' rather than `unknown'
-	# because (1) that's what they normally are, and
-	# (2) the word "unknown" tends to confuse beginning users.
-	i*86 | x86_64)
-	  basic_machine=$basic_machine-pc
-	  ;;
-	# Object if more than one company name word.
-	*-*-*)
-		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
-		exit 1
-		;;
-	# Recognize the basic CPU types with company name.
-	580-* \
-	| a29k-* \
-	| aarch64-* | aarch64_be-* \
-	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
-	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
-	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
-	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
-	| avr-* | avr32-* \
-	| ba-* \
-	| be32-* | be64-* \
-	| bfin-* | bs2000-* \
-	| c[123]* | c30-* | [cjt]90-* | c4x-* \
-	| c8051-* | clipper-* | craynv-* | cydra-* \
-	| d10v-* | d30v-* | dlx-* \
-	| e2k-* | elxsi-* \
-	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
-	| h8300-* | h8500-* \
-	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
-	| hexagon-* \
-	| i*86-* | i860-* | i960-* | ia64-* \
-	| ip2k-* | iq2000-* \
-	| k1om-* \
-	| le32-* | le64-* \
-	| lm32-* \
-	| m32c-* | m32r-* | m32rle-* \
-	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
-	| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
-	| microblaze-* | microblazeel-* \
-	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
-	| mips16-* \
-	| mips64-* | mips64el-* \
-	| mips64octeon-* | mips64octeonel-* \
-	| mips64orion-* | mips64orionel-* \
-	| mips64r5900-* | mips64r5900el-* \
-	| mips64vr-* | mips64vrel-* \
-	| mips64vr4100-* | mips64vr4100el-* \
-	| mips64vr4300-* | mips64vr4300el-* \
-	| mips64vr5000-* | mips64vr5000el-* \
-	| mips64vr5900-* | mips64vr5900el-* \
-	| mipsisa32-* | mipsisa32el-* \
-	| mipsisa32r2-* | mipsisa32r2el-* \
-	| mipsisa32r6-* | mipsisa32r6el-* \
-	| mipsisa64-* | mipsisa64el-* \
-	| mipsisa64r2-* | mipsisa64r2el-* \
-	| mipsisa64r6-* | mipsisa64r6el-* \
-	| mipsisa64sb1-* | mipsisa64sb1el-* \
-	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
-	| mipsr5900-* | mipsr5900el-* \
-	| mipstx39-* | mipstx39el-* \
-	| mmix-* \
-	| mt-* \
-	| msp430-* \
-	| nds32-* | nds32le-* | nds32be-* \
-	| nios-* | nios2-* | nios2eb-* | nios2el-* \
-	| none-* | np1-* | ns16k-* | ns32k-* \
-	| open8-* \
-	| or1k*-* \
-	| orion-* \
-	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
-	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
-	| pru-* \
-	| pyramid-* \
-	| riscv32-* | riscv64-* \
-	| rl78-* | romp-* | rs6000-* | rx-* \
-	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
-	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
-	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
-	| sparclite-* \
-	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \
-	| tahoe-* \
-	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
-	| tile*-* \
-	| tron-* \
-	| ubicom32-* \
-	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
-	| vax-* \
-	| visium-* \
-	| we32k-* \
-	| x86-* | x86_64-* | xc16x-* | xps100-* \
-	| xstormy16-* | xtensa*-* \
-	| ymp-* \
-	| z8k-* | z80-*)
-		;;
-	# Recognize the basic CPU types without company name, with glob match.
-	xtensa*)
-		basic_machine=$basic_machine-unknown
-		;;
 	# Recognize the various machine names and aliases which stand
 	# for a CPU type and a company and sometimes even an OS.
-	386bsd)
-		basic_machine=i386-unknown
-		os=-bsd
-		;;
 	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
-		basic_machine=m68000-att
+		cpu=m68000
+		vendor=att
 		;;
 	3b*)
-		basic_machine=we32k-att
-		;;
-	a29khif)
-		basic_machine=a29k-amd
-		os=-udi
-		;;
-	abacus)
-		basic_machine=abacus-unknown
-		;;
-	adobe68k)
-		basic_machine=m68010-adobe
-		os=-scout
-		;;
-	alliant | fx80)
-		basic_machine=fx80-alliant
-		;;
-	altos | altos3068)
-		basic_machine=m68k-altos
-		;;
-	am29k)
-		basic_machine=a29k-none
-		os=-bsd
-		;;
-	amd64)
-		basic_machine=x86_64-pc
-		;;
-	amd64-*)
-		basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	amdahl)
-		basic_machine=580-amdahl
-		os=-sysv
-		;;
-	amiga | amiga-*)
-		basic_machine=m68k-unknown
-		;;
-	amigaos | amigados)
-		basic_machine=m68k-unknown
-		os=-amigaos
-		;;
-	amigaunix | amix)
-		basic_machine=m68k-unknown
-		os=-sysv4
-		;;
-	apollo68)
-		basic_machine=m68k-apollo
-		os=-sysv
-		;;
-	apollo68bsd)
-		basic_machine=m68k-apollo
-		os=-bsd
-		;;
-	aros)
-		basic_machine=i386-pc
-		os=-aros
-		;;
-	asmjs)
-		basic_machine=asmjs-unknown
-		;;
-	aux)
-		basic_machine=m68k-apple
-		os=-aux
-		;;
-	balance)
-		basic_machine=ns32k-sequent
-		os=-dynix
-		;;
-	blackfin)
-		basic_machine=bfin-unknown
-		os=-linux
-		;;
-	blackfin-*)
-		basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
-		os=-linux
+		cpu=we32k
+		vendor=att
 		;;
 	bluegene*)
-		basic_machine=powerpc-ibm
-		os=-cnk
-		;;
-	c54x-*)
-		basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	c55x-*)
-		basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	c6x-*)
-		basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	c90)
-		basic_machine=c90-cray
-		os=-unicos
-		;;
-	cegcc)
-		basic_machine=arm-unknown
-		os=-cegcc
-		;;
-	convex-c1)
-		basic_machine=c1-convex
-		os=-bsd
-		;;
-	convex-c2)
-		basic_machine=c2-convex
-		os=-bsd
-		;;
-	convex-c32)
-		basic_machine=c32-convex
-		os=-bsd
-		;;
-	convex-c34)
-		basic_machine=c34-convex
-		os=-bsd
-		;;
-	convex-c38)
-		basic_machine=c38-convex
-		os=-bsd
-		;;
-	cray | j90)
-		basic_machine=j90-cray
-		os=-unicos
-		;;
-	craynv)
-		basic_machine=craynv-cray
-		os=-unicosmp
-		;;
-	cr16 | cr16-*)
-		basic_machine=cr16-unknown
-		os=-elf
-		;;
-	crds | unos)
-		basic_machine=m68k-crds
-		;;
-	crisv32 | crisv32-* | etraxfs*)
-		basic_machine=crisv32-axis
-		;;
-	cris | cris-* | etrax*)
-		basic_machine=cris-axis
-		;;
-	crx)
-		basic_machine=crx-unknown
-		os=-elf
-		;;
-	da30 | da30-*)
-		basic_machine=m68k-da30
-		;;
-	decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
-		basic_machine=mips-dec
+		cpu=powerpc
+		vendor=ibm
+		basic_os=cnk
 		;;
 	decsystem10* | dec10*)
-		basic_machine=pdp10-dec
-		os=-tops10
+		cpu=pdp10
+		vendor=dec
+		basic_os=tops10
 		;;
 	decsystem20* | dec20*)
-		basic_machine=pdp10-dec
-		os=-tops20
+		cpu=pdp10
+		vendor=dec
+		basic_os=tops20
 		;;
 	delta | 3300 | motorola-3300 | motorola-delta \
 	      | 3300-motorola | delta-motorola)
-		basic_machine=m68k-motorola
-		;;
-	delta88)
-		basic_machine=m88k-motorola
-		os=-sysv3
-		;;
-	dicos)
-		basic_machine=i686-pc
-		os=-dicos
-		;;
-	djgpp)
-		basic_machine=i586-pc
-		os=-msdosdjgpp
+		cpu=m68k
+		vendor=motorola
 		;;
-	dpx20 | dpx20-*)
-		basic_machine=rs6000-bull
-		os=-bosx
-		;;
-	dpx2* | dpx2*-bull)
-		basic_machine=m68k-bull
-		os=-sysv3
-		;;
-	e500v[12])
-		basic_machine=powerpc-unknown
-		os=$os"spe"
-		;;
-	e500v[12]-*)
-		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
-		os=$os"spe"
-		;;
-	ebmon29k)
-		basic_machine=a29k-amd
-		os=-ebmon
-		;;
-	elxsi)
-		basic_machine=elxsi-elxsi
-		os=-bsd
+	dpx2*)
+		cpu=m68k
+		vendor=bull
+		basic_os=sysv3
 		;;
 	encore | umax | mmax)
-		basic_machine=ns32k-encore
+		cpu=ns32k
+		vendor=encore
 		;;
-	es1800 | OSE68k | ose68k | ose | OSE)
-		basic_machine=m68k-ericsson
-		os=-ose
+	elxsi)
+		cpu=elxsi
+		vendor=elxsi
+		basic_os=${basic_os:-bsd}
 		;;
 	fx2800)
-		basic_machine=i860-alliant
+		cpu=i860
+		vendor=alliant
 		;;
 	genix)
-		basic_machine=ns32k-ns
-		;;
-	gmicro)
-		basic_machine=tron-gmicro
-		os=-sysv
-		;;
-	go32)
-		basic_machine=i386-pc
-		os=-go32
+		cpu=ns32k
+		vendor=ns
 		;;
 	h3050r* | hiux*)
-		basic_machine=hppa1.1-hitachi
-		os=-hiuxwe2
-		;;
-	h8300hms)
-		basic_machine=h8300-hitachi
-		os=-hms
-		;;
-	h8300xray)
-		basic_machine=h8300-hitachi
-		os=-xray
-		;;
-	h8500hms)
-		basic_machine=h8500-hitachi
-		os=-hms
-		;;
-	harris)
-		basic_machine=m88k-harris
-		os=-sysv3
-		;;
-	hp300-*)
-		basic_machine=m68k-hp
-		;;
-	hp300bsd)
-		basic_machine=m68k-hp
-		os=-bsd
-		;;
-	hp300hpux)
-		basic_machine=m68k-hp
-		os=-hpux
+		cpu=hppa1.1
+		vendor=hitachi
+		basic_os=hiuxwe2
 		;;
 	hp3k9[0-9][0-9] | hp9[0-9][0-9])
-		basic_machine=hppa1.0-hp
+		cpu=hppa1.0
+		vendor=hp
 		;;
 	hp9k2[0-9][0-9] | hp9k31[0-9])
-		basic_machine=m68000-hp
+		cpu=m68000
+		vendor=hp
 		;;
 	hp9k3[2-9][0-9])
-		basic_machine=m68k-hp
+		cpu=m68k
+		vendor=hp
 		;;
 	hp9k6[0-9][0-9] | hp6[0-9][0-9])
-		basic_machine=hppa1.0-hp
+		cpu=hppa1.0
+		vendor=hp
 		;;
 	hp9k7[0-79][0-9] | hp7[0-79][0-9])
-		basic_machine=hppa1.1-hp
+		cpu=hppa1.1
+		vendor=hp
 		;;
 	hp9k78[0-9] | hp78[0-9])
 		# FIXME: really hppa2.0-hp
-		basic_machine=hppa1.1-hp
+		cpu=hppa1.1
+		vendor=hp
 		;;
 	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
 		# FIXME: really hppa2.0-hp
-		basic_machine=hppa1.1-hp
+		cpu=hppa1.1
+		vendor=hp
 		;;
 	hp9k8[0-9][13679] | hp8[0-9][13679])
-		basic_machine=hppa1.1-hp
+		cpu=hppa1.1
+		vendor=hp
 		;;
 	hp9k8[0-9][0-9] | hp8[0-9][0-9])
-		basic_machine=hppa1.0-hp
-		;;
-	hppa-next)
-		os=-nextstep3
-		;;
-	hppaosf)
-		basic_machine=hppa1.1-hp
-		os=-osf
-		;;
-	hppro)
-		basic_machine=hppa1.1-hp
-		os=-proelf
-		;;
-	i370-ibm* | ibm*)
-		basic_machine=i370-ibm
+		cpu=hppa1.0
+		vendor=hp
 		;;
 	i*86v32)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-sysv32
+		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		vendor=pc
+		basic_os=sysv32
 		;;
 	i*86v4*)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-sysv4
+		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		vendor=pc
+		basic_os=sysv4
 		;;
 	i*86v)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-sysv
+		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		vendor=pc
+		basic_os=sysv
 		;;
 	i*86sol2)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-solaris2
-		;;
-	i386mach)
-		basic_machine=i386-mach
-		os=-mach
+		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		vendor=pc
+		basic_os=solaris2
 		;;
-	i386-vsta | vsta)
-		basic_machine=i386-unknown
-		os=-vsta
+	j90 | j90-cray)
+		cpu=j90
+		vendor=cray
+		basic_os=${basic_os:-unicos}
 		;;
 	iris | iris4d)
-		basic_machine=mips-sgi
-		case $os in
-		    -irix*)
+		cpu=mips
+		vendor=sgi
+		case $basic_os in
+		    irix*)
 			;;
 		    *)
-			os=-irix4
+			basic_os=irix4
 			;;
 		esac
 		;;
-	isi68 | isi)
-		basic_machine=m68k-isi
-		os=-sysv
-		;;
-	leon-*|leon[3-9]-*)
-		basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'`
-		;;
-	m68knommu)
-		basic_machine=m68k-unknown
-		os=-linux
-		;;
-	m68knommu-*)
-		basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
-		os=-linux
-		;;
-	m88k-omron*)
-		basic_machine=m88k-omron
-		;;
-	magnum | m3230)
-		basic_machine=mips-mips
-		os=-sysv
-		;;
-	merlin)
-		basic_machine=ns32k-utek
-		os=-sysv
-		;;
-	microblaze*)
-		basic_machine=microblaze-xilinx
-		;;
-	mingw64)
-		basic_machine=x86_64-pc
-		os=-mingw64
-		;;
-	mingw32)
-		basic_machine=i686-pc
-		os=-mingw32
-		;;
-	mingw32ce)
-		basic_machine=arm-unknown
-		os=-mingw32ce
-		;;
 	miniframe)
-		basic_machine=m68000-convergent
+		cpu=m68000
+		vendor=convergent
 		;;
-	*mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
-		basic_machine=m68k-atari
-		os=-mint
-		;;
-	mips3*-*)
-		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
-		;;
-	mips3*)
-		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
-		;;
-	monitor)
-		basic_machine=m68k-rom68k
-		os=-coff
-		;;
-	morphos)
-		basic_machine=powerpc-unknown
-		os=-morphos
-		;;
-	moxiebox)
-		basic_machine=moxie-unknown
-		os=-moxiebox
-		;;
-	msdos)
-		basic_machine=i386-pc
-		os=-msdos
-		;;
-	ms1-*)
-		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
-		;;
-	msys)
-		basic_machine=i686-pc
-		os=-msys
-		;;
-	mvs)
-		basic_machine=i370-ibm
-		os=-mvs
-		;;
-	nacl)
-		basic_machine=le32-unknown
-		os=-nacl
-		;;
-	ncr3000)
-		basic_machine=i486-ncr
-		os=-sysv4
-		;;
-	netbsd386)
-		basic_machine=i386-unknown
-		os=-netbsd
-		;;
-	netwinder)
-		basic_machine=armv4l-rebel
-		os=-linux
-		;;
-	news | news700 | news800 | news900)
-		basic_machine=m68k-sony
-		os=-newsos
-		;;
-	news1000)
-		basic_machine=m68030-sony
-		os=-newsos
+	*mint | mint[0-9]* | *MiNT | *MiNT[0-9]*)
+		cpu=m68k
+		vendor=atari
+		basic_os=mint
 		;;
 	news-3600 | risc-news)
-		basic_machine=mips-sony
-		os=-newsos
-		;;
-	necv70)
-		basic_machine=v70-nec
-		os=-sysv
-		;;
-	next | m*-next )
-		basic_machine=m68k-next
-		case $os in
-		    -nextstep* )
+		cpu=mips
+		vendor=sony
+		basic_os=newsos
+		;;
+	next | m*-next)
+		cpu=m68k
+		vendor=next
+		case $basic_os in
+		    openstep*)
+		        ;;
+		    nextstep*)
 			;;
-		    -ns2*)
-		      os=-nextstep2
+		    ns2*)
+		      basic_os=nextstep2
 			;;
 		    *)
-		      os=-nextstep3
+		      basic_os=nextstep3
 			;;
 		esac
 		;;
-	nh3000)
-		basic_machine=m68k-harris
-		os=-cxux
-		;;
-	nh[45]000)
-		basic_machine=m88k-harris
-		os=-cxux
-		;;
-	nindy960)
-		basic_machine=i960-intel
-		os=-nindy
-		;;
-	mon960)
-		basic_machine=i960-intel
-		os=-mon960
-		;;
-	nonstopux)
-		basic_machine=mips-compaq
-		os=-nonstopux
-		;;
 	np1)
-		basic_machine=np1-gould
-		;;
-	neo-tandem)
-		basic_machine=neo-tandem
-		;;
-	nse-tandem)
-		basic_machine=nse-tandem
-		;;
-	nsr-tandem)
-		basic_machine=nsr-tandem
+		cpu=np1
+		vendor=gould
 		;;
 	op50n-* | op60c-*)
-		basic_machine=hppa1.1-oki
-		os=-proelf
-		;;
-	openrisc | openrisc-*)
-		basic_machine=or32-unknown
-		;;
-	os400)
-		basic_machine=powerpc-ibm
-		os=-os400
-		;;
-	OSE68000 | ose68000)
-		basic_machine=m68000-ericsson
-		os=-ose
-		;;
-	os68k)
-		basic_machine=m68k-none
-		os=-os68k
+		cpu=hppa1.1
+		vendor=oki
+		basic_os=proelf
 		;;
 	pa-hitachi)
-		basic_machine=hppa1.1-hitachi
-		os=-hiuxwe2
-		;;
-	paragon)
-		basic_machine=i860-intel
-		os=-osf
-		;;
-	parisc)
-		basic_machine=hppa-unknown
-		os=-linux
-		;;
-	parisc-*)
-		basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
-		os=-linux
+		cpu=hppa1.1
+		vendor=hitachi
+		basic_os=hiuxwe2
 		;;
 	pbd)
-		basic_machine=sparc-tti
+		cpu=sparc
+		vendor=tti
 		;;
 	pbb)
-		basic_machine=m68k-tti
-		;;
-	pc532 | pc532-*)
-		basic_machine=ns32k-pc532
-		;;
-	pc98)
-		basic_machine=i386-pc
-		;;
-	pc98-*)
-		basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	pentium | p5 | k5 | k6 | nexgen | viac3)
-		basic_machine=i586-pc
-		;;
-	pentiumpro | p6 | 6x86 | athlon | athlon_*)
-		basic_machine=i686-pc
-		;;
-	pentiumii | pentium2 | pentiumiii | pentium3)
-		basic_machine=i686-pc
-		;;
-	pentium4)
-		basic_machine=i786-pc
-		;;
-	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
-		basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
+		cpu=m68k
+		vendor=tti
 		;;
-	pentiumpro-* | p6-* | 6x86-* | athlon-*)
-		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
-		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	pentium4-*)
-		basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
+	pc532)
+		cpu=ns32k
+		vendor=pc532
 		;;
 	pn)
-		basic_machine=pn-gould
-		;;
-	power)	basic_machine=power-ibm
-		;;
-	ppc | ppcbe)	basic_machine=powerpc-unknown
-		;;
-	ppc-* | ppcbe-*)
-		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	ppcle | powerpclittle)
-		basic_machine=powerpcle-unknown
-		;;
-	ppcle-* | powerpclittle-*)
-		basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	ppc64)	basic_machine=powerpc64-unknown
-		;;
-	ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	ppc64le | powerpc64little)
-		basic_machine=powerpc64le-unknown
+		cpu=pn
+		vendor=gould
 		;;
-	ppc64le-* | powerpc64little-*)
-		basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
+	power)
+		cpu=power
+		vendor=ibm
 		;;
 	ps2)
-		basic_machine=i386-ibm
-		;;
-	pw32)
-		basic_machine=i586-unknown
-		os=-pw32
-		;;
-	rdos | rdos64)
-		basic_machine=x86_64-pc
-		os=-rdos
-		;;
-	rdos32)
-		basic_machine=i386-pc
-		os=-rdos
-		;;
-	rom68k)
-		basic_machine=m68k-rom68k
-		os=-coff
+		cpu=i386
+		vendor=ibm
 		;;
 	rm[46]00)
-		basic_machine=mips-siemens
+		cpu=mips
+		vendor=siemens
 		;;
 	rtpc | rtpc-*)
-		basic_machine=romp-ibm
-		;;
-	s390 | s390-*)
-		basic_machine=s390-ibm
-		;;
-	s390x | s390x-*)
-		basic_machine=s390x-ibm
+		cpu=romp
+		vendor=ibm
 		;;
-	sa29200)
-		basic_machine=a29k-amd
-		os=-udi
-		;;
-	sb1)
-		basic_machine=mipsisa64sb1-unknown
+	sde)
+		cpu=mipsisa32
+		vendor=sde
+		basic_os=${basic_os:-elf}
 		;;
-	sb1el)
-		basic_machine=mipsisa64sb1el-unknown
+	simso-wrs)
+		cpu=sparclite
+		vendor=wrs
+		basic_os=vxworks
 		;;
-	sde)
-		basic_machine=mipsisa32-sde
-		os=-elf
+	tower | tower-32)
+		cpu=m68k
+		vendor=ncr
 		;;
-	sei)
-		basic_machine=mips-sei
-		os=-seiux
+	vpp*|vx|vx-*)
+		cpu=f301
+		vendor=fujitsu
 		;;
-	sequent)
-		basic_machine=i386-sequent
+	w65)
+		cpu=w65
+		vendor=wdc
 		;;
-	sh)
-		basic_machine=sh-hitachi
-		os=-hms
+	w89k-*)
+		cpu=hppa1.1
+		vendor=winbond
+		basic_os=proelf
 		;;
-	sh5el)
-		basic_machine=sh5le-unknown
+	none)
+		cpu=none
+		vendor=none
 		;;
-	sh64)
-		basic_machine=sh64-unknown
+	leon|leon[3-9])
+		cpu=sparc
+		vendor=$basic_machine
 		;;
-	sparclite-wrs | simso-wrs)
-		basic_machine=sparclite-wrs
-		os=-vxworks
+	leon-*|leon[3-9]-*)
+		cpu=sparc
+		vendor=$(echo "$basic_machine" | sed 's/-.*//')
 		;;
-	sps7)
-		basic_machine=m68k-bull
-		os=-sysv2
+
+	*-*)
+		# shellcheck disable=SC2162
+		IFS="-" read cpu vendor <<EOF
+$basic_machine
+EOF
 		;;
-	spur)
-		basic_machine=spur-unknown
+	# We use `pc' rather than `unknown'
+	# because (1) that's what they normally are, and
+	# (2) the word "unknown" tends to confuse beginning users.
+	i*86 | x86_64)
+		cpu=$basic_machine
+		vendor=pc
 		;;
-	st2000)
-		basic_machine=m68k-tandem
+	# These rules are duplicated from below for sake of the special case above;
+	# i.e. things that normalized to x86 arches should also default to "pc"
+	pc98)
+		cpu=i386
+		vendor=pc
 		;;
-	stratus)
-		basic_machine=i860-stratus
-		os=-sysv4
+	x64 | amd64)
+		cpu=x86_64
+		vendor=pc
 		;;
-	strongarm-* | thumb-*)
-		basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
+	# Recognize the basic CPU types without company name.
+	*)
+		cpu=$basic_machine
+		vendor=unknown
 		;;
-	sun2)
-		basic_machine=m68000-sun
+esac
+
+unset -v basic_machine
+
+# Decode basic machines in the full and proper CPU-Company form.
+case $cpu-$vendor in
+	# Here we handle the default manufacturer of certain CPU types in canonical form. It is in
+	# some cases the only manufacturer, in others, it is the most popular.
+	craynv-unknown)
+		vendor=cray
+		basic_os=${basic_os:-unicosmp}
 		;;
-	sun2os3)
-		basic_machine=m68000-sun
-		os=-sunos3
+	c90-unknown | c90-cray)
+		vendor=cray
+		basic_os=${Basic_os:-unicos}
 		;;
-	sun2os4)
-		basic_machine=m68000-sun
-		os=-sunos4
+	fx80-unknown)
+		vendor=alliant
 		;;
-	sun3os3)
-		basic_machine=m68k-sun
-		os=-sunos3
+	romp-unknown)
+		vendor=ibm
 		;;
-	sun3os4)
-		basic_machine=m68k-sun
-		os=-sunos4
+	mmix-unknown)
+		vendor=knuth
 		;;
-	sun4os3)
-		basic_machine=sparc-sun
-		os=-sunos3
+	microblaze-unknown | microblazeel-unknown)
+		vendor=xilinx
 		;;
-	sun4os4)
-		basic_machine=sparc-sun
-		os=-sunos4
+	rs6000-unknown)
+		vendor=ibm
 		;;
-	sun4sol2)
-		basic_machine=sparc-sun
-		os=-solaris2
+	vax-unknown)
+		vendor=dec
 		;;
-	sun3 | sun3-*)
-		basic_machine=m68k-sun
+	pdp11-unknown)
+		vendor=dec
 		;;
-	sun4)
-		basic_machine=sparc-sun
+	we32k-unknown)
+		vendor=att
 		;;
-	sun386 | sun386i | roadrunner)
-		basic_machine=i386-sun
+	cydra-unknown)
+		vendor=cydrome
 		;;
-	sv1)
-		basic_machine=sv1-cray
-		os=-unicos
+	i370-ibm*)
+		vendor=ibm
 		;;
-	symmetry)
-		basic_machine=i386-sequent
-		os=-dynix
+	orion-unknown)
+		vendor=highlevel
 		;;
-	t3e)
-		basic_machine=alphaev5-cray
-		os=-unicos
+	xps-unknown | xps100-unknown)
+		cpu=xps100
+		vendor=honeywell
 		;;
-	t90)
-		basic_machine=t90-cray
-		os=-unicos
+
+	# Here we normalize CPU types with a missing or matching vendor
+	dpx20-unknown | dpx20-bull)
+		cpu=rs6000
+		vendor=bull
+		basic_os=${basic_os:-bosx}
 		;;
-	tile*)
-		basic_machine=$basic_machine-unknown
-		os=-linux-gnu
+
+	# Here we normalize CPU types irrespective of the vendor
+	amd64-*)
+		cpu=x86_64
 		;;
-	tx39)
-		basic_machine=mipstx39-unknown
+	blackfin-*)
+		cpu=bfin
+		basic_os=linux
 		;;
-	tx39el)
-		basic_machine=mipstx39el-unknown
+	c54x-*)
+		cpu=tic54x
 		;;
-	toad1)
-		basic_machine=pdp10-xkl
-		os=-tops20
+	c55x-*)
+		cpu=tic55x
 		;;
-	tower | tower-32)
-		basic_machine=m68k-ncr
+	c6x-*)
+		cpu=tic6x
 		;;
-	tpf)
-		basic_machine=s390x-ibm
-		os=-tpf
+	e500v[12]-*)
+		cpu=powerpc
+		basic_os=${basic_os}"spe"
 		;;
-	udi29k)
-		basic_machine=a29k-amd
-		os=-udi
+	mips3*-*)
+		cpu=mips64
 		;;
-	ultra3)
-		basic_machine=a29k-nyu
-		os=-sym1
+	ms1-*)
+		cpu=mt
 		;;
-	v810 | necv810)
-		basic_machine=v810-nec
-		os=-none
+	m68knommu-*)
+		cpu=m68k
+		basic_os=linux
 		;;
-	vaxv)
-		basic_machine=vax-dec
-		os=-sysv
+	m9s12z-* | m68hcs12z-* | hcs12z-* | s12z-*)
+		cpu=s12z
 		;;
-	vms)
-		basic_machine=vax-dec
-		os=-vms
+	openrisc-*)
+		cpu=or32
 		;;
-	vpp*|vx|vx-*)
-		basic_machine=f301-fujitsu
+	parisc-*)
+		cpu=hppa
+		basic_os=linux
 		;;
-	vxworks960)
-		basic_machine=i960-wrs
-		os=-vxworks
+	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+		cpu=i586
 		;;
-	vxworks68)
-		basic_machine=m68k-wrs
-		os=-vxworks
+	pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*)
+		cpu=i686
 		;;
-	vxworks29k)
-		basic_machine=a29k-wrs
-		os=-vxworks
+	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+		cpu=i686
 		;;
-	w65*)
-		basic_machine=w65-wdc
-		os=-none
+	pentium4-*)
+		cpu=i786
 		;;
-	w89k-*)
-		basic_machine=hppa1.1-winbond
-		os=-proelf
+	pc98-*)
+		cpu=i386
 		;;
-	xbox)
-		basic_machine=i686-pc
-		os=-mingw32
+	ppc-* | ppcbe-*)
+		cpu=powerpc
 		;;
-	xps | xps100)
-		basic_machine=xps100-honeywell
+	ppcle-* | powerpclittle-*)
+		cpu=powerpcle
 		;;
-	xscale-* | xscalee[bl]-*)
-		basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
+	ppc64-*)
+		cpu=powerpc64
 		;;
-	ymp)
-		basic_machine=ymp-cray
-		os=-unicos
+	ppc64le-* | powerpc64little-*)
+		cpu=powerpc64le
 		;;
-	z8k-*-coff)
-		basic_machine=z8k-unknown
-		os=-sim
+	sb1-*)
+		cpu=mipsisa64sb1
 		;;
-	z80-*-coff)
-		basic_machine=z80-unknown
-		os=-sim
+	sb1el-*)
+		cpu=mipsisa64sb1el
 		;;
-	none)
-		basic_machine=none-none
-		os=-none
+	sh5e[lb]-*)
+		cpu=$(echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/')
 		;;
-
-# Here we handle the default manufacturer of certain CPU types.  It is in
-# some cases the only manufacturer, in others, it is the most popular.
-	w89k)
-		basic_machine=hppa1.1-winbond
+	spur-*)
+		cpu=spur
 		;;
-	op50n)
-		basic_machine=hppa1.1-oki
+	strongarm-* | thumb-*)
+		cpu=arm
 		;;
-	op60c)
-		basic_machine=hppa1.1-oki
+	tx39-*)
+		cpu=mipstx39
 		;;
-	romp)
-		basic_machine=romp-ibm
+	tx39el-*)
+		cpu=mipstx39el
 		;;
-	mmix)
-		basic_machine=mmix-knuth
+	x64-*)
+		cpu=x86_64
 		;;
-	rs6000)
-		basic_machine=rs6000-ibm
+	xscale-* | xscalee[bl]-*)
+		cpu=$(echo "$cpu" | sed 's/^xscale/arm/')
 		;;
-	vax)
-		basic_machine=vax-dec
+	arm64-*)
+		cpu=aarch64
 		;;
-	pdp10)
-		# there are many clones, so DEC is not a safe bet
-		basic_machine=pdp10-unknown
+
+	# Recognize the canonical CPU Types that limit and/or modify the
+	# company names they are paired with.
+	cr16-*)
+		basic_os=${basic_os:-elf}
 		;;
-	pdp11)
-		basic_machine=pdp11-dec
+	crisv32-* | etraxfs*-*)
+		cpu=crisv32
+		vendor=axis
 		;;
-	we32k)
-		basic_machine=we32k-att
+	cris-* | etrax*-*)
+		cpu=cris
+		vendor=axis
 		;;
-	sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
-		basic_machine=sh-unknown
+	crx-*)
+		basic_os=${basic_os:-elf}
 		;;
-	sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
-		basic_machine=sparc-sun
+	neo-tandem)
+		cpu=neo
+		vendor=tandem
 		;;
-	cydra)
-		basic_machine=cydra-cydrome
+	nse-tandem)
+		cpu=nse
+		vendor=tandem
 		;;
-	orion)
-		basic_machine=orion-highlevel
+	nsr-tandem)
+		cpu=nsr
+		vendor=tandem
 		;;
-	orion105)
-		basic_machine=clipper-highlevel
+	nsv-tandem)
+		cpu=nsv
+		vendor=tandem
 		;;
-	mac | mpw | mac-mpw)
-		basic_machine=m68k-apple
+	nsx-tandem)
+		cpu=nsx
+		vendor=tandem
 		;;
-	pmac | pmac-mpw)
-		basic_machine=powerpc-apple
+	mipsallegrexel-sony)
+		cpu=mipsallegrexel
+		vendor=sony
 		;;
-	*-unknown)
-		# Make sure to match an already-canonicalized machine name.
+	tile*-*)
+		basic_os=${basic_os:-linux-gnu}
 		;;
+
 	*)
-		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
-		exit 1
+		# Recognize the canonical CPU types that are allowed with any
+		# company name.
+		case $cpu in
+			1750a | 580 \
+			| a29k \
+			| aarch64 | aarch64_be \
+			| abacus \
+			| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
+			| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
+			| alphapca5[67] | alpha64pca5[67] \
+			| am33_2.0 \
+			| amdgcn \
+			| arc | arceb \
+			| arm | arm[lb]e | arme[lb] | armv* \
+			| avr | avr32 \
+			| asmjs \
+			| ba \
+			| be32 | be64 \
+			| bfin | bpf | bs2000 \
+			| c[123]* | c30 | [cjt]90 | c4x \
+			| c8051 | clipper | craynv | csky | cydra \
+			| d10v | d30v | dlx | dsp16xx \
+			| e2k | elxsi | epiphany \
+			| f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \
+			| h8300 | h8500 \
+			| hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+			| hexagon \
+			| i370 | i*86 | i860 | i960 | ia16 | ia64 \
+			| ip2k | iq2000 \
+			| k1om \
+			| le32 | le64 \
+			| lm32 \
+			| m32c | m32r | m32rle \
+			| m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
+			| m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
+			| m88110 | m88k | maxq | mb | mcore | mep | metag \
+			| microblaze | microblazeel \
+			| mips | mipsbe | mipseb | mipsel | mipsle \
+			| mips16 \
+			| mips64 | mips64eb | mips64el \
+			| mips64octeon | mips64octeonel \
+			| mips64orion | mips64orionel \
+			| mips64r5900 | mips64r5900el \
+			| mips64vr | mips64vrel \
+			| mips64vr4100 | mips64vr4100el \
+			| mips64vr4300 | mips64vr4300el \
+			| mips64vr5000 | mips64vr5000el \
+			| mips64vr5900 | mips64vr5900el \
+			| mipsisa32 | mipsisa32el \
+			| mipsisa32r2 | mipsisa32r2el \
+			| mipsisa32r6 | mipsisa32r6el \
+			| mipsisa64 | mipsisa64el \
+			| mipsisa64r2 | mipsisa64r2el \
+			| mipsisa64r6 | mipsisa64r6el \
+			| mipsisa64sb1 | mipsisa64sb1el \
+			| mipsisa64sr71k | mipsisa64sr71kel \
+			| mipsr5900 | mipsr5900el \
+			| mipstx39 | mipstx39el \
+			| mmix \
+			| mn10200 | mn10300 \
+			| moxie \
+			| mt \
+			| msp430 \
+			| nds32 | nds32le | nds32be \
+			| nfp \
+			| nios | nios2 | nios2eb | nios2el \
+			| none | np1 | ns16k | ns32k | nvptx \
+			| open8 \
+			| or1k* \
+			| or32 \
+			| orion \
+			| picochip \
+			| pdp10 | pdp11 | pj | pjl | pn | power \
+			| powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
+			| pru \
+			| pyramid \
+			| riscv | riscv32 | riscv64 \
+			| rl78 | romp | rs6000 | rx \
+			| s390 | s390x \
+			| score \
+			| sh | shl \
+			| sh[1234] | sh[24]a | sh[24]ae[lb] | sh[23]e | she[lb] | sh[lb]e \
+			| sh[1234]e[lb] |  sh[12345][lb]e | sh[23]ele | sh64 | sh64le \
+			| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet \
+			| sparclite \
+			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
+			| spu \
+			| tahoe \
+			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
+			| tron \
+			| ubicom32 \
+			| v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \
+			| vax \
+			| visium \
+			| w65 \
+			| wasm32 | wasm64 \
+			| we32k \
+			| x86 | x86_64 | xc16x | xgate | xps100 \
+			| xstormy16 | xtensa* \
+			| ymp \
+			| z8k | z80)
+				;;
+
+			*)
+				echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2
+				exit 1
+				;;
+		esac
 		;;
 esac
 
 # Here we canonicalize certain aliases for manufacturers.
-case $basic_machine in
-	*-digital*)
-		basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+case $vendor in
+	digital*)
+		vendor=dec
 		;;
-	*-commodore*)
-		basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+	commodore*)
+		vendor=cbm
 		;;
 	*)
 		;;
@@ -1355,203 +1278,213 @@ esac
 
 # Decode manufacturer-specific aliases for certain operating systems.
 
-if [ x"$os" != x"" ]
+if test x$basic_os != x
 then
-case $os in
-	# First match some system type aliases
-	# that might get confused with valid system types.
-	# -solaris* is a basic system type, with this one exception.
-	-auroraux)
-		os=-auroraux
+
+# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just
+# set os.
+case $basic_os in
+	gnu/linux*)
+		kernel=linux
+		os=$(echo $basic_os | sed -e 's|gnu/linux|gnu|')
+		;;
+	os2-emx)
+		kernel=os2
+		os=$(echo $basic_os | sed -e 's|os2-emx|emx|')
+		;;
+	nto-qnx*)
+		kernel=nto
+		os=$(echo $basic_os | sed -e 's|nto-qnx|qnx|')
+		;;
+	*-*)
+		# shellcheck disable=SC2162
+		IFS="-" read kernel os <<EOF
+$basic_os
+EOF
+		;;
+	# Default OS when just kernel was specified
+	nto*)
+		kernel=nto
+		os=$(echo $basic_os | sed -e 's|nto|qnx|')
+		;;
+	linux*)
+		kernel=linux
+		os=$(echo $basic_os | sed -e 's|linux|gnu|')
 		;;
-	-solaris1 | -solaris1.*)
-		os=`echo $os | sed -e 's|solaris1|sunos4|'`
+	*)
+		kernel=
+		os=$basic_os
+		;;
+esac
+
+# Now, normalize the OS (knowing we just have one component, it's not a kernel,
+# etc.)
+case $os in
+	# First match some system type aliases that might get confused
+	# with valid system types.
+	# solaris* is a basic system type, with this one exception.
+	auroraux)
+		os=auroraux
 		;;
-	-solaris)
-		os=-solaris2
+	bluegene*)
+		os=cnk
 		;;
-	-svr4*)
-		os=-sysv4
+	solaris1 | solaris1.*)
+		os=$(echo $os | sed -e 's|solaris1|sunos4|')
 		;;
-	-unixware*)
-		os=-sysv4.2uw
+	solaris)
+		os=solaris2
 		;;
-	-gnu/linux*)
-		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+	unixware*)
+		os=sysv4.2uw
 		;;
-	# First accept the basic system types.
-	# The portable systems comes first.
-	# Each alternative MUST END IN A *, to match a version number.
-	# -sysv* is not here because it comes later, after sysvr4.
-	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
-	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
-	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
-	      | -sym* | -kopensolaris* | -plan9* \
-	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
-	      | -aos* | -aros* | -cloudabi* | -sortix* \
-	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
-	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
-	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
-	      | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \
-	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
-	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
-	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
-	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
-	      | -chorusos* | -chorusrdb* | -cegcc* \
-	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
-	      | -linux-newlib* | -linux-musl* | -linux-uclibc* \
-	      | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
-	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
-	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
-	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
-	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
-	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
-	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
-	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
-	      | -onefs* | -tirtos* | -phoenix* | -fuchsia*)
-	# Remember, each alternative MUST END IN *, to match a version number.
-		;;
-	-qnx*)
-		case $basic_machine in
-		    x86-* | i*86-*)
-			;;
-		    *)
-			os=-nto$os
-			;;
-		esac
+	# es1800 is here to avoid being matched by es* (a different OS)
+	es1800*)
+		os=ose
 		;;
-	-nto-qnx*)
+	# Some version numbers need modification
+	chorusos*)
+		os=chorusos
 		;;
-	-nto*)
-		os=`echo $os | sed -e 's|nto|nto-qnx|'`
+	isc)
+		os=isc2.2
 		;;
-	-sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
-	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
-	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+	sco6)
+		os=sco5v6
 		;;
-	-mac*)
-		os=`echo $os | sed -e 's|mac|macos|'`
+	sco5)
+		os=sco3.2v5
 		;;
-	-linux-dietlibc)
-		os=-linux-dietlibc
+	sco4)
+		os=sco3.2v4
 		;;
-	-linux*)
-		os=`echo $os | sed -e 's|linux|linux-gnu|'`
+	sco3.2.[4-9]*)
+		os=$(echo $os | sed -e 's/sco3.2./sco3.2v/')
 		;;
-	-sunos5*)
-		os=`echo $os | sed -e 's|sunos5|solaris2|'`
+	sco*v* | scout)
+		# Don't match below
 		;;
-	-sunos6*)
-		os=`echo $os | sed -e 's|sunos6|solaris3|'`
+	sco*)
+		os=sco3.2v2
 		;;
-	-opened*)
-		os=-openedition
+	psos*)
+		os=psos
 		;;
-	-os400*)
-		os=-os400
+	qnx*)
+		os=qnx
 		;;
-	-wince*)
-		os=-wince
+	hiux*)
+		os=hiuxwe2
 		;;
-	-osfrose*)
-		os=-osfrose
+	lynx*178)
+		os=lynxos178
 		;;
-	-osf*)
-		os=-osf
+	lynx*5)
+		os=lynxos5
 		;;
-	-utek*)
-		os=-bsd
+	lynxos*)
+		# don't get caught up in next wildcard
 		;;
-	-dynix*)
-		os=-bsd
+	lynx*)
+		os=lynxos
 		;;
-	-acis*)
-		os=-aos
+	mac[0-9]*)
+		os=$(echo "$os" | sed -e 's|mac|macos|')
 		;;
-	-atheos*)
-		os=-atheos
+	opened*)
+		os=openedition
 		;;
-	-syllable*)
-		os=-syllable
+	os400*)
+		os=os400
 		;;
-	-386bsd)
-		os=-bsd
+	sunos5*)
+		os=$(echo "$os" | sed -e 's|sunos5|solaris2|')
 		;;
-	-ctix* | -uts*)
-		os=-sysv
+	sunos6*)
+		os=$(echo "$os" | sed -e 's|sunos6|solaris3|')
 		;;
-	-nova*)
-		os=-rtmk-nova
+	wince*)
+		os=wince
 		;;
-	-ns2 )
-		os=-nextstep2
+	utek*)
+		os=bsd
 		;;
-	-nsk*)
-		os=-nsk
+	dynix*)
+		os=bsd
 		;;
-	# Preserve the version number of sinix5.
-	-sinix5.*)
-		os=`echo $os | sed -e 's|sinix|sysv|'`
+	acis*)
+		os=aos
 		;;
-	-sinix*)
-		os=-sysv4
+	atheos*)
+		os=atheos
 		;;
-	-tpf*)
-		os=-tpf
+	syllable*)
+		os=syllable
 		;;
-	-triton*)
-		os=-sysv3
+	386bsd)
+		os=bsd
 		;;
-	-oss*)
-		os=-sysv3
+	ctix* | uts*)
+		os=sysv
 		;;
-	-svr4)
-		os=-sysv4
+	nova*)
+		os=rtmk-nova
 		;;
-	-svr3)
-		os=-sysv3
+	ns2)
+		os=nextstep2
 		;;
-	-sysvr4)
-		os=-sysv4
+	# Preserve the version number of sinix5.
+	sinix5.*)
+		os=$(echo $os | sed -e 's|sinix|sysv|')
 		;;
-	# This must come after -sysvr4.
-	-sysv*)
+	sinix*)
+		os=sysv4
 		;;
-	-ose*)
-		os=-ose
+	tpf*)
+		os=tpf
 		;;
-	-es1800*)
-		os=-ose
+	triton*)
+		os=sysv3
 		;;
-	-xenix)
-		os=-xenix
+	oss*)
+		os=sysv3
 		;;
-	-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
-		os=-mint
+	svr4*)
+		os=sysv4
 		;;
-	-aros*)
-		os=-aros
+	svr3)
+		os=sysv3
 		;;
-	-zvmoe)
-		os=-zvmoe
+	sysvr4)
+		os=sysv4
 		;;
-	-dicos*)
-		os=-dicos
+	ose*)
+		os=ose
 		;;
-	-nacl*)
+	*mint | mint[0-9]* | *MiNT | MiNT[0-9]*)
+		os=mint
 		;;
-	-ios)
+	dicos*)
+		os=dicos
 		;;
-	-none)
+	pikeos*)
+		# Until real need of OS specific support for
+		# particular features comes up, bare metal
+		# configurations are quite functional.
+		case $cpu in
+		    arm*)
+			os=eabi
+			;;
+		    *)
+			os=elf
+			;;
+		esac
 		;;
 	*)
-		# Get rid of the `-' at the beginning of $os.
-		os=`echo $os | sed 's/[^-]*-//'`
-		echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
-		exit 1
+		# No normalization, but not necessarily accepted, that comes below.
 		;;
 esac
+
 else
 
 # Here we handle the default operating systems that come with various machines.
@@ -1564,261 +1497,356 @@ else
 # will signal an error saying that MANUFACTURER isn't an operating
 # system, and we'll never get to this point.
 
-case $basic_machine in
+kernel=
+case $cpu-$vendor in
 	score-*)
-		os=-elf
+		os=elf
 		;;
 	spu-*)
-		os=-elf
+		os=elf
 		;;
 	*-acorn)
-		os=-riscix1.2
+		os=riscix1.2
 		;;
 	arm*-rebel)
-		os=-linux
+		kernel=linux
+		os=gnu
 		;;
 	arm*-semi)
-		os=-aout
+		os=aout
 		;;
 	c4x-* | tic4x-*)
-		os=-coff
+		os=coff
 		;;
 	c8051-*)
-		os=-elf
+		os=elf
+		;;
+	clipper-intergraph)
+		os=clix
 		;;
 	hexagon-*)
-		os=-elf
+		os=elf
 		;;
 	tic54x-*)
-		os=-coff
+		os=coff
 		;;
 	tic55x-*)
-		os=-coff
+		os=coff
 		;;
 	tic6x-*)
-		os=-coff
+		os=coff
 		;;
 	# This must come before the *-dec entry.
 	pdp10-*)
-		os=-tops20
+		os=tops20
 		;;
 	pdp11-*)
-		os=-none
+		os=none
 		;;
 	*-dec | vax-*)
-		os=-ultrix4.2
+		os=ultrix4.2
 		;;
 	m68*-apollo)
-		os=-domain
+		os=domain
 		;;
 	i386-sun)
-		os=-sunos4.0.2
+		os=sunos4.0.2
 		;;
 	m68000-sun)
-		os=-sunos3
+		os=sunos3
 		;;
 	m68*-cisco)
-		os=-aout
+		os=aout
 		;;
 	mep-*)
-		os=-elf
+		os=elf
 		;;
 	mips*-cisco)
-		os=-elf
+		os=elf
 		;;
 	mips*-*)
-		os=-elf
+		os=elf
 		;;
 	or32-*)
-		os=-coff
+		os=coff
 		;;
 	*-tti)	# must be before sparc entry or we get the wrong os.
-		os=-sysv3
+		os=sysv3
 		;;
 	sparc-* | *-sun)
-		os=-sunos4.1.1
+		os=sunos4.1.1
 		;;
-	*-be)
-		os=-beos
+	pru-*)
+		os=elf
 		;;
-	*-haiku)
-		os=-haiku
+	*-be)
+		os=beos
 		;;
 	*-ibm)
-		os=-aix
+		os=aix
 		;;
 	*-knuth)
-		os=-mmixware
+		os=mmixware
 		;;
 	*-wec)
-		os=-proelf
+		os=proelf
 		;;
 	*-winbond)
-		os=-proelf
+		os=proelf
 		;;
 	*-oki)
-		os=-proelf
+		os=proelf
 		;;
 	*-hp)
-		os=-hpux
+		os=hpux
 		;;
 	*-hitachi)
-		os=-hiux
+		os=hiux
 		;;
 	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
-		os=-sysv
+		os=sysv
 		;;
 	*-cbm)
-		os=-amigaos
+		os=amigaos
 		;;
 	*-dg)
-		os=-dgux
+		os=dgux
 		;;
 	*-dolphin)
-		os=-sysv3
+		os=sysv3
 		;;
 	m68k-ccur)
-		os=-rtu
+		os=rtu
 		;;
 	m88k-omron*)
-		os=-luna
+		os=luna
 		;;
-	*-next )
-		os=-nextstep
+	*-next)
+		os=nextstep
 		;;
 	*-sequent)
-		os=-ptx
+		os=ptx
 		;;
 	*-crds)
-		os=-unos
+		os=unos
 		;;
 	*-ns)
-		os=-genix
+		os=genix
 		;;
 	i370-*)
-		os=-mvs
-		;;
-	*-next)
-		os=-nextstep3
+		os=mvs
 		;;
 	*-gould)
-		os=-sysv
+		os=sysv
 		;;
 	*-highlevel)
-		os=-bsd
+		os=bsd
 		;;
 	*-encore)
-		os=-bsd
+		os=bsd
 		;;
 	*-sgi)
-		os=-irix
+		os=irix
 		;;
 	*-siemens)
-		os=-sysv4
+		os=sysv4
 		;;
 	*-masscomp)
-		os=-rtu
+		os=rtu
 		;;
 	f30[01]-fujitsu | f700-fujitsu)
-		os=-uxpv
+		os=uxpv
 		;;
 	*-rom68k)
-		os=-coff
+		os=coff
 		;;
 	*-*bug)
-		os=-coff
+		os=coff
 		;;
 	*-apple)
-		os=-macos
+		os=macos
 		;;
 	*-atari*)
-		os=-mint
+		os=mint
+		;;
+	*-wrs)
+		os=vxworks
 		;;
 	*)
-		os=-none
+		os=none
 		;;
 esac
+
 fi
 
+# Now, validate our (potentially fixed-up) OS.
+case $os in
+	# Sometimes we do "kernel-abi", so those need to count as OSes.
+	musl* | newlib* | uclibc*)
+		;;
+	# Likewise for "kernel-libc"
+	eabi | eabihf | gnueabi | gnueabihf)
+		;;
+	# Now accept the basic system types.
+	# The portable systems comes first.
+	# Each alternative MUST end in a * to match a version number.
+	gnu* | android* | bsd* | mach* | minix* | genix* | ultrix* | irix* \
+	     | *vms* | esix* | aix* | cnk* | sunos | sunos[34]* \
+	     | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \
+	     | sym* |  plan9* | psp* | sim* | xray* | os68k* | v88r* \
+	     | hiux* | abug | nacl* | netware* | windows* \
+	     | os9* | macos* | osx* | ios* \
+	     | mpw* | magic* | mmixware* | mon960* | lnews* \
+	     | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \
+	     | aos* | aros* | cloudabi* | sortix* | twizzler* \
+	     | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \
+	     | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \
+	     | mirbsd* | netbsd* | dicos* | openedition* | ose* \
+	     | bitrig* | openbsd* | solidbsd* | libertybsd* | os108* \
+	     | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \
+	     | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \
+	     | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \
+	     | udi* | lites* | ieee* | go32* | aux* | hcos* \
+	     | chorusrdb* | cegcc* | glidix* \
+	     | cygwin* | msys* | pe* | moss* | proelf* | rtems* \
+	     | midipix* | mingw32* | mingw64* | mint* \
+	     | uxpv* | beos* | mpeix* | udk* | moxiebox* \
+	     | interix* | uwin* | mks* | rhapsody* | darwin* \
+	     | openstep* | oskit* | conix* | pw32* | nonstopux* \
+	     | storm-chaos* | tops10* | tenex* | tops20* | its* \
+	     | os2* | vos* | palmos* | uclinux* | nucleus* | morphos* \
+	     | scout* | superux* | sysv* | rtmk* | tpf* | windiss* \
+	     | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \
+	     | skyos* | haiku* | rdos* | toppers* | drops* | es* \
+	     | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \
+	     | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \
+	     | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx*)
+		;;
+	# This one is extra strict with allowed versions
+	sco3.2v2 | sco3.2v[4-9]* | sco5v6*)
+		# Don't forget version if it is 3.2v4 or newer.
+		;;
+	none)
+		;;
+	*)
+		echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2
+		exit 1
+		;;
+esac
+
+# As a final step for OS-related things, validate the OS-kernel combination
+# (given a valid OS), if there is a kernel.
+case $kernel-$os in
+	linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* | linux-musl* | linux-uclibc* )
+		;;
+	uclinux-uclibc* )
+		;;
+	-dietlibc* | -newlib* | -musl* | -uclibc* )
+		# These are just libc implementations, not actual OSes, and thus
+		# require a kernel.
+		echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2
+		exit 1
+		;;
+	kfreebsd*-gnu* | kopensolaris*-gnu*)
+		;;
+	nto-qnx*)
+		;;
+	os2-emx)
+		;;
+	*-eabi* | *-gnueabi*)
+		;;
+	-*)
+		# Blank kernel with real OS is always fine.
+		;;
+	*-*)
+		echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2
+		exit 1
+		;;
+esac
+
 # Here we handle the case where we know the os, and the CPU type, but not the
 # manufacturer.  We pick the logical manufacturer.
-vendor=unknown
-case $basic_machine in
-	*-unknown)
-		case $os in
-			-riscix*)
+case $vendor in
+	unknown)
+		case $cpu-$os in
+			*-riscix*)
 				vendor=acorn
 				;;
-			-sunos*)
+			*-sunos*)
 				vendor=sun
 				;;
-			-cnk*|-aix*)
+			*-cnk* | *-aix*)
 				vendor=ibm
 				;;
-			-beos*)
+			*-beos*)
 				vendor=be
 				;;
-			-hpux*)
+			*-hpux*)
 				vendor=hp
 				;;
-			-mpeix*)
+			*-mpeix*)
 				vendor=hp
 				;;
-			-hiux*)
+			*-hiux*)
 				vendor=hitachi
 				;;
-			-unos*)
+			*-unos*)
 				vendor=crds
 				;;
-			-dgux*)
+			*-dgux*)
 				vendor=dg
 				;;
-			-luna*)
+			*-luna*)
 				vendor=omron
 				;;
-			-genix*)
+			*-genix*)
 				vendor=ns
 				;;
-			-mvs* | -opened*)
+			*-clix*)
+				vendor=intergraph
+				;;
+			*-mvs* | *-opened*)
+				vendor=ibm
+				;;
+			*-os400*)
 				vendor=ibm
 				;;
-			-os400*)
+			s390-* | s390x-*)
 				vendor=ibm
 				;;
-			-ptx*)
+			*-ptx*)
 				vendor=sequent
 				;;
-			-tpf*)
+			*-tpf*)
 				vendor=ibm
 				;;
-			-vxsim* | -vxworks* | -windiss*)
+			*-vxsim* | *-vxworks* | *-windiss*)
 				vendor=wrs
 				;;
-			-aux*)
+			*-aux*)
 				vendor=apple
 				;;
-			-hms*)
+			*-hms*)
 				vendor=hitachi
 				;;
-			-mpw* | -macos*)
+			*-mpw* | *-macos*)
 				vendor=apple
 				;;
-			-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+			*-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*)
 				vendor=atari
 				;;
-			-vos*)
+			*-vos*)
 				vendor=stratus
 				;;
 		esac
-		basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
 		;;
 esac
 
-echo $basic_machine$os
+echo "$cpu-$vendor-${kernel:+$kernel-}$os"
 exit
 
 # Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
+# eval: (add-hook 'before-save-hook 'time-stamp)
 # time-stamp-start: "timestamp='"
 # time-stamp-format: "%:y-%02m-%02d"
 # time-stamp-end: "'"
-- 
cgit v0.12


From 95f0a77fdef6573dc581cc92279f6d9acefa3ebf Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Mon, 2 Nov 2020 20:29:48 +0000
Subject: Detect pthread_getname_np explicitly.

At least one libc (musl) defines pthread_setname_np without defining
pthread_getname_np. Detect the presence of each individually, rather than
inferring both must be defined if set is.
---
 configure.ac                                       | 31 ++++++++++++++++++++++
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |  6 +++++
 src/prof_sys.c                                     |  5 +++-
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 1e6de8a..eeceb12 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1745,6 +1745,37 @@ dnl Check if we have dlsym support.
   if test "x${je_cv_pthread_setname_np}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETNAME_NP], [ ])
   fi
+  dnl Check if pthread_getname_np is not necessarily present despite
+  dnl the pthread_setname_np counterpart
+  JE_COMPILABLE([pthread_getname_np(3)], [
+#include <pthread.h>
+#include <stdlib.h>
+], [
+  {
+  	char *name = malloc(16);
+  	pthread_getname_np(pthread_self(), name, 16);
+	free(name);
+  }
+], [je_cv_pthread_getname_np])
+  if test "x${je_cv_pthread_getname_np}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GETNAME_NP], [ ])
+  fi
+  dnl Check if pthread_get_name_np is not necessarily present despite
+  dnl the pthread_set_name_np counterpart
+  JE_COMPILABLE([pthread_get_name_np(3)], [
+#include <pthread.h>
+#include <pthread_np.h>
+#include <stdlib.h>
+], [
+  {
+  	char *name = malloc(16);
+  	pthread_get_name_np(pthread_self(), name, 16);
+	free(name);
+  }
+], [je_cv_pthread_get_name_np])
+  if test "x${je_cv_pthread_get_name_np}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GET_NAME_NP], [ ])
+  fi
 fi
 
 JE_APPEND_VS(CPPFLAGS, -D_REENTRANT)
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 5ea1a19..bcc3559 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -85,6 +85,12 @@
 /* Defined if pthread_setname_np(3) is available. */
 #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
 
+/* Defined if pthread_getname_np(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_GETNAME_NP
+
+/* Defined if pthread_get_name_np(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP
+
 /*
  * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
  */
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 777ef1d..87cd2b2 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -292,8 +292,11 @@ void prof_unwind_init() {
 
 static int
 prof_sys_thread_name_read_impl(char *buf, size_t limit) {
-#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+#if defined(JEMALLOC_HAVE_PTHREAD_GETNAME_NP)
 	return pthread_getname_np(pthread_self(), buf, limit);
+#elif defined(JEMALLOC_HAVE_PTHREAD_GET_NAME_NP)
+	pthread_get_name_np(pthread_self(), buf, limit);
+	return 0;
 #else
 	return ENOSYS;
 #endif
-- 
cgit v0.12


From b4c37a6e81ef2e0286b66a0bc9fc09060690c9a5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Nov 2020 16:00:52 -0800
Subject: Rename edata_tree_t -> edata_avail_t.

This isn't a tree any more, and it mildly irritates me any time I see it.
---
 include/jemalloc/internal/edata.h       | 4 ++--
 include/jemalloc/internal/edata_cache.h | 2 +-
 src/edata.c                             | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 632c6c3..5ec12be 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -69,7 +69,7 @@ struct edata_map_info_s {
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
-typedef ph(edata_t) edata_tree_t;
+typedef ph(edata_t) edata_avail_t;
 typedef ph(edata_t) edata_heap_t;
 typedef ph(edata_t) edata_age_heap_t;
 struct edata_s {
@@ -723,7 +723,7 @@ edata_age_comp(const edata_t *a, const edata_t *b) {
 	return edata_snad_comp(a, b);
 }
 
-ph_proto(, edata_avail_, edata_tree_t, edata_t)
+ph_proto(, edata_avail_, edata_avail_t, edata_t)
 ph_proto(, edata_heap_, edata_heap_t, edata_t)
 ph_proto(, edata_age_heap_, edata_age_heap_t, edata_t);
 
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index f7d0c31..9a54df0 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -21,7 +21,7 @@
 
 typedef struct edata_cache_s edata_cache_t;
 struct edata_cache_s {
-	edata_tree_t avail;
+	edata_avail_t avail;
 	atomic_zu_t count;
 	malloc_mutex_t mtx;
 	base_t *base;
diff --git a/src/edata.c b/src/edata.c
index 214e993..a659731 100644
--- a/src/edata.c
+++ b/src/edata.c
@@ -1,7 +1,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-ph_gen(, edata_avail_, edata_tree_t, edata_t, ph_link,
+ph_gen(, edata_avail_, edata_avail_t, edata_t, ph_link,
     edata_esnead_comp)
 ph_gen(, edata_heap_, edata_heap_t, edata_t, ph_link, edata_snad_comp)
 ph_gen(, edata_age_heap_, edata_age_heap_t, edata_t, ph_link, edata_age_comp)
-- 
cgit v0.12


From 4ca3d91e96c316d3baf67ce4846c164819e2697c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 6 Nov 2020 14:38:17 -0800
Subject: Rename geom_grow -> exp_grow.

This was promised in the review of the introduction of geom_grow, but would have
been painful to do there because of the series that introduced it.  Now that
those are comitted, renaming is easier.
---
 Makefile.in                                        |  2 +-
 include/jemalloc/internal/exp_grow.h               | 50 ++++++++++++++++++++++
 include/jemalloc/internal/geom_grow.h              | 50 ----------------------
 include/jemalloc/internal/hpa.h                    |  4 +-
 include/jemalloc/internal/pac.h                    |  4 +-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 src/exp_grow.c                                     |  8 ++++
 src/extent.c                                       |  8 ++--
 src/geom_grow.c                                    |  8 ----
 src/hpa.c                                          |  6 +--
 src/pac.c                                          |  6 +--
 test/unit/retained.c                               |  2 +-
 15 files changed, 82 insertions(+), 74 deletions(-)
 create mode 100644 include/jemalloc/internal/exp_grow.h
 delete mode 100644 include/jemalloc/internal/geom_grow.h
 create mode 100644 src/exp_grow.c
 delete mode 100644 src/geom_grow.c

diff --git a/Makefile.in b/Makefile.in
index 34df239..ca9b17b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -114,10 +114,10 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ehooks.c \
 	$(srcroot)src/emap.c \
 	$(srcroot)src/eset.c \
+	$(srcroot)src/exp_grow.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
-	$(srcroot)src/geom_grow.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_central.c \
diff --git a/include/jemalloc/internal/exp_grow.h b/include/jemalloc/internal/exp_grow.h
new file mode 100644
index 0000000..8566b8a
--- /dev/null
+++ b/include/jemalloc/internal/exp_grow.h
@@ -0,0 +1,50 @@
+#ifndef JEMALLOC_INTERNAL_EXP_GROW_H
+#define JEMALLOC_INTERNAL_EXP_GROW_H
+
+typedef struct exp_grow_s exp_grow_t;
+struct exp_grow_s {
+	/*
+	 * Next extent size class in a growing series to use when satisfying a
+	 * request via the extent hooks (only if opt_retain).  This limits the
+	 * number of disjoint virtual memory ranges so that extent merging can
+	 * be effective even if multiple arenas' extent allocation requests are
+	 * highly interleaved.
+	 *
+	 * retain_grow_limit is the max allowed size ind to expand (unless the
+	 * required size is greater).  Default is no limit, and controlled
+	 * through mallctl only.
+	 */
+	pszind_t next;
+	pszind_t limit;
+};
+
+static inline bool
+exp_grow_size_prepare(exp_grow_t *exp_grow, size_t alloc_size_min,
+    size_t *r_alloc_size, pszind_t *r_skip) {
+	*r_skip = 0;
+	*r_alloc_size = sz_pind2sz(exp_grow->next + *r_skip);
+	while (*r_alloc_size < alloc_size_min) {
+		(*r_skip)++;
+		if (exp_grow->next + *r_skip  >=
+		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
+			/* Outside legal range. */
+			return true;
+		}
+		*r_alloc_size = sz_pind2sz(exp_grow->next + *r_skip);
+	}
+	return false;
+}
+
+static inline void
+exp_grow_size_commit(exp_grow_t *exp_grow, pszind_t skip) {
+	if (exp_grow->next + skip + 1 <= exp_grow->limit) {
+		exp_grow->next += skip + 1;
+	} else {
+		exp_grow->next = exp_grow->limit;
+	}
+
+}
+
+void exp_grow_init(exp_grow_t *exp_grow);
+
+#endif /* JEMALLOC_INTERNAL_EXP_GROW_H */
diff --git a/include/jemalloc/internal/geom_grow.h b/include/jemalloc/internal/geom_grow.h
deleted file mode 100644
index ba83386..0000000
--- a/include/jemalloc/internal/geom_grow.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ECACHE_GROW_H
-#define JEMALLOC_INTERNAL_ECACHE_GROW_H
-
-typedef struct geom_grow_s geom_grow_t;
-struct geom_grow_s {
-	/*
-	 * Next extent size class in a growing series to use when satisfying a
-	 * request via the extent hooks (only if opt_retain).  This limits the
-	 * number of disjoint virtual memory ranges so that extent merging can
-	 * be effective even if multiple arenas' extent allocation requests are
-	 * highly interleaved.
-	 *
-	 * retain_grow_limit is the max allowed size ind to expand (unless the
-	 * required size is greater).  Default is no limit, and controlled
-	 * through mallctl only.
-	 */
-	pszind_t next;
-	pszind_t limit;
-};
-
-static inline bool
-geom_grow_size_prepare(geom_grow_t *geom_grow, size_t alloc_size_min,
-    size_t *r_alloc_size, pszind_t *r_skip) {
-	*r_skip = 0;
-	*r_alloc_size = sz_pind2sz(geom_grow->next + *r_skip);
-	while (*r_alloc_size < alloc_size_min) {
-		(*r_skip)++;
-		if (geom_grow->next + *r_skip  >=
-		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
-			/* Outside legal range. */
-			return true;
-		}
-		*r_alloc_size = sz_pind2sz(geom_grow->next + *r_skip);
-	}
-	return false;
-}
-
-static inline void
-geom_grow_size_commit(geom_grow_t *geom_grow, pszind_t skip) {
-	if (geom_grow->next + skip + 1 <= geom_grow->limit) {
-		geom_grow->next += skip + 1;
-	} else {
-		geom_grow->next = geom_grow->limit;
-	}
-
-}
-
-void geom_grow_init(geom_grow_t *geom_grow);
-
-#endif /* JEMALLOC_INTERNAL_ECACHE_GROW_H */
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 1cef6e5..159f0d0 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_HPA_H
 #define JEMALLOC_INTERNAL_HPA_H
 
-#include "jemalloc/internal/geom_grow.h"
+#include "jemalloc/internal/exp_grow.h"
 #include "jemalloc/internal/hpa_central.h"
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
@@ -29,7 +29,7 @@ struct hpa_s {
 	 * small finite number of allocations from it.
 	 */
 	edata_cache_t *edata_cache;
-	geom_grow_t geom_grow;
+	exp_grow_t exp_grow;
 };
 
 /* Used only by CTL; not actually stored here (i.e., all derived). */
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 614d34a..b998b69 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PAC_H
 #define JEMALLOC_INTERNAL_PAC_H
 
-#include "jemalloc/internal/geom_grow.h"
+#include "jemalloc/internal/exp_grow.h"
 #include "jemalloc/internal/pai.h"
 
 
@@ -95,7 +95,7 @@ struct pac_s {
 	edata_cache_t *edata_cache;
 
 	/* The grow info for the retained ecache. */
-	geom_grow_t geom_grow;
+	exp_grow_t exp_grow;
 	malloc_mutex_t grow_mtx;
 
 	/*
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index f14f87f..2d6b4b6 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -54,6 +54,7 @@
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\emap.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
+    <ClCompile Include="..\..\..\..\src\exp_grow.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 689a520..e3b7e0c 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -46,6 +46,9 @@
     <ClCompile Include="..\..\..\..\src\emap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\exp_grow.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\extent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 30c6b29..33d87a4 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -54,6 +54,7 @@
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\emap.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
+    <ClCompile Include="..\..\..\..\src\exp_grow.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 689a520..e3b7e0c 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -46,6 +46,9 @@
     <ClCompile Include="..\..\..\..\src\emap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\exp_grow.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\extent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/exp_grow.c b/src/exp_grow.c
new file mode 100644
index 0000000..386471f
--- /dev/null
+++ b/src/exp_grow.c
@@ -0,0 +1,8 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+void
+exp_grow_init(exp_grow_t *exp_grow) {
+	exp_grow->next = sz_psz2ind(HUGEPAGE);
+	exp_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
+}
diff --git a/src/extent.c b/src/extent.c
index e9c76eb..c7dcc2e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -626,9 +626,9 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	size_t alloc_size;
-	pszind_t geom_grow_skip;
-	bool err = geom_grow_size_prepare(&pac->geom_grow, alloc_size_min,
-	    &alloc_size, &geom_grow_skip);
+	pszind_t exp_grow_skip;
+	bool err = exp_grow_size_prepare(&pac->exp_grow, alloc_size_min,
+	    &alloc_size, &exp_grow_skip);
 	if (err) {
 		goto label_err;
 	}
@@ -724,7 +724,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * range.
 	 */
 	/* All opportunities for failure are past. */
-	geom_grow_size_commit(&pac->geom_grow, geom_grow_skip);
+	exp_grow_size_commit(&pac->exp_grow, exp_grow_skip);
 	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 
 	if (config_prof) {
diff --git a/src/geom_grow.c b/src/geom_grow.c
deleted file mode 100644
index 4816bb7..0000000
--- a/src/geom_grow.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-void
-geom_grow_init(geom_grow_t *geom_grow) {
-	geom_grow->next = sz_psz2ind(HUGEPAGE);
-	geom_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
-}
diff --git a/src/hpa.c b/src/hpa.c
index b329dbb..8029e0b 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -43,7 +43,7 @@ hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, edata_cache_t *edata_cache) {
 	hpa->ind = base_ind_get(base);
 	hpa->edata_cache = edata_cache;
 
-	geom_grow_init(&hpa->geom_grow);
+	exp_grow_init(&hpa->exp_grow);
 
 	return false;
 }
@@ -132,7 +132,7 @@ hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min,
 
 	size_t hugepage_goal_min = HUGEPAGE_CEILING(size_goal);
 
-	err = geom_grow_size_prepare(&hpa->geom_grow, hugepage_goal_min,
+	err = exp_grow_size_prepare(&hpa->exp_grow, hugepage_goal_min,
 	    &alloc_size, &skip);
 	if (err) {
 		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
@@ -183,7 +183,7 @@ hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min,
 	malloc_mutex_unlock(tsdn, &hpa->mtx);
 
 	if (!err) {
-		geom_grow_size_commit(&hpa->geom_grow, skip);
+		exp_grow_size_commit(&hpa->exp_grow, skip);
 	}
 	malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
 	edata_arena_ind_set(edata, shard->ind);
diff --git a/src/pac.c b/src/pac.c
index f50e82b..07c9d23 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -68,7 +68,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
-	geom_grow_init(&pac->geom_grow);
+	exp_grow_init(&pac->exp_grow);
 	if (malloc_mutex_init(&pac->grow_mtx, "extent_grow",
 	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		return true;
@@ -207,10 +207,10 @@ pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 
 	malloc_mutex_lock(tsdn, &pac->grow_mtx);
 	if (old_limit != NULL) {
-		*old_limit = sz_pind2sz(pac->geom_grow.limit);
+		*old_limit = sz_pind2sz(pac->exp_grow.limit);
 	}
 	if (new_limit != NULL) {
-		pac->geom_grow.limit = new_ind;
+		pac->exp_grow.limit = new_ind;
 	}
 	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 80ee8cd..9ad9940 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -143,7 +143,7 @@ TEST_BEGIN(test_retained) {
 		size_t usable = 0;
 		size_t fragmented = 0;
 		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
-		    arena->pa_shard.pac.geom_grow.next; pind++) {
+		    arena->pa_shard.pac.exp_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;
-- 
cgit v0.12


From cf2549a149dc27eefef1101500cd9ee743e477a0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Nov 2020 13:34:43 -0800
Subject: Add a per-arena oversize_threshold.

This can let manual arenas trade off memory and CPU the way auto arenas do.
---
 Makefile.in                     |   1 +
 include/jemalloc/internal/pa.h  |   3 +-
 include/jemalloc/internal/pac.h |   8 ++-
 src/arena.c                     |   2 +-
 src/ctl.c                       |  38 ++++++++++++
 src/extent.c                    |   5 +-
 src/pa.c                        |   7 ++-
 src/pac.c                       |   7 ++-
 test/unit/oversize_threshold.c  | 131 ++++++++++++++++++++++++++++++++++++++++
 test/unit/pa.c                  |   4 +-
 10 files changed, 194 insertions(+), 12 deletions(-)
 create mode 100644 test/unit/oversize_threshold.c

diff --git a/Makefile.in b/Makefile.in
index ca9b17b..03dbbdf 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -229,6 +229,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
 	$(srcroot)test/unit/nstime.c \
+	$(srcroot)test/unit/oversize_threshold.c \
 	$(srcroot)test/unit/pa.c \
 	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/pages.c \
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 5e97d0b..f1823e6 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -123,7 +123,8 @@ pa_shard_ehooks_get(pa_shard_t *shard) {
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
     unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
-    nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
+    nstime_t *cur_time, size_t oversize_threshold, ssize_t dirty_decay_ms,
+    ssize_t muzzy_decay_ms);
 
 /*
  * This isn't exposed to users; we allow late enablement of the HPA shard so
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index b998b69..6d4dfba 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -98,6 +98,9 @@ struct pac_s {
 	exp_grow_t exp_grow;
 	malloc_mutex_t grow_mtx;
 
+	/* How large extents should be before getting auto-purged. */
+	atomic_zu_t oversize_threshold;
+
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
 	 * transitions.
@@ -115,8 +118,9 @@ struct pac_s {
 };
 
 bool pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
-    edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
-    ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx);
+    edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
+    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
+    malloc_mutex_t *stats_mtx);
 bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
     size_t *new_limit);
 void pac_stats_merge(tsdn_t *tsdn, pac_t *pac, pac_stats_t *pac_stats_out,
diff --git a/src/arena.c b/src/arena.c
index 360827e..7099713 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1500,7 +1500,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	nstime_init_update(&cur_time);
 	if (pa_shard_init(tsdn, &arena->pa_shard, &arena_emap_global, base, ind,
 	    &arena->stats.pa_shard_stats, LOCKEDINT_MTX(arena->stats.mtx),
-	    &cur_time, arena_dirty_decay_ms_default_get(),
+	    &cur_time, oversize_threshold, arena_dirty_decay_ms_default_get(),
 	    arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
diff --git a/src/ctl.c b/src/ctl.c
index d5dd1d1..4bb422a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -151,6 +151,7 @@ CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_reset)
 CTL_PROTO(arena_i_destroy)
 CTL_PROTO(arena_i_dss)
+CTL_PROTO(arena_i_oversize_threshold)
 CTL_PROTO(arena_i_dirty_decay_ms)
 CTL_PROTO(arena_i_muzzy_decay_ms)
 CTL_PROTO(arena_i_extent_hooks)
@@ -431,6 +432,11 @@ static const ctl_named_node_t arena_i_node[] = {
 	{NAME("reset"),		CTL(arena_i_reset)},
 	{NAME("destroy"),	CTL(arena_i_destroy)},
 	{NAME("dss"),		CTL(arena_i_dss)},
+	/*
+	 * Undocumented for now, since we anticipate an arena API in flux after
+	 * we cut the last 5-series release.
+	 */
+	{NAME("oversize_threshold"), CTL(arena_i_oversize_threshold)},
 	{NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)},
 	{NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)},
 	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)},
@@ -2531,6 +2537,38 @@ label_return:
 }
 
 static int
+arena_i_oversize_threshold_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	unsigned arena_ind;
+	MIB_UNSIGNED(arena_ind, 1);
+
+	arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
+	if (arena == NULL) {
+		ret = EFAULT;
+		goto label_return;
+	}
+
+	if (oldp != NULL && oldlenp != NULL) {
+		size_t oldval = atomic_load_zu(
+		    &arena->pa_shard.pac.oversize_threshold, ATOMIC_RELAXED);
+		READ(oldval, size_t);
+	}
+	if (newp != NULL) {
+		if (newlen != sizeof(size_t)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		atomic_store_zu(&arena->pa_shard.pac.oversize_threshold,
+		    *(size_t *)newp, ATOMIC_RELAXED);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
 arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
 	int ret;
diff --git a/src/extent.c b/src/extent.c
index c7dcc2e..378bc73 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -983,8 +983,9 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			edata = extent_try_coalesce_large(tsdn, pac, ehooks,
 			    ecache, edata, &coalesced, growing_retained);
 		} while (coalesced);
-		if (edata_size_get(edata) >= oversize_threshold &&
-		    extent_may_force_decay(pac)) {
+		if (edata_size_get(edata) >=
+		    atomic_load_zu(&pac->oversize_threshold, ATOMIC_RELAXED)
+		    && extent_may_force_decay(pac)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
 			extent_maximally_purge(tsdn, pac, ehooks, edata);
diff --git a/src/pa.c b/src/pa.c
index aee7bcd..e5fcbb7 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -17,7 +17,8 @@ pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
 bool
 pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
     unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
-    nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+    nstime_t *cur_time, size_t oversize_threshold, ssize_t dirty_decay_ms,
+    ssize_t muzzy_decay_ms) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	if (edata_cache_init(&shard->edata_cache, base)) {
@@ -25,8 +26,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	}
 
 	if (pac_init(tsdn, &shard->pac, base, emap, &shard->edata_cache,
-	    cur_time, dirty_decay_ms, muzzy_decay_ms, &stats->pac_stats,
-	    stats_mtx)) {
+	    cur_time, oversize_threshold, dirty_decay_ms, muzzy_decay_ms,
+	    &stats->pac_stats, stats_mtx)) {
 		return true;
 	}
 
diff --git a/src/pac.c b/src/pac.c
index 07c9d23..8064615 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -37,8 +37,9 @@ pac_decay_data_get(pac_t *pac, extent_state_t state,
 
 bool
 pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
-    edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
-    ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx) {
+    edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
+    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
+    malloc_mutex_t *stats_mtx) {
 	unsigned ind = base_ind_get(base);
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
@@ -73,6 +74,8 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
+	atomic_store_zu(&pac->oversize_threshold, oversize_threshold,
+	    ATOMIC_RELAXED);
 	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
 		return true;
 	}
diff --git a/test/unit/oversize_threshold.c b/test/unit/oversize_threshold.c
new file mode 100644
index 0000000..e374b14
--- /dev/null
+++ b/test/unit/oversize_threshold.c
@@ -0,0 +1,131 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/ctl.h"
+
+static void
+arena_mallctl(const char *mallctl_str, unsigned arena, void *oldp,
+    size_t *oldlen, void *newp, size_t newlen) {
+	int err;
+	char buf[100];
+	malloc_snprintf(buf, sizeof(buf), mallctl_str, arena);
+
+	err = mallctl(buf, oldp, oldlen, newp, newlen);
+	expect_d_eq(0, err, "Mallctl failed; %s", buf);
+}
+
+TEST_BEGIN(test_oversize_threshold_get_set) {
+	int err;
+	size_t old_threshold;
+	size_t new_threshold;
+	size_t threshold_sz = sizeof(old_threshold);
+
+	unsigned arena;
+	size_t arena_sz = sizeof(arena);
+	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
+	expect_d_eq(0, err, "Arena creation failed");
+
+	/* Just a write. */
+	new_threshold = 1024 * 1024;
+	arena_mallctl("arena.%u.oversize_threshold", arena, NULL, NULL,
+	    &new_threshold, threshold_sz);
+
+	/* Read and write */
+	new_threshold = 2 * 1024 * 1024;
+	arena_mallctl("arena.%u.oversize_threshold", arena, &old_threshold,
+	    &threshold_sz, &new_threshold, threshold_sz);
+	expect_zu_eq(1024 * 1024, old_threshold, "Should have read old value");
+
+	/* Just a read */
+	arena_mallctl("arena.%u.oversize_threshold", arena, &old_threshold,
+	    &threshold_sz, NULL, 0);
+	expect_zu_eq(2 * 1024 * 1024, old_threshold, "Should have read old value");
+}
+TEST_END
+
+static size_t max_purged = 0;
+static bool
+purge_forced_record_max(extent_hooks_t* hooks, void *addr, size_t sz,
+    size_t offset, size_t length, unsigned arena_ind) {
+	if (length > max_purged) {
+		max_purged = length;
+	}
+	return false;
+}
+
+static bool
+dalloc_record_max(extent_hooks_t *extent_hooks, void *addr, size_t sz,
+    bool comitted, unsigned arena_ind) {
+	if (sz > max_purged) {
+		max_purged = sz;
+	}
+	return false;
+}
+
+extent_hooks_t max_recording_extent_hooks;
+
+TEST_BEGIN(test_oversize_threshold) {
+	max_recording_extent_hooks = ehooks_default_extent_hooks;
+	max_recording_extent_hooks.purge_forced = &purge_forced_record_max;
+	max_recording_extent_hooks.dalloc = &dalloc_record_max;
+
+	extent_hooks_t *extent_hooks = &max_recording_extent_hooks;
+
+	int err;
+
+	unsigned arena;
+	size_t arena_sz = sizeof(arena);
+	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
+	expect_d_eq(0, err, "Arena creation failed");
+	arena_mallctl("arena.%u.extent_hooks", arena, NULL, NULL, &extent_hooks,
+	    sizeof(extent_hooks));
+
+	/*
+	 * This test will fundamentally race with purging, since we're going to
+	 * check the dirty stats to see if our oversized allocation got purged.
+	 * We don't want other purging to happen accidentally.  We can't just
+	 * disable purging entirely, though, since that will also disable
+	 * oversize purging.  Just set purging intervals to be very large.
+	 */
+	ssize_t decay_ms = 100 * 1000;
+	ssize_t decay_ms_sz = sizeof(decay_ms);
+	arena_mallctl("arena.%u.dirty_decay_ms", arena, NULL, NULL, &decay_ms,
+	    decay_ms_sz);
+	arena_mallctl("arena.%u.muzzy_decay_ms", arena, NULL, NULL, &decay_ms,
+	    decay_ms_sz);
+
+	/* Clean everything out. */
+	arena_mallctl("arena.%u.purge", arena, NULL, NULL, NULL, 0);
+	max_purged = 0;
+
+	/* Set threshold to 1MB. */
+	size_t threshold = 1024 * 1024;
+	size_t threshold_sz = sizeof(threshold);
+	arena_mallctl("arena.%u.oversize_threshold", arena, NULL, NULL,
+	    &threshold, threshold_sz);
+
+	/* Allocating and freeing half a megabyte should leave them dirty. */
+	void *ptr = mallocx(512 * 1024, MALLOCX_ARENA(arena));
+	dallocx(ptr, MALLOCX_TCACHE_NONE);
+	expect_zu_lt(max_purged, 512 * 1024, "Expected no 512k purge");
+
+	/* Purge again to reset everything out. */
+	arena_mallctl("arena.%u.purge", arena, NULL, NULL, NULL, 0);
+	max_purged = 0;
+
+	/*
+	 * Allocating and freeing 2 megabytes should leave them dirty because of
+	 * the oversize threshold.
+	 */
+	ptr = mallocx(2 * 1024 * 1024, MALLOCX_ARENA(arena));
+	dallocx(ptr, MALLOCX_TCACHE_NONE);
+	expect_zu_ge(max_purged, 2 * 1024 * 1024, "Expected a 2MB purge");
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_oversize_threshold_get_set,
+	    test_oversize_threshold);
+}
+
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 3a91023..dacd8e7 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -63,9 +63,11 @@ test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	nstime_t time;
 	nstime_init(&time, 0);
 
+	const size_t oversize_threshold = 8 * 1024 * 1024;
 	err = pa_shard_init(TSDN_NULL, &test_data->shard, &test_data->emap,
 	    test_data->base, /* ind */ 1, &test_data->stats,
-	    &test_data->stats_mtx, &time, dirty_decay_ms, muzzy_decay_ms);
+	    &test_data->stats_mtx, &time, oversize_threshold, dirty_decay_ms,
+	    muzzy_decay_ms);
 	assert_false(err, "");
 
 	return test_data;
-- 
cgit v0.12


From 9545c2cd36e758f41857b93b8cb55355cf0bc508 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 13 Nov 2020 11:28:37 -0800
Subject: Add sample interval to prof last-N dump

---
 src/prof_recent.c       |  3 +++
 test/unit/prof_recent.c | 18 +++++++++++-------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index b1aeef3..ff87678 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -540,6 +540,9 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	emitter_begin(&emitter);
+	uint64_t sample_interval = (uint64_t)1U << lg_prof_sample;
+	emitter_json_kv(&emitter, "sample_interval", emitter_type_uint64,
+	    &sample_interval);
 	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize,
 	    &dump_max);
 	emitter_json_array_kv_begin(&emitter, "recent_alloc");
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 180f13f..e16a849 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -370,16 +370,16 @@ typedef struct {
 #define DUMP_ERROR "Dump output is wrong"
 
 static void
-confirm_record(const char *template,
-    const confirm_record_t *records, const size_t n_records) {
+confirm_record(const char *template, const confirm_record_t *records,
+    const size_t n_records) {
 	static const char *types[2] = {"alloc", "dalloc"};
 	static char buf[64];
 
 	/*
 	 * The template string would be in the form of:
-	 * "{\"recent_alloc_max\":XYZ,\"recent_alloc\":[]}",
+	 * "{...,\"recent_alloc\":[]}",
 	 * and dump_out would be in the form of:
-	 * "{\"recent_alloc_max\":XYZ,\"recent_alloc\":[...]}".
+	 * "{...,\"recent_alloc\":[...]}".
 	 * Using "- 2" serves to cut right before the ending "]}".
 	 */
 	assert_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
@@ -489,18 +489,22 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	void *p, *q;
 	confirm_record_t records[2];
 
+	assert_zu_eq(lg_prof_sample, (size_t)0,
+	    "lg_prof_sample not set correctly");
+
 	future = 0;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	call_dump();
-	expect_str_eq(dump_out, "{\"recent_alloc_max\":0,\"recent_alloc\":[]}",
-	    DUMP_ERROR);
+	expect_str_eq(dump_out, "{\"sample_interval\":1,"
+	    "\"recent_alloc_max\":0,\"recent_alloc\":[]}", DUMP_ERROR);
 
 	future = 2;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	call_dump();
-	const char *template = "{\"recent_alloc_max\":2,\"recent_alloc\":[]}";
+	const char *template = "{\"sample_interval\":1,"
+	    "\"recent_alloc_max\":2,\"recent_alloc\":[]}";
 	expect_str_eq(dump_out, template, DUMP_ERROR);
 
 	p = malloc(7);
-- 
cgit v0.12


From 566c4a8594d433ac40ebfd5a4736a53c431f81dd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 22 Oct 2020 14:44:36 -0700
Subject: Slight changes to cache bin internal functions

---
 include/jemalloc/internal/cache_bin.h | 44 +++++++++++++++++++++--------------
 src/cache_bin.c                       |  2 +-
 2 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 64275f2..551afc8 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -167,16 +167,21 @@ cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 	return later - earlier;
 }
 
-/* Number of items currently cached in the bin. */
+/* Number of items currently cached in the bin, without checking ncached_max. */
 static inline cache_bin_sz_t
-cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_ncached_get_internal(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
 	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	cache_bin_sz_t n = diff / sizeof(void *);
-
-	assert(n <= cache_bin_info_ncached_max(info));
 	assert(n == 0 || *(bin->stack_head) != NULL);
+	return n;
+}
 
+/* Number of items currently cached in the bin, with checking ncached_max. */
+static inline cache_bin_sz_t
+cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
+	assert(n <= cache_bin_info_ncached_max(info));
 	return n;
 }
 
@@ -186,7 +191,7 @@ cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
  * A pointer to the position one past the end of the backing array.
  */
 static inline void **
-cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_empty_position_get(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
 	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
@@ -204,7 +209,7 @@ cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
 static inline void
 cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
 	assert(cache_bin_ncached_get(bin, info) == 0);
-	assert(cache_bin_empty_position_get(bin, info) == bin->stack_head);
+	assert(cache_bin_empty_position_get(bin) == bin->stack_head);
 }
 
 /*
@@ -213,7 +218,7 @@ cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
  * ncached >= low_water during flush).
  */
 static inline cache_bin_sz_t
-cache_bin_low_water_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_low_water_get_internal(cache_bin_t *bin) {
 	return cache_bin_diff(bin, bin->low_bits_low_water,
 	    bin->low_bits_empty) / sizeof(void *);
 }
@@ -221,7 +226,7 @@ cache_bin_low_water_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin, info);
+	cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin);
 	assert(low_water <= cache_bin_info_ncached_max(info));
 	assert(low_water <= cache_bin_ncached_get(bin, info));
 
@@ -240,6 +245,14 @@ cache_bin_low_water_set(cache_bin_t *bin) {
 	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 }
 
+static inline void
+cache_bin_low_water_adjust(cache_bin_t *bin) {
+	if (cache_bin_ncached_get_internal(bin)
+	    < cache_bin_low_water_get_internal(bin)) {
+		cache_bin_low_water_set(bin);
+	}
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	/*
@@ -365,8 +378,8 @@ struct cache_bin_ptr_array_s {
 static inline void
 cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
-	assert(cache_bin_ncached_get(bin, info) == 0);
-	arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
+	cache_bin_assert_empty(bin, info);
+	arr->ptr = cache_bin_empty_position_get(bin) - nfill;
 }
 
 /*
@@ -377,8 +390,8 @@ cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
 static inline void
 cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
-	assert(cache_bin_ncached_get(bin, info) == 0);
-	void **empty_position = cache_bin_empty_position_get(bin, info);
+	cache_bin_assert_empty(bin, info);
+	void **empty_position = cache_bin_empty_position_get(bin);
 	if (nfilled < arr->n) {
 		memmove(empty_position - nfilled, empty_position - arr->n,
 		    nfilled * sizeof(void *));
@@ -390,7 +403,7 @@ cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
 static inline void
 cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
-	arr->ptr = cache_bin_empty_position_get(bin, info) - 1;
+	arr->ptr = cache_bin_empty_position_get(bin) - 1;
 	assert(cache_bin_ncached_get(bin, info) == 0
 	    || *arr->ptr != NULL);
 }
@@ -416,10 +429,7 @@ cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
 	memmove(bin->stack_head + nflushed, bin->stack_head,
 	    rem * sizeof(void *));
 	bin->stack_head = bin->stack_head + nflushed;
-	if (cache_bin_ncached_get(bin, info)
-	    < cache_bin_low_water_get_internal(bin, info)) {
-		bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
-	}
+	cache_bin_low_water_adjust(bin);
 }
 
 /*
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 1d04b0d..5f50606 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -84,7 +84,7 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	assert(cache_bin_diff(bin, bin->low_bits_full,
 	    (uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
 	assert(cache_bin_ncached_get(bin, info) == 0);
-	assert(cache_bin_empty_position_get(bin, info) == empty_position);
+	assert(cache_bin_empty_position_get(bin) == empty_position);
 
 	assert(bin_stack_size > 0 || empty_position == full_position);
 }
-- 
cgit v0.12


From 4a65f34930fb5e72b2d6ab55d23b5971a5efefbd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 22 Oct 2020 14:56:15 -0700
Subject: Fix a cache bin test

---
 test/unit/cache_bin.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 43fe8c6..7798bfa 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -186,7 +186,7 @@ TEST_BEGIN(test_cache_bin) {
 	    ncached_max / 2);
 	/* Try to fill some, succeed partially. */
 	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2,
-	    ncached_max / 2);
+	    ncached_max / 4);
 	/* Try to fill some, fail completely. */
 	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2, 0);
 
@@ -196,6 +196,8 @@ TEST_BEGIN(test_cache_bin) {
 	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 2);
 	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
 	do_flush_test(&bin, &info, ptrs, ncached_max / 2, 0);
+
+	free(ptrs);
 }
 TEST_END
 
-- 
cgit v0.12


From be5e49f4fa09247a91557690cdaef42a82a83d6a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 22 Oct 2020 16:07:25 -0700
Subject: Add a batch mode for cache_bin_alloc()

---
 include/jemalloc/internal/cache_bin.h | 12 ++++++++
 test/unit/cache_bin.c                 | 53 +++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 551afc8..c1b8fc4 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -317,6 +317,18 @@ cache_bin_alloc(cache_bin_t *bin, bool *success) {
 	return cache_bin_alloc_impl(bin, success, true);
 }
 
+JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
+cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
+	size_t n = cache_bin_ncached_get_internal(bin);
+	if (n > num) {
+		n = num;
+	}
+	memcpy(out, bin->stack_head, n * sizeof(void *));
+	bin->stack_head += n;
+	cache_bin_low_water_adjust(bin);
+	return n;
+}
+
 /*
  * Free an object into the given bin.  Fails only if the bin is full.
  */
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 7798bfa..b31d07d 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -52,6 +52,34 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	}
 }
 
+static void
+do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
+    cache_bin_sz_t nfill, size_t batch) {
+	assert_true(cache_bin_ncached_get(bin, info) == 0, "");
+	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill);
+	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill);
+	for (cache_bin_sz_t i = 0; i < nfill; i++) {
+		arr.ptr[i] = &ptrs[i];
+	}
+	cache_bin_finish_fill(bin, info, &arr, nfill);
+	assert_true(cache_bin_ncached_get(bin, info) == nfill, "");
+	cache_bin_low_water_set(bin);
+
+	void **out = malloc((batch + 1) * sizeof(void *));
+	size_t n = cache_bin_alloc_batch(bin, batch, out);
+	assert_true(n == ((size_t)nfill < batch ? (size_t)nfill : batch), "");
+	for (cache_bin_sz_t i = 0; i < (cache_bin_sz_t)n; i++) {
+		expect_ptr_eq(out[i], &ptrs[i], "");
+	}
+	expect_true(cache_bin_low_water_get(bin, info) == nfill -
+	    (cache_bin_sz_t)n, "");
+	while (cache_bin_ncached_get(bin, info) > 0) {
+		bool success;
+		cache_bin_alloc(bin, &success);
+	}
+	free(out);
+}
+
 TEST_BEGIN(test_cache_bin) {
 	const int ncached_max = 100;
 	bool success;
@@ -197,6 +225,31 @@ TEST_BEGIN(test_cache_bin) {
 	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
 	do_flush_test(&bin, &info, ptrs, ncached_max / 2, 0);
 
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max * 2);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max / 2);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 2);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 1);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 0);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2,
+	    ncached_max / 2);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, ncached_max);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2,
+	    ncached_max / 4);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 2);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 1);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 0);
+	do_batch_alloc_test(&bin, &info, ptrs, 2, ncached_max);
+	do_batch_alloc_test(&bin, &info, ptrs, 2, 2);
+	do_batch_alloc_test(&bin, &info, ptrs, 2, 1);
+	do_batch_alloc_test(&bin, &info, ptrs, 2, 0);
+	do_batch_alloc_test(&bin, &info, ptrs, 1, 2);
+	do_batch_alloc_test(&bin, &info, ptrs, 1, 1);
+	do_batch_alloc_test(&bin, &info, ptrs, 1, 0);
+	do_batch_alloc_test(&bin, &info, ptrs, 0, 2);
+	do_batch_alloc_test(&bin, &info, ptrs, 0, 1);
+	do_batch_alloc_test(&bin, &info, ptrs, 0, 0);
+
 	free(ptrs);
 }
 TEST_END
-- 
cgit v0.12


From ac480136d76010243f50997a1c1231a5572548aa Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 20 Oct 2020 11:00:09 -0700
Subject: Split out locality checking in batch allocation tests

---
 test/unit/batch_alloc.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
index 08d6f66..cb46513 100644
--- a/test/unit/batch_alloc.c
+++ b/test/unit/batch_alloc.c
@@ -35,20 +35,28 @@ verify_stats(bin_stats_t *before, bin_stats_t *after, size_t batch,
 }
 
 static void
-verify_batch(tsd_t *tsd, void **ptrs, size_t batch, size_t usize, bool zero,
-    arena_t *arena, unsigned nregs) {
-	for (size_t i = 0, j = 0; i < batch; ++i, ++j) {
-		if (j == nregs) {
-			j = 0;
-		}
+verify_batch_basic(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
+    bool zero) {
+	for (size_t i = 0; i < batch; ++i) {
 		void *p = ptrs[i];
 		expect_zu_eq(isalloc(tsd_tsdn(tsd), p), usize, "");
-		expect_ptr_eq(iaalloc(tsd_tsdn(tsd), p), arena, "");
 		if (zero) {
 			for (size_t k = 0; k < usize; ++k) {
 				expect_true(*((unsigned char *)p + k) == 0, "");
 			}
 		}
+	}
+}
+
+static void
+verify_batch_locality(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
+    arena_t *arena, unsigned nregs) {
+	for (size_t i = 0, j = 0; i < batch; ++i, ++j) {
+		if (j == nregs) {
+			j = 0;
+		}
+		void *p = ptrs[i];
+		expect_ptr_eq(iaalloc(tsd_tsdn(tsd), p), arena, "");
 		if (j == 0) {
 			expect_true(PAGE_ALIGNED(p), "");
 			continue;
@@ -154,7 +162,8 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 			assert_zu_eq(filled, batch, "");
 			memcpy(&stats_after, &bin->stats, sizeof(bin_stats_t));
 			verify_stats(&stats_before, &stats_after, batch, nregs);
-			verify_batch(tsd, ptrs, batch, usize, zero, arena,
+			verify_batch_basic(tsd, ptrs, batch, usize, zero);
+			verify_batch_locality(tsd, ptrs, batch, usize, arena,
 			    nregs);
 			release_batch(ptrs, batch, usize);
 		}
-- 
cgit v0.12


From d96e4525adaefbde79f349d024eb5f94e72faf50 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 12 Nov 2020 14:54:25 -0800
Subject: Route batch allocation of small batch size to tcache

---
 src/jemalloc.c          | 106 +++++++++++++++++++++++++++++++-----------------
 test/unit/batch_alloc.c |  76 +++++++++++-----------------------
 2 files changed, 91 insertions(+), 91 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2a791e1..575a63c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4088,32 +4088,15 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 	if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
 		goto label_done;
 	}
-
 	szind_t ind = sz_size2index(usize);
-	if (unlikely(ind >= SC_NBINS)) {
-		/* No optimization for large sizes. */
-		void *p;
-		while (filled < num && (p = je_mallocx(size, flags)) != NULL) {
-			ptrs[filled++] = p;
-		}
-		goto label_done;
-	}
-
 	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
-	unsigned arena_ind = mallocx_arena_get(flags);
-	arena_t *arena;
-	if (arena_get_from_ind(tsd, arena_ind, &arena)) {
-		goto label_done;
-	}
-	if (arena == NULL) {
-		arena = arena_choose(tsd, NULL);
-	} else {
-		/* When a manual arena is specified, bypass the tcache. */
-		flags |= MALLOCX_TCACHE_NONE;
-	}
-	if (unlikely(arena == NULL)) {
-		goto label_done;
+	cache_bin_t *bin = NULL;
+	arena_t *arena = NULL;
+	size_t nregs = 0;
+	if (likely(ind < SC_NBINS)) {
+		nregs = bin_infos[ind].nregs;
+		assert(nregs > 0);
 	}
 
 	while (filled < num) {
@@ -4132,9 +4115,63 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 			batch_alloc_prof_sample_assert(tsd, batch, usize);
 		}
 
-		size_t n = arena_fill_small_fresh(tsd_tsdn(tsd), arena,
-		    ind, ptrs + filled, batch, zero);
-		filled += n;
+		size_t progress = 0;
+
+		if (likely(ind < SC_NBINS) && batch >= nregs) {
+			if (arena == NULL) {
+				unsigned arena_ind = mallocx_arena_get(flags);
+				if (arena_get_from_ind(tsd, arena_ind,
+				    &arena)) {
+					goto label_done;
+				}
+				if (arena == NULL) {
+					arena = arena_choose(tsd, NULL);
+				}
+				if (unlikely(arena == NULL)) {
+					goto label_done;
+				}
+			}
+			size_t arena_batch = batch - batch % nregs;
+			size_t n = arena_fill_small_fresh(tsd_tsdn(tsd), arena,
+			    ind, ptrs + filled, arena_batch, zero);
+			progress += n;
+			filled += n;
+		}
+
+		if (likely(ind < nhbins) && progress < batch) {
+			if (bin == NULL) {
+				unsigned tcache_ind = mallocx_tcache_get(flags);
+				tcache_t *tcache = tcache_get_from_ind(tsd,
+				    tcache_ind, /* slow */ true,
+				    /* is_alloc */ true);
+				if (tcache != NULL) {
+					bin = &tcache->bins[ind];
+				}
+			}
+			if (bin != NULL) {
+				size_t bin_batch = batch - progress;
+				size_t n = cache_bin_alloc_batch(bin, bin_batch,
+				    ptrs + filled);
+				if (config_stats) {
+					bin->tstats.nrequests += n;
+				}
+				if (zero) {
+					for (size_t i = 0; i < n; ++i) {
+						memset(ptrs[filled + i], 0,
+						    usize);
+					}
+				}
+				if (config_prof && opt_prof
+				    && unlikely(ind >= SC_NBINS)) {
+					for (size_t i = 0; i < n; ++i) {
+						prof_tctx_reset_sampled(tsd,
+						    ptrs[filled + i]);
+					}
+				}
+				progress += n;
+				filled += n;
+			}
+		}
 
 		/*
 		 * For thread events other than prof sampling, trigger them as
@@ -4146,23 +4183,16 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		 *     were handled individually, but it would do no harm (or
 		 *     even be beneficial) to coalesce the triggerings.
 		 */
-		thread_alloc_event(tsd, n * usize);
-
-		if (n < batch) { /* OOM */
-			break;
-		}
+		thread_alloc_event(tsd, progress * usize);
 
-		if (prof_sample_event) {
-			/*
-			 * The next allocation will be prof sampled.  The
-			 * thread event logic is handled within the mallocx()
-			 * call.
-			 */
+		if (progress < batch || prof_sample_event) {
 			void *p = je_mallocx(size, flags);
 			if (p == NULL) { /* OOM */
 				break;
 			}
-			assert(prof_sampled(tsd, p));
+			if (progress == batch) {
+				assert(prof_sampled(tsd, p));
+			}
 			ptrs[filled++] = p;
 		}
 	}
diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
index cb46513..992990f 100644
--- a/test/unit/batch_alloc.c
+++ b/test/unit/batch_alloc.c
@@ -6,35 +6,6 @@ static void *ptrs[BATCH_MAX];
 #define PAGE_ALIGNED(ptr) (((uintptr_t)ptr & PAGE_MASK) == 0)
 
 static void
-verify_stats(bin_stats_t *before, bin_stats_t *after, size_t batch,
-    unsigned nregs) {
-	if (!config_stats) {
-		return;
-	}
-	if (config_prof && opt_prof) {
-		/*
-		 * Checking the stats when prof is on is feasible but
-		 * complicated, while checking the non-prof case suffices for
-		 * unit-test purpose.
-		 */
-		return;
-	}
-	expect_u64_eq(before->nmalloc + batch, after->nmalloc, "");
-	expect_u64_eq(before->nrequests + batch, after->nrequests, "");
-	expect_zu_eq(before->curregs + batch, after->curregs, "");
-	size_t nslab = batch / nregs;
-	size_t n_nonfull = 0;
-	if (batch % nregs != 0) {
-		++nslab;
-		++n_nonfull;
-	}
-	expect_u64_eq(before->nslabs + nslab, after->nslabs, "");
-	expect_zu_eq(before->curslabs + nslab, after->curslabs, "");
-	expect_zu_eq(before->nonfull_slabs + n_nonfull, after->nonfull_slabs,
-	    "");
-}
-
-static void
 verify_batch_basic(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
     bool zero) {
 	for (size_t i = 0; i < batch; ++i) {
@@ -51,10 +22,21 @@ verify_batch_basic(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
 static void
 verify_batch_locality(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
     arena_t *arena, unsigned nregs) {
+	if (config_prof && opt_prof) {
+		/*
+		 * Checking batch locality when prof is on is feasible but
+		 * complicated, while checking the non-prof case suffices for
+		 * unit-test purpose.
+		 */
+		return;
+	}
 	for (size_t i = 0, j = 0; i < batch; ++i, ++j) {
 		if (j == nregs) {
 			j = 0;
 		}
+		if (j == 0 && batch - i < nregs) {
+			break;
+		}
 		void *p = ptrs[i];
 		expect_ptr_eq(iaalloc(tsd_tsdn(tsd), p), arena, "");
 		if (j == 0) {
@@ -63,21 +45,8 @@ verify_batch_locality(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
 		}
 		assert(i > 0);
 		void *q = ptrs[i - 1];
-		bool adjacent = (uintptr_t)p > (uintptr_t)q
-		    && (size_t)((uintptr_t)p - (uintptr_t)q) == usize;
-		if (config_prof && opt_prof) {
-			if (adjacent) {
-				expect_false(prof_sampled(tsd, p)
-				    || prof_sampled(tsd, q), "");
-			} else {
-				expect_true(prof_sampled(tsd, p)
-				    || prof_sampled(tsd, q), "");
-				expect_true(PAGE_ALIGNED(p), "");
-				j = 0;
-			}
-		} else {
-			expect_true(adjacent, "");
-		}
+		expect_true((uintptr_t)p > (uintptr_t)q
+		    && (size_t)((uintptr_t)p - (uintptr_t)q) == usize, "");
 	}
 }
 
@@ -124,8 +93,6 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 		arena = arena_choose(tsd, NULL);
 	}
 	assert(arena != NULL);
-	bin_t *bin = arena_bin_choose(tsd_tsdn(tsd), arena, ind, NULL);
-	assert(bin != NULL);
 	int flags = arena_flag;
 	if (alignment != 0) {
 		flags |= MALLOCX_ALIGN(alignment);
@@ -155,13 +122,9 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 			}
 			size_t batch = base + (size_t)j;
 			assert(batch < BATCH_MAX);
-			bin_stats_t stats_before, stats_after;
-			memcpy(&stats_before, &bin->stats, sizeof(bin_stats_t));
 			size_t filled = batch_alloc_wrapper(ptrs, batch, size,
 			    flags);
 			assert_zu_eq(filled, batch, "");
-			memcpy(&stats_after, &bin->stats, sizeof(bin_stats_t));
-			verify_stats(&stats_before, &stats_after, batch, nregs);
 			verify_batch_basic(tsd, ptrs, batch, usize, zero);
 			verify_batch_locality(tsd, ptrs, batch, usize, arena,
 			    nregs);
@@ -196,8 +159,15 @@ TEST_BEGIN(test_batch_alloc_manual_arena) {
 }
 TEST_END
 
-TEST_BEGIN(test_batch_alloc_fallback) {
-	const size_t size = SC_LARGE_MINCLASS;
+TEST_BEGIN(test_batch_alloc_large) {
+	size_t size = SC_LARGE_MINCLASS;
+	for (size_t batch = 0; batch < 4; ++batch) {
+		assert(batch < BATCH_MAX);
+		size_t filled = batch_alloc(ptrs, batch, size, 0);
+		assert_zu_eq(filled, batch, "");
+		release_batch(ptrs, batch, size);
+	}
+	size = tcache_maxclass + 1;
 	for (size_t batch = 0; batch < 4; ++batch) {
 		assert(batch < BATCH_MAX);
 		size_t filled = batch_alloc(ptrs, batch, size, 0);
@@ -214,5 +184,5 @@ main(void) {
 	    test_batch_alloc_zero,
 	    test_batch_alloc_aligned,
 	    test_batch_alloc_manual_arena,
-	    test_batch_alloc_fallback);
+	    test_batch_alloc_large);
 }
-- 
cgit v0.12


From 92e189be8b725be1f4de5f476f410173db29bc7d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 13 Nov 2020 17:15:35 -0800
Subject: Add some comments to the batch allocation logic flow

---
 src/jemalloc.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 575a63c..ebc6669 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4091,8 +4091,13 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 	szind_t ind = sz_size2index(usize);
 	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
+	/*
+	 * The cache bin and arena will be lazily initialized; it's hard to
+	 * know in advance whether each of them needs to be initialized.
+	 */
 	cache_bin_t *bin = NULL;
 	arena_t *arena = NULL;
+
 	size_t nregs = 0;
 	if (likely(ind < SC_NBINS)) {
 		nregs = bin_infos[ind].nregs;
@@ -4148,8 +4153,33 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 					bin = &tcache->bins[ind];
 				}
 			}
+			/*
+			 * If we don't have a tcache bin, we don't want to
+			 * immediately give up, because there's the possibility
+			 * that the user explicitly requested to bypass the
+			 * tcache, or that the user explicitly turned off the
+			 * tcache; in such cases, we go through the slow path,
+			 * i.e. the mallocx() call at the end of the while loop.
+			 */
 			if (bin != NULL) {
 				size_t bin_batch = batch - progress;
+				/*
+				 * n can be less than bin_batch, meaning that
+				 * the cache bin does not have enough memory.
+				 * In such cases, we rely on the slow path,
+				 * i.e. the mallocx() call at the end of the
+				 * while loop, to fill in the cache, and in the
+				 * next iteration of the while loop, the tcache
+				 * will contain a lot of memory, and we can
+				 * harvest them here.  Compared to the
+				 * alternative approach where we directly go to
+				 * the arena bins here, the overhead of our
+				 * current approach should usually be minimal,
+				 * since we never try to fetch more memory than
+				 * what a slab contains via the tcache.  An
+				 * additional benefit is that the tcache will
+				 * not be empty for the next allocation request.
+				 */
 				size_t n = cache_bin_alloc_batch(bin, bin_batch,
 				    ptrs + filled);
 				if (config_stats) {
-- 
cgit v0.12


From 520b75fa2daf3313d87780f40ca0101c83c10398 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Mon, 23 Nov 2020 15:00:38 +0000
Subject: utrace support with label based signature.

---
 configure.ac                                       | 23 ++++++++++++++++++----
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |  3 +++
 include/jemalloc/internal/jemalloc_preamble.h.in   |  8 +++++++-
 src/jemalloc.c                                     |  2 +-
 4 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index eeceb12..8e21f3f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1405,10 +1405,25 @@ JE_COMPILABLE([utrace(2)], [
 	utrace((void *)0, 0);
 ], [je_cv_utrace])
 if test "x${je_cv_utrace}" = "xno" ; then
-  enable_utrace="0"
-fi
-if test "x$enable_utrace" = "x1" ; then
-  AC_DEFINE([JEMALLOC_UTRACE], [ ])
+  JE_COMPILABLE([utrace(2) with label], [
+  #include <sys/types.h>
+  #include <sys/param.h>
+  #include <sys/time.h>
+  #include <sys/uio.h>
+  #include <sys/ktrace.h>
+  ], [
+	  utrace((void *)0, (void *)0, 0);
+  ], [je_cv_utrace_label])
+  if test "x${je_cv_utrace_label}" = "xno"; then
+    enable_utrace="0"
+  fi
+  if test "x$enable_utrace" = "x1" ; then
+    AC_DEFINE([JEMALLOC_UTRACE_LABEL], [ ])
+  fi
+else
+  if test "x$enable_utrace" = "x1" ; then
+    AC_DEFINE([JEMALLOC_UTRACE], [ ])
+  fi
 fi
 AC_SUBST([enable_utrace])
 
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index bcc3559..ff0e15b 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -173,6 +173,9 @@
 /* Support utrace(2)-based tracing. */
 #undef JEMALLOC_UTRACE
 
+/* Support utrace(2)-based tracing (label based signature). */
+#undef JEMALLOC_UTRACE_LABEL
+
 /* Support optional abort() on OOM. */
 #undef JEMALLOC_XMALLOC
 
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index d62fee0..ef1cbae 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -4,8 +4,14 @@
 #include "jemalloc_internal_defs.h"
 #include "jemalloc/internal/jemalloc_internal_decls.h"
 
-#ifdef JEMALLOC_UTRACE
+#if defined(JEMALLOC_UTRACE) || defined(JEMALLOC_UTRACE_LABEL)
 #include <sys/ktrace.h>
+#  if defined(JEMALLOC_UTRACE)
+#    define UTRACE_CALL(p, l) utrace(p, l)
+#  else
+#    define UTRACE_CALL(p, l) utrace("jemalloc_process", p, l)
+#    define JEMALLOC_UTRACE
+#  endif
 #endif
 
 #define JEMALLOC_NO_DEMANGLE
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ebc6669..1a8db83 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -250,7 +250,7 @@ typedef struct {
 		ut.p = (a);						\
 		ut.s = (b);						\
 		ut.r = (c);						\
-		utrace(&ut, sizeof(ut));				\
+		UTRACE_CALL(&ut, sizeof(ut));				\
 		errno = utrace_serrno;					\
 	}								\
 } while (0)
-- 
cgit v0.12


From 99c2d6c232eca19e29224f48425517ecebcc1ab0 Mon Sep 17 00:00:00 2001
From: Igor Wiedler <iwiedler@gitlab.com>
Date: Thu, 19 Nov 2020 16:50:09 +0100
Subject: Backport jeprof --collapse for flamegraph generation

---
 bin/jeprof.in | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index 3ed408c..d47359c 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -205,6 +205,8 @@ Output type:
    --svg               Generate SVG to stdout
    --gif               Generate GIF to stdout
    --raw               Generate symbolized jeprof data (useful with remote fetch)
+   --collapsed         Generate collapsed stacks for building flame graphs
+                       (see http://www.brendangregg.com/flamegraphs.html)
 
 Heap-Profile Options:
    --inuse_space       Display in-use (mega)bytes [default]
@@ -332,6 +334,7 @@ sub Init() {
   $main::opt_gif = 0;
   $main::opt_svg = 0;
   $main::opt_raw = 0;
+  $main::opt_collapsed = 0;
 
   $main::opt_nodecount = 80;
   $main::opt_nodefraction = 0.005;
@@ -405,6 +408,7 @@ sub Init() {
              "svg!"           => \$main::opt_svg,
              "gif!"           => \$main::opt_gif,
              "raw!"           => \$main::opt_raw,
+             "collapsed!"     => \$main::opt_collapsed,
              "interactive!"   => \$main::opt_interactive,
              "nodecount=i"    => \$main::opt_nodecount,
              "nodefraction=f" => \$main::opt_nodefraction,
@@ -490,6 +494,7 @@ sub Init() {
       $main::opt_svg +
       $main::opt_gif +
       $main::opt_raw +
+      $main::opt_collapsed +
       $main::opt_interactive +
       0;
   if ($modes > 1) {
@@ -621,6 +626,8 @@ sub FilterAndPrint {
       PrintText($symbols, $flat, $cumulative, -1);
     } elsif ($main::opt_raw) {
       PrintSymbolizedProfile($symbols, $profile, $main::prog);
+    } elsif ($main::opt_collapsed) {
+      PrintCollapsedStacks($symbols, $profile);
     } elsif ($main::opt_callgrind) {
       PrintCallgrind($calls);
     } else {
@@ -2810,6 +2817,40 @@ sub IsSecondPcAlwaysTheSame {
   return $second_pc;
 }
 
+sub ExtractSymbolNameInlineStack {
+  my $symbols = shift;
+  my $address = shift;
+
+  my @stack = ();
+
+  if (exists $symbols->{$address}) {
+    my @localinlinestack = @{$symbols->{$address}};
+    for (my $i = $#localinlinestack; $i > 0; $i-=3) {
+      my $file = $localinlinestack[$i-1];
+      my $fn = $localinlinestack[$i-0];
+
+      if ($file eq "?" || $file eq ":0") {
+        $file = "??:0";
+      }
+      if ($fn eq '??') {
+        # If we can't get the symbol name, at least use the file information.
+        $fn = $file;
+      }
+      my $suffix = "[inline]";
+      if ($i == 2) {
+        $suffix = "";
+      }
+      push (@stack, $fn.$suffix);
+    }
+  }
+  else {
+    # If we can't get a symbol name, at least fill in the address.
+    push (@stack, $address);
+  }
+
+  return @stack;
+}
+
 sub ExtractSymbolLocation {
   my $symbols = shift;
   my $address = shift;
@@ -2884,6 +2925,17 @@ sub FilterFrames {
   return $result;
 }
 
+sub PrintCollapsedStacks {
+  my $symbols = shift;
+  my $profile = shift;
+
+  while (my ($stack_trace, $count) = each %$profile) {
+    my @address = split(/\n/, $stack_trace);
+    my @names = reverse ( map { ExtractSymbolNameInlineStack($symbols, $_) } @address );
+    printf("%s %d\n", join(";", @names), $count);
+  }
+}
+
 sub RemoveUninterestingFrames {
   my $symbols = shift;
   my $profile = shift;
-- 
cgit v0.12


From ecd39418aca14cddcf69acc86c2aa3cbb13a72e1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 1 Dec 2020 13:00:57 -0800
Subject: Add fxp: A fixed-point math library.

This will be used in the next commit to allow non-integer values for
narenas_ratio.
---
 Makefile.in                                        |   2 +
 include/jemalloc/internal/fxp.h                    | 100 ++++++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |   3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |   3 +
 src/fxp.c                                          | 124 ++++++++
 test/unit/fxp.c                                    | 344 +++++++++++++++++++++
 8 files changed, 578 insertions(+)
 create mode 100644 include/jemalloc/internal/fxp.h
 create mode 100644 src/fxp.c
 create mode 100644 test/unit/fxp.c

diff --git a/Makefile.in b/Makefile.in
index 03dbbdf..eae3065 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -118,6 +118,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
+	$(srcroot)src/fxp.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_central.c \
@@ -212,6 +213,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/extent_quantize.c \
 	${srcroot}test/unit/flat_bitmap.c \
 	$(srcroot)test/unit/fork.c \
+	${srcroot}test/unit/fxp.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
diff --git a/include/jemalloc/internal/fxp.h b/include/jemalloc/internal/fxp.h
new file mode 100644
index 0000000..d943809
--- /dev/null
+++ b/include/jemalloc/internal/fxp.h
@@ -0,0 +1,100 @@
+#ifndef JEMALLOC_INTERNAL_FXP_H
+#define JEMALLOC_INTERNAL_FXP_H
+
+/*
+ * A simple fixed-point math implementation, supporting only unsigned values
+ * (with overflow being an error).
+ *
+ * It's not in general safe to use floating point in core code, because various
+ * libc implementations we get linked against can assume that malloc won't touch
+ * floating point state and call it with an unusual calling convention.
+ */
+
+/*
+ * High 16 bits are the integer part, low 16 are the fractional part.  Or
+ * equivalently, repr == 2**16 * val, where we use "val" to refer to the
+ * (imaginary) fractional representation of the true value.
+ *
+ * We pick a uint32_t here since it's convenient in some places to
+ * double the representation size (i.e. multiplication and division use
+ * 64-bit integer types), and a uint64_t is the largest type we're
+ * certain is available.
+ */
+typedef uint32_t fxp_t;
+#define FXP_INIT_INT(x) ((x) << 16)
+
+/*
+ * Amount of precision used in parsing and printing numbers.  The integer bound
+ * is simply because the integer part of the number gets 16 bits, and so is
+ * bounded by 65536.
+ *
+ * We use a lot of precision for the fractional part, even though most of it
+ * gets rounded off; this lets us get exact values for the important special
+ * case where the denominator is a small power of 2 (for instance,
+ * 1/512 == 0.001953125 is exactly representable even with only 16 bits of
+ * fractional precision).  We need to left-shift by 16 before dividing by
+ * 10**precision, so we pick precision to be floor(log(2**48)) = 14.
+ */
+#define FXP_INTEGER_PART_DIGITS 5
+#define FXP_FRACTIONAL_PART_DIGITS 14
+
+/*
+ * In addition to the integer and fractional parts of the number, we need to
+ * include a null character and (possibly) a decimal point.
+ */
+#define FXP_BUF_SIZE (FXP_INTEGER_PART_DIGITS + FXP_FRACTIONAL_PART_DIGITS + 2)
+
+static inline fxp_t
+fxp_add(fxp_t a, fxp_t b) {
+	return a + b;
+}
+
+static inline fxp_t
+fxp_sub(fxp_t a, fxp_t b) {
+	assert(a >= b);
+	return a - b;
+}
+
+static inline fxp_t
+fxp_mul(fxp_t a, fxp_t b) {
+	uint64_t unshifted = (uint64_t)a * (uint64_t)b;
+	/*
+	 * Unshifted is (a.val * 2**16) * (b.val * 2**16)
+	 *   == (a.val * b.val) * 2**32, but we want
+	 * (a.val * b.val) * 2 ** 16.
+	 */
+	return (uint32_t)(unshifted >> 16);
+}
+
+static inline fxp_t
+fxp_div(fxp_t a, fxp_t b) {
+	assert(b != 0);
+	uint64_t unshifted = ((uint64_t)a << 32) / (uint64_t)b;
+	/*
+	 * Unshifted is (a.val * 2**16) * (2**32) / (b.val * 2**16)
+	 *   == (a.val / b.val) * (2 ** 32), which again corresponds to a right
+	 *   shift of 16.
+	 */
+	return (uint32_t)(unshifted >> 16);
+}
+
+static inline uint32_t
+fxp_round_down(fxp_t a) {
+	return a >> 16;
+}
+
+static inline uint32_t
+fxp_round_nearest(fxp_t a) {
+	uint32_t fractional_part = (a  & ((1U << 16) - 1));
+	uint32_t increment = (uint32_t)(fractional_part >= (1U << 15));
+	return (a >> 16) + increment;
+}
+
+/*
+ * Returns true on error.  Otherwise, returns false and updates *ptr to point to
+ * the first character not parsed (because it wasn't a digit).
+ */
+bool fxp_parse(fxp_t *a, const char *ptr, char **end);
+void fxp_print(fxp_t a, char buf[FXP_BUF_SIZE]);
+
+#endif /* JEMALLOC_INTERNAL_FXP_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 2d6b4b6..6c4e7fd 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index e3b7e0c..84ff574 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -58,6 +58,9 @@
     <ClCompile Include="..\..\..\..\src\extent_mmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\fxp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 33d87a4..07fbe21 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index e3b7e0c..84ff574 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -58,6 +58,9 @@
     <ClCompile Include="..\..\..\..\src\extent_mmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\fxp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/fxp.c b/src/fxp.c
new file mode 100644
index 0000000..96585f0
--- /dev/null
+++ b/src/fxp.c
@@ -0,0 +1,124 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/fxp.h"
+
+static bool
+fxp_isdigit(char c) {
+	return '0' <= c && c <= '9';
+}
+
+bool
+fxp_parse(fxp_t *result, const char *str, char **end) {
+	/*
+	 * Using malloc_strtoumax in this method isn't as handy as you might
+	 * expect (I tried). In the fractional part, significant leading zeros
+	 * mean that you still need to do your own parsing, now with trickier
+	 * math.  In the integer part, the casting (uintmax_t to uint32_t)
+	 * forces more reasoning about bounds than just checking for overflow as
+	 * we parse.
+	 */
+	uint32_t integer_part = 0;
+
+	const char *cur = str;
+
+	/* The string must start with a digit or a decimal point. */
+	if (*cur != '.' && !fxp_isdigit(*cur)) {
+		return true;
+	}
+
+	while ('0' <= *cur && *cur <= '9') {
+		integer_part *= 10;
+		integer_part += *cur - '0';
+		if (integer_part >= (1U << 16)) {
+			return true;
+		}
+		cur++;
+	}
+
+	/*
+	 * We've parsed all digits at the beginning of the string, without
+	 * overflow.  Either we're done, or there's a fractional part.
+	 */
+	if (*cur != '.') {
+		*result = (integer_part << 16);
+		if (end != NULL) {
+			*end = (char *)cur;
+		}
+		return false;
+	}
+
+	/* There's a fractional part. */
+	cur++;
+	if (!fxp_isdigit(*cur)) {
+		/* Shouldn't end on the decimal point. */
+		return true;
+	}
+
+	/*
+	 * We use a lot of precision for the fractional part, even though we'll
+	 * discard most of it; this lets us get exact values for the important
+	 * special case where the denominator is a small power of 2 (for
+	 * instance, 1/512 == 0.001953125 is exactly representable even with
+	 * only 16 bits of fractional precision).  We need to left-shift by 16
+	 * before dividing so we pick the number of digits to be
+	 * floor(log(2**48)) = 14.
+	 */
+	uint64_t fractional_part = 0;
+	uint64_t frac_div = 1;
+	for (int i = 0; i < FXP_FRACTIONAL_PART_DIGITS; i++) {
+		fractional_part *= 10;
+		frac_div *= 10;
+		if (fxp_isdigit(*cur)) {
+			fractional_part += *cur - '0';
+			cur++;
+		}
+	}
+	/*
+	 * We only parse the first maxdigits characters, but we can still ignore
+	 * any digits after that.
+	 */
+	while (fxp_isdigit(*cur)) {
+		cur++;
+	}
+
+	assert(fractional_part < frac_div);
+	uint32_t fractional_repr = (uint32_t)(
+	    (fractional_part << 16) / frac_div);
+
+	/* Success! */
+	*result = (integer_part << 16) + fractional_repr;
+	if (end != NULL) {
+		*end = (char *)cur;
+	}
+	return false;
+}
+
+void
+fxp_print(fxp_t a, char buf[FXP_BUF_SIZE]) {
+	uint32_t integer_part = fxp_round_down(a);
+	uint32_t fractional_part = (a & ((1U << 16) - 1));
+
+	int leading_fraction_zeros = 0;
+	uint64_t fraction_digits = fractional_part;
+	for (int i = 0; i < FXP_FRACTIONAL_PART_DIGITS; i++) {
+		if (fraction_digits < (1U << 16)
+		    && fraction_digits * 10 >= (1U << 16)) {
+			leading_fraction_zeros = i;
+		}
+		fraction_digits *= 10;
+	}
+	fraction_digits >>= 16;
+	while (fraction_digits > 0 && fraction_digits % 10 == 0) {
+		fraction_digits /= 10;
+	}
+
+	size_t printed = malloc_snprintf(buf, FXP_BUF_SIZE, "%"FMTu32".",
+	    integer_part);
+	for (int i = 0; i < leading_fraction_zeros; i++) {
+		buf[printed] = '0';
+		printed++;
+	}
+	malloc_snprintf(&buf[printed], FXP_BUF_SIZE - printed, "%"FMTu64,
+	    fraction_digits);
+}
diff --git a/test/unit/fxp.c b/test/unit/fxp.c
new file mode 100644
index 0000000..89f0ca6
--- /dev/null
+++ b/test/unit/fxp.c
@@ -0,0 +1,344 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/fxp.h"
+
+static double
+fxp2double(fxp_t a) {
+	double intpart = (double)(a >> 16);
+	double fracpart = (double)(a & ((1U << 16) - 1)) / (1U << 16);
+	return intpart + fracpart;
+}
+
+/* Is a close to b? */
+static bool
+double_close(double a, double b) {
+	/*
+	 * Our implementation doesn't try for precision.  Correspondingly, don't
+	 * enforce it too strenuously here; accept values that are close in
+	 * either relative or absolute terms.
+	 */
+	return fabs(a - b) < 0.01 || fabs(a - b) / a < 0.01;
+}
+
+static bool
+fxp_close(fxp_t a, fxp_t b) {
+	return double_close(fxp2double(a), fxp2double(b));
+}
+
+static fxp_t
+xparse_fxp(const char *str) {
+	fxp_t result;
+	bool err = fxp_parse(&result, str, NULL);
+	assert_false(err, "Invalid fxp string: %s", str);
+	return result;
+}
+
+static void
+expect_parse_accurate(const char *str, const char *parse_str) {
+	double true_val = strtod(str, NULL);
+	fxp_t fxp_val;
+	char *end;
+	bool err = fxp_parse(&fxp_val, parse_str, &end);
+	expect_false(err, "Unexpected parse failure");
+	expect_ptr_eq(parse_str + strlen(str), end,
+	    "Didn't parse whole string");
+	expect_true(double_close(fxp2double(fxp_val), true_val),
+	    "Misparsed %s", str);
+}
+
+static void
+parse_valid_trial(const char *str) {
+	/* The value it parses should be correct. */
+	expect_parse_accurate(str, str);
+	char buf[100];
+	snprintf(buf, sizeof(buf), "%swith_some_trailing_text", str);
+	expect_parse_accurate(str, buf);
+	snprintf(buf, sizeof(buf), "%s with a space", str);
+	expect_parse_accurate(str, buf);
+	snprintf(buf, sizeof(buf), "%s,in_a_malloc_conf_string:1", str);
+	expect_parse_accurate(str, buf);
+}
+
+TEST_BEGIN(test_parse_valid) {
+	parse_valid_trial("0");
+	parse_valid_trial("1");
+	parse_valid_trial("2");
+	parse_valid_trial("100");
+	parse_valid_trial("345");
+	parse_valid_trial("00000000123");
+	parse_valid_trial("00000000987");
+
+	parse_valid_trial("0.0");
+	parse_valid_trial("0.00000000000456456456");
+	parse_valid_trial("100.00000000000456456456");
+
+	parse_valid_trial("123.1");
+	parse_valid_trial("123.01");
+	parse_valid_trial("123.001");
+	parse_valid_trial("123.0001");
+	parse_valid_trial("123.00001");
+	parse_valid_trial("123.000001");
+	parse_valid_trial("123.0000001");
+
+	parse_valid_trial(".0");
+	parse_valid_trial(".1");
+	parse_valid_trial(".01");
+	parse_valid_trial(".001");
+	parse_valid_trial(".0001");
+	parse_valid_trial(".00001");
+	parse_valid_trial(".000001");
+
+	parse_valid_trial(".1");
+	parse_valid_trial(".10");
+	parse_valid_trial(".100");
+	parse_valid_trial(".1000");
+	parse_valid_trial(".100000");
+}
+TEST_END
+
+static void expect_parse_failure(const char *str) {
+	fxp_t result = FXP_INIT_INT(333);
+	char *end = (void *)0x123;
+	bool err = fxp_parse(&result, str, &end);
+	expect_true(err, "Expected a parse error on: %s", str);
+	expect_ptr_eq((void *)0x123, end,
+	    "Parse error shouldn't change results");
+	expect_u32_eq(result, FXP_INIT_INT(333),
+	    "Parse error shouldn't change results");
+}
+
+TEST_BEGIN(test_parse_invalid) {
+	expect_parse_failure("123.");
+	expect_parse_failure("3.a");
+	expect_parse_failure(".a");
+	expect_parse_failure("a.1");
+	expect_parse_failure("a");
+	/* A valid string, but one that overflows. */
+	expect_parse_failure("123456789");
+	expect_parse_failure("0000000123456789");
+	expect_parse_failure("1000000");
+}
+TEST_END
+
+static void
+expect_add(const char *astr, const char *bstr, const char* resultstr) {
+	fxp_t a = xparse_fxp(astr);
+	fxp_t b = xparse_fxp(bstr);
+	fxp_t result = xparse_fxp(resultstr);
+	expect_true(fxp_close(fxp_add(a, b), result),
+	    "Expected %s + %s == %s", astr, bstr, resultstr);
+}
+
+TEST_BEGIN(test_add_simple) {
+	expect_add("0", "0", "0");
+	expect_add("0", "1", "1");
+	expect_add("1", "1", "2");
+	expect_add("1.5", "1.5", "3");
+	expect_add("0.1", "0.1", "0.2");
+	expect_add("123", "456", "579");
+}
+TEST_END
+
+static void
+expect_sub(const char *astr, const char *bstr, const char* resultstr) {
+	fxp_t a = xparse_fxp(astr);
+	fxp_t b = xparse_fxp(bstr);
+	fxp_t result = xparse_fxp(resultstr);
+	expect_true(fxp_close(fxp_sub(a, b), result),
+	    "Expected %s - %s == %s", astr, bstr, resultstr);
+}
+
+TEST_BEGIN(test_sub_simple) {
+	expect_sub("0", "0", "0");
+	expect_sub("1", "0", "1");
+	expect_sub("1", "1", "0");
+	expect_sub("3.5", "1.5", "2");
+	expect_sub("0.3", "0.1", "0.2");
+	expect_sub("456", "123", "333");
+}
+TEST_END
+
+static void
+expect_mul(const char *astr, const char *bstr, const char* resultstr) {
+	fxp_t a = xparse_fxp(astr);
+	fxp_t b = xparse_fxp(bstr);
+	fxp_t result = xparse_fxp(resultstr);
+	expect_true(fxp_close(fxp_mul(a, b), result),
+	    "Expected %s * %s == %s", astr, bstr, resultstr);
+}
+
+TEST_BEGIN(test_mul_simple) {
+	expect_mul("0", "0", "0");
+	expect_mul("1", "0", "0");
+	expect_mul("1", "1", "1");
+	expect_mul("1.5", "1.5", "2.25");
+	expect_mul("100.0", "10", "1000");
+	expect_mul(".1", "10", "1");
+}
+TEST_END
+
+static void
+expect_div(const char *astr, const char *bstr, const char* resultstr) {
+	fxp_t a = xparse_fxp(astr);
+	fxp_t b = xparse_fxp(bstr);
+	fxp_t result = xparse_fxp(resultstr);
+	expect_true(fxp_close(fxp_div(a, b), result),
+	    "Expected %s / %s == %s", astr, bstr, resultstr);
+}
+
+TEST_BEGIN(test_div_simple) {
+	expect_div("1", "1", "1");
+	expect_div("0", "1", "0");
+	expect_div("2", "1", "2");
+	expect_div("3", "2", "1.5");
+	expect_div("3", "1.5", "2");
+	expect_div("10", ".1", "100");
+	expect_div("123", "456", ".2697368421");
+}
+TEST_END
+
+static void
+expect_round(const char *str, uint32_t rounded_down, uint32_t rounded_nearest) {
+	fxp_t fxp = xparse_fxp(str);
+	uint32_t fxp_rounded_down = fxp_round_down(fxp);
+	uint32_t fxp_rounded_nearest = fxp_round_nearest(fxp);
+	expect_u32_eq(rounded_down, fxp_rounded_down,
+	    "Mistake rounding %s down", str);
+	expect_u32_eq(rounded_nearest, fxp_rounded_nearest,
+	    "Mistake rounding %s to nearest", str);
+}
+
+TEST_BEGIN(test_round_simple) {
+	expect_round("1.5", 1, 2);
+	expect_round("0", 0, 0);
+	expect_round("0.1", 0, 0);
+	expect_round("0.4", 0, 0);
+	expect_round("0.40000", 0, 0);
+	expect_round("0.5", 0, 1);
+	expect_round("0.6", 0, 1);
+	expect_round("123", 123, 123);
+	expect_round("123.4", 123, 123);
+	expect_round("123.5", 123, 124);
+}
+TEST_END
+
+static void
+expect_print(const char *str) {
+	fxp_t fxp = xparse_fxp(str);
+	char buf[FXP_BUF_SIZE];
+	fxp_print(fxp, buf);
+	expect_d_eq(0, strcmp(str, buf), "Couldn't round-trip print %s", str);
+}
+
+TEST_BEGIN(test_print_simple) {
+	expect_print("0.0");
+	expect_print("1.0");
+	expect_print("2.0");
+	expect_print("123.0");
+	/*
+	 * We hit the possibility of roundoff errors whenever the fractional
+	 * component isn't a round binary number; only check these here (we
+	 * round-trip properly in the stress test).
+	 */
+	expect_print("1.5");
+	expect_print("3.375");
+	expect_print("0.25");
+	expect_print("0.125");
+	/* 1 / 2**14 */
+	expect_print("0.00006103515625");
+}
+TEST_END
+
+TEST_BEGIN(test_stress) {
+	const char *numbers[] = {
+		"0.0", "0.1", "0.2", "0.3", "0.4",
+		"0.5", "0.6", "0.7", "0.8", "0.9",
+
+		"1.0", "1.1", "1.2", "1.3", "1.4",
+		"1.5", "1.6", "1.7", "1.8", "1.9",
+
+		"2.0", "2.1", "2.2", "2.3", "2.4",
+		"2.5", "2.6", "2.7", "2.8", "2.9",
+
+		"17.0", "17.1", "17.2", "17.3", "17.4",
+		"17.5", "17.6", "17.7", "17.8", "17.9",
+
+		"18.0", "18.1", "18.2", "18.3", "18.4",
+		"18.5", "18.6", "18.7", "18.8", "18.9",
+
+		"123.0", "123.1", "123.2", "123.3", "123.4",
+		"123.5", "123.6", "123.7", "123.8", "123.9",
+
+		"124.0", "124.1", "124.2", "124.3", "124.4",
+		"124.5", "124.6", "124.7", "124.8", "124.9",
+
+		"125.0", "125.1", "125.2", "125.3", "125.4",
+		"125.5", "125.6", "125.7", "125.8", "125.9"};
+	size_t numbers_len = sizeof(numbers)/sizeof(numbers[0]);
+	for (size_t i = 0; i < numbers_len; i++) {
+		fxp_t fxp_a = xparse_fxp(numbers[i]);
+		double double_a = strtod(numbers[i], NULL);
+
+		uint32_t fxp_rounded_down = fxp_round_down(fxp_a);
+		uint32_t fxp_rounded_nearest = fxp_round_nearest(fxp_a);
+		uint32_t double_rounded_down = (uint32_t)double_a;
+		uint32_t double_rounded_nearest = (uint32_t)round(double_a);
+
+		expect_u32_eq(double_rounded_down, fxp_rounded_down,
+		    "Incorrectly rounded down %s", numbers[i]);
+		expect_u32_eq(double_rounded_nearest, fxp_rounded_nearest,
+		    "Incorrectly rounded-to-nearest %s", numbers[i]);
+
+		for (size_t j = 0; j < numbers_len; j++) {
+			fxp_t fxp_b = xparse_fxp(numbers[j]);
+			double double_b = strtod(numbers[j], NULL);
+
+			fxp_t fxp_sum = fxp_add(fxp_a, fxp_b);
+			double double_sum = double_a + double_b;
+			expect_true(
+			    double_close(fxp2double(fxp_sum), double_sum),
+			    "Miscomputed %s + %s", numbers[i], numbers[j]);
+
+			if (double_a > double_b) {
+				fxp_t fxp_diff = fxp_sub(fxp_a, fxp_b);
+				double double_diff = double_a - double_b;
+				expect_true(
+				    double_close(fxp2double(fxp_diff),
+				    double_diff),
+				    "Miscomputed %s - %s", numbers[i],
+				    numbers[j]);
+			}
+
+			fxp_t fxp_prod = fxp_mul(fxp_a, fxp_b);
+			double double_prod = double_a * double_b;
+			expect_true(
+			    double_close(fxp2double(fxp_prod), double_prod),
+			    "Miscomputed %s * %s", numbers[i], numbers[j]);
+
+			if (double_b != 0.0) {
+				fxp_t fxp_quot = fxp_div(fxp_a, fxp_b);
+				double double_quot = double_a / double_b;
+				expect_true(
+				    double_close(fxp2double(fxp_quot),
+				    double_quot),
+				    "Miscomputed %s / %s", numbers[i],
+				    numbers[j]);
+			}
+		}
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_parse_valid,
+	    test_parse_invalid,
+	    test_add_simple,
+	    test_sub_simple,
+	    test_mul_simple,
+	    test_div_simple,
+	    test_round_simple,
+	    test_print_simple,
+	    test_stress);
+}
-- 
cgit v0.12


From d438296b1fbb898653b9f3f454f3f84b33d30986 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 1 Dec 2020 13:13:55 -0800
Subject: narenas_ratio: Accept fractional values.

With recent scalability improvements to the HPA, we're experimenting with much
lower arena counts; this gets annoying when trying to test across different
hardware configurations using only the narenas setting.
---
 src/jemalloc.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1a8db83..74240c0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -9,6 +9,7 @@
 #include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/fxp.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/log.h"
@@ -127,7 +128,7 @@ bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
-unsigned	opt_narenas_ratio = 4;
+fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
 
 unsigned	ncpus;
 
@@ -1312,10 +1313,14 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 			}
 			if (CONF_MATCH("narenas_ratio")) {
-				CONF_HANDLE_UNSIGNED(opt_narenas_ratio,
-				    "narenas_ratio", 1, UINT_MAX,
-				    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
-				    /* clip */ false)
+				char *end;
+				bool err = fxp_parse(&opt_narenas_ratio, v,
+				    &end);
+				if (err || (size_t)(end - v) != vlen) {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				}
+				CONF_CONTINUE;
 			}
 			if (CONF_MATCH("bin_shards")) {
 				const char *bin_shards_segment_cur = v;
@@ -1877,7 +1882,13 @@ malloc_narenas_default(void) {
 	 * default.
 	 */
 	if (ncpus > 1) {
-		return ncpus * opt_narenas_ratio;
+		fxp_t fxp_ncpus = FXP_INIT_INT(ncpus);
+		fxp_t goal = fxp_mul(fxp_ncpus, opt_narenas_ratio);
+		uint32_t int_goal = fxp_round_nearest(goal);
+		if (int_goal == 0) {
+			return 1;
+		}
+		return int_goal;
 	} else {
 		return 1;
 	}
-- 
cgit v0.12


From d0a991d47b2717ac6abe6a7d8adc52c967ecd115 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Nov 2020 17:24:31 -0800
Subject: psset: Add insert/remove functions.

These will allow us to (for instance) move pageslabs from a psset dedicated to
not-yet-hugeified pages to one dedicated to hugeified ones.
---
 include/jemalloc/internal/psset.h |   3 +
 src/psset.c                       |  62 ++++++++++++++-----
 test/unit/psset.c                 | 121 ++++++++++++++++++++++++++++++--------
 3 files changed, 144 insertions(+), 42 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 1431123..4b0c4da 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -59,6 +59,9 @@ struct psset_s {
 
 void psset_init(psset_t *psset);
 
+void psset_insert(psset_t *psset, edata_t *ps);
+void psset_remove(psset_t *psset, edata_t *ps);
+
 /*
  * Tries to obtain a chunk from an existing pageslab already in the set.
  * Returns true on failure.
diff --git a/src/psset.c b/src/psset.c
index 9fc7ec1..cd0dcae 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -65,6 +65,51 @@ psset_assert_ps_consistent(edata_t *ps) {
 	    edata_size_get(ps) >> LG_PAGE) == edata_longest_free_range_get(ps));
 }
 
+void
+psset_insert(psset_t *psset, edata_t *ps) {
+	psset_assert_ps_consistent(ps);
+	size_t longest_free_range = edata_longest_free_range_get(ps);
+
+	if (longest_free_range == 0) {
+		/*
+		 * We don't ned to track full slabs; just pretend to for stats
+		 * purposes.  See the comment at psset_bin_stats_adjust.
+		 */
+		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		    /* inc */ true);
+		return;
+	}
+
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+	    longest_free_range << LG_PAGE));
+
+	assert(pind < PSSET_NPSIZES);
+	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
+		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+	}
+	psset_edata_heap_insert(psset, pind, ps);
+}
+
+void
+psset_remove(psset_t *psset, edata_t *ps) {
+	psset_assert_ps_consistent(ps);
+	size_t longest_free_range = edata_longest_free_range_get(ps);
+
+	if (longest_free_range == 0) {
+		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		    /* inc */ true);
+		return;
+	}
+
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+	    longest_free_range << LG_PAGE));
+	assert(pind < PSSET_NPSIZES);
+	psset_edata_heap_remove(psset, pind, ps);
+	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
+		bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+	}
+}
+
 /*
  * Similar to PAC's extent_recycle_extract.  Out of all the pageslabs in the
  * set, picks one that can satisfy the allocation and remove it from the set.
@@ -91,21 +136,6 @@ psset_recycle_extract(psset_t *psset, size_t size) {
 	return ps;
 }
 
-static void
-psset_insert(psset_t *psset, edata_t *ps, size_t largest_range) {
-	psset_assert_ps_consistent(ps);
-
-	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-	    largest_range << LG_PAGE));
-
-	assert(pind < PSSET_NPSIZES);
-
-	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
-		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
-	}
-	psset_edata_heap_insert(psset, pind, ps);
-}
-
 /*
  * Given a pageslab ps and an edata to allocate size bytes from, initializes the
  * edata with a range in the pageslab, and puts ps back in the set.
@@ -187,7 +217,7 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
 		    /* inc */ true);
 	} else {
-		psset_insert(psset, ps, largest_unchosen_range);
+		psset_insert(psset, ps);
 	}
 }
 
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 861903d..e734ec8 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -360,23 +360,37 @@ TEST_BEGIN(test_stats) {
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
 	stats_expect(&psset, PAGESLAB_PAGES);
+	edata_t *ps;
 	for (ssize_t i = PAGESLAB_PAGES - 1; i >= 0; i--) {
-		edata_t *ps = psset_dalloc(&psset, &alloc[i]);
+		ps = psset_dalloc(&psset, &alloc[i]);
 		expect_true((ps == NULL) == (i != 0),
 		    "psset_dalloc should only evict a slab on the last free");
 		stats_expect(&psset, i);
 	}
+
+	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	stats_expect(&psset, 1);
+	psset_remove(&psset, &pageslab);
+	stats_expect(&psset, 0);
+	psset_insert(&psset, &pageslab);
+	stats_expect(&psset, 1);
 }
 TEST_END
 
-TEST_BEGIN(test_oldest_fit) {
+/*
+ * Fills in and inserts two pageslabs, with the first better than the second,
+ * and each fully allocated (into the allocations in allocs and worse_allocs,
+ * each of which should be PAGESLAB_PAGES long).
+ *
+ * (There's nothing magic about these numbers; it's just useful to share the
+ * setup between the oldest fit and the insert/remove test).
+ */
+static void
+init_test_pageslabs(psset_t *psset, edata_t *pageslab, edata_t *worse_pageslab,
+    edata_t *alloc, edata_t *worse_alloc) {
 	bool err;
-	edata_t alloc[PAGESLAB_PAGES];
-	edata_t worse_alloc[PAGESLAB_PAGES];
-
-	edata_t pageslab;
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_init(&pageslab, /* arena_ind */ 0, (void *)(10 * PAGESLAB_SIZE),
+	memset(pageslab, 0, sizeof(*pageslab));
+	edata_init(pageslab, /* arena_ind */ 0, (void *)(10 * PAGESLAB_SIZE),
 	    PAGESLAB_SIZE, /* slab */ true, SC_NSIZES, PAGESLAB_SN + 1,
 	    extent_state_active, /* zeroed */ false, /* comitted */ true,
 	    EXTENT_PAI_HPA, EXTENT_IS_HEAD);
@@ -386,29 +400,27 @@ TEST_BEGIN(test_oldest_fit) {
 	 * added to the set after the previous one, and so should be less
 	 * preferred for allocations.
 	 */
-	edata_t worse_pageslab;
-	memset(&worse_pageslab, 0, sizeof(pageslab));
-	edata_init(&worse_pageslab, /* arena_ind */ 0,
+	memset(worse_pageslab, 0, sizeof(*worse_pageslab));
+	edata_init(worse_pageslab, /* arena_ind */ 0,
 	    (void *)(9 * PAGESLAB_SIZE), PAGESLAB_SIZE, /* slab */ true,
 	    SC_NSIZES, PAGESLAB_SN - 1, extent_state_active, /* zeroed */ false,
 	    /* comitted */ true, EXTENT_PAI_HPA, EXTENT_IS_HEAD);
 
-	psset_t psset;
-	psset_init(&psset);
+	psset_init(psset);
 
 	edata_init_test(&alloc[0]);
-	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	psset_alloc_new(psset, pageslab, &alloc[0], PAGE);
 	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
 		edata_init_test(&alloc[i]);
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = psset_alloc_reuse(psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
-		expect_ptr_eq(&pageslab, edata_ps_get(&alloc[i]),
+		expect_ptr_eq(pageslab, edata_ps_get(&alloc[i]),
 		    "Allocated from the wrong pageslab");
 	}
 
 	edata_init_test(&worse_alloc[0]);
-	psset_alloc_new(&psset, &worse_pageslab, &worse_alloc[0], PAGE);
-	expect_ptr_eq(&worse_pageslab, edata_ps_get(&worse_alloc[0]),
+	psset_alloc_new(psset, worse_pageslab, &worse_alloc[0], PAGE);
+	expect_ptr_eq(worse_pageslab, edata_ps_get(&worse_alloc[0]),
 	    "Allocated from the wrong pageslab");
 	/*
 	 * Make the two pssets otherwise indistinguishable; all full except for
@@ -416,20 +428,31 @@ TEST_BEGIN(test_oldest_fit) {
 	 */
 	for (size_t i = 1; i < PAGESLAB_PAGES - 1; i++) {
 		edata_init_test(&worse_alloc[i]);
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = psset_alloc_reuse(psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
-		expect_ptr_eq(&worse_pageslab, edata_ps_get(&alloc[i]),
+		expect_ptr_eq(worse_pageslab, edata_ps_get(&alloc[i]),
 		    "Allocated from the wrong pageslab");
 	}
 
 	/* Deallocate the last page from the older pageslab. */
-	edata_t *evicted = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	edata_t *evicted = psset_dalloc(psset, &alloc[PAGESLAB_PAGES - 1]);
 	expect_ptr_null(evicted, "Unexpected eviction");
+}
 
-	/*
-	 * This edata is the whole purpose for the test; it should come from the
-	 * older pageslab.
-	 */
+TEST_BEGIN(test_oldest_fit) {
+	bool err;
+	edata_t alloc[PAGESLAB_PAGES];
+	edata_t worse_alloc[PAGESLAB_PAGES];
+
+	edata_t pageslab;
+	edata_t worse_pageslab;
+
+	psset_t psset;
+
+	init_test_pageslabs(&psset, &pageslab, &worse_pageslab, alloc,
+	    worse_alloc);
+
+	/* The edata should come from the better pageslab. */
 	edata_t test_edata;
 	edata_init_test(&test_edata);
 	err = psset_alloc_reuse(&psset, &test_edata, PAGE);
@@ -439,6 +462,51 @@ TEST_BEGIN(test_oldest_fit) {
 }
 TEST_END
 
+TEST_BEGIN(test_insert_remove) {
+	bool err;
+	edata_t *ps;
+	edata_t alloc[PAGESLAB_PAGES];
+	edata_t worse_alloc[PAGESLAB_PAGES];
+
+	edata_t pageslab;
+	edata_t worse_pageslab;
+
+	psset_t psset;
+
+	init_test_pageslabs(&psset, &pageslab, &worse_pageslab, alloc,
+	    worse_alloc);
+
+	/* Remove better; should still be able to alloc from worse. */
+	psset_remove(&psset, &pageslab);
+	err = psset_alloc_reuse(&psset, &worse_alloc[PAGESLAB_PAGES - 1], PAGE);
+	expect_false(err, "Removal should still leave an empty page");
+	expect_ptr_eq(&worse_pageslab,
+	    edata_ps_get(&worse_alloc[PAGESLAB_PAGES - 1]),
+	    "Allocated out of wrong ps");
+
+	/*
+	 * After deallocating the previous alloc and reinserting better, it
+	 * should be preferred for future allocations.
+	 */
+	ps = psset_dalloc(&psset, &worse_alloc[PAGESLAB_PAGES - 1]);
+	expect_ptr_null(ps, "Incorrect eviction of nonempty pageslab");
+	psset_insert(&psset, &pageslab);
+	err = psset_alloc_reuse(&psset, &alloc[PAGESLAB_PAGES - 1], PAGE);
+	expect_false(err, "psset should be nonempty");
+	expect_ptr_eq(&pageslab, edata_ps_get(&alloc[PAGESLAB_PAGES - 1]),
+	    "Removal/reinsertion shouldn't change ordering");
+	/*
+	 * After deallocating and removing both, allocations should fail.
+	 */
+	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	expect_ptr_null(ps, "Incorrect eviction");
+	psset_remove(&psset, &pageslab);
+	psset_remove(&psset, &worse_pageslab);
+	err = psset_alloc_reuse(&psset, &alloc[PAGESLAB_PAGES - 1], PAGE);
+	expect_true(err, "psset should be empty, but an alloc succeeded");
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -448,5 +516,6 @@ main(void) {
 	    test_evict,
 	    test_multi_pageslab,
 	    test_stats,
-	    test_oldest_fit);
+	    test_oldest_fit,
+	    test_insert_remove);
 }
-- 
cgit v0.12


From c1b2a77933135ebefa62a5ec4c7d9efa94b14592 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Nov 2020 16:23:03 -0800
Subject: psset: Move in stats.

A later change will benefit from having these functions pulled into a
psset-module set of functions.
---
 include/jemalloc/internal/hpa.h   |  6 ++++--
 include/jemalloc/internal/psset.h | 24 +++++++++++-----------
 src/ctl.c                         | 21 +++++++------------
 src/hpa.c                         | 22 ++++++++++++++++++--
 src/pa_extra.c                    | 10 +--------
 src/psset.c                       | 43 ++++++++++++++++++++++++---------------
 test/unit/psset.c                 | 19 +++++++++--------
 7 files changed, 82 insertions(+), 63 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 159f0d0..12a7a17 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -35,8 +35,7 @@ struct hpa_s {
 /* Used only by CTL; not actually stored here (i.e., all derived). */
 typedef struct hpa_shard_stats_s hpa_shard_stats_t;
 struct hpa_shard_stats_s {
-	psset_bin_stats_t psset_full_slab_stats;
-	psset_bin_stats_t psset_slab_stats[PSSET_NPSIZES];
+	psset_stats_t psset_stats;
 };
 
 typedef struct hpa_shard_s hpa_shard_t;
@@ -89,6 +88,9 @@ bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap,
 bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa,
     edata_cache_t *edata_cache, unsigned ind, size_t ps_goal,
     size_t ps_alloc_max, size_t small_max, size_t large_min);
+
+void hpa_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
+void hpa_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst);
 /*
  * Notify the shard that we won't use it for allocations much longer.  Due to
  * the possibility of races, we don't actually prevent allocations; just flush
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 4b0c4da..4529827 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -31,12 +31,16 @@ struct psset_bin_stats_s {
 	size_t ninactive;
 };
 
-static inline void
-psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
-	dst->npageslabs += src->npageslabs;
-	dst->nactive += src->nactive;
-	dst->ninactive += src->ninactive;
-}
+/* Used only by CTL; not actually stored here (i.e., all derived). */
+typedef struct psset_stats_s psset_stats_t;
+struct psset_stats_s {
+	/*
+	 * Full slabs don't live in any edata heap.  But we still track their
+	 * stats.
+	 */
+	psset_bin_stats_t full_slabs;
+	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES];
+};
 
 typedef struct psset_s psset_t;
 struct psset_s {
@@ -46,18 +50,14 @@ struct psset_s {
 	 */
 	edata_age_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
-	/*
-	 * Full slabs don't live in any edata heap.  But we still track their
-	 * stats.
-	 */
-	psset_bin_stats_t full_slab_stats;
-	psset_bin_stats_t slab_stats[PSSET_NPSIZES];
+	psset_stats_t stats;
 
 	/* How many alloc_new calls have happened? */
 	uint64_t age_counter;
 };
 
 void psset_init(psset_t *psset);
+void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
 
 void psset_insert(psset_t *psset, edata_t *ps);
 void psset_remove(psset_t *psset, edata_t *ps);
diff --git a/src/ctl.c b/src/ctl.c
index 4bb422a..f0df73b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1104,14 +1104,7 @@ MUTEX_PROF_ARENA_MUTEXES
 		}
 
 		/* Merge HPA stats. */
-		psset_bin_stats_accum(&sdstats->hpastats.psset_full_slab_stats,
-		    &astats->hpastats.psset_full_slab_stats);
-		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
-			psset_bin_stats_accum(
-			    &sdstats->hpastats.psset_slab_stats[i],
-			    &astats->hpastats.psset_slab_stats[i]);
-		}
-
+		hpa_stats_accum(&sdstats->hpastats, &astats->hpastats);
 		sec_stats_accum(&sdstats->secstats, &astats->secstats);
 	}
 }
@@ -3375,21 +3368,21 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs,
-    arenas_i(mib[2])->astats->hpastats.psset_full_slab_stats.npageslabs,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive,
-    arenas_i(mib[2])->astats->hpastats.psset_full_slab_stats.nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive,
-    arenas_i(mib[2])->astats->hpastats.psset_full_slab_stats.ninactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive, size_t);
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs,
-    arenas_i(mib[2])->astats->hpastats.psset_slab_stats[mib[5]].npageslabs,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive,
-    arenas_i(mib[2])->astats->hpastats.psset_slab_stats[mib[5]].nactive,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive,
-    arenas_i(mib[2])->astats->hpastats.psset_slab_stats[mib[5]].ninactive,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive,
     size_t);
 
 static const ctl_named_node_t *
diff --git a/src/hpa.c b/src/hpa.c
index 8029e0b..e7548ad 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -89,6 +89,24 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
 	return false;
 }
 
+/*
+ * Note that the stats functions here follow the usual stats naming conventions;
+ * "merge" obtains the stats from some live object of instance, while "accum"
+ * only combines the stats from one stats objet to another.  Hence the lack of
+ * locking here.
+ */
+void
+hpa_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
+	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
+}
+
+void
+hpa_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst) {
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	psset_stats_accum(&dst->psset_stats, &shard->psset.stats);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+}
+
 static edata_t *
 hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min,
     size_t size_goal) {
@@ -415,10 +433,10 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 		    PAGE);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		assert(psset_empty);
-		hpa_shard_assert_stats_empty(&shard->psset.full_slab_stats);
+		hpa_shard_assert_stats_empty(&shard->psset.stats.full_slabs);
 		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 			hpa_shard_assert_stats_empty(
-			    &shard->psset.slab_stats[i]);
+			    &shard->psset.stats.nonfull_slabs[i]);
 		}
 	}
 }
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 24cb653..2002418 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -150,15 +150,7 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 	}
 
 	if (shard->ever_used_hpa) {
-		malloc_mutex_lock(tsdn, &shard->hpa_shard.mtx);
-		psset_bin_stats_accum(&hpa_stats_out->psset_full_slab_stats,
-		    &shard->hpa_shard.psset.full_slab_stats);
-		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
-			psset_bin_stats_accum(
-			    &hpa_stats_out->psset_slab_stats[i],
-			    &shard->hpa_shard.psset.slab_stats[i]);
-		}
-		malloc_mutex_unlock(tsdn, &shard->hpa_shard.mtx);
+		hpa_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out);
 		sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out);
 	}
 }
diff --git a/src/psset.c b/src/psset.c
index cd0dcae..c24266c 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -14,17 +14,26 @@ psset_init(psset_t *psset) {
 		edata_age_heap_new(&psset->pageslabs[i]);
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
-	psset->full_slab_stats.npageslabs = 0;
-	psset->full_slab_stats.nactive = 0;
-	psset->full_slab_stats.ninactive = 0;
-	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
-		psset->slab_stats[i].npageslabs = 0;
-		psset->slab_stats[i].nactive = 0;
-		psset->slab_stats[i].ninactive = 0;
-	}
+	memset(&psset->stats, 0, sizeof(psset->stats));
 	psset->age_counter = 0;
 }
 
+static void
+psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
+	dst->npageslabs += src->npageslabs;
+	dst->nactive += src->nactive;
+	dst->ninactive += src->ninactive;
+}
+
+void
+psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
+	psset_bin_stats_accum(&dst->full_slabs, &src->full_slabs);
+	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+		psset_bin_stats_accum(&dst->nonfull_slabs[i],
+		    &src->nonfull_slabs[i]);
+	}
+}
+
 /*
  * The stats maintenance strategy is simple, but not necessarily obvious.
  * edata_nfree and the bitmap must remain consistent at all times.  If they
@@ -50,13 +59,15 @@ psset_bin_stats_adjust(psset_bin_stats_t *binstats, edata_t *ps, bool inc) {
 static void
 psset_edata_heap_remove(psset_t *psset, pszind_t pind, edata_t *ps) {
 	edata_age_heap_remove(&psset->pageslabs[pind], ps);
-	psset_bin_stats_adjust(&psset->slab_stats[pind], ps, /* inc */ false);
+	psset_bin_stats_adjust(&psset->stats.nonfull_slabs[pind], ps,
+	    /* inc */ false);
 }
 
 static void
 psset_edata_heap_insert(psset_t *psset, pszind_t pind, edata_t *ps) {
 	edata_age_heap_insert(&psset->pageslabs[pind], ps);
-	psset_bin_stats_adjust(&psset->slab_stats[pind], ps, /* inc */ true);
+	psset_bin_stats_adjust(&psset->stats.nonfull_slabs[pind], ps,
+	    /* inc */ true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -75,7 +86,7 @@ psset_insert(psset_t *psset, edata_t *ps) {
 		 * We don't ned to track full slabs; just pretend to for stats
 		 * purposes.  See the comment at psset_bin_stats_adjust.
 		 */
-		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
 		    /* inc */ true);
 		return;
 	}
@@ -96,7 +107,7 @@ psset_remove(psset_t *psset, edata_t *ps) {
 	size_t longest_free_range = edata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
-		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
 		    /* inc */ true);
 		return;
 	}
@@ -214,7 +225,7 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 	}
 	edata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
 	if (largest_unchosen_range == 0) {
-		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
 		    /* inc */ true);
 	} else {
 		psset_insert(psset, ps);
@@ -265,15 +276,15 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	fb_unset_range(ps_fb, ps_npages, begin, len);
 	if (ps_old_longest_free_range == 0) {
 		/* We were in the (imaginary) full bin; update stats for it. */
-		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
 		    /* inc */ false);
 	} else {
 		/*
 		 * The edata is still in the bin, need to update its
 		 * contribution.
 		 */
-		psset->slab_stats[old_pind].nactive -= len;
-		psset->slab_stats[old_pind].ninactive += len;
+		psset->stats.nonfull_slabs[old_pind].nactive -= len;
+		psset->stats.nonfull_slabs[old_pind].ninactive += len;
 	}
 	/*
 	 * Note that we want to do this after the stats updates, since if it was
diff --git a/test/unit/psset.c b/test/unit/psset.c
index e734ec8..e07bdc4 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -307,14 +307,14 @@ stats_expect_empty(psset_bin_stats_t *stats) {
 static void
 stats_expect(psset_t *psset, size_t nactive) {
 	if (nactive == PAGESLAB_PAGES) {
-		expect_zu_eq(1, psset->full_slab_stats.npageslabs,
+		expect_zu_eq(1, psset->stats.full_slabs.npageslabs,
 		    "Expected a full slab");
-		expect_zu_eq(PAGESLAB_PAGES, psset->full_slab_stats.nactive,
+		expect_zu_eq(PAGESLAB_PAGES, psset->stats.full_slabs.nactive,
 		    "Should have exactly filled the bin");
-		expect_zu_eq(0, psset->full_slab_stats.ninactive,
+		expect_zu_eq(0, psset->stats.full_slabs.ninactive,
 		    "Should never have inactive pages in a full slab");
 	} else {
-		stats_expect_empty(&psset->full_slab_stats);
+		stats_expect_empty(&psset->stats.full_slabs);
 	}
 	size_t ninactive = PAGESLAB_PAGES - nactive;
 	pszind_t nonempty_pind = PSSET_NPSIZES;
@@ -324,14 +324,17 @@ stats_expect(psset_t *psset, size_t nactive) {
 	}
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		if (i == nonempty_pind) {
-			assert_zu_eq(1, psset->slab_stats[i].npageslabs,
+			assert_zu_eq(1,
+			    psset->stats.nonfull_slabs[i].npageslabs,
 			    "Should have found a slab");
-			expect_zu_eq(nactive, psset->slab_stats[i].nactive,
+			expect_zu_eq(nactive,
+			    psset->stats.nonfull_slabs[i].nactive,
 			    "Mismatch in active pages");
-			expect_zu_eq(ninactive, psset->slab_stats[i].ninactive,
+			expect_zu_eq(ninactive,
+			    psset->stats.nonfull_slabs[i].ninactive,
 			    "Mismatch in inactive pages");
 		} else {
-			stats_expect_empty(&psset->slab_stats[i]);
+			stats_expect_empty(&psset->stats.nonfull_slabs[i]);
 		}
 	}
 }
-- 
cgit v0.12


From 63677dde631e089c4dc00b6cca5e6e03ac9fdc90 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 5 Dec 2020 09:10:15 -0800
Subject: Pages: Statically detect if pages_huge may succeed

---
 include/jemalloc/internal/pages.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 7dae633..cfaa0fc 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -58,6 +58,18 @@ static const bool pages_can_purge_forced =
 #endif
     ;
 
+#if defined(JEMALLOC_HAVE_MADVISE_HUGE) || defined(JEMALLOC_HAVE_MEMCNTL)
+#  define PAGES_CAN_HUGIFY
+#endif
+
+static const bool pages_can_hugify =
+#ifdef PAGES_CAN_HUGIFY
+    true
+#else
+    false
+#endif
+    ;
+
 typedef enum {
 	thp_mode_default       = 0, /* Do not change hugepage settings. */
 	thp_mode_always        = 1, /* Always set MADV_HUGEPAGE. */
-- 
cgit v0.12


From 43af63fff496967bf2173c92737aea1cca4ca025 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Nov 2020 13:49:30 -0800
Subject: HPA: Manage whole hugepages at a time.

This redesigns the HPA implementation to allow us to manage hugepages all at
once, locally, without relying on a global fallback.
---
 include/jemalloc/internal/arena_externs.h |   1 -
 include/jemalloc/internal/edata.h         |  18 +-
 include/jemalloc/internal/hpa.h           |  93 ++---
 include/jemalloc/internal/mutex_prof.h    |   4 +-
 include/jemalloc/internal/pa.h            |   5 +-
 include/jemalloc/internal/psset.h         |  11 +-
 src/arena.c                               |   6 +-
 src/ctl.c                                 | 115 +++---
 src/hpa.c                                 | 593 ++++++++++++++++++------------
 src/jemalloc.c                            |  37 +-
 src/pa.c                                  |  15 +-
 src/pa_extra.c                            |   2 +-
 src/psset.c                               | 120 ++++--
 src/stats.c                               | 115 ++++--
 test/unit/hpa.c                           |  76 +---
 test/unit/psset.c                         |  21 +-
 16 files changed, 691 insertions(+), 541 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 40223b5..e3cfcee 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -16,7 +16,6 @@ extern const char *percpu_arena_mode_names[];
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 extern emap_t arena_emap_global;
-extern hpa_t arena_hpa_global;
 
 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 5ec12be..465c962 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -208,9 +208,9 @@ struct edata_s {
 		 */
 
 		/*
-		 * If this edata is from an HPA, it may be part of some larger
-		 * pageslab.  Track it if so.  Otherwise (either because it's
-		 * not part of a pageslab, or not from the HPA at all), NULL.
+		 * If this edata is a user allocation from an HPA, it comes out
+		 * of some pageslab (we don't yet support huegpage allocations
+		 * that don't fit into pageslabs).  This tracks it.
 		 */
 		edata_t *ps;
 		/*
@@ -225,6 +225,8 @@ struct edata_s {
 			 * between heaps.
 			 */
 			uint32_t longest_free_range;
+			/* Whether or not the slab is backed by a hugepage. */
+			bool hugeified;
 		};
 	};
 
@@ -329,6 +331,11 @@ edata_pai_get(const edata_t *edata) {
 }
 
 static inline bool
+edata_hugeified_get(const edata_t *edata) {
+	return edata->hugeified;
+}
+
+static inline bool
 edata_slab_get(const edata_t *edata) {
 	return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >>
 	    EDATA_BITS_SLAB_SHIFT);
@@ -560,6 +567,11 @@ edata_pai_set(edata_t *edata, extent_pai_t pai) {
 }
 
 static inline void
+edata_hugeified_set(edata_t *edata, bool hugeified) {
+	edata->hugeified = hugeified;
+}
+
+static inline void
 edata_slab_set(edata_t *edata, bool slab) {
 	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) |
 	    ((uint64_t)slab << EDATA_BITS_SLAB_SHIFT);
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 12a7a17..1c4585d 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -6,32 +6,6 @@
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
-typedef struct hpa_s hpa_t;
-struct hpa_s {
-	/*
-	 * We have two mutexes for the central allocator; mtx protects its
-	 * state, while grow_mtx protects controls the ability to grow the
-	 * backing store.  This prevents race conditions in which the central
-	 * allocator has exhausted its memory while mutiple threads are trying
-	 * to allocate.  If they all reserved more address space from the OS
-	 * without synchronization, we'd end consuming much more than necessary.
-	 */
-	malloc_mutex_t grow_mtx;
-	malloc_mutex_t mtx;
-	hpa_central_t central;
-	/* The arena ind we're associated with. */
-	unsigned ind;
-	/*
-	 * This edata cache is the global one that we use for new allocations in
-	 * growing; practically, it comes from a0.
-	 *
-	 * We don't use an edata_cache_small in front of this, since we expect a
-	 * small finite number of allocations from it.
-	 */
-	edata_cache_t *edata_cache;
-	exp_grow_t exp_grow;
-};
-
 /* Used only by CTL; not actually stored here (i.e., all derived). */
 typedef struct hpa_shard_stats_s hpa_shard_stats_t;
 struct hpa_shard_stats_s {
@@ -53,44 +27,53 @@ struct hpa_shard_s {
 	 * allocator, and so will use its edata_cache.
 	 */
 	edata_cache_small_t ecs;
-	hpa_t *hpa;
+
 	psset_t psset;
 
 	/*
-	 * When we're grabbing a new ps from the central allocator, how big
-	 * would we like it to be?  This is mostly about the level of batching
-	 * we use in our requests to the centralized allocator.
+	 * The largest size we'll allocate out of the shard.  For those
+	 * allocations refused, the caller (in practice, the PA module) will
+	 * fall back to the more general (for now) PAC, which can always handle
+	 * any allocation request.
 	 */
-	size_t ps_goal;
-	/*
-	 * What's the maximum size we'll try to allocate out of the psset?  We
-	 * don't want this to be too large relative to ps_goal, as a
-	 * fragmentation avoidance measure.
-	 */
-	size_t ps_alloc_max;
+	size_t alloc_max;
+
 	/*
-	 * What's the maximum size we'll try to allocate out of the shard at
-	 * all?
+	 * Slabs currently purged away.  They are hugepage-sized and
+	 * hugepage-aligned, but have had pages_nohuge and pages_purge_forced
+	 * called on them.
+	 *
+	 * Guarded by grow_mtx.
 	 */
-	size_t small_max;
+	edata_list_inactive_t unused_slabs;
+
 	/*
-	 * What's the minimum size for which we'll go straight to the global
-	 * arena?
+	 * Either NULL (if empty), or some integer multiple of a
+	 * hugepage-aligned number of hugepages.  We carve them off one at a
+	 * time to satisfy new pageslab requests.
+	 *
+	 * Guarded by grow_mtx.
 	 */
-	size_t large_min;
+	edata_t *eden;
 
 	/* The arena ind we're associated with. */
 	unsigned ind;
+	emap_t *emap;
 };
 
-bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap,
-    edata_cache_t *edata_cache);
-bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa,
-    edata_cache_t *edata_cache, unsigned ind, size_t ps_goal,
-    size_t ps_alloc_max, size_t small_max, size_t large_min);
+/*
+ * Whether or not the HPA can be used given the current configuration.  This is
+ * is not necessarily a guarantee that it backs its allocations by hugepages,
+ * just that it can function properly given the system it's running on.
+ */
+bool hpa_supported();
+bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap,
+    edata_cache_t *edata_cache, unsigned ind, size_t alloc_max);
+
+void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
+void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
+    hpa_shard_stats_t *dst);
 
-void hpa_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
-void hpa_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst);
 /*
  * Notify the shard that we won't use it for allocations much longer.  Due to
  * the possibility of races, we don't actually prevent allocations; just flush
@@ -108,14 +91,4 @@ void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
 
-/*
- * These should be acquired after all the shard locks in phase 4, but before any
- * locks in phase 4.  The central HPA may acquire an edata cache mutex (of a0),
- * so it needs to be lower in the witness ordering, but it's also logically
- * global and not tied to any particular arena.
- */
-void hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa);
-void hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa);
-void hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa);
-
 #endif /* JEMALLOC_INTERNAL_HPA_H */
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index ef0bf0d..3759daa 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -11,9 +11,7 @@
     OP(ctl)								\
     OP(prof)								\
     OP(prof_thds_data)							\
-    OP(prof_dump)							\
-    OP(hpa_central)							\
-    OP(hpa_central_grow)
+    OP(prof_dump)
 
 typedef enum {
 #define OP(mtx) global_prof_mutex_##mtx,
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index f1823e6..b903022 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -130,9 +130,8 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * This isn't exposed to users; we allow late enablement of the HPA shard so
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
-bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
-    size_t ps_alloc_max, size_t small_max, size_t large_min, size_t sec_nshards,
-    size_t sec_alloc_max, size_t sec_bytes_max);
+bool pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max,
+    size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 4529827..3c9f23b 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -24,11 +24,14 @@
 typedef struct psset_bin_stats_s psset_bin_stats_t;
 struct psset_bin_stats_s {
 	/* How many pageslabs are in this bin? */
-	size_t npageslabs;
+	size_t npageslabs_huge;
+	size_t npageslabs_nonhuge;
 	/* Of them, how many pages are active? */
-	size_t nactive;
+	size_t nactive_huge;
+	size_t nactive_nonhuge;
 	/* How many are inactive? */
-	size_t ninactive;
+	size_t ninactive_huge;
+	size_t ninactive_nonhuge;
 };
 
 /* Used only by CTL; not actually stored here (i.e., all derived). */
@@ -62,6 +65,8 @@ void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
 void psset_insert(psset_t *psset, edata_t *ps);
 void psset_remove(psset_t *psset, edata_t *ps);
 
+void psset_hugify(psset_t *psset, edata_t *ps);
+
 /*
  * Tries to obtain a chunk from an existing pageslab already in the set.
  * Returns true on failure.
diff --git a/src/arena.c b/src/arena.c
index 7099713..209eb34 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -37,7 +37,6 @@ static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
 emap_t arena_emap_global;
-hpa_t arena_hpa_global;
 
 const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
@@ -1535,9 +1534,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 *   so arena_hpa_global is not yet initialized.
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
-		if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global,
-		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
-		    opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
+		if (pa_shard_enable_hpa(&arena->pa_shard,
+		    opt_hpa_slab_max_alloc, opt_hpa_sec_nshards,
 		    opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
 			goto label_error;
 		}
diff --git a/src/ctl.c b/src/ctl.c
index f0df73b..88cee66 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -220,13 +220,19 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
 INDEX_PROTO(stats_arenas_i_extents_j)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge)
 INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
 CTL_PROTO(stats_arenas_i_dss)
@@ -606,21 +612,33 @@ MUTEX_PROF_ARENA_MUTEXES
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
-	{NAME("npageslabs"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs)},
-	{NAME("nactive"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive)},
-	{NAME("ninactive"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive)}
+	{NAME("npageslabs_huge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
+	{NAME("nactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
+	{NAME("ninactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)},
+	{NAME("npageslabs_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
+	{NAME("nactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)},
+	{NAME("ninactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge)},
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
-	{NAME("npageslabs"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs)},
-	{NAME("nactive"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive)},
-	{NAME("ninactive"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive)}
+	{NAME("npageslabs_huge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
+	{NAME("nactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
+	{NAME("ninactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge)},
+	{NAME("npageslabs_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
+	{NAME("nactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)},
+	{NAME("ninactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge)}
 };
 
 static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
@@ -1104,7 +1122,7 @@ MUTEX_PROF_ARENA_MUTEXES
 		}
 
 		/* Merge HPA stats. */
-		hpa_stats_accum(&sdstats->hpastats, &astats->hpastats);
+		hpa_shard_stats_accum(&sdstats->hpastats, &astats->hpastats);
 		sec_stats_accum(&sdstats->secstats, &astats->secstats);
 	}
 }
@@ -1219,14 +1237,6 @@ ctl_refresh(tsdn_t *tsdn) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
 			    global_prof_mutex_prof_dump, prof_dump_mtx);
 		}
-		if (opt_hpa) {
-			READ_GLOBAL_MUTEX_PROF_DATA(
-			    global_prof_mutex_hpa_central,
-			    arena_hpa_global.mtx);
-			READ_GLOBAL_MUTEX_PROF_DATA(
-			    global_prof_mutex_hpa_central_grow,
-			    arena_hpa_global.grow_mtx);
-		}
 		if (have_background_thread) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
 			    global_prof_mutex_background_thread,
@@ -3259,11 +3269,6 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(tdatas_mtx);
 		MUTEX_PROF_RESET(prof_dump_mtx);
 	}
-	if (opt_hpa) {
-		MUTEX_PROF_RESET(arena_hpa_global.mtx);
-		MUTEX_PROF_RESET(arena_hpa_global.grow_mtx);
-	}
-
 
 	/* Per arena mutexes. */
 	unsigned n = narenas_total_get();
@@ -3367,22 +3372,44 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 	return super_stats_arenas_i_extents_j_node;
 }
 
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs,
+/* Full, huge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_huge,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive_huge, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive_huge, size_t);
+
+/* Full, nonhuge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_nonhuge,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive_nonhuge, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive_nonhuge, size_t);
+
+/* Nonfull, huge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs_huge,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive_huge,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive_huge,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive, size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive, size_t);
 
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs,
+/* Nonfull, nonhuge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs_nonhuge,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive,
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive_nonhuge,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive,
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive_nonhuge,
     size_t);
 
 static const ctl_named_node_t *
diff --git a/src/hpa.c b/src/hpa.c
index e7548ad..ca75628 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -6,6 +6,8 @@
 #include "jemalloc/internal/flat_bitmap.h"
 #include "jemalloc/internal/witness.h"
 
+#define HPA_EDEN_SIZE (128 * HUGEPAGE)
+
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero);
 static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -15,43 +17,40 @@ static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
 
 bool
-hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, edata_cache_t *edata_cache) {
-	bool err;
-
+hpa_supported() {
+#ifdef _WIN32
 	/*
-	 * We fundamentally rely on a address-space-hungry growth strategy for
-	 * hugepages.  This may change in the future, but for now we should have
-	 * refused to turn on any HPA at a higher level of the stack.
+	 * At least until the API and implementation is somewhat settled, we
+	 * don't want to try to debug the VM subsystem on the hardest-to-test
+	 * platform.
 	 */
-	assert(LG_SIZEOF_PTR == 3);
-
-	err = malloc_mutex_init(&hpa->grow_mtx, "hpa_grow", WITNESS_RANK_HPA_GROW,
-	    malloc_mutex_rank_exclusive);
-	if (err) {
-		return true;
+	return false;
+#endif
+	if (!pages_can_hugify) {
+		return false;
 	}
-	err = malloc_mutex_init(&hpa->mtx, "hpa", WITNESS_RANK_HPA,
-	    malloc_mutex_rank_exclusive);
-	if (err) {
-		return true;
+	/*
+	 * We fundamentally rely on a address-space-hungry growth strategy for
+	 * hugepages.
+	 */
+	if (LG_SIZEOF_PTR == 2) {
+		return false;
 	}
-
-	hpa_central_init(&hpa->central, edata_cache, emap);
-	if (err) {
-		return true;
+	/*
+	 * We use the edata bitmap; it needs to have at least as many bits as a
+	 * hugepage has pages.
+	 */
+	if (HUGEPAGE / PAGE > BITMAP_GROUPS_MAX * sizeof(bitmap_t) * 8) {
+		return false;
 	}
-	hpa->ind = base_ind_get(base);
-	hpa->edata_cache = edata_cache;
-
-	exp_grow_init(&hpa->exp_grow);
-
-	return false;
+	return true;
 }
 
 bool
-hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
-    unsigned ind, size_t ps_goal, size_t ps_alloc_max, size_t small_max,
-    size_t large_min) {
+hpa_shard_init(hpa_shard_t *shard, emap_t *emap, edata_cache_t *edata_cache,
+    unsigned ind, size_t alloc_max) {
+	/* malloc_conf processing should have filtered out these cases. */
+	assert(hpa_supported());
 	bool err;
 	err = malloc_mutex_init(&shard->grow_mtx, "hpa_shard_grow",
 	    WITNESS_RANK_HPA_SHARD_GROW, malloc_mutex_rank_exclusive);
@@ -66,12 +65,12 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
 
 	assert(edata_cache != NULL);
 	edata_cache_small_init(&shard->ecs, edata_cache);
-	shard->hpa = hpa;
 	psset_init(&shard->psset);
-	shard->ps_goal = ps_goal;
-	shard->ps_alloc_max = ps_alloc_max;
-	shard->small_max = small_max;
-	shard->large_min = large_min;
+	shard->alloc_max = alloc_max;
+	edata_list_inactive_init(&shard->unused_slabs);
+	shard->eden = NULL;
+	shard->ind = ind;
+	shard->emap = emap;
 
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
@@ -83,9 +82,6 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
 
-	shard->ind = ind;
-	assert(ind == base_ind_get(edata_cache->base));
-
 	return false;
 }
 
@@ -96,176 +92,333 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
  * locking here.
  */
 void
-hpa_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
+hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
 	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
 }
 
 void
-hpa_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst) {
+hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
+    hpa_shard_stats_t *dst) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	psset_stats_accum(&dst->psset_stats, &shard->psset.stats);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
-static edata_t *
-hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min,
-    size_t size_goal) {
-	bool err;
-	edata_t *edata;
-
-	hpa_t *hpa = shard->hpa;
-
-	malloc_mutex_lock(tsdn, &hpa->mtx);
-	edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min,
-	    size_goal);
-	malloc_mutex_unlock(tsdn, &hpa->mtx);
-	if (edata != NULL) {
-		edata_arena_ind_set(edata, shard->ind);
-		return edata;
-	}
-	/* No existing range can satisfy the request; try to grow. */
-	malloc_mutex_lock(tsdn, &hpa->grow_mtx);
-
+static bool
+hpa_should_hugify(hpa_shard_t *shard, edata_t *ps) {
 	/*
-	 * We could have raced with other grow attempts; re-check to see if we
-	 * did, and are now able to satisfy the request.
+	 * For now, just use a static check; hugify a page if it's <= 5%
+	 * inactive.  Eventually, this should be a malloc conf option.
 	 */
-	malloc_mutex_lock(tsdn, &hpa->mtx);
-	edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min,
-	    size_goal);
-	malloc_mutex_unlock(tsdn, &hpa->mtx);
-	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
-		edata_arena_ind_set(edata, shard->ind);
-		return edata;
-	}
+	return !edata_hugeified_get(ps)
+	    && edata_nfree_get(ps) < (HUGEPAGE / PAGE) * 5 / 100;
+}
 
+/* Returns true on error. */
+static void
+hpa_hugify(edata_t *ps) {
+	assert(edata_size_get(ps) == HUGEPAGE);
+	assert(edata_hugeified_get(ps));
+	bool err = pages_huge(edata_base_get(ps), HUGEPAGE);
 	/*
-	 * No such luck. We've dropped mtx, so other allocations can proceed
-	 * while we allocate the new extent.  We know no one else will grow in
-	 * the meantime, though, since we still hold grow_mtx.
+	 * Eat the error; even if the hugeification failed, it's still safe to
+	 * pretend it didn't (and would require extraordinary measures to
+	 * unhugify).
 	 */
-	size_t alloc_size;
-	pszind_t skip;
+	(void)err;
+}
 
-	size_t hugepage_goal_min = HUGEPAGE_CEILING(size_goal);
+static void
+hpa_dehugify(edata_t *ps) {
+	/* Purge, then dehugify while unbacked. */
+	pages_purge_forced(edata_addr_get(ps), HUGEPAGE);
+	pages_nohuge(edata_addr_get(ps), HUGEPAGE);
+	edata_hugeified_set(ps, false);
+}
 
-	err = exp_grow_size_prepare(&hpa->exp_grow, hugepage_goal_min,
-	    &alloc_size, &skip);
-	if (err) {
-		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
-		return NULL;
+static edata_t *
+hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
+	edata_t *ps = NULL;
+
+	/* Is there address space waiting for reuse? */
+	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
+	ps = edata_list_inactive_first(&shard->unused_slabs);
+	if (ps != NULL) {
+		edata_list_inactive_remove(&shard->unused_slabs, ps);
+		return ps;
+	}
+
+	/* Is eden a perfect fit? */
+	if (shard->eden != NULL && edata_size_get(shard->eden) == HUGEPAGE) {
+		ps = shard->eden;
+		shard->eden = NULL;
+		return ps;
 	}
-	alloc_size = HUGEPAGE_CEILING(alloc_size);
 
 	/*
-	 * Eventually, we need to think about this more systematically, and in
-	 * terms of extent hooks.  For now, though, we know we only care about
-	 * overcommitting systems, and we're not going to purge much.
+	 * We're about to try to allocate from eden by splitting.  If eden is
+	 * NULL, we have to allocate it too.  Otherwise, we just have to
+	 * allocate an edata_t for the new psset.
 	 */
-	bool commit = true;
-	void *addr = pages_map(NULL, alloc_size, HUGEPAGE, &commit);
-	if (addr == NULL) {
-		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
-		return NULL;
+	if (shard->eden == NULL) {
+		/*
+		 * During development, we're primarily concerned with systems
+		 * with overcommit.  Eventually, we should be more careful here.
+		 */
+		bool commit = true;
+		/* Allocate address space, bailing if we fail. */
+		void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE,
+		    &commit);
+		if (new_eden == NULL) {
+			return NULL;
+		}
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		/* Allocate ps edata, bailing if we fail. */
+		ps = edata_cache_small_get(tsdn, &shard->ecs);
+		if (ps == NULL) {
+			malloc_mutex_unlock(tsdn, &shard->mtx);
+			pages_unmap(new_eden, HPA_EDEN_SIZE);
+			return NULL;
+		}
+		/* Allocate eden edata, bailing if we fail. */
+		shard->eden = edata_cache_small_get(tsdn, &shard->ecs);
+		if (shard->eden == NULL) {
+			edata_cache_small_put(tsdn, &shard->ecs, ps);
+			malloc_mutex_unlock(tsdn, &shard->mtx);
+			pages_unmap(new_eden, HPA_EDEN_SIZE);
+			return NULL;
+		}
+		/* Success. */
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+
+		/*
+		 * Note that the values here don't really make sense (e.g. eden
+		 * is actually zeroed).  But we don't use the slab metadata in
+		 * determining subsequent allocation metadata (e.g. zero
+		 * tracking should be done at the per-page level, not at the
+		 * level of the hugepage).  It's just a convenient data
+		 * structure that contains much of the helpers we need (defined
+		 * lists, a bitmap, an address field, etc.).  Eventually, we'll
+		 * have a "real" representation of a hugepage that's unconnected
+		 * to the edata_ts it will serve allocations into.
+		 */
+		edata_init(shard->eden, shard->ind, new_eden, HPA_EDEN_SIZE,
+		    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_dirty,
+		    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+		    /* is_head */ true);
+		edata_hugeified_set(shard->eden, false);
+	} else {
+		/* Eden is already nonempty; only need an edata for ps. */
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		ps = edata_cache_small_get(tsdn, &shard->ecs);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		if (ps == NULL) {
+			return NULL;
+		}
 	}
-	err = pages_huge(addr, alloc_size);
 	/*
-	 * Ignore this for now; even if the allocation fails, the address space
-	 * should still be usable.
+	 * We should have dropped mtx since we're not touching ecs any more, but
+	 * we should continue to hold the grow mutex, since we're about to touch
+	 * eden.
 	 */
-	(void)err;
-
-	edata = edata_cache_get(tsdn, hpa->edata_cache);
-	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
-		pages_unmap(addr, alloc_size);
+	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
+	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
+
+	assert(shard->eden != NULL);
+	assert(edata_size_get(shard->eden) > HUGEPAGE);
+	assert(edata_size_get(shard->eden) % HUGEPAGE == 0);
+	assert(edata_addr_get(shard->eden)
+	    == HUGEPAGE_ADDR2BASE(edata_addr_get(shard->eden)));
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	ps = edata_cache_small_get(tsdn, &shard->ecs);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	if (ps == NULL) {
 		return NULL;
 	}
+	edata_init(ps, edata_arena_ind_get(shard->eden),
+	    edata_addr_get(shard->eden), HUGEPAGE, /* slab */ false,
+	    /* szind */ SC_NSIZES, /* sn */ 0, extent_state_dirty,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    /* is_head */ true);
+	edata_hugeified_set(ps, false);
+	edata_addr_set(shard->eden, edata_past_get(ps));
+	edata_size_set(shard->eden,
+	    edata_size_get(shard->eden) - HUGEPAGE);
+
+	return ps;
+}
 
+/*
+ * The psset does not hold empty slabs.  Upon becoming empty, then, we need to
+ * put them somewhere.  We take this as an opportunity to purge, and retain
+ * their address space in a list outside the psset.
+ */
+static void
+hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *ps) {
 	/*
-	 * The serial number here is just a placeholder; the hpa_central gets to
-	 * decide how it wants to fill it in.
-	 *
-	 * The grow edata is associated with the hpa_central_t arena ind; the
-	 * subsequent allocation we get (in the hpa_central_alloc_grow call
-	 * below) will be filled in with the shard ind.
+	 * We do relatively expensive system calls.  The ps was evicted, so no
+	 * one should touch it while we're also touching it.
 	 */
-	edata_init(edata, hpa->ind, addr, alloc_size, /* slab */ false,
-	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ true,
-	    /* comitted */ true, EXTENT_PAI_HPA, /* is_head */ true);
+	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
+	malloc_mutex_assert_not_owner(tsdn, &shard->grow_mtx);
 
-	malloc_mutex_lock(tsdn, &hpa->mtx);
-	/* Note that this replace edata with the allocation to return. */
-	err = hpa_central_alloc_grow(tsdn, &hpa->central, size_goal, edata);
-	malloc_mutex_unlock(tsdn, &hpa->mtx);
+	assert(edata_size_get(ps) == HUGEPAGE);
+	assert(HUGEPAGE_ADDR2BASE(edata_addr_get(ps)) == edata_addr_get(ps));
 
-	if (!err) {
-		exp_grow_size_commit(&hpa->exp_grow, skip);
-	}
-	malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
-	edata_arena_ind_set(edata, shard->ind);
-
-	if (err) {
-		pages_unmap(addr, alloc_size);
-		edata_cache_put(tsdn, hpa->edata_cache, edata);
-		return NULL;
-	}
+	/*
+	 * We do this unconditionally, even for pages which were not originally
+	 * hugeified; it has the same effect.
+	 */
+	hpa_dehugify(ps);
 
-	return edata;
+	malloc_mutex_lock(tsdn, &shard->grow_mtx);
+	edata_list_inactive_prepend(&shard->unused_slabs, ps);
+	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
 
 static edata_t *
-hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
-	assert(size <= shard->ps_alloc_max);
-
+hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
 	bool err;
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs);
+	*oom = false;
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
+		*oom = true;
 		return NULL;
 	}
-	edata_arena_ind_set(edata, shard->ind);
+	assert(edata_arena_ind_get(edata) == shard->ind);
 
 	err = psset_alloc_reuse(&shard->psset, edata, size);
+	if (err) {
+		edata_cache_small_put(tsdn, &shard->ecs, edata);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		return NULL;
+	}
+	/*
+	 * This could theoretically be moved outside of the critical section,
+	 * but that introduces the potential for a race.  Without the lock, the
+	 * (initially nonempty, since this is the reuse pathway) pageslab we
+	 * allocated out of could become otherwise empty while the lock is
+	 * dropped.  This would force us to deal with a pageslab eviction down
+	 * the error pathway, which is a pain.
+	 */
+	err = emap_register_boundary(tsdn, shard->emap, edata,
+	    SC_NSIZES, /* slab */ false);
+	if (err) {
+		edata_t *ps = psset_dalloc(&shard->psset, edata);
+		/*
+		 * The pageslab was nonempty before we started; it
+		 * should still be nonempty now, and so shouldn't get
+		 * evicted.
+		 */
+		assert(ps == NULL);
+		edata_cache_small_put(tsdn, &shard->ecs, edata);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		*oom = true;
+		return NULL;
+	}
+
+	edata_t *ps = edata_ps_get(edata);
+	assert(ps != NULL);
+	bool hugify = hpa_should_hugify(shard, ps);
+	if (hugify) {
+		/*
+		 * Do the metadata modification while holding the lock; we'll
+		 * actually change state with the lock dropped.
+		 */
+		psset_hugify(&shard->psset, ps);
+	}
 	malloc_mutex_unlock(tsdn, &shard->mtx);
-	if (!err) {
+	if (hugify) {
+		/*
+		 * Hugifying with the lock dropped is safe, even with
+		 * concurrent modifications to the ps.  This relies on
+		 * the fact that the current implementation will never
+		 * dehugify a non-empty pageslab, and ps will never
+		 * become empty before we return edata to the user to be
+		 * freed.
+		 *
+		 * Note that holding the lock would prevent not just operations
+		 * on this page slab, but also operations any other alloc/dalloc
+		 * operations in this hpa shard.
+		 */
+		hpa_hugify(ps);
+	}
+	return edata;
+}
+
+static edata_t *
+hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
+	assert(size <= shard->alloc_max);
+	bool err;
+	bool oom;
+	edata_t *edata;
+
+	edata = hpa_try_alloc_no_grow(tsdn, shard, size, &oom);
+	if (edata != NULL) {
 		return edata;
 	}
+
 	/* Nothing in the psset works; we have to grow it. */
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
-
-	/* As above; check for grow races. */
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	err = psset_alloc_reuse(&shard->psset, edata, size);
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	if (!err) {
+	/*
+	 * Check for grow races; maybe some earlier thread expanded the psset
+	 * in between when we dropped the main mutex and grabbed the grow mutex.
+	 */
+	edata = hpa_try_alloc_no_grow(tsdn, shard, size, &oom);
+	if (edata != NULL || oom) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return edata;
 	}
 
-	edata_t *grow_edata = hpa_alloc_central(tsdn, shard, size,
-	    shard->ps_goal);
+	/*
+	 * Note that we don't hold shard->mtx here (while growing);
+	 * deallocations (and allocations of smaller sizes) may still succeed
+	 * while we're doing this potentially expensive system call.
+	 */
+	edata_t *grow_edata = hpa_grow(tsdn, shard);
 	if (grow_edata == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-
-		malloc_mutex_lock(tsdn, &shard->mtx);
-		edata_cache_small_put(tsdn, &shard->ecs, edata);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-
 		return NULL;
 	}
-	edata_arena_ind_set(grow_edata, shard->ind);
+	assert(edata_arena_ind_get(grow_edata) == shard->ind);
+
 	edata_slab_set(grow_edata, true);
 	fb_group_t *fb = edata_slab_data_get(grow_edata)->bitmap;
-	fb_init(fb, shard->ps_goal / PAGE);
+	fb_init(fb, HUGEPAGE / PAGE);
 
 	/* We got the new edata; allocate from it. */
 	malloc_mutex_lock(tsdn, &shard->mtx);
+	edata = edata_cache_small_get(tsdn, &shard->ecs);
+	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+		return NULL;
+	}
 	psset_alloc_new(&shard->psset, grow_edata, edata, size);
+	err = emap_register_boundary(tsdn, shard->emap, edata,
+	    SC_NSIZES, /* slab */ false);
+	if (err) {
+		edata_t *ps = psset_dalloc(&shard->psset, edata);
+		/*
+		 * The pageslab was empty except for the new allocation; it
+		 * should get evicted.
+		 */
+		assert(ps == grow_edata);
+		edata_cache_small_put(tsdn, &shard->ecs, edata);
+		/*
+		 * Technically the same as fallthrough at the time of this
+		 * writing, but consistent with the error handling in the rest
+		 * of the function.
+		 */
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+		hpa_handle_ps_eviction(tsdn, shard, ps);
+		return NULL;
+	}
 	malloc_mutex_unlock(tsdn, &shard->mtx);
-
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 	return edata;
 }
@@ -283,33 +436,25 @@ static edata_t *
 hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero) {
 	assert((size & PAGE_MASK) == 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
 	hpa_shard_t *shard = hpa_from_pai(self);
 	/* We don't handle alignment or zeroing for now. */
 	if (alignment > PAGE || zero) {
 		return NULL;
 	}
-	if (size > shard->small_max && size < shard->large_min) {
+	if (size > shard->alloc_max) {
 		return NULL;
 	}
 
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	edata_t *edata;
-	if (size <= shard->ps_alloc_max) {
-		edata = hpa_alloc_psset(tsdn, shard, size);
-		if (edata != NULL) {
-			emap_register_boundary(tsdn, shard->hpa->central.emap,
-			    edata, SC_NSIZES, /* slab */ false);
-		}
-	} else {
-		edata = hpa_alloc_central(tsdn, shard, size, size);
-	}
+	edata_t *edata = hpa_alloc_psset(tsdn, shard, size);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
+
 	if (edata != NULL) {
-		emap_assert_mapped(tsdn, shard->hpa->central.emap, edata);
+		emap_assert_mapped(tsdn, shard->emap, edata);
 		assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
 		assert(edata_state_get(edata) == extent_state_active);
 		assert(edata_arena_ind_get(edata) == shard->ind);
@@ -337,16 +482,6 @@ hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 }
 
 static void
-hpa_dalloc_central(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
-	hpa_t *hpa = shard->hpa;
-
-	edata_arena_ind_set(edata, hpa->ind);
-	malloc_mutex_lock(tsdn, &hpa->mtx);
-	hpa_central_dalloc(tsdn, &hpa->central, edata);
-	malloc_mutex_unlock(tsdn, &hpa->mtx);
-}
-
-static void
 hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	hpa_shard_t *shard = hpa_from_pai(self);
 
@@ -361,56 +496,29 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	assert(edata_committed_get(edata));
 	assert(edata_base_get(edata) != NULL);
 
+	edata_t *ps = edata_ps_get(edata);
+	/* Currently, all edatas come from pageslabs. */
+	assert(ps != NULL);
+	emap_deregister_boundary(tsdn, shard->emap, edata);
+	malloc_mutex_lock(tsdn, &shard->mtx);
 	/*
-	 * There are two cases:
-	 * - The psset field is NULL.  In this case, the edata comes directly
-	 *   from the hpa_central_t and should be returned to it.
-	 * - THe psset field is not NULL, in which case we return the edata to
-	 *   the appropriate slab (which may in turn cause it to become empty,
-	 *   triggering an eviction of the whole slab, which should then be
-	 *   returned to the hpa_central_t).
+	 * Note that the shard mutex protects the edata hugeified field, too.
+	 * Page slabs can move between pssets (and have their hugeified status
+	 * change) in racy ways.
 	 */
-	if (edata_ps_get(edata) != NULL) {
-		emap_deregister_boundary(tsdn, shard->hpa->central.emap, edata);
-
-		malloc_mutex_lock(tsdn, &shard->mtx);
-		edata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
-		edata_cache_small_put(tsdn, &shard->ecs, edata);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-
-
-		if (evicted_ps != NULL) {
-			/*
-			 * The deallocation caused a pageslab to become empty.
-			 * Free it back to the centralized allocator.
-			 */
-			bool err = emap_register_boundary(tsdn,
-			    shard->hpa->central.emap, evicted_ps, SC_NSIZES,
-			    /* slab */ false);
-			/*
-			 * Registration can only fail on OOM, but the boundary
-			 * mappings should have been initialized during
-			 * allocation.
-			 */
-			assert(!err);
-			edata_slab_set(evicted_ps, false);
-			edata_ps_set(evicted_ps, NULL);
-
-			assert(edata_arena_ind_get(evicted_ps) == shard->ind);
-			hpa_dalloc_central(tsdn, shard, evicted_ps);
-		}
-	} else {
-		hpa_dalloc_central(tsdn, shard, edata);
+	edata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
+	/*
+	 * If a pageslab became empty because of the dalloc, it better have been
+	 * the one we expected.
+	 */
+	assert(evicted_ps == NULL || evicted_ps == ps);
+	edata_cache_small_put(tsdn, &shard->ecs, edata);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	if (evicted_ps != NULL) {
+		hpa_handle_ps_eviction(tsdn, shard, evicted_ps);
 	}
 }
 
-static void
-hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
-	assert(bin_stats->npageslabs == 0);
-	assert(bin_stats->nactive == 0);
-	assert(bin_stats->ninactive == 0);
-}
-
 void
 hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
@@ -418,6 +526,29 @@ hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
+static void
+hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
+	assert(bin_stats->npageslabs_huge == 0);
+	assert(bin_stats->nactive_huge == 0);
+	assert(bin_stats->ninactive_huge == 0);
+	assert(bin_stats->npageslabs_nonhuge == 0);
+	assert(bin_stats->nactive_nonhuge == 0);
+	assert(bin_stats->ninactive_nonhuge == 0);
+}
+
+static void
+hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
+	edata_t edata = {0};
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	bool psset_empty = psset_alloc_reuse(psset, &edata, PAGE);
+	assert(psset_empty);
+	hpa_shard_assert_stats_empty(&psset->stats.full_slabs);
+	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+		hpa_shard_assert_stats_empty(
+		    &psset->stats.nonfull_slabs[i]);
+	}
+}
+
 void
 hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 	/*
@@ -427,17 +558,15 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * 1-page allocation.
 	 */
 	if (config_debug) {
-		edata_t edata = {0};
 		malloc_mutex_lock(tsdn, &shard->mtx);
-		bool psset_empty = psset_alloc_reuse(&shard->psset, &edata,
-		    PAGE);
+		hpa_assert_empty(tsdn, shard, &shard->psset);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
-		assert(psset_empty);
-		hpa_shard_assert_stats_empty(&shard->psset.stats.full_slabs);
-		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
-			hpa_shard_assert_stats_empty(
-			    &shard->psset.stats.nonfull_slabs[i]);
-		}
+	}
+	edata_t *ps;
+	while ((ps = edata_list_inactive_first(&shard->unused_slabs)) != NULL) {
+		assert(edata_size_get(ps) == HUGEPAGE);
+		edata_list_inactive_remove(&shard->unused_slabs, ps);
+		pages_unmap(edata_base_get(ps), HUGEPAGE);
 	}
 }
 
@@ -462,21 +591,3 @@ hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_postfork_child(tsdn, &shard->grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->mtx);
 }
-
-void
-hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa) {
-	malloc_mutex_prefork(tsdn, &hpa->grow_mtx);
-	malloc_mutex_prefork(tsdn, &hpa->mtx);
-}
-
-void
-hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa) {
-	malloc_mutex_postfork_parent(tsdn, &hpa->grow_mtx);
-	malloc_mutex_postfork_parent(tsdn, &hpa->mtx);
-}
-
-void
-hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa) {
-	malloc_mutex_postfork_child(tsdn, &hpa->grow_mtx);
-	malloc_mutex_postfork_child(tsdn, &hpa->mtx);
-}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 74240c0..277b9e7 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1802,31 +1802,19 @@ malloc_init_hard_a0_locked() {
 	}
 	a0 = arena_get(TSDN_NULL, 0, false);
 
-	if (opt_hpa && LG_SIZEOF_PTR == 2) {
+	if (opt_hpa && !hpa_supported()) {
+		malloc_printf("<jemalloc>: HPA not supported in the current "
+		    "configuration; %s.",
+		    opt_abort_conf ? "aborting" : "disabling");
 		if (opt_abort_conf) {
-			malloc_printf("<jemalloc>: Hugepages not currently "
-			    "supported on 32-bit architectures; aborting.");
+			malloc_abort_invalid_conf();
 		} else {
-			malloc_printf("<jemalloc>: Hugepages not currently "
-			    "supported on 32-bit architectures; disabling.");
 			opt_hpa = false;
 		}
 	} else if (opt_hpa) {
-		/*
-		 * The global HPA uses the edata cache from a0, and so needs to
-		 * be initialized specially, after a0 is.  The arena init code
-		 * handles this case specially, and does not turn on the HPA for
-		 * a0 when opt_hpa is true.  This lets us do global HPA
-		 * initialization against a valid a0.
-		 */
-		if (hpa_init(&arena_hpa_global, b0get(), &arena_emap_global,
-		    &a0->pa_shard.edata_cache)) {
-			return true;
-		}
-		if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global,
-		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
-		    opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
-		    opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
+		if (pa_shard_enable_hpa(&a0->pa_shard, opt_hpa_slab_max_alloc,
+		    opt_hpa_sec_nshards, opt_hpa_sec_max_alloc,
+		    opt_hpa_sec_max_bytes)) {
 			return true;
 		}
 	}
@@ -4346,9 +4334,6 @@ _malloc_prefork(void)
 				}
 			}
 		}
-		if (i == 4 && opt_hpa) {
-			hpa_prefork4(tsd_tsdn(tsd), &arena_hpa_global);
-		}
 
 	}
 	prof_prefork1(tsd_tsdn(tsd));
@@ -4388,9 +4373,6 @@ _malloc_postfork(void)
 			arena_postfork_parent(tsd_tsdn(tsd), arena);
 		}
 	}
-	if (opt_hpa) {
-		hpa_postfork_parent(tsd_tsdn(tsd), &arena_hpa_global);
-	}
 	prof_postfork_parent(tsd_tsdn(tsd));
 	if (have_background_thread) {
 		background_thread_postfork_parent(tsd_tsdn(tsd));
@@ -4421,9 +4403,6 @@ jemalloc_postfork_child(void) {
 			arena_postfork_child(tsd_tsdn(tsd), arena);
 		}
 	}
-	if (opt_hpa) {
-		hpa_postfork_child(tsd_tsdn(tsd), &arena_hpa_global);
-	}
 	prof_postfork_child(tsd_tsdn(tsd));
 	if (have_background_thread) {
 		background_thread_postfork_child(tsd_tsdn(tsd));
diff --git a/src/pa.c b/src/pa.c
index e5fcbb7..bc52ff4 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -49,17 +49,10 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 }
 
 bool
-pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
-    size_t ps_alloc_max, size_t small_max, size_t large_min,
-    size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max) {
-	ps_goal &= ~PAGE_MASK;
-	ps_alloc_max &= ~PAGE_MASK;
-
-	if (ps_alloc_max > ps_goal) {
-		ps_alloc_max = ps_goal;
-	}
-	if (hpa_shard_init(&shard->hpa_shard, hpa, &shard->edata_cache,
-	    shard->ind, ps_goal, ps_alloc_max, small_max, large_min)) {
+pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max, size_t sec_nshards,
+    size_t sec_alloc_max, size_t sec_bytes_max) {
+	if (hpa_shard_init(&shard->hpa_shard, shard->emap, &shard->edata_cache,
+	    shard->ind, alloc_max)) {
 		return true;
 	}
 	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards,
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 2002418..0f488be 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -150,7 +150,7 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 	}
 
 	if (shard->ever_used_hpa) {
-		hpa_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out);
+		hpa_shard_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out);
 		sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out);
 	}
 }
diff --git a/src/psset.c b/src/psset.c
index c24266c..2ee683b 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -20,9 +20,13 @@ psset_init(psset_t *psset) {
 
 static void
 psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
-	dst->npageslabs += src->npageslabs;
-	dst->nactive += src->nactive;
-	dst->ninactive += src->ninactive;
+	dst->npageslabs_huge += src->npageslabs_huge;
+	dst->nactive_huge += src->nactive_huge;
+	dst->ninactive_huge += src->ninactive_huge;
+
+	dst->npageslabs_nonhuge += src->npageslabs_nonhuge;
+	dst->nactive_nonhuge += src->nactive_nonhuge;
+	dst->ninactive_nonhuge += src->ninactive_nonhuge;
 }
 
 void
@@ -45,29 +49,62 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
  * ensure we don't miss any heap modification operations.
  */
 JEMALLOC_ALWAYS_INLINE void
-psset_bin_stats_adjust(psset_bin_stats_t *binstats, edata_t *ps, bool inc) {
-	size_t mul = inc ? (size_t)1 : (size_t)-1;
+psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, edata_t *ps,
+    bool insert) {
+	size_t *npageslabs_dst = edata_hugeified_get(ps)
+	    ? &binstats->npageslabs_huge : &binstats->npageslabs_nonhuge;
+	size_t *nactive_dst = edata_hugeified_get(ps)
+	    ? &binstats->nactive_huge : &binstats->nactive_nonhuge;
+	size_t *ninactive_dst = edata_hugeified_get(ps)
+	    ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
 
 	size_t npages = edata_size_get(ps) >> LG_PAGE;
 	size_t ninactive = edata_nfree_get(ps);
 	size_t nactive = npages - ninactive;
-	binstats->npageslabs += mul * 1;
-	binstats->nactive += mul * nactive;
-	binstats->ninactive += mul * ninactive;
+
+	size_t mul = insert ? (size_t)1 : (size_t)-1;
+	*npageslabs_dst += mul * 1;
+	*nactive_dst += mul * nactive;
+	*ninactive_dst += mul * ninactive;
+}
+
+static void
+psset_bin_stats_insert(psset_bin_stats_t *binstats, edata_t *ps) {
+	psset_bin_stats_insert_remove(binstats, ps, /* insert */ true);
+}
+
+static void
+psset_bin_stats_remove(psset_bin_stats_t *binstats, edata_t *ps) {
+	psset_bin_stats_insert_remove(binstats, ps, /* insert */ false);
+}
+
+/*
+ * We don't currently need an "activate" equivalent to this, since down the
+ * allocation pathways we don't do the optimization in which we change a slab
+ * without first removing it from a bin.
+ */
+static void
+psset_bin_stats_deactivate(psset_bin_stats_t *binstats, bool huge, size_t num) {
+	size_t *nactive_dst = huge
+	    ? &binstats->nactive_huge : &binstats->nactive_nonhuge;
+	size_t *ninactive_dst = huge
+	    ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
+
+	assert(*nactive_dst >= num);
+	*nactive_dst -= num;
+	*ninactive_dst += num;
 }
 
 static void
 psset_edata_heap_remove(psset_t *psset, pszind_t pind, edata_t *ps) {
 	edata_age_heap_remove(&psset->pageslabs[pind], ps);
-	psset_bin_stats_adjust(&psset->stats.nonfull_slabs[pind], ps,
-	    /* inc */ false);
+	psset_bin_stats_remove(&psset->stats.nonfull_slabs[pind], ps);
 }
 
 static void
 psset_edata_heap_insert(psset_t *psset, pszind_t pind, edata_t *ps) {
 	edata_age_heap_insert(&psset->pageslabs[pind], ps);
-	psset_bin_stats_adjust(&psset->stats.nonfull_slabs[pind], ps,
-	    /* inc */ true);
+	psset_bin_stats_insert(&psset->stats.nonfull_slabs[pind], ps);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -86,8 +123,7 @@ psset_insert(psset_t *psset, edata_t *ps) {
 		 * We don't ned to track full slabs; just pretend to for stats
 		 * purposes.  See the comment at psset_bin_stats_adjust.
 		 */
-		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
-		    /* inc */ true);
+		psset_bin_stats_insert(&psset->stats.full_slabs, ps);
 		return;
 	}
 
@@ -107,8 +143,7 @@ psset_remove(psset_t *psset, edata_t *ps) {
 	size_t longest_free_range = edata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
-		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
-		    /* inc */ true);
+		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
 		return;
 	}
 
@@ -121,6 +156,26 @@ psset_remove(psset_t *psset, edata_t *ps) {
 	}
 }
 
+void
+psset_hugify(psset_t *psset, edata_t *ps) {
+	assert(!edata_hugeified_get(ps));
+	psset_assert_ps_consistent(ps);
+
+	size_t longest_free_range = edata_longest_free_range_get(ps);
+	psset_bin_stats_t *bin_stats;
+	if (longest_free_range == 0) {
+		bin_stats = &psset->stats.full_slabs;
+	} else {
+		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+		    longest_free_range << LG_PAGE));
+		assert(pind < PSSET_NPSIZES);
+		bin_stats = &psset->stats.nonfull_slabs[pind];
+	}
+	psset_bin_stats_remove(bin_stats, ps);
+	edata_hugeified_set(ps, true);
+	psset_bin_stats_insert(bin_stats, ps);
+}
+
 /*
  * Similar to PAC's extent_recycle_extract.  Out of all the pageslabs in the
  * set, picks one that can satisfy the allocation and remove it from the set.
@@ -225,8 +280,7 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 	}
 	edata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
 	if (largest_unchosen_range == 0) {
-		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
-		    /* inc */ true);
+		psset_bin_stats_insert(&psset->stats.full_slabs, ps);
 	} else {
 		psset_insert(psset, ps);
 	}
@@ -258,8 +312,8 @@ edata_t *
 psset_dalloc(psset_t *psset, edata_t *edata) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
 	assert(edata_ps_get(edata) != NULL);
-
 	edata_t *ps = edata_ps_get(edata);
+
 	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
 	size_t ps_old_longest_free_range = edata_longest_free_range_get(ps);
 	pszind_t old_pind = SC_NPSIZES;
@@ -274,22 +328,12 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	    >> LG_PAGE;
 	size_t len = edata_size_get(edata) >> LG_PAGE;
 	fb_unset_range(ps_fb, ps_npages, begin, len);
-	if (ps_old_longest_free_range == 0) {
-		/* We were in the (imaginary) full bin; update stats for it. */
-		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
-		    /* inc */ false);
-	} else {
-		/*
-		 * The edata is still in the bin, need to update its
-		 * contribution.
-		 */
-		psset->stats.nonfull_slabs[old_pind].nactive -= len;
-		psset->stats.nonfull_slabs[old_pind].ninactive += len;
-	}
-	/*
-	 * Note that we want to do this after the stats updates, since if it was
-	 * full it psset_bin_stats_adjust would have looked at the old version.
-	 */
+
+	/* The pageslab is still in the bin; adjust its stats first. */
+	psset_bin_stats_t *bin_stats = (ps_old_longest_free_range == 0
+	    ? &psset->stats.full_slabs : &psset->stats.nonfull_slabs[old_pind]);
+	psset_bin_stats_deactivate(bin_stats, edata_hugeified_get(ps), len);
+
 	edata_nfree_set(ps, (uint32_t)(edata_nfree_get(ps) + len));
 
 	/* We might have just created a new, larger range. */
@@ -327,6 +371,12 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 			bitmap_set(psset->bitmap, &psset_bitmap_info,
 			    (size_t)old_pind);
 		}
+	} else {
+		/*
+		 * Otherwise, the bin was full, and we need to adjust the full
+		 * bin stats.
+		 */
+		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
 	}
 	/* If the pageslab is empty, it gets evicted from the set. */
 	if (new_range_len == ps_npages) {
diff --git a/src/stats.c b/src/stats.c
index 4b40721..abe3ab1 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -667,16 +667,27 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
 	emitter_row_t row;
 	emitter_row_init(&row);
 
-	size_t npageslabs;
-	size_t nactive;
-	size_t ninactive;
-
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs",
-	    i, &npageslabs, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive",
-	    i, &nactive, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive",
-	    i, &ninactive, size_t);
+	size_t npageslabs_huge;
+	size_t nactive_huge;
+	size_t ninactive_huge;
+
+	size_t npageslabs_nonhuge;
+	size_t nactive_nonhuge;
+	size_t ninactive_nonhuge;
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
+	    i, &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
+	    i, &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive_huge",
+	    i, &ninactive_huge, size_t);
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge",
+	    i, &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge",
+	    i, &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive_nonhuge",
+	    i, &ninactive_nonhuge, size_t);
 
 	size_t sec_bytes;
 	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
@@ -686,39 +697,62 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
 	    "  In full slabs:\n"
-	    "      npageslabs: %zu\n"
-	    "      nactive: %zu\n"
-	    "      ninactive: %zu\n",
-	    npageslabs, nactive, ninactive);
+	    "      npageslabs: %zu huge, %zu nonhuge\n"
+	    "      nactive: %zu huge, %zu nonhuge \n"
+	    "      ninactive: %zu huge, %zu nonhuge \n",
+	    npageslabs_huge, npageslabs_nonhuge, nactive_huge, nactive_nonhuge,
+	    ninactive_huge, ninactive_nonhuge);
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
 	emitter_json_object_kv_begin(emitter, "full_slabs");
-	emitter_json_kv(emitter, "npageslabs", emitter_type_size, &npageslabs);
-	emitter_json_kv(emitter, "nactive", emitter_type_size, &nactive);
-	emitter_json_kv(emitter, "ninactive", emitter_type_size, &ninactive);
+	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
+	    &npageslabs_huge);
+	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
+	    &npageslabs_nonhuge);
+	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
+	    &nactive_huge);
+	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
+	    &nactive_nonhuge);
+	emitter_json_kv(emitter, "ninactive_huge", emitter_type_size,
+	    &ninactive_huge);
+	emitter_json_kv(emitter, "ninactive_nonhuge", emitter_type_size,
+	    &ninactive_nonhuge);
 	emitter_json_object_end(emitter); /* End "full_slabs" */
 
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
-	COL_HDR(row, npageslabs, NULL, right, 13, size)
-	COL_HDR(row, nactive, NULL, right, 13, size)
-	COL_HDR(row, ninactive, NULL, right, 13, size)
+	COL_HDR(row, npageslabs_huge, NULL, right, 16, size)
+	COL_HDR(row, nactive_huge, NULL, right, 16, size)
+	COL_HDR(row, ninactive_huge, NULL, right, 16, size)
+	COL_HDR(row, npageslabs_nonhuge, NULL, right, 20, size)
+	COL_HDR(row, nactive_nonhuge, NULL, right, 20, size)
+	COL_HDR(row, ninactive_nonhuge, NULL, right, 20, size)
 
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "nonfull_slabs");
 	bool in_gap = false;
 	for (pszind_t j = 0; j < PSSET_NPSIZES; j++) {
 		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs",
-		    i, j, &npageslabs, size_t);
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs_huge",
+		    i, j, &npageslabs_huge, size_t);
+		CTL_M2_M5_GET(
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive_huge",
+		    i, j, &nactive_huge, size_t);
+		CTL_M2_M5_GET(
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive_huge",
+		    i, j, &ninactive_huge, size_t);
+
+		CTL_M2_M5_GET(
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs_nonhuge",
+		    i, j, &npageslabs_nonhuge, size_t);
 		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive",
-		    i, j, &nactive, size_t);
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive_nonhuge",
+		    i, j, &nactive_nonhuge, size_t);
 		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive",
-		    i, j, &ninactive, size_t);
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive_nonhuge",
+		    i, j, &ninactive_nonhuge, size_t);
 
 		bool in_gap_prev = in_gap;
-		in_gap = (npageslabs == 0);
+		in_gap = (npageslabs_huge == 0 && npageslabs_nonhuge == 0);
 		if (in_gap_prev && !in_gap) {
 			emitter_table_printf(emitter,
 			    "                     ---\n");
@@ -726,20 +760,29 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
 
 		col_size.size_val = sz_pind2sz(j);
 		col_ind.size_val = j;
-		col_npageslabs.size_val = npageslabs;
-		col_nactive.size_val = nactive;
-		col_ninactive.size_val = ninactive;
+		col_npageslabs_huge.size_val = npageslabs_huge;
+		col_nactive_huge.size_val = nactive_huge;
+		col_ninactive_huge.size_val = ninactive_huge;
+		col_npageslabs_nonhuge.size_val = npageslabs_nonhuge;
+		col_nactive_nonhuge.size_val = nactive_nonhuge;
+		col_ninactive_nonhuge.size_val = ninactive_nonhuge;
 		if (!in_gap) {
 			emitter_table_row(emitter, &row);
 		}
 
 		emitter_json_object_begin(emitter);
-		emitter_json_kv(emitter, "npageslabs", emitter_type_size,
-		    &npageslabs);
-		emitter_json_kv(emitter, "nactive", emitter_type_size,
-		    &nactive);
-		emitter_json_kv(emitter, "ninactive", emitter_type_size,
-		    &ninactive);
+		emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
+		    &npageslabs_huge);
+		emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
+		    &nactive_huge);
+		emitter_json_kv(emitter, "ninactive_huge", emitter_type_size,
+		    &ninactive_huge);
+		emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
+		    &npageslabs_nonhuge);
+		emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
+		    &nactive_nonhuge);
+		emitter_json_kv(emitter, "ninactive_nonhuge", emitter_type_size,
+		    &ninactive_huge);
 		emitter_json_object_end(emitter);
 	}
 	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index b58dced..72a20c3 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -2,14 +2,9 @@
 
 #include "jemalloc/internal/hpa.h"
 
-#define HPA_IND 111
-#define SHARD_IND 222
+#define SHARD_IND 111
 
-#define PS_GOAL (128 * PAGE)
-#define PS_ALLOC_MAX (64 * PAGE)
-
-#define HPA_SMALL_MAX (200 * PAGE)
-#define HPA_LARGE_MIN (300 * PAGE)
+#define ALLOC_MAX (HUGEPAGE / 4)
 
 typedef struct test_data_s test_data_t;
 struct test_data_s {
@@ -18,50 +13,32 @@ struct test_data_s {
 	 * test_data_t and the hpa_shard_t;
 	 */
 	hpa_shard_t shard;
-	base_t *shard_base;
+	base_t *base;
 	edata_cache_t shard_edata_cache;
 
-	hpa_t hpa;
-	base_t *hpa_base;
-	edata_cache_t hpa_edata_cache;
-
 	emap_t emap;
 };
 
 static hpa_shard_t *
 create_test_data() {
 	bool err;
-	base_t *shard_base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
 	    &ehooks_default_extent_hooks);
-	assert_ptr_not_null(shard_base, "");
-
-	base_t *hpa_base = base_new(TSDN_NULL, /* ind */ HPA_IND,
-	    &ehooks_default_extent_hooks);
-	assert_ptr_not_null(hpa_base, "");
+	assert_ptr_not_null(base, "");
 
 	test_data_t *test_data = malloc(sizeof(test_data_t));
 	assert_ptr_not_null(test_data, "");
 
-	test_data->shard_base = shard_base;
-	test_data->hpa_base = hpa_base;
-
-	err = edata_cache_init(&test_data->shard_edata_cache, shard_base);
-	assert_false(err, "");
-
-	err = edata_cache_init(&test_data->hpa_edata_cache, hpa_base);
-	assert_false(err, "");
+	test_data->base = base;
 
-	err = emap_init(&test_data->emap, test_data->hpa_base,
-	    /* zeroed */ false);
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
 	assert_false(err, "");
 
-	err = hpa_init(&test_data->hpa, hpa_base, &test_data->emap,
-	    &test_data->hpa_edata_cache);
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
 	assert_false(err, "");
 
-	err = hpa_shard_init(&test_data->shard, &test_data->hpa,
-	    &test_data->shard_edata_cache, SHARD_IND, PS_GOAL, PS_ALLOC_MAX,
-	    HPA_SMALL_MAX, HPA_LARGE_MIN);
+	err = hpa_shard_init(&test_data->shard, &test_data->emap,
+	    &test_data->shard_edata_cache, SHARD_IND, ALLOC_MAX);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
@@ -70,12 +47,11 @@ create_test_data() {
 static void
 destroy_test_data(hpa_shard_t *shard) {
 	test_data_t *test_data = (test_data_t *)shard;
-	base_delete(TSDN_NULL, test_data->shard_base);
-	base_delete(TSDN_NULL, test_data->hpa_base);
+	base_delete(TSDN_NULL, test_data->base);
 	free(test_data);
 }
 
-TEST_BEGIN(test_small_max_large_min) {
+TEST_BEGIN(test_alloc_max) {
 	test_skip_if(LG_SIZEOF_PTR != 3);
 
 	hpa_shard_t *shard = create_test_data();
@@ -84,18 +60,11 @@ TEST_BEGIN(test_small_max_large_min) {
 	edata_t *edata;
 
 	/* Small max */
-	edata = pai_alloc(tsdn, &shard->pai, HPA_SMALL_MAX, PAGE, false);
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
-	edata = pai_alloc(tsdn, &shard->pai, HPA_SMALL_MAX + PAGE, PAGE, false);
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
 
-	/* Large min */
-	edata = pai_alloc(tsdn, &shard->pai, HPA_LARGE_MIN, PAGE, false);
-	expect_ptr_not_null(edata, "Allocation of large min failed");
-	edata = pai_alloc(tsdn, &shard->pai, HPA_LARGE_MIN - PAGE, PAGE, false);
-	expect_ptr_null(edata,
-	    "Allocation of smaller than large min succeeded");
-
 	destroy_test_data(shard);
 }
 TEST_END
@@ -178,26 +147,19 @@ TEST_BEGIN(test_stress) {
 	mem_tree_new(&tree);
 
 	for (size_t i = 0; i < 100 * 1000; i++) {
-		size_t operation = prng_range_zu(&prng_state, 4);
-		if (operation < 2) {
+		size_t operation = prng_range_zu(&prng_state, 2);
+		if (operation == 0) {
 			/* Alloc */
 			if (nlive_edatas == nlive_edatas_max) {
 				continue;
 			}
 
-			size_t npages_min;
-			size_t npages_max;
 			/*
 			 * We make sure to get an even balance of small and
 			 * large allocations.
 			 */
-			if (operation == 0) {
-				npages_min = 1;
-				npages_max = HPA_SMALL_MAX / PAGE;
-			} else {
-				npages_min = HPA_LARGE_MIN / PAGE;
-				npages_max = HPA_LARGE_MIN / PAGE + 20;
-			}
+			size_t npages_min = 1;
+			size_t npages_max = ALLOC_MAX / PAGE;
 			size_t npages = npages_min + prng_range_zu(&prng_state,
 			    npages_max - npages_min);
 			edata_t *edata = pai_alloc(tsdn, &shard->pai,
@@ -260,6 +222,6 @@ main(void) {
 	(void)mem_tree_reverse_iter;
 	(void)mem_tree_destroy;
 	return test_no_reentrancy(
-	    test_small_max_large_min,
+	    test_alloc_max,
 	    test_stress);
 }
diff --git a/test/unit/psset.c b/test/unit/psset.c
index e07bdc4..ea61ab9 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -2,7 +2,7 @@
 
 #include "jemalloc/internal/psset.h"
 
-#define PAGESLAB_PAGES 64
+#define PAGESLAB_PAGES (HUGEPAGE / PAGE)
 #define PAGESLAB_SIZE (PAGESLAB_PAGES << LG_PAGE)
 #define PAGESLAB_SN 123
 #define PAGESLAB_ADDR ((void *)(1234 << LG_PAGE))
@@ -296,22 +296,23 @@ TEST_END
 
 static void
 stats_expect_empty(psset_bin_stats_t *stats) {
-	assert_zu_eq(0, stats->npageslabs,
+	assert_zu_eq(0, stats->npageslabs_nonhuge,
 	    "Supposedly empty bin had positive npageslabs");
-	expect_zu_eq(0, stats->nactive, "Unexpected nonempty bin"
+	expect_zu_eq(0, stats->nactive_nonhuge, "Unexpected nonempty bin"
 	    "Supposedly empty bin had positive nactive");
-	expect_zu_eq(0, stats->ninactive, "Unexpected nonempty bin"
+	expect_zu_eq(0, stats->ninactive_nonhuge, "Unexpected nonempty bin"
 	    "Supposedly empty bin had positive ninactive");
 }
 
 static void
 stats_expect(psset_t *psset, size_t nactive) {
 	if (nactive == PAGESLAB_PAGES) {
-		expect_zu_eq(1, psset->stats.full_slabs.npageslabs,
+		expect_zu_eq(1, psset->stats.full_slabs.npageslabs_nonhuge,
 		    "Expected a full slab");
-		expect_zu_eq(PAGESLAB_PAGES, psset->stats.full_slabs.nactive,
+		expect_zu_eq(PAGESLAB_PAGES,
+		    psset->stats.full_slabs.nactive_nonhuge,
 		    "Should have exactly filled the bin");
-		expect_zu_eq(0, psset->stats.full_slabs.ninactive,
+		expect_zu_eq(0, psset->stats.full_slabs.ninactive_nonhuge,
 		    "Should never have inactive pages in a full slab");
 	} else {
 		stats_expect_empty(&psset->stats.full_slabs);
@@ -325,13 +326,13 @@ stats_expect(psset_t *psset, size_t nactive) {
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		if (i == nonempty_pind) {
 			assert_zu_eq(1,
-			    psset->stats.nonfull_slabs[i].npageslabs,
+			    psset->stats.nonfull_slabs[i].npageslabs_nonhuge,
 			    "Should have found a slab");
 			expect_zu_eq(nactive,
-			    psset->stats.nonfull_slabs[i].nactive,
+			    psset->stats.nonfull_slabs[i].nactive_nonhuge,
 			    "Mismatch in active pages");
 			expect_zu_eq(ninactive,
-			    psset->stats.nonfull_slabs[i].ninactive,
+			    psset->stats.nonfull_slabs[i].ninactive_nonhuge,
 			    "Mismatch in inactive pages");
 		} else {
 			stats_expect_empty(&psset->stats.nonfull_slabs[i]);
-- 
cgit v0.12


From 4a15008cfbf414136f40a57fb1ceac80b22ea09f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 15:54:29 -0800
Subject: HPA unit test: skip if unsupported.

Previously, we replicated the logic in hpa_supported in the test as well.
---
 test/unit/hpa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 72a20c3..94efd4a 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -52,7 +52,7 @@ destroy_test_data(hpa_shard_t *shard) {
 }
 
 TEST_BEGIN(test_alloc_max) {
-	test_skip_if(LG_SIZEOF_PTR != 3);
+	test_skip_if(!hpa_supported());
 
 	hpa_shard_t *shard = create_test_data();
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
@@ -128,7 +128,7 @@ node_remove(mem_tree_t *tree, edata_t *edata) {
 }
 
 TEST_BEGIN(test_stress) {
-	test_skip_if(LG_SIZEOF_PTR != 3);
+	test_skip_if(!hpa_supported());
 
 	hpa_shard_t *shard = create_test_data();
 
-- 
cgit v0.12


From ca30b5db2bbf51b9c4d5aefa2ec87490b7f93395 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 17 Nov 2020 16:32:45 -0800
Subject: Introduce hpdata_t.

Using an edata_t both for hugepages and the allocations within those hugepages
was convenient at first, but has outlived its usefulness.  Representing
hugepages explicitly, with their own data structure, will make future
development easier.
---
 Makefile.in                                        |   1 +
 include/jemalloc/internal/edata.h                  | 109 ++----------
 include/jemalloc/internal/hpa.h                    |  16 +-
 include/jemalloc/internal/hpdata.h                 | 124 +++++++++++++
 include/jemalloc/internal/pages.h                  |  14 ++
 include/jemalloc/internal/psset.h                  |  17 +-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |   3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |   3 +
 src/edata.c                                        |   1 -
 src/hpa.c                                          | 178 ++++++++-----------
 src/hpdata.c                                       |  18 ++
 src/pa.c                                           |   4 +-
 src/psset.c                                        | 134 +++++++-------
 test/unit/hpa.c                                    |   3 +-
 test/unit/psset.c                                  | 196 +++++++++------------
 17 files changed, 416 insertions(+), 407 deletions(-)
 create mode 100644 include/jemalloc/internal/hpdata.h
 create mode 100644 src/hpdata.c

diff --git a/Makefile.in b/Makefile.in
index eae3065..f263fc3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -122,6 +122,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_central.c \
+	$(srcroot)src/hpdata.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/log.c \
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 465c962..c048288 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin_info.h"
 #include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/hpdata.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/ql.h"
@@ -71,7 +72,6 @@ struct edata_map_info_s {
 typedef struct edata_s edata_t;
 typedef ph(edata_t) edata_avail_t;
 typedef ph(edata_t) edata_heap_t;
-typedef ph(edata_t) edata_age_heap_t;
 struct edata_s {
 	/*
 	 * Bitfield containing several fields:
@@ -194,41 +194,13 @@ struct edata_s {
 	};
 
 	/*
-	 * In some context-specific sense, the age of an active extent.  Each
-	 * context can pick a specific meaning, and share the definition of the
-	 * edata_age_heap_t below.
+	 * If this edata is a user allocation from an HPA, it comes out of some
+	 * pageslab (we don't yet support huegpage allocations that don't fit
+	 * into pageslabs).  This tracks it.
 	 */
-	uint64_t age;
-	union {
-		/*
-		 * We could steal a low bit from these fields to indicate what
-		 * sort of "thing" this is (a page slab, an object within a page
-		 * slab, or a non-pageslab range).  We don't do this yet, but it
-		 * would enable some extra asserts.
-		 */
-
-		/*
-		 * If this edata is a user allocation from an HPA, it comes out
-		 * of some pageslab (we don't yet support huegpage allocations
-		 * that don't fit into pageslabs).  This tracks it.
-		 */
-		edata_t *ps;
-		/*
-		 * If this edata *is* a pageslab, then we cache some useful
-		 * information about its associated bitmap.
-		 */
-		struct {
-			/*
-			 * The longest free range a pageslab contains determines
-			 * the heap it lives in.  If we know that it didn't
-			 * change after an operation, we can avoid moving it
-			 * between heaps.
-			 */
-			uint32_t longest_free_range;
-			/* Whether or not the slab is backed by a hugepage. */
-			bool hugeified;
-		};
-	};
+	hpdata_t *e_ps;
+	/* Extra field reserved for HPA. */
+	void *e_reserved;
 
 	union {
 		/*
@@ -331,11 +303,6 @@ edata_pai_get(const edata_t *edata) {
 }
 
 static inline bool
-edata_hugeified_get(const edata_t *edata) {
-	return edata->hugeified;
-}
-
-static inline bool
 edata_slab_get(const edata_t *edata) {
 	return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >>
 	    EDATA_BITS_SLAB_SHIFT);
@@ -377,21 +344,10 @@ edata_bsize_get(const edata_t *edata) {
 	return edata->e_bsize;
 }
 
-static inline uint64_t
-edata_age_get(const edata_t *edata) {
-	return edata->age;
-}
-
-static inline edata_t *
+static inline hpdata_t *
 edata_ps_get(const edata_t *edata) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
-	return edata->ps;
-}
-
-static inline uint32_t
-edata_longest_free_range_get(const edata_t *edata) {
-	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
-	return edata->longest_free_range;
+	return edata->e_ps;
 }
 
 static inline void *
@@ -477,21 +433,9 @@ edata_bsize_set(edata_t *edata, size_t bsize) {
 }
 
 static inline void
-edata_age_set(edata_t *edata, uint64_t age) {
-	edata->age = age;
-}
-
-static inline void
-edata_ps_set(edata_t *edata, edata_t *ps) {
-	assert(edata_pai_get(edata) == EXTENT_PAI_HPA || ps == NULL);
-	edata->ps = ps;
-}
-
-static inline void
-edata_longest_free_range_set(edata_t *edata, uint32_t longest_free_range) {
-	assert(edata_pai_get(edata) == EXTENT_PAI_HPA
-	    || longest_free_range == 0);
-	edata->longest_free_range = longest_free_range;
+edata_ps_set(edata_t *edata, hpdata_t *ps) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+	edata->e_ps = ps;
 }
 
 static inline void
@@ -567,11 +511,6 @@ edata_pai_set(edata_t *edata, extent_pai_t pai) {
 }
 
 static inline void
-edata_hugeified_set(edata_t *edata, bool hugeified) {
-	edata->hugeified = hugeified;
-}
-
-static inline void
 edata_slab_set(edata_t *edata, bool slab) {
 	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) |
 	    ((uint64_t)slab << EDATA_BITS_SLAB_SHIFT);
@@ -633,9 +572,6 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	if (config_prof) {
 		edata_prof_tctx_set(edata, NULL);
 	}
-	edata_age_set(edata, 0);
-	edata_ps_set(edata, NULL);
-	edata_longest_free_range_set(edata, 0);
 }
 
 static inline void
@@ -649,15 +585,12 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 	edata_state_set(edata, extent_state_active);
 	edata_zeroed_set(edata, true);
 	edata_committed_set(edata, true);
-	edata_age_set(edata, 0);
 	/*
 	 * This isn't strictly true, but base allocated extents never get
 	 * deallocated and can't be looked up in the emap, but no sense in
 	 * wasting a state bit to encode this fact.
 	 */
 	edata_pai_set(edata, EXTENT_PAI_PAC);
-	edata_ps_set(edata, NULL);
-	edata_longest_free_range_set(edata, 0);
 }
 
 static inline int
@@ -718,25 +651,7 @@ edata_esnead_comp(const edata_t *a, const edata_t *b) {
 	return ret;
 }
 
-static inline int
-edata_age_comp(const edata_t *a, const edata_t *b) {
-	uint64_t a_age = edata_age_get(a);
-	uint64_t b_age = edata_age_get(b);
-
-	/*
-	 * Equal ages are possible in certain race conditions, like two distinct
-	 * threads simultaneously allocating a new fresh slab without holding a
-	 * bin lock.
-	 */
-	int ret = (a_age > b_age) - (a_age < b_age);
-	if (ret != 0) {
-		return ret;
-	}
-	return edata_snad_comp(a, b);
-}
-
 ph_proto(, edata_avail_, edata_avail_t, edata_t)
 ph_proto(, edata_heap_, edata_heap_t, edata_t)
-ph_proto(, edata_age_heap_, edata_age_heap_t, edata_t);
 
 #endif /* JEMALLOC_INTERNAL_EDATA_H */
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 1c4585d..edb3617 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -21,6 +21,8 @@ struct hpa_shard_s {
 	pai_t pai;
 	malloc_mutex_t grow_mtx;
 	malloc_mutex_t mtx;
+	/* The base metadata allocator. */
+	base_t *base;
 	/*
 	 * This edata cache is the one we use when allocating a small extent
 	 * from a pageslab.  The pageslab itself comes from the centralized
@@ -45,7 +47,14 @@ struct hpa_shard_s {
 	 *
 	 * Guarded by grow_mtx.
 	 */
-	edata_list_inactive_t unused_slabs;
+	hpdata_list_t unused_slabs;
+
+	/*
+	 * How many grow operations have occurred.
+	 *
+	 * Guarded by grow_mtx.
+	 */
+	uint64_t age_counter;
 
 	/*
 	 * Either NULL (if empty), or some integer multiple of a
@@ -54,7 +63,8 @@ struct hpa_shard_s {
 	 *
 	 * Guarded by grow_mtx.
 	 */
-	edata_t *eden;
+	void *eden;
+	size_t eden_len;
 
 	/* The arena ind we're associated with. */
 	unsigned ind;
@@ -67,7 +77,7 @@ struct hpa_shard_s {
  * just that it can function properly given the system it's running on.
  */
 bool hpa_supported();
-bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap,
+bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
     edata_cache_t *edata_cache, unsigned ind, size_t alloc_max);
 
 void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
new file mode 100644
index 0000000..c4bf6ef
--- /dev/null
+++ b/include/jemalloc/internal/hpdata.h
@@ -0,0 +1,124 @@
+#ifndef JEMALLOC_INTERNAL_HPDATA_H
+#define JEMALLOC_INTERNAL_HPDATA_H
+
+#include "jemalloc/internal/flat_bitmap.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/typed_list.h"
+
+/*
+ * The metadata representation we use for extents in hugepages.  While the PAC
+ * uses the edata_t to represent both active and inactive extents, the HP only
+ * uses the edata_t for active ones; instead, inactive extent state is tracked
+ * within hpdata associated with the enclosing hugepage-sized, hugepage-aligned
+ * region of virtual address space.
+ *
+ * An hpdata need not be "truly" backed by a hugepage (which is not necessarily
+ * an observable property of any given region of address space).  It's just
+ * hugepage-sized and hugepage-aligned; it's *potentially* huge.
+ */
+typedef struct hpdata_s hpdata_t;
+struct hpdata_s {
+	/*
+	 * We likewise follow the edata convention of mangling names and forcing
+	 * the use of accessors -- this lets us add some consistency checks on
+	 * access.
+	 */
+
+	/*
+	 * The address of the hugepage in question.  This can't be named h_addr,
+	 * since that conflicts with a macro defined in Windows headers.
+	 */
+	void *h_address;
+	/* Its age (measured in psset operations). */
+	uint64_t h_age;
+	/* Whether or not we think the hugepage is mapped that way by the OS. */
+	bool h_huge;
+	union {
+		/* When nonempty, used by the psset bins. */
+		phn(hpdata_t) ph_link;
+		/*
+		 * When empty (or not corresponding to any hugepage), list
+		 * linkage.
+		 */
+		ql_elm(hpdata_t) ql_link;
+	};
+
+	/* Number of currently free pages (regardless of contiguity). */
+	size_t h_nfree;
+	/* The length of the largest contiguous sequence of inactive pages. */
+	size_t h_longest_free_range;
+
+	/* A bitmap with bits set in the active pages. */
+	fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
+};
+
+static inline void *
+hpdata_addr_get(const hpdata_t *hpdata) {
+	return hpdata->h_address;
+}
+
+static inline void
+hpdata_addr_set(hpdata_t *hpdata, void *addr) {
+	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+	hpdata->h_address = addr;
+}
+
+static inline uint64_t
+hpdata_age_get(const hpdata_t *hpdata) {
+	return hpdata->h_age;
+}
+
+static inline void
+hpdata_age_set(hpdata_t *hpdata, uint64_t age) {
+	hpdata->h_age = age;
+}
+
+static inline bool
+hpdata_huge_get(const hpdata_t *hpdata) {
+	return hpdata->h_huge;
+}
+
+static inline void
+hpdata_huge_set(hpdata_t *hpdata, bool huge) {
+	hpdata->h_huge = huge;
+}
+
+static inline size_t
+hpdata_nfree_get(const hpdata_t *hpdata) {
+	return hpdata->h_nfree;
+}
+
+static inline void
+hpdata_nfree_set(hpdata_t *hpdata, size_t nfree) {
+	assert(nfree <= HUGEPAGE_PAGES);
+	hpdata->h_nfree = nfree;
+}
+
+static inline size_t
+hpdata_longest_free_range_get(const hpdata_t *hpdata) {
+	return hpdata->h_longest_free_range;
+}
+
+static inline void
+hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
+	assert(longest_free_range <= HUGEPAGE_PAGES);
+	hpdata->h_longest_free_range = longest_free_range;
+}
+
+static inline void
+hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
+	hpdata_addr_set(hpdata, addr);
+	hpdata_age_set(hpdata, age);
+	hpdata_huge_set(hpdata, false);
+	hpdata_nfree_set(hpdata, HUGEPAGE_PAGES);
+	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
+	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
+}
+
+TYPED_LIST(hpdata_list, hpdata_t, ql_link)
+
+typedef ph(hpdata_t) hpdata_age_heap_t;
+ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
+
+#endif /* JEMALLOC_INTERNAL_HPDATA_H */
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index cfaa0fc..035364e 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -17,6 +17,20 @@
 /* Huge page size.  LG_HUGEPAGE is determined by the configure script. */
 #define HUGEPAGE	((size_t)(1U << LG_HUGEPAGE))
 #define HUGEPAGE_MASK	((size_t)(HUGEPAGE - 1))
+
+#if LG_HUGEPAGE != 0
+#  define HUGEPAGE_PAGES (HUGEPAGE / PAGE)
+#else
+/*
+ * It's convenient to define arrays (or bitmaps) of HUGEPAGE_PAGES lengths.  If
+ * we can't autodetect the hugepage size, it gets treated as 0, in which case
+ * we'll trigger a compiler error in those arrays.  Avoid this case by ensuring
+ * that this value is at least 1.  (We won't ever run in this degraded state;
+ * hpa_supported() returns false in this case.
+ */
+#  define HUGEPAGE_PAGES 1
+#endif
+
 /* Return the huge page base address for the huge page containing address a. */
 #define HUGEPAGE_ADDR2BASE(a)						\
 	((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK))
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 3c9f23b..01b4e80 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PSSET_H
 #define JEMALLOC_INTERNAL_PSSET_H
 
+#include "jemalloc/internal/hpdata.h"
+
 /*
  * A page-slab set.  What the eset is to PAC, the psset is to HPA.  It maintains
  * a collection of page-slabs (the intent being that they are backed by
@@ -51,21 +53,18 @@ struct psset_s {
 	 * The pageslabs, quantized by the size class of the largest contiguous
 	 * free run of pages in a pageslab.
 	 */
-	edata_age_heap_t pageslabs[PSSET_NPSIZES];
+	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
 	psset_stats_t stats;
-
-	/* How many alloc_new calls have happened? */
-	uint64_t age_counter;
 };
 
 void psset_init(psset_t *psset);
 void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
 
-void psset_insert(psset_t *psset, edata_t *ps);
-void psset_remove(psset_t *psset, edata_t *ps);
+void psset_insert(psset_t *psset, hpdata_t *ps);
+void psset_remove(psset_t *psset, hpdata_t *ps);
 
-void psset_hugify(psset_t *psset, edata_t *ps);
+void psset_hugify(psset_t *psset, hpdata_t *ps);
 
 /*
  * Tries to obtain a chunk from an existing pageslab already in the set.
@@ -78,7 +77,7 @@ bool psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size);
  * to the psset and allocate an extent from within it.  The passed-in pageslab
  * must be at least as big as size.
  */
-void psset_alloc_new(psset_t *psset, edata_t *ps,
+void psset_alloc_new(psset_t *psset, hpdata_t *ps,
     edata_t *r_edata, size_t size);
 
 /*
@@ -89,6 +88,6 @@ void psset_alloc_new(psset_t *psset, edata_t *ps,
  * result must be checked and deallocated to the central HPA.  Otherwise returns
  * NULL.
  */
-edata_t *psset_dalloc(psset_t *psset, edata_t *edata);
+hpdata_t *psset_dalloc(psset_t *psset, edata_t *edata);
 
 #endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 6c4e7fd..531dd9a 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
+    <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 84ff574..f031fb1 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_central.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpdata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\inspect.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 07fbe21..bc64de5 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
+    <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 84ff574..f031fb1 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_central.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpdata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\inspect.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/edata.c b/src/edata.c
index a659731..23523dd 100644
--- a/src/edata.c
+++ b/src/edata.c
@@ -4,4 +4,3 @@
 ph_gen(, edata_avail_, edata_avail_t, edata_t, ph_link,
     edata_esnead_comp)
 ph_gen(, edata_heap_, edata_heap_t, edata_t, ph_link, edata_snad_comp)
-ph_gen(, edata_age_heap_, edata_age_heap_t, edata_t, ph_link, edata_age_comp)
diff --git a/src/hpa.c b/src/hpa.c
index ca75628..9a190c8 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -33,22 +33,22 @@ hpa_supported() {
 	 * We fundamentally rely on a address-space-hungry growth strategy for
 	 * hugepages.
 	 */
-	if (LG_SIZEOF_PTR == 2) {
+	if (LG_SIZEOF_PTR != 3) {
 		return false;
 	}
 	/*
-	 * We use the edata bitmap; it needs to have at least as many bits as a
-	 * hugepage has pages.
+	 * If we couldn't detect the value of HUGEPAGE, HUGEPAGE_PAGES becomes
+	 * this sentinel value -- see the comment in pages.h.
 	 */
-	if (HUGEPAGE / PAGE > BITMAP_GROUPS_MAX * sizeof(bitmap_t) * 8) {
+	if (HUGEPAGE_PAGES == 1) {
 		return false;
 	}
 	return true;
 }
 
 bool
-hpa_shard_init(hpa_shard_t *shard, emap_t *emap, edata_cache_t *edata_cache,
-    unsigned ind, size_t alloc_max) {
+hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
+    edata_cache_t *edata_cache, unsigned ind, size_t alloc_max) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@@ -64,11 +64,14 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, edata_cache_t *edata_cache,
 	}
 
 	assert(edata_cache != NULL);
+	shard->base = base;
 	edata_cache_small_init(&shard->ecs, edata_cache);
 	psset_init(&shard->psset);
 	shard->alloc_max = alloc_max;
-	edata_list_inactive_init(&shard->unused_slabs);
+	hpdata_list_init(&shard->unused_slabs);
+	shard->age_counter = 0;
 	shard->eden = NULL;
+	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
 
@@ -104,22 +107,27 @@ hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
+static hpdata_t *
+hpa_alloc_ps(tsdn_t *tsdn, hpa_shard_t *shard) {
+	return (hpdata_t *)base_alloc(tsdn, shard->base, sizeof(hpdata_t),
+	    CACHELINE);
+}
+
 static bool
-hpa_should_hugify(hpa_shard_t *shard, edata_t *ps) {
+hpa_should_hugify(hpa_shard_t *shard, hpdata_t *ps) {
 	/*
 	 * For now, just use a static check; hugify a page if it's <= 5%
 	 * inactive.  Eventually, this should be a malloc conf option.
 	 */
-	return !edata_hugeified_get(ps)
-	    && edata_nfree_get(ps) < (HUGEPAGE / PAGE) * 5 / 100;
+	return !hpdata_huge_get(ps)
+	    && hpdata_nfree_get(ps) < (HUGEPAGE / PAGE) * 5 / 100;
 }
 
 /* Returns true on error. */
 static void
-hpa_hugify(edata_t *ps) {
-	assert(edata_size_get(ps) == HUGEPAGE);
-	assert(edata_hugeified_get(ps));
-	bool err = pages_huge(edata_base_get(ps), HUGEPAGE);
+hpa_hugify(hpdata_t *ps) {
+	assert(hpdata_huge_get(ps));
+	bool err = pages_huge(hpdata_addr_get(ps), HUGEPAGE);
 	/*
 	 * Eat the error; even if the hugeification failed, it's still safe to
 	 * pretend it didn't (and would require extraordinary measures to
@@ -129,30 +137,36 @@ hpa_hugify(edata_t *ps) {
 }
 
 static void
-hpa_dehugify(edata_t *ps) {
+hpa_dehugify(hpdata_t *ps) {
 	/* Purge, then dehugify while unbacked. */
-	pages_purge_forced(edata_addr_get(ps), HUGEPAGE);
-	pages_nohuge(edata_addr_get(ps), HUGEPAGE);
-	edata_hugeified_set(ps, false);
+	pages_purge_forced(hpdata_addr_get(ps), HUGEPAGE);
+	pages_nohuge(hpdata_addr_get(ps), HUGEPAGE);
+	hpdata_huge_set(ps, false);
 }
 
-static edata_t *
+static hpdata_t *
 hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
-	edata_t *ps = NULL;
+	hpdata_t *ps = NULL;
 
 	/* Is there address space waiting for reuse? */
 	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
-	ps = edata_list_inactive_first(&shard->unused_slabs);
+	ps = hpdata_list_first(&shard->unused_slabs);
 	if (ps != NULL) {
-		edata_list_inactive_remove(&shard->unused_slabs, ps);
+		hpdata_list_remove(&shard->unused_slabs, ps);
+		hpdata_age_set(ps, shard->age_counter++);
 		return ps;
 	}
 
 	/* Is eden a perfect fit? */
-	if (shard->eden != NULL && edata_size_get(shard->eden) == HUGEPAGE) {
-		ps = shard->eden;
+	if (shard->eden != NULL && shard->eden_len == HUGEPAGE) {
+		ps = hpa_alloc_ps(tsdn, shard);
+		if (ps == NULL) {
+			return NULL;
+		}
+		hpdata_init(ps, shard->eden, shard->age_counter++);
 		shard->eden = NULL;
+		shard->eden_len = 0;
 		return ps;
 	}
 
@@ -173,78 +187,32 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
 		if (new_eden == NULL) {
 			return NULL;
 		}
-		malloc_mutex_lock(tsdn, &shard->mtx);
-		/* Allocate ps edata, bailing if we fail. */
-		ps = edata_cache_small_get(tsdn, &shard->ecs);
+		ps = hpa_alloc_ps(tsdn, shard);
 		if (ps == NULL) {
-			malloc_mutex_unlock(tsdn, &shard->mtx);
 			pages_unmap(new_eden, HPA_EDEN_SIZE);
 			return NULL;
 		}
-		/* Allocate eden edata, bailing if we fail. */
-		shard->eden = edata_cache_small_get(tsdn, &shard->ecs);
-		if (shard->eden == NULL) {
-			edata_cache_small_put(tsdn, &shard->ecs, ps);
-			malloc_mutex_unlock(tsdn, &shard->mtx);
-			pages_unmap(new_eden, HPA_EDEN_SIZE);
-			return NULL;
-		}
-		/* Success. */
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-
-		/*
-		 * Note that the values here don't really make sense (e.g. eden
-		 * is actually zeroed).  But we don't use the slab metadata in
-		 * determining subsequent allocation metadata (e.g. zero
-		 * tracking should be done at the per-page level, not at the
-		 * level of the hugepage).  It's just a convenient data
-		 * structure that contains much of the helpers we need (defined
-		 * lists, a bitmap, an address field, etc.).  Eventually, we'll
-		 * have a "real" representation of a hugepage that's unconnected
-		 * to the edata_ts it will serve allocations into.
-		 */
-		edata_init(shard->eden, shard->ind, new_eden, HPA_EDEN_SIZE,
-		    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_dirty,
-		    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-		    /* is_head */ true);
-		edata_hugeified_set(shard->eden, false);
+		shard->eden = new_eden;
+		shard->eden_len = HPA_EDEN_SIZE;
 	} else {
 		/* Eden is already nonempty; only need an edata for ps. */
-		malloc_mutex_lock(tsdn, &shard->mtx);
-		ps = edata_cache_small_get(tsdn, &shard->ecs);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
+		ps = hpa_alloc_ps(tsdn, shard);
 		if (ps == NULL) {
 			return NULL;
 		}
 	}
-	/*
-	 * We should have dropped mtx since we're not touching ecs any more, but
-	 * we should continue to hold the grow mutex, since we're about to touch
-	 * eden.
-	 */
-	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
-	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
-
+	assert(ps != NULL);
 	assert(shard->eden != NULL);
-	assert(edata_size_get(shard->eden) > HUGEPAGE);
-	assert(edata_size_get(shard->eden) % HUGEPAGE == 0);
-	assert(edata_addr_get(shard->eden)
-	    == HUGEPAGE_ADDR2BASE(edata_addr_get(shard->eden)));
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	ps = edata_cache_small_get(tsdn, &shard->ecs);
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	if (ps == NULL) {
-		return NULL;
-	}
-	edata_init(ps, edata_arena_ind_get(shard->eden),
-	    edata_addr_get(shard->eden), HUGEPAGE, /* slab */ false,
-	    /* szind */ SC_NSIZES, /* sn */ 0, extent_state_dirty,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    /* is_head */ true);
-	edata_hugeified_set(ps, false);
-	edata_addr_set(shard->eden, edata_past_get(ps));
-	edata_size_set(shard->eden,
-	    edata_size_get(shard->eden) - HUGEPAGE);
+	assert(shard->eden_len > HUGEPAGE);
+	assert(shard->eden_len % HUGEPAGE == 0);
+	assert(HUGEPAGE_ADDR2BASE(shard->eden) == shard->eden);
+
+	hpdata_init(ps, shard->eden, shard->age_counter++);
+
+	char *eden_char = (char *)shard->eden;
+	eden_char += HUGEPAGE;
+	shard->eden = (void *)eden_char;
+	shard->eden_len -= HUGEPAGE;
 
 	return ps;
 }
@@ -255,7 +223,7 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
  * their address space in a list outside the psset.
  */
 static void
-hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *ps) {
+hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	/*
 	 * We do relatively expensive system calls.  The ps was evicted, so no
 	 * one should touch it while we're also touching it.
@@ -263,9 +231,6 @@ hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *ps) {
 	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
 	malloc_mutex_assert_not_owner(tsdn, &shard->grow_mtx);
 
-	assert(edata_size_get(ps) == HUGEPAGE);
-	assert(HUGEPAGE_ADDR2BASE(edata_addr_get(ps)) == edata_addr_get(ps));
-
 	/*
 	 * We do this unconditionally, even for pages which were not originally
 	 * hugeified; it has the same effect.
@@ -273,7 +238,7 @@ hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *ps) {
 	hpa_dehugify(ps);
 
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
-	edata_list_inactive_prepend(&shard->unused_slabs, ps);
+	hpdata_list_prepend(&shard->unused_slabs, ps);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
 
@@ -307,7 +272,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	err = emap_register_boundary(tsdn, shard->emap, edata,
 	    SC_NSIZES, /* slab */ false);
 	if (err) {
-		edata_t *ps = psset_dalloc(&shard->psset, edata);
+		hpdata_t *ps = psset_dalloc(&shard->psset, edata);
 		/*
 		 * The pageslab was nonempty before we started; it
 		 * should still be nonempty now, and so shouldn't get
@@ -320,7 +285,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		return NULL;
 	}
 
-	edata_t *ps = edata_ps_get(edata);
+	hpdata_t *ps = edata_ps_get(edata);
 	assert(ps != NULL);
 	bool hugify = hpa_should_hugify(shard, ps);
 	if (hugify) {
@@ -378,16 +343,11 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	edata_t *grow_edata = hpa_grow(tsdn, shard);
-	if (grow_edata == NULL) {
+	hpdata_t *grow_ps = hpa_grow(tsdn, shard);
+	if (grow_ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return NULL;
 	}
-	assert(edata_arena_ind_get(grow_edata) == shard->ind);
-
-	edata_slab_set(grow_edata, true);
-	fb_group_t *fb = edata_slab_data_get(grow_edata)->bitmap;
-	fb_init(fb, HUGEPAGE / PAGE);
 
 	/* We got the new edata; allocate from it. */
 	malloc_mutex_lock(tsdn, &shard->mtx);
@@ -395,18 +355,19 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+		hpa_handle_ps_eviction(tsdn, shard, grow_ps);
 		return NULL;
 	}
-	psset_alloc_new(&shard->psset, grow_edata, edata, size);
+	psset_alloc_new(&shard->psset, grow_ps, edata, size);
 	err = emap_register_boundary(tsdn, shard->emap, edata,
 	    SC_NSIZES, /* slab */ false);
 	if (err) {
-		edata_t *ps = psset_dalloc(&shard->psset, edata);
+		hpdata_t *ps = psset_dalloc(&shard->psset, edata);
 		/*
 		 * The pageslab was empty except for the new allocation; it
 		 * should get evicted.
 		 */
-		assert(ps == grow_edata);
+		assert(ps == grow_ps);
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		/*
 		 * Technically the same as fallthrough at the time of this
@@ -496,7 +457,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	assert(edata_committed_get(edata));
 	assert(edata_base_get(edata) != NULL);
 
-	edata_t *ps = edata_ps_get(edata);
+	hpdata_t *ps = edata_ps_get(edata);
 	/* Currently, all edatas come from pageslabs. */
 	assert(ps != NULL);
 	emap_deregister_boundary(tsdn, shard->emap, edata);
@@ -506,7 +467,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	 * Page slabs can move between pssets (and have their hugeified status
 	 * change) in racy ways.
 	 */
-	edata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
+	hpdata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
 	/*
 	 * If a pageslab became empty because of the dalloc, it better have been
 	 * the one we expected.
@@ -562,11 +523,10 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 		hpa_assert_empty(tsdn, shard, &shard->psset);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
-	edata_t *ps;
-	while ((ps = edata_list_inactive_first(&shard->unused_slabs)) != NULL) {
-		assert(edata_size_get(ps) == HUGEPAGE);
-		edata_list_inactive_remove(&shard->unused_slabs, ps);
-		pages_unmap(edata_base_get(ps), HUGEPAGE);
+	hpdata_t *ps;
+	while ((ps = hpdata_list_first(&shard->unused_slabs)) != NULL) {
+		hpdata_list_remove(&shard->unused_slabs, ps);
+		pages_unmap(hpdata_addr_get(ps), HUGEPAGE);
 	}
 }
 
diff --git a/src/hpdata.c b/src/hpdata.c
new file mode 100644
index 0000000..bbe3acc
--- /dev/null
+++ b/src/hpdata.c
@@ -0,0 +1,18 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpdata.h"
+
+static int
+hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
+	uint64_t a_age = hpdata_age_get(a);
+	uint64_t b_age = hpdata_age_get(b);
+	/*
+	 * hpdata ages are operation counts in the psset; no two should be the
+	 * same.
+	 */
+	assert(a_age != b_age);
+	return (a_age > b_age) - (a_age < b_age);
+}
+
+ph_gen(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t, ph_link, hpdata_age_comp)
diff --git a/src/pa.c b/src/pa.c
index bc52ff4..da64b82 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -51,8 +51,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 bool
 pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max, size_t sec_nshards,
     size_t sec_alloc_max, size_t sec_bytes_max) {
-	if (hpa_shard_init(&shard->hpa_shard, shard->emap, &shard->edata_cache,
-	    shard->ind, alloc_max)) {
+	if (hpa_shard_init(&shard->hpa_shard, shard->emap, shard->base,
+	    &shard->edata_cache, shard->ind, alloc_max)) {
 		return true;
 	}
 	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards,
diff --git a/src/psset.c b/src/psset.c
index 2ee683b..cebc1ce 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -11,11 +11,10 @@ static const bitmap_info_t psset_bitmap_info =
 void
 psset_init(psset_t *psset) {
 	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
-		edata_age_heap_new(&psset->pageslabs[i]);
+		hpdata_age_heap_new(&psset->pageslabs[i]);
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
 	memset(&psset->stats, 0, sizeof(psset->stats));
-	psset->age_counter = 0;
 }
 
 static void
@@ -49,18 +48,17 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
  * ensure we don't miss any heap modification operations.
  */
 JEMALLOC_ALWAYS_INLINE void
-psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, edata_t *ps,
+psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, hpdata_t *ps,
     bool insert) {
-	size_t *npageslabs_dst = edata_hugeified_get(ps)
+	size_t *npageslabs_dst = hpdata_huge_get(ps)
 	    ? &binstats->npageslabs_huge : &binstats->npageslabs_nonhuge;
-	size_t *nactive_dst = edata_hugeified_get(ps)
+	size_t *nactive_dst = hpdata_huge_get(ps)
 	    ? &binstats->nactive_huge : &binstats->nactive_nonhuge;
-	size_t *ninactive_dst = edata_hugeified_get(ps)
+	size_t *ninactive_dst = hpdata_huge_get(ps)
 	    ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
 
-	size_t npages = edata_size_get(ps) >> LG_PAGE;
-	size_t ninactive = edata_nfree_get(ps);
-	size_t nactive = npages - ninactive;
+	size_t ninactive = hpdata_nfree_get(ps);
+	size_t nactive = HUGEPAGE_PAGES - ninactive;
 
 	size_t mul = insert ? (size_t)1 : (size_t)-1;
 	*npageslabs_dst += mul * 1;
@@ -69,12 +67,12 @@ psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, edata_t *ps,
 }
 
 static void
-psset_bin_stats_insert(psset_bin_stats_t *binstats, edata_t *ps) {
+psset_bin_stats_insert(psset_bin_stats_t *binstats, hpdata_t *ps) {
 	psset_bin_stats_insert_remove(binstats, ps, /* insert */ true);
 }
 
 static void
-psset_bin_stats_remove(psset_bin_stats_t *binstats, edata_t *ps) {
+psset_bin_stats_remove(psset_bin_stats_t *binstats, hpdata_t *ps) {
 	psset_bin_stats_insert_remove(binstats, ps, /* insert */ false);
 }
 
@@ -96,27 +94,27 @@ psset_bin_stats_deactivate(psset_bin_stats_t *binstats, bool huge, size_t num) {
 }
 
 static void
-psset_edata_heap_remove(psset_t *psset, pszind_t pind, edata_t *ps) {
-	edata_age_heap_remove(&psset->pageslabs[pind], ps);
+psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
+	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
 	psset_bin_stats_remove(&psset->stats.nonfull_slabs[pind], ps);
 }
 
 static void
-psset_edata_heap_insert(psset_t *psset, pszind_t pind, edata_t *ps) {
-	edata_age_heap_insert(&psset->pageslabs[pind], ps);
+psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
+	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
 	psset_bin_stats_insert(&psset->stats.nonfull_slabs[pind], ps);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-psset_assert_ps_consistent(edata_t *ps) {
-	assert(fb_urange_longest(edata_slab_data_get(ps)->bitmap,
-	    edata_size_get(ps) >> LG_PAGE) == edata_longest_free_range_get(ps));
+psset_assert_ps_consistent(hpdata_t *ps) {
+	assert(fb_urange_longest(ps->active_pages, HUGEPAGE_PAGES)
+	    == hpdata_longest_free_range_get(ps));
 }
 
 void
-psset_insert(psset_t *psset, edata_t *ps) {
+psset_insert(psset_t *psset, hpdata_t *ps) {
 	psset_assert_ps_consistent(ps);
-	size_t longest_free_range = edata_longest_free_range_get(ps);
+	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
 		/*
@@ -131,16 +129,16 @@ psset_insert(psset_t *psset, edata_t *ps) {
 	    longest_free_range << LG_PAGE));
 
 	assert(pind < PSSET_NPSIZES);
-	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
+	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
 	}
-	psset_edata_heap_insert(psset, pind, ps);
+	psset_hpdata_heap_insert(psset, pind, ps);
 }
 
 void
-psset_remove(psset_t *psset, edata_t *ps) {
+psset_remove(psset_t *psset, hpdata_t *ps) {
 	psset_assert_ps_consistent(ps);
-	size_t longest_free_range = edata_longest_free_range_get(ps);
+	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
 		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
@@ -150,18 +148,18 @@ psset_remove(psset_t *psset, edata_t *ps) {
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
 	    longest_free_range << LG_PAGE));
 	assert(pind < PSSET_NPSIZES);
-	psset_edata_heap_remove(psset, pind, ps);
-	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
+	psset_hpdata_heap_remove(psset, pind, ps);
+	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
 	}
 }
 
 void
-psset_hugify(psset_t *psset, edata_t *ps) {
-	assert(!edata_hugeified_get(ps));
+psset_hugify(psset_t *psset, hpdata_t *ps) {
+	assert(!hpdata_huge_get(ps));
 	psset_assert_ps_consistent(ps);
 
-	size_t longest_free_range = edata_longest_free_range_get(ps);
+	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 	psset_bin_stats_t *bin_stats;
 	if (longest_free_range == 0) {
 		bin_stats = &psset->stats.full_slabs;
@@ -172,7 +170,7 @@ psset_hugify(psset_t *psset, edata_t *ps) {
 		bin_stats = &psset->stats.nonfull_slabs[pind];
 	}
 	psset_bin_stats_remove(bin_stats, ps);
-	edata_hugeified_set(ps, true);
+	hpdata_huge_set(ps, true);
 	psset_bin_stats_insert(bin_stats, ps);
 }
 
@@ -180,7 +178,7 @@ psset_hugify(psset_t *psset, edata_t *ps) {
  * Similar to PAC's extent_recycle_extract.  Out of all the pageslabs in the
  * set, picks one that can satisfy the allocation and remove it from the set.
  */
-static edata_t *
+static hpdata_t *
 psset_recycle_extract(psset_t *psset, size_t size) {
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
@@ -188,13 +186,13 @@ psset_recycle_extract(psset_t *psset, size_t size) {
 	if (pind == PSSET_NPSIZES) {
 		return NULL;
 	}
-	edata_t *ps = edata_age_heap_first(&psset->pageslabs[pind]);
+	hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
 	if (ps == NULL) {
 		return NULL;
 	}
 
-	psset_edata_heap_remove(psset, pind, ps);
-	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
+	psset_hpdata_heap_remove(psset, pind, ps);
+	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		bitmap_set(psset->bitmap, &psset_bitmap_info, pind);
 	}
 
@@ -207,7 +205,7 @@ psset_recycle_extract(psset_t *psset, size_t size) {
  * edata with a range in the pageslab, and puts ps back in the set.
  */
 static void
-psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
+psset_ps_alloc_insert(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
     size_t size) {
 	size_t start = 0;
 	/*
@@ -217,15 +215,14 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 	size_t begin = 0;
 	size_t len = 0;
 
-	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
+	fb_group_t *ps_fb = ps->active_pages;
 
 	size_t npages = size >> LG_PAGE;
-	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
 
 	size_t largest_unchosen_range = 0;
 	while (true) {
-		bool found = fb_urange_iter(ps_fb, ps_npages, start, &begin,
-		    &len);
+		bool found = fb_urange_iter(ps_fb, HUGEPAGE_PAGES, start,
+		    &begin, &len);
 		/*
 		 * A precondition to this function is that ps must be able to
 		 * serve the allocation.
@@ -245,14 +242,14 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 		}
 		start = begin + len;
 	}
-	uintptr_t addr = (uintptr_t)edata_base_get(ps) + begin * PAGE;
+	uintptr_t addr = (uintptr_t)hpdata_addr_get(ps) + begin * PAGE;
 	edata_init(r_edata, edata_arena_ind_get(r_edata), (void *)addr, size,
 	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	    EXTENT_NOT_HEAD);
 	edata_ps_set(r_edata, ps);
-	fb_set_range(ps_fb, ps_npages, begin, npages);
-	edata_nfree_set(ps, (uint32_t)(edata_nfree_get(ps) - npages));
+	fb_set_range(ps_fb, HUGEPAGE_PAGES, begin, npages);
+	hpdata_nfree_set(ps, (uint32_t)(hpdata_nfree_get(ps) - npages));
 	/* The pageslab isn't in a bin, so no bin stats need to change. */
 
 	/*
@@ -267,8 +264,8 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 	 * this check in the case where we're allocating from some smaller run.
 	 */
 	start = begin + npages;
-	while (start < ps_npages) {
-		bool found = fb_urange_iter(ps_fb, ps_npages, start, &begin,
+	while (start < HUGEPAGE_PAGES) {
+		bool found = fb_urange_iter(ps_fb, HUGEPAGE_PAGES, start, &begin,
 		    &len);
 		if (!found) {
 			break;
@@ -278,7 +275,7 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 		}
 		start = begin + len;
 	}
-	edata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
+	hpdata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
 	if (largest_unchosen_range == 0) {
 		psset_bin_stats_insert(&psset->stats.full_slabs, ps);
 	} else {
@@ -288,7 +285,7 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 
 bool
 psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
-	edata_t *ps = psset_recycle_extract(psset, size);
+	hpdata_t *ps = psset_recycle_extract(psset, size);
 	if (ps == NULL) {
 		return true;
 	}
@@ -297,48 +294,43 @@ psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
 }
 
 void
-psset_alloc_new(psset_t *psset, edata_t *ps, edata_t *r_edata, size_t size) {
-	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
-	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
-	assert(fb_empty(ps_fb, ps_npages));
-	assert(ps_npages >= (size >> LG_PAGE));
-	edata_nfree_set(ps, (uint32_t)ps_npages);
-	edata_age_set(ps, psset->age_counter);
-	psset->age_counter++;
+psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
+	fb_group_t *ps_fb = ps->active_pages;
+	assert(fb_empty(ps_fb, HUGEPAGE_PAGES));
+	assert(hpdata_nfree_get(ps) == HUGEPAGE_PAGES);
 	psset_ps_alloc_insert(psset, ps, r_edata, size);
 }
 
-edata_t *
+hpdata_t *
 psset_dalloc(psset_t *psset, edata_t *edata) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
 	assert(edata_ps_get(edata) != NULL);
-	edata_t *ps = edata_ps_get(edata);
+	hpdata_t *ps = edata_ps_get(edata);
 
-	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
-	size_t ps_old_longest_free_range = edata_longest_free_range_get(ps);
+	fb_group_t *ps_fb = ps->active_pages;
+	size_t ps_old_longest_free_range = hpdata_longest_free_range_get(ps);
 	pszind_t old_pind = SC_NPSIZES;
 	if (ps_old_longest_free_range != 0) {
 		old_pind = sz_psz2ind(sz_psz_quantize_floor(
 		    ps_old_longest_free_range << LG_PAGE));
 	}
 
-	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
 	size_t begin =
-	    ((uintptr_t)edata_base_get(edata) - (uintptr_t)edata_base_get(ps))
+	    ((uintptr_t)edata_base_get(edata) - (uintptr_t)hpdata_addr_get(ps))
 	    >> LG_PAGE;
 	size_t len = edata_size_get(edata) >> LG_PAGE;
-	fb_unset_range(ps_fb, ps_npages, begin, len);
+	fb_unset_range(ps_fb, HUGEPAGE_PAGES, begin, len);
 
 	/* The pageslab is still in the bin; adjust its stats first. */
 	psset_bin_stats_t *bin_stats = (ps_old_longest_free_range == 0
 	    ? &psset->stats.full_slabs : &psset->stats.nonfull_slabs[old_pind]);
-	psset_bin_stats_deactivate(bin_stats, edata_hugeified_get(ps), len);
+	psset_bin_stats_deactivate(bin_stats, hpdata_huge_get(ps), len);
 
-	edata_nfree_set(ps, (uint32_t)(edata_nfree_get(ps) + len));
+	hpdata_nfree_set(ps, (uint32_t)(hpdata_nfree_get(ps) + len));
 
 	/* We might have just created a new, larger range. */
-	size_t new_begin = (size_t)(fb_fls(ps_fb, ps_npages, begin) + 1);
-	size_t new_end = fb_ffs(ps_fb, ps_npages, begin + len - 1);
+	size_t new_begin = (size_t)(fb_fls(ps_fb, HUGEPAGE_PAGES, begin) + 1);
+	size_t new_end = fb_ffs(ps_fb, HUGEPAGE_PAGES, begin + len - 1);
 	size_t new_range_len = new_end - new_begin;
 	/*
 	 * If the new free range is no longer than the previous longest one,
@@ -352,7 +344,7 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	 * Otherwise, it might need to get evicted from the set, or change its
 	 * bin.
 	 */
-	edata_longest_free_range_set(ps, (uint32_t)new_range_len);
+	hpdata_longest_free_range_set(ps, (uint32_t)new_range_len);
 	/*
 	 * If it was previously non-full, then it's in some (possibly now
 	 * incorrect) bin already; remove it.
@@ -366,8 +358,8 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	 * and the issue becomes moot).
 	 */
 	if (ps_old_longest_free_range > 0) {
-		psset_edata_heap_remove(psset, old_pind, ps);
-		if (edata_age_heap_empty(&psset->pageslabs[old_pind])) {
+		psset_hpdata_heap_remove(psset, old_pind, ps);
+		if (hpdata_age_heap_empty(&psset->pageslabs[old_pind])) {
 			bitmap_set(psset->bitmap, &psset_bitmap_info,
 			    (size_t)old_pind);
 		}
@@ -379,16 +371,16 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
 	}
 	/* If the pageslab is empty, it gets evicted from the set. */
-	if (new_range_len == ps_npages) {
+	if (new_range_len == HUGEPAGE_PAGES) {
 		return ps;
 	}
 	/* Otherwise, it gets reinserted. */
 	pszind_t new_pind = sz_psz2ind(sz_psz_quantize_floor(
 	    new_range_len << LG_PAGE));
-	if (edata_age_heap_empty(&psset->pageslabs[new_pind])) {
+	if (hpdata_age_heap_empty(&psset->pageslabs[new_pind])) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info,
 		    (size_t)new_pind);
 	}
-	psset_edata_heap_insert(psset, new_pind, ps);
+	psset_hpdata_heap_insert(psset, new_pind, ps);
 	return NULL;
 }
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 94efd4a..90ec89e 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -38,7 +38,8 @@ create_test_data() {
 	assert_false(err, "");
 
 	err = hpa_shard_init(&test_data->shard, &test_data->emap,
-	    &test_data->shard_edata_cache, SHARD_IND, ALLOC_MAX);
+	    test_data->base, &test_data->shard_edata_cache, SHARD_IND,
+	    ALLOC_MAX);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
diff --git a/test/unit/psset.c b/test/unit/psset.c
index ea61ab9..811c7be 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -2,10 +2,8 @@
 
 #include "jemalloc/internal/psset.h"
 
-#define PAGESLAB_PAGES (HUGEPAGE / PAGE)
-#define PAGESLAB_SIZE (PAGESLAB_PAGES << LG_PAGE)
-#define PAGESLAB_SN 123
-#define PAGESLAB_ADDR ((void *)(1234 << LG_PAGE))
+#define PAGESLAB_ADDR ((void *)(1234 * HUGEPAGE))
+#define PAGESLAB_AGE 5678
 
 #define ALLOC_ARENA_IND 111
 #define ALLOC_ESN 222
@@ -42,14 +40,10 @@ edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 
 TEST_BEGIN(test_empty) {
 	bool err;
-	edata_t pageslab;
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_t alloc;
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
+	edata_t alloc;
 	edata_init_test(&alloc);
 
 	psset_t psset;
@@ -63,27 +57,24 @@ TEST_END
 
 TEST_BEGIN(test_fill) {
 	bool err;
-	edata_t pageslab;
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_t alloc[PAGESLAB_PAGES];
 
-	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+
+	edata_t alloc[HUGEPAGE_PAGES];
 
 	psset_t psset;
 	psset_init(&psset);
 
 	edata_init_test(&alloc[0]);
 	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
-	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
 		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
 
-	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		edata_t *edata = &alloc[i];
 		edata_expect(edata, i, 1);
 	}
@@ -98,30 +89,26 @@ TEST_END
 
 TEST_BEGIN(test_reuse) {
 	bool err;
-	edata_t *ps;
+	hpdata_t *ps;
 
-	edata_t pageslab;
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_t alloc[PAGESLAB_PAGES];
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
+	edata_t alloc[HUGEPAGE_PAGES];
 
 	psset_t psset;
 	psset_init(&psset);
 
 	edata_init_test(&alloc[0]);
 	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
-	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
 		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
 
 	/* Free odd indices. */
-	for (size_t i = 0; i < PAGESLAB_PAGES; i ++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i ++) {
 		if (i % 2 == 0) {
 			continue;
 		}
@@ -129,7 +116,7 @@ TEST_BEGIN(test_reuse) {
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
 	/* Realloc into them. */
-	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		if (i % 2 == 0) {
 			continue;
 		}
@@ -138,7 +125,7 @@ TEST_BEGIN(test_reuse) {
 		edata_expect(&alloc[i], i, 1);
 	}
 	/* Now, free the pages at indices 0 or 1 mod 2. */
-	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		if (i % 4 > 1) {
 			continue;
 		}
@@ -146,7 +133,7 @@ TEST_BEGIN(test_reuse) {
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
 	/* And realloc 2-page allocations into them. */
-	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		if (i % 4 != 0) {
 			continue;
 		}
@@ -155,7 +142,7 @@ TEST_BEGIN(test_reuse) {
 		edata_expect(&alloc[i], i, 2);
 	}
 	/* Free all the 2-page allocations. */
-	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		if (i % 4 != 0) {
 			continue;
 		}
@@ -175,13 +162,13 @@ TEST_BEGIN(test_reuse) {
 	edata_expect(&alloc[index_of_3], index_of_3, 3);
 
 	/* Free up a 4-page hole at the end. */
-	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
-	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 2]);
+	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 2]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
 
 	/* Make sure we can satisfy an allocation at the very end of a slab. */
-	size_t index_of_4 = PAGESLAB_PAGES - 4;
+	size_t index_of_4 = HUGEPAGE_PAGES - 4;
 	ps = psset_dalloc(&psset, &alloc[index_of_4]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
 	err = psset_alloc_reuse(&psset, &alloc[index_of_4], 4 * PAGE);
@@ -192,33 +179,31 @@ TEST_END
 
 TEST_BEGIN(test_evict) {
 	bool err;
-	edata_t *ps;
-	edata_t pageslab;
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_t alloc[PAGESLAB_PAGES];
-
-	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
+	hpdata_t *ps;
+
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+
+	edata_t alloc[HUGEPAGE_PAGES];
+
 	psset_t psset;
 	psset_init(&psset);
 
 	/* Alloc the whole slab. */
 	edata_init_test(&alloc[0]);
 	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
-	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
 		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Unxpected allocation failure");
 	}
 
 	/* Dealloc the whole slab, going forwards. */
-	for (size_t i = 0; i < PAGESLAB_PAGES - 1; i++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES - 1; i++) {
 		ps = psset_dalloc(&psset, &alloc[i]);
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
-	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_eq(&pageslab, ps, "Empty pageslab not evicted.");
 
 	err = psset_alloc_reuse(&psset, &alloc[0], PAGE);
@@ -228,20 +213,15 @@ TEST_END
 
 TEST_BEGIN(test_multi_pageslab) {
 	bool err;
-	edata_t *ps;
-	edata_t pageslab[2];
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_t alloc[2][PAGESLAB_PAGES];
-
-	edata_init(&pageslab[0], /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
-	edata_init(&pageslab[1], /* arena_ind */ 0,
-	    (void *)((uintptr_t)PAGESLAB_ADDR + PAGESLAB_SIZE), PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
+	hpdata_t *ps;
+
+	hpdata_t pageslab[2];
+	hpdata_init(&pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE);
+	hpdata_init(&pageslab[1],
+	    (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
+	    PAGESLAB_AGE + 1);
+
+	edata_t alloc[2][HUGEPAGE_PAGES];
 
 	psset_t psset;
 	psset_init(&psset);
@@ -254,7 +234,7 @@ TEST_BEGIN(test_multi_pageslab) {
 
 	/* Fill them both up; make sure we do so in first-fit order. */
 	for (size_t i = 0; i < 2; i++) {
-		for (size_t j = 1; j < PAGESLAB_PAGES; j++) {
+		for (size_t j = 1; j < HUGEPAGE_PAGES; j++) {
 			edata_init_test(&alloc[i][j]);
 			err = psset_alloc_reuse(&psset, &alloc[i][j], PAGE);
 			expect_false(err,
@@ -306,10 +286,10 @@ stats_expect_empty(psset_bin_stats_t *stats) {
 
 static void
 stats_expect(psset_t *psset, size_t nactive) {
-	if (nactive == PAGESLAB_PAGES) {
+	if (nactive == HUGEPAGE_PAGES) {
 		expect_zu_eq(1, psset->stats.full_slabs.npageslabs_nonhuge,
 		    "Expected a full slab");
-		expect_zu_eq(PAGESLAB_PAGES,
+		expect_zu_eq(HUGEPAGE_PAGES,
 		    psset->stats.full_slabs.nactive_nonhuge,
 		    "Should have exactly filled the bin");
 		expect_zu_eq(0, psset->stats.full_slabs.ninactive_nonhuge,
@@ -317,9 +297,9 @@ stats_expect(psset_t *psset, size_t nactive) {
 	} else {
 		stats_expect_empty(&psset->stats.full_slabs);
 	}
-	size_t ninactive = PAGESLAB_PAGES - nactive;
+	size_t ninactive = HUGEPAGE_PAGES - nactive;
 	pszind_t nonempty_pind = PSSET_NPSIZES;
-	if (ninactive != 0 && ninactive < PAGESLAB_PAGES) {
+	if (ninactive != 0 && ninactive < HUGEPAGE_PAGES) {
 		nonempty_pind = sz_psz2ind(sz_psz_quantize_floor(
 		    ninactive << LG_PAGE));
 	}
@@ -342,14 +322,11 @@ stats_expect(psset_t *psset, size_t nactive) {
 
 TEST_BEGIN(test_stats) {
 	bool err;
-	edata_t pageslab;
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_t alloc[PAGESLAB_PAGES];
 
-	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+
+	edata_t alloc[HUGEPAGE_PAGES];
 
 	psset_t psset;
 	psset_init(&psset);
@@ -357,15 +334,15 @@ TEST_BEGIN(test_stats) {
 
 	edata_init_test(&alloc[0]);
 	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
-	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		stats_expect(&psset, i);
 		edata_init_test(&alloc[i]);
 		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
-	stats_expect(&psset, PAGESLAB_PAGES);
-	edata_t *ps;
-	for (ssize_t i = PAGESLAB_PAGES - 1; i >= 0; i--) {
+	stats_expect(&psset, HUGEPAGE_PAGES);
+	hpdata_t *ps;
+	for (ssize_t i = HUGEPAGE_PAGES - 1; i >= 0; i--) {
 		ps = psset_dalloc(&psset, &alloc[i]);
 		expect_true((ps == NULL) == (i != 0),
 		    "psset_dalloc should only evict a slab on the last free");
@@ -384,37 +361,28 @@ TEST_END
 /*
  * Fills in and inserts two pageslabs, with the first better than the second,
  * and each fully allocated (into the allocations in allocs and worse_allocs,
- * each of which should be PAGESLAB_PAGES long).
+ * each of which should be HUGEPAGE_PAGES long).
  *
  * (There's nothing magic about these numbers; it's just useful to share the
  * setup between the oldest fit and the insert/remove test).
  */
 static void
-init_test_pageslabs(psset_t *psset, edata_t *pageslab, edata_t *worse_pageslab,
-    edata_t *alloc, edata_t *worse_alloc) {
+init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
+    hpdata_t *worse_pageslab, edata_t *alloc, edata_t *worse_alloc) {
 	bool err;
-	memset(pageslab, 0, sizeof(*pageslab));
-	edata_init(pageslab, /* arena_ind */ 0, (void *)(10 * PAGESLAB_SIZE),
-	    PAGESLAB_SIZE, /* slab */ true, SC_NSIZES, PAGESLAB_SN + 1,
-	    extent_state_active, /* zeroed */ false, /* comitted */ true,
-	    EXTENT_PAI_HPA, EXTENT_IS_HEAD);
 
+	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE);
 	/*
-	 * This pageslab is better from an edata_comp_snad POV, but will be
-	 * added to the set after the previous one, and so should be less
-	 * preferred for allocations.
+	 * This pageslab would be better from an address-first-fit POV, but
+	 * better from an age POV.
 	 */
-	memset(worse_pageslab, 0, sizeof(*worse_pageslab));
-	edata_init(worse_pageslab, /* arena_ind */ 0,
-	    (void *)(9 * PAGESLAB_SIZE), PAGESLAB_SIZE, /* slab */ true,
-	    SC_NSIZES, PAGESLAB_SN - 1, extent_state_active, /* zeroed */ false,
-	    /* comitted */ true, EXTENT_PAI_HPA, EXTENT_IS_HEAD);
+	hpdata_init(worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1);
 
 	psset_init(psset);
 
 	edata_init_test(&alloc[0]);
 	psset_alloc_new(psset, pageslab, &alloc[0], PAGE);
-	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
 		err = psset_alloc_reuse(psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
@@ -430,7 +398,7 @@ init_test_pageslabs(psset_t *psset, edata_t *pageslab, edata_t *worse_pageslab,
 	 * Make the two pssets otherwise indistinguishable; all full except for
 	 * a single page.
 	 */
-	for (size_t i = 1; i < PAGESLAB_PAGES - 1; i++) {
+	for (size_t i = 1; i < HUGEPAGE_PAGES - 1; i++) {
 		edata_init_test(&worse_alloc[i]);
 		err = psset_alloc_reuse(psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
@@ -439,17 +407,17 @@ init_test_pageslabs(psset_t *psset, edata_t *pageslab, edata_t *worse_pageslab,
 	}
 
 	/* Deallocate the last page from the older pageslab. */
-	edata_t *evicted = psset_dalloc(psset, &alloc[PAGESLAB_PAGES - 1]);
+	hpdata_t *evicted = psset_dalloc(psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(evicted, "Unexpected eviction");
 }
 
 TEST_BEGIN(test_oldest_fit) {
 	bool err;
-	edata_t alloc[PAGESLAB_PAGES];
-	edata_t worse_alloc[PAGESLAB_PAGES];
+	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t worse_alloc[HUGEPAGE_PAGES];
 
-	edata_t pageslab;
-	edata_t worse_pageslab;
+	hpdata_t pageslab;
+	hpdata_t worse_pageslab;
 
 	psset_t psset;
 
@@ -468,12 +436,12 @@ TEST_END
 
 TEST_BEGIN(test_insert_remove) {
 	bool err;
-	edata_t *ps;
-	edata_t alloc[PAGESLAB_PAGES];
-	edata_t worse_alloc[PAGESLAB_PAGES];
+	hpdata_t *ps;
+	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t worse_alloc[HUGEPAGE_PAGES];
 
-	edata_t pageslab;
-	edata_t worse_pageslab;
+	hpdata_t pageslab;
+	hpdata_t worse_pageslab;
 
 	psset_t psset;
 
@@ -482,31 +450,31 @@ TEST_BEGIN(test_insert_remove) {
 
 	/* Remove better; should still be able to alloc from worse. */
 	psset_remove(&psset, &pageslab);
-	err = psset_alloc_reuse(&psset, &worse_alloc[PAGESLAB_PAGES - 1], PAGE);
+	err = psset_alloc_reuse(&psset, &worse_alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_false(err, "Removal should still leave an empty page");
 	expect_ptr_eq(&worse_pageslab,
-	    edata_ps_get(&worse_alloc[PAGESLAB_PAGES - 1]),
+	    edata_ps_get(&worse_alloc[HUGEPAGE_PAGES - 1]),
 	    "Allocated out of wrong ps");
 
 	/*
 	 * After deallocating the previous alloc and reinserting better, it
 	 * should be preferred for future allocations.
 	 */
-	ps = psset_dalloc(&psset, &worse_alloc[PAGESLAB_PAGES - 1]);
+	ps = psset_dalloc(&psset, &worse_alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Incorrect eviction of nonempty pageslab");
 	psset_insert(&psset, &pageslab);
-	err = psset_alloc_reuse(&psset, &alloc[PAGESLAB_PAGES - 1], PAGE);
+	err = psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_false(err, "psset should be nonempty");
-	expect_ptr_eq(&pageslab, edata_ps_get(&alloc[PAGESLAB_PAGES - 1]),
+	expect_ptr_eq(&pageslab, edata_ps_get(&alloc[HUGEPAGE_PAGES - 1]),
 	    "Removal/reinsertion shouldn't change ordering");
 	/*
 	 * After deallocating and removing both, allocations should fail.
 	 */
-	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Incorrect eviction");
 	psset_remove(&psset, &pageslab);
 	psset_remove(&psset, &worse_pageslab);
-	err = psset_alloc_reuse(&psset, &alloc[PAGESLAB_PAGES - 1], PAGE);
+	err = psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_true(err, "psset should be empty, but an alloc succeeded");
 }
 TEST_END
-- 
cgit v0.12


From 089f8fa4429f5e9ee0e679411941ef180e446248 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 18 Nov 2020 14:52:19 -0800
Subject: Move hpdata bitmap logic out of the psset.

---
 include/jemalloc/internal/hpdata.h |  24 ++++++---
 src/hpdata.c                       |  96 ++++++++++++++++++++++++++++++++++
 src/psset.c                        | 104 +++++--------------------------------
 3 files changed, 127 insertions(+), 97 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index c4bf6ef..7bedaf4 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -107,13 +107,15 @@ hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
 }
 
 static inline void
-hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
-	hpdata_addr_set(hpdata, addr);
-	hpdata_age_set(hpdata, age);
-	hpdata_huge_set(hpdata, false);
-	hpdata_nfree_set(hpdata, HUGEPAGE_PAGES);
-	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
-	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
+hpdata_assert_empty(hpdata_t *hpdata) {
+	assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
+	assert(hpdata_nfree_get(hpdata) == HUGEPAGE_PAGES);
+}
+
+static inline void
+hpdata_assert_consistent(hpdata_t *hpdata) {
+	assert(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
+	    == hpdata_longest_free_range_get(hpdata));
 }
 
 TYPED_LIST(hpdata_list, hpdata_t, ql_link)
@@ -121,4 +123,12 @@ TYPED_LIST(hpdata_list, hpdata_t, ql_link)
 typedef ph(hpdata_t) hpdata_age_heap_t;
 ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
 
+void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
+/*
+ * Given an hpdata which can serve an allocation request, pick and reserve an
+ * offset within that allocation.
+ */
+size_t hpdata_reserve_alloc(hpdata_t *hpdata, size_t npages);
+void hpdata_unreserve(hpdata_t *hpdata, size_t start, size_t npages);
+
 #endif /* JEMALLOC_INTERNAL_HPDATA_H */
diff --git a/src/hpdata.c b/src/hpdata.c
index bbe3acc..a876a30 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -16,3 +16,99 @@ hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
 }
 
 ph_gen(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t, ph_link, hpdata_age_comp)
+
+
+void
+hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
+	hpdata_addr_set(hpdata, addr);
+	hpdata_age_set(hpdata, age);
+	hpdata_huge_set(hpdata, false);
+	hpdata_nfree_set(hpdata, HUGEPAGE_PAGES);
+	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
+	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
+}
+
+size_t
+hpdata_reserve_alloc(hpdata_t *hpdata, size_t npages) {
+	assert(npages <= hpdata_longest_free_range_get(hpdata));
+
+	size_t result;
+
+	size_t start = 0;
+	/*
+	 * These are dead stores, but the compiler will issue warnings on them
+	 * since it can't tell statically that found is always true below.
+	 */
+	size_t begin = 0;
+	size_t len = 0;
+
+	size_t largest_unchosen_range = 0;
+	while (true) {
+		bool found = fb_urange_iter(hpdata->active_pages,
+		    HUGEPAGE_PAGES, start, &begin, &len);
+		/*
+		 * A precondition to this function is that hpdata must be able
+		 * to serve the allocation.
+		 */
+		assert(found);
+		if (len >= npages) {
+			/*
+			 * We use first-fit within the page slabs; this gives
+			 * bounded worst-case fragmentation within a slab.  It's
+			 * not necessarily right; we could experiment with
+			 * various other options.
+			 */
+			break;
+		}
+		if (len > largest_unchosen_range) {
+			largest_unchosen_range = len;
+		}
+		start = begin + len;
+	}
+	/* We found a range; remember it. */
+	result = begin;
+	fb_set_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
+	hpdata_nfree_set(hpdata, hpdata_nfree_get(hpdata) - npages);
+
+	/*
+	 * We might have shrunk the longest free range.  We have to keep
+	 * scanning until the end of the hpdata to be sure.
+	 *
+	 * TODO: As an optimization, we should only do this when the range we
+	 * just allocated from was equal to the longest free range size.
+	 */
+	start = begin + npages;
+	while (start < HUGEPAGE_PAGES) {
+		bool found = fb_urange_iter(hpdata->active_pages,
+		    HUGEPAGE_PAGES, start, &begin, &len);
+		if (!found) {
+			break;
+		}
+		if (len > largest_unchosen_range) {
+			largest_unchosen_range = len;
+		}
+		start = begin + len;
+	}
+	hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
+
+	return result;
+}
+
+void
+hpdata_unreserve(hpdata_t *hpdata, size_t begin, size_t npages) {
+	size_t old_longest_range = hpdata_longest_free_range_get(hpdata);
+
+	fb_unset_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
+	/* We might have just created a new, larger range. */
+	size_t new_begin = (fb_fls(hpdata->active_pages, HUGEPAGE_PAGES,
+	    begin) + 1);
+	size_t new_end = fb_ffs(hpdata->active_pages, HUGEPAGE_PAGES,
+	    begin + npages - 1);
+	size_t new_range_len = new_end - new_begin;
+
+	if (new_range_len > old_longest_range) {
+		hpdata_longest_free_range_set(hpdata, new_range_len);
+	}
+
+	hpdata_nfree_set(hpdata, hpdata_nfree_get(hpdata) + npages);
+}
diff --git a/src/psset.c b/src/psset.c
index cebc1ce..5418851 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -105,15 +105,9 @@ psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	psset_bin_stats_insert(&psset->stats.nonfull_slabs[pind], ps);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-psset_assert_ps_consistent(hpdata_t *ps) {
-	assert(fb_urange_longest(ps->active_pages, HUGEPAGE_PAGES)
-	    == hpdata_longest_free_range_get(ps));
-}
-
 void
 psset_insert(psset_t *psset, hpdata_t *ps) {
-	psset_assert_ps_consistent(ps);
+	hpdata_assert_consistent(ps);
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
@@ -137,7 +131,7 @@ psset_insert(psset_t *psset, hpdata_t *ps) {
 
 void
 psset_remove(psset_t *psset, hpdata_t *ps) {
-	psset_assert_ps_consistent(ps);
+	hpdata_assert_consistent(ps);
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
@@ -157,7 +151,7 @@ psset_remove(psset_t *psset, hpdata_t *ps) {
 void
 psset_hugify(psset_t *psset, hpdata_t *ps) {
 	assert(!hpdata_huge_get(ps));
-	psset_assert_ps_consistent(ps);
+	hpdata_assert_consistent(ps);
 
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 	psset_bin_stats_t *bin_stats;
@@ -196,7 +190,7 @@ psset_recycle_extract(psset_t *psset, size_t size) {
 		bitmap_set(psset->bitmap, &psset_bitmap_info, pind);
 	}
 
-	psset_assert_ps_consistent(ps);
+	hpdata_assert_consistent(ps);
 	return ps;
 }
 
@@ -207,76 +201,18 @@ psset_recycle_extract(psset_t *psset, size_t size) {
 static void
 psset_ps_alloc_insert(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
     size_t size) {
-	size_t start = 0;
-	/*
-	 * These are dead stores, but the compiler will issue warnings on them
-	 * since it can't tell statically that found is always true below.
-	 */
-	size_t begin = 0;
-	size_t len = 0;
-
-	fb_group_t *ps_fb = ps->active_pages;
-
-	size_t npages = size >> LG_PAGE;
-
-	size_t largest_unchosen_range = 0;
-	while (true) {
-		bool found = fb_urange_iter(ps_fb, HUGEPAGE_PAGES, start,
-		    &begin, &len);
-		/*
-		 * A precondition to this function is that ps must be able to
-		 * serve the allocation.
-		 */
-		assert(found);
-		if (len >= npages) {
-			/*
-			 * We use first-fit within the page slabs; this gives
-			 * bounded worst-case fragmentation within a slab.  It's
-			 * not necessarily right; we could experiment with
-			 * various other options.
-			 */
-			break;
-		}
-		if (len > largest_unchosen_range) {
-			largest_unchosen_range = len;
-		}
-		start = begin + len;
-	}
+	size_t npages = size / PAGE;
+	size_t begin = hpdata_reserve_alloc(ps, npages);
 	uintptr_t addr = (uintptr_t)hpdata_addr_get(ps) + begin * PAGE;
 	edata_init(r_edata, edata_arena_ind_get(r_edata), (void *)addr, size,
 	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	    EXTENT_NOT_HEAD);
 	edata_ps_set(r_edata, ps);
-	fb_set_range(ps_fb, HUGEPAGE_PAGES, begin, npages);
-	hpdata_nfree_set(ps, (uint32_t)(hpdata_nfree_get(ps) - npages));
 	/* The pageslab isn't in a bin, so no bin stats need to change. */
 
-	/*
-	 * OK, we've got to put the pageslab back.  First we have to figure out
-	 * where, though; we've only checked run sizes before the pageslab we
-	 * picked.  We also need to look for ones after the one we picked.  Note
-	 * that we want begin + npages as the start position, not begin + len;
-	 * we might not have used the whole range.
-	 *
-	 * TODO: With a little bit more care, we can guarantee that the longest
-	 * free range field in the edata is accurate upon entry, and avoid doing
-	 * this check in the case where we're allocating from some smaller run.
-	 */
-	start = begin + npages;
-	while (start < HUGEPAGE_PAGES) {
-		bool found = fb_urange_iter(ps_fb, HUGEPAGE_PAGES, start, &begin,
-		    &len);
-		if (!found) {
-			break;
-		}
-		if (len > largest_unchosen_range) {
-			largest_unchosen_range = len;
-		}
-		start = begin + len;
-	}
-	hpdata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
-	if (largest_unchosen_range == 0) {
+	size_t longest_free_range = hpdata_longest_free_range_get(ps);
+	if (longest_free_range == 0) {
 		psset_bin_stats_insert(&psset->stats.full_slabs, ps);
 	} else {
 		psset_insert(psset, ps);
@@ -295,9 +231,7 @@ psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
 
 void
 psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
-	fb_group_t *ps_fb = ps->active_pages;
-	assert(fb_empty(ps_fb, HUGEPAGE_PAGES));
-	assert(hpdata_nfree_get(ps) == HUGEPAGE_PAGES);
+	hpdata_assert_empty(ps);
 	psset_ps_alloc_insert(psset, ps, r_edata, size);
 }
 
@@ -307,7 +241,6 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	assert(edata_ps_get(edata) != NULL);
 	hpdata_t *ps = edata_ps_get(edata);
 
-	fb_group_t *ps_fb = ps->active_pages;
 	size_t ps_old_longest_free_range = hpdata_longest_free_range_get(ps);
 	pszind_t old_pind = SC_NPSIZES;
 	if (ps_old_longest_free_range != 0) {
@@ -319,33 +252,24 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	    ((uintptr_t)edata_base_get(edata) - (uintptr_t)hpdata_addr_get(ps))
 	    >> LG_PAGE;
 	size_t len = edata_size_get(edata) >> LG_PAGE;
-	fb_unset_range(ps_fb, HUGEPAGE_PAGES, begin, len);
 
 	/* The pageslab is still in the bin; adjust its stats first. */
 	psset_bin_stats_t *bin_stats = (ps_old_longest_free_range == 0
 	    ? &psset->stats.full_slabs : &psset->stats.nonfull_slabs[old_pind]);
 	psset_bin_stats_deactivate(bin_stats, hpdata_huge_get(ps), len);
 
-	hpdata_nfree_set(ps, (uint32_t)(hpdata_nfree_get(ps) + len));
+	hpdata_unreserve(ps, begin, len);
+	size_t ps_new_longest_free_range = hpdata_longest_free_range_get(ps);
 
-	/* We might have just created a new, larger range. */
-	size_t new_begin = (size_t)(fb_fls(ps_fb, HUGEPAGE_PAGES, begin) + 1);
-	size_t new_end = fb_ffs(ps_fb, HUGEPAGE_PAGES, begin + len - 1);
-	size_t new_range_len = new_end - new_begin;
 	/*
 	 * If the new free range is no longer than the previous longest one,
 	 * then the pageslab is non-empty and doesn't need to change bins.
 	 * We're done, and don't need to return a pageslab to evict.
 	 */
-	if (new_range_len <= ps_old_longest_free_range) {
+	if (ps_new_longest_free_range <= ps_old_longest_free_range) {
 		return NULL;
 	}
 	/*
-	 * Otherwise, it might need to get evicted from the set, or change its
-	 * bin.
-	 */
-	hpdata_longest_free_range_set(ps, (uint32_t)new_range_len);
-	/*
 	 * If it was previously non-full, then it's in some (possibly now
 	 * incorrect) bin already; remove it.
 	 *
@@ -371,12 +295,12 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
 	}
 	/* If the pageslab is empty, it gets evicted from the set. */
-	if (new_range_len == HUGEPAGE_PAGES) {
+	if (ps_new_longest_free_range == HUGEPAGE_PAGES) {
 		return ps;
 	}
 	/* Otherwise, it gets reinserted. */
 	pszind_t new_pind = sz_psz2ind(sz_psz_quantize_floor(
-	    new_range_len << LG_PAGE));
+	    ps_new_longest_free_range << LG_PAGE));
 	if (hpdata_age_heap_empty(&psset->pageslabs[new_pind])) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info,
 		    (size_t)new_pind);
-- 
cgit v0.12


From 5228d869ee9af9c547302abe3165bd63f6bdbbf5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 30 Nov 2020 13:28:54 -0800
Subject: psset: Use fit/insert/remove as basis functions.

All other functionality can be implemented in terms of these; doing so (while
retaining the same API) will be convenient for subsequent refactors.
---
 include/jemalloc/internal/hpdata.h |   5 ++
 include/jemalloc/internal/psset.h  |  10 +--
 src/hpa.c                          |  12 +--
 src/psset.c                        | 160 +++++++------------------------------
 4 files changed, 43 insertions(+), 144 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 7bedaf4..d221c57 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -123,6 +123,11 @@ TYPED_LIST(hpdata_list, hpdata_t, ql_link)
 typedef ph(hpdata_t) hpdata_age_heap_t;
 ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
 
+static inline bool
+hpdata_empty(hpdata_t *hpdata) {
+	return hpdata_nfree_get(hpdata) == HUGEPAGE_PAGES;
+}
+
 void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
 /*
  * Given an hpdata which can serve an allocation request, pick and reserve an
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 01b4e80..c876c5c 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -64,13 +64,8 @@ void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
 void psset_insert(psset_t *psset, hpdata_t *ps);
 void psset_remove(psset_t *psset, hpdata_t *ps);
 
-void psset_hugify(psset_t *psset, hpdata_t *ps);
-
-/*
- * Tries to obtain a chunk from an existing pageslab already in the set.
- * Returns true on failure.
- */
-bool psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size);
+/* Analogous to the eset_fit; pick a hpdata to serve the request. */
+hpdata_t *psset_fit(psset_t *psset, size_t size);
 
 /*
  * Given a newly created pageslab ps (not currently in the set), pass ownership
@@ -79,6 +74,7 @@ bool psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size);
  */
 void psset_alloc_new(psset_t *psset, hpdata_t *ps,
     edata_t *r_edata, size_t size);
+bool psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size);
 
 /*
  * Given an extent that comes from a pageslab in this pageslab set, returns it
diff --git a/src/hpa.c b/src/hpa.c
index 9a190c8..5614961 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -129,7 +129,7 @@ hpa_hugify(hpdata_t *ps) {
 	assert(hpdata_huge_get(ps));
 	bool err = pages_huge(hpdata_addr_get(ps), HUGEPAGE);
 	/*
-	 * Eat the error; even if the hugeification failed, it's still safe to
+	 * Eat the error; even if the hugification failed, it's still safe to
 	 * pretend it didn't (and would require extraordinary measures to
 	 * unhugify).
 	 */
@@ -233,7 +233,7 @@ hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 
 	/*
 	 * We do this unconditionally, even for pages which were not originally
-	 * hugeified; it has the same effect.
+	 * hugified; it has the same effect.
 	 */
 	hpa_dehugify(ps);
 
@@ -293,7 +293,9 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		 * Do the metadata modification while holding the lock; we'll
 		 * actually change state with the lock dropped.
 		 */
-		psset_hugify(&shard->psset, ps);
+		psset_remove(&shard->psset, ps);
+		hpdata_huge_set(ps, true);
+		psset_insert(&shard->psset, ps);
 	}
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	if (hugify) {
@@ -463,8 +465,8 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	emap_deregister_boundary(tsdn, shard->emap, edata);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	/*
-	 * Note that the shard mutex protects the edata hugeified field, too.
-	 * Page slabs can move between pssets (and have their hugeified status
+	 * Note that the shard mutex protects the edata hugified field, too.
+	 * Page slabs can move between pssets (and have their hugified status
 	 * change) in racy ways.
 	 */
 	hpdata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
diff --git a/src/psset.c b/src/psset.c
index 5418851..2e3558c 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -76,23 +76,6 @@ psset_bin_stats_remove(psset_bin_stats_t *binstats, hpdata_t *ps) {
 	psset_bin_stats_insert_remove(binstats, ps, /* insert */ false);
 }
 
-/*
- * We don't currently need an "activate" equivalent to this, since down the
- * allocation pathways we don't do the optimization in which we change a slab
- * without first removing it from a bin.
- */
-static void
-psset_bin_stats_deactivate(psset_bin_stats_t *binstats, bool huge, size_t num) {
-	size_t *nactive_dst = huge
-	    ? &binstats->nactive_huge : &binstats->nactive_nonhuge;
-	size_t *ninactive_dst = huge
-	    ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
-
-	assert(*nactive_dst >= num);
-	*nactive_dst -= num;
-	*ninactive_dst += num;
-}
-
 static void
 psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
@@ -148,32 +131,8 @@ psset_remove(psset_t *psset, hpdata_t *ps) {
 	}
 }
 
-void
-psset_hugify(psset_t *psset, hpdata_t *ps) {
-	assert(!hpdata_huge_get(ps));
-	hpdata_assert_consistent(ps);
-
-	size_t longest_free_range = hpdata_longest_free_range_get(ps);
-	psset_bin_stats_t *bin_stats;
-	if (longest_free_range == 0) {
-		bin_stats = &psset->stats.full_slabs;
-	} else {
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-		bin_stats = &psset->stats.nonfull_slabs[pind];
-	}
-	psset_bin_stats_remove(bin_stats, ps);
-	hpdata_huge_set(ps, true);
-	psset_bin_stats_insert(bin_stats, ps);
-}
-
-/*
- * Similar to PAC's extent_recycle_extract.  Out of all the pageslabs in the
- * set, picks one that can satisfy the allocation and remove it from the set.
- */
-static hpdata_t *
-psset_recycle_extract(psset_t *psset, size_t size) {
+hpdata_t *
+psset_fit(psset_t *psset, size_t size) {
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
 	    (size_t)min_pind);
@@ -185,22 +144,14 @@ psset_recycle_extract(psset_t *psset, size_t size) {
 		return NULL;
 	}
 
-	psset_hpdata_heap_remove(psset, pind, ps);
-	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		bitmap_set(psset->bitmap, &psset_bitmap_info, pind);
-	}
-
 	hpdata_assert_consistent(ps);
+
 	return ps;
 }
 
-/*
- * Given a pageslab ps and an edata to allocate size bytes from, initializes the
- * edata with a range in the pageslab, and puts ps back in the set.
- */
-static void
-psset_ps_alloc_insert(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
-    size_t size) {
+void
+psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
+	hpdata_assert_empty(ps);
 	size_t npages = size / PAGE;
 	size_t begin = hpdata_reserve_alloc(ps, npages);
 	uintptr_t addr = (uintptr_t)hpdata_addr_get(ps) + begin * PAGE;
@@ -209,30 +160,28 @@ psset_ps_alloc_insert(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	    EXTENT_NOT_HEAD);
 	edata_ps_set(r_edata, ps);
-	/* The pageslab isn't in a bin, so no bin stats need to change. */
-
-	size_t longest_free_range = hpdata_longest_free_range_get(ps);
-	if (longest_free_range == 0) {
-		psset_bin_stats_insert(&psset->stats.full_slabs, ps);
-	} else {
-		psset_insert(psset, ps);
-	}
+	psset_insert(psset, ps);
 }
 
 bool
 psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
-	hpdata_t *ps = psset_recycle_extract(psset, size);
-	if (ps == NULL) {
-		return true;
-	}
-	psset_ps_alloc_insert(psset, ps, r_edata, size);
-	return false;
-}
+       hpdata_t *ps = psset_fit(psset, size);
+       if (ps == NULL) {
+               return true;
+       }
+       psset_remove(psset, ps);
 
-void
-psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
-	hpdata_assert_empty(ps);
-	psset_ps_alloc_insert(psset, ps, r_edata, size);
+       size_t npages = size / PAGE;
+       size_t begin = hpdata_reserve_alloc(ps, npages);
+       uintptr_t addr = (uintptr_t)hpdata_addr_get(ps) + begin * PAGE;
+       edata_init(r_edata, edata_arena_ind_get(r_edata), (void *)addr, size,
+	   /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
+	   /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
+	   EXTENT_NOT_HEAD);
+       edata_ps_set(r_edata, ps);
+       psset_insert(psset, ps);
+
+       return false;
 }
 
 hpdata_t *
@@ -241,70 +190,17 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	assert(edata_ps_get(edata) != NULL);
 	hpdata_t *ps = edata_ps_get(edata);
 
-	size_t ps_old_longest_free_range = hpdata_longest_free_range_get(ps);
-	pszind_t old_pind = SC_NPSIZES;
-	if (ps_old_longest_free_range != 0) {
-		old_pind = sz_psz2ind(sz_psz_quantize_floor(
-		    ps_old_longest_free_range << LG_PAGE));
-	}
-
 	size_t begin =
 	    ((uintptr_t)edata_base_get(edata) - (uintptr_t)hpdata_addr_get(ps))
 	    >> LG_PAGE;
 	size_t len = edata_size_get(edata) >> LG_PAGE;
 
-	/* The pageslab is still in the bin; adjust its stats first. */
-	psset_bin_stats_t *bin_stats = (ps_old_longest_free_range == 0
-	    ? &psset->stats.full_slabs : &psset->stats.nonfull_slabs[old_pind]);
-	psset_bin_stats_deactivate(bin_stats, hpdata_huge_get(ps), len);
-
+	psset_remove(psset, ps);
 	hpdata_unreserve(ps, begin, len);
-	size_t ps_new_longest_free_range = hpdata_longest_free_range_get(ps);
-
-	/*
-	 * If the new free range is no longer than the previous longest one,
-	 * then the pageslab is non-empty and doesn't need to change bins.
-	 * We're done, and don't need to return a pageslab to evict.
-	 */
-	if (ps_new_longest_free_range <= ps_old_longest_free_range) {
-		return NULL;
-	}
-	/*
-	 * If it was previously non-full, then it's in some (possibly now
-	 * incorrect) bin already; remove it.
-	 *
-	 * TODO: We bailed out early above if we didn't expand the longest free
-	 * range, which should avoid a lot of redundant remove/reinserts in the
-	 * same bin.  But it doesn't eliminate all of them; it's possible that
-	 * we decreased the longest free range length, but only slightly, and
-	 * not enough to change our pszind.  We could check that more precisely.
-	 * (Or, ideally, size class dequantization will happen at some point,
-	 * and the issue becomes moot).
-	 */
-	if (ps_old_longest_free_range > 0) {
-		psset_hpdata_heap_remove(psset, old_pind, ps);
-		if (hpdata_age_heap_empty(&psset->pageslabs[old_pind])) {
-			bitmap_set(psset->bitmap, &psset_bitmap_info,
-			    (size_t)old_pind);
-		}
-	} else {
-		/*
-		 * Otherwise, the bin was full, and we need to adjust the full
-		 * bin stats.
-		 */
-		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
-	}
-	/* If the pageslab is empty, it gets evicted from the set. */
-	if (ps_new_longest_free_range == HUGEPAGE_PAGES) {
+	if (hpdata_empty(ps)) {
 		return ps;
+	} else {
+		psset_insert(psset, ps);
+		return NULL;
 	}
-	/* Otherwise, it gets reinserted. */
-	pszind_t new_pind = sz_psz2ind(sz_psz_quantize_floor(
-	    ps_new_longest_free_range << LG_PAGE));
-	if (hpdata_age_heap_empty(&psset->pageslabs[new_pind])) {
-		bitmap_unset(psset->bitmap, &psset_bitmap_info,
-		    (size_t)new_pind);
-	}
-	psset_hpdata_heap_insert(psset, new_pind, ps);
-	return NULL;
 }
-- 
cgit v0.12


From 0971e1e4e33edf1cd0d5be808d1eb092ffeab9f3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 30 Nov 2020 14:34:27 -0800
Subject: hpdata: Use addr/size instead of begin/npages.

This is easier for the users of the hpdata.
---
 include/jemalloc/internal/hpdata.h |  4 ++--
 src/hpdata.c                       | 16 ++++++++++++----
 src/psset.c                        | 20 ++++++--------------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index d221c57..cb034ea 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -133,7 +133,7 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
  * Given an hpdata which can serve an allocation request, pick and reserve an
  * offset within that allocation.
  */
-size_t hpdata_reserve_alloc(hpdata_t *hpdata, size_t npages);
-void hpdata_unreserve(hpdata_t *hpdata, size_t start, size_t npages);
+void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
+void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
 
 #endif /* JEMALLOC_INTERNAL_HPDATA_H */
diff --git a/src/hpdata.c b/src/hpdata.c
index a876a30..847eb9d 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -28,8 +28,10 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
 }
 
-size_t
-hpdata_reserve_alloc(hpdata_t *hpdata, size_t npages) {
+void *
+hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
+	assert((sz & PAGE_MASK) == 0);
+	size_t npages = sz >> LG_PAGE;
 	assert(npages <= hpdata_longest_free_range_get(hpdata));
 
 	size_t result;
@@ -91,11 +93,17 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t npages) {
 	}
 	hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
 
-	return result;
+	return (void *)(
+	    (uintptr_t)hpdata_addr_get(hpdata) + (result << LG_PAGE));
 }
 
 void
-hpdata_unreserve(hpdata_t *hpdata, size_t begin, size_t npages) {
+hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
+	assert((sz & PAGE_MASK) == 0);
+	size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
+	    >> LG_PAGE;
+	assert(begin < HUGEPAGE_PAGES);
+	size_t npages = sz >> LG_PAGE;
 	size_t old_longest_range = hpdata_longest_free_range_get(hpdata);
 
 	fb_unset_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
diff --git a/src/psset.c b/src/psset.c
index 2e3558c..c31520f 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -152,10 +152,8 @@ psset_fit(psset_t *psset, size_t size) {
 void
 psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
 	hpdata_assert_empty(ps);
-	size_t npages = size / PAGE;
-	size_t begin = hpdata_reserve_alloc(ps, npages);
-	uintptr_t addr = (uintptr_t)hpdata_addr_get(ps) + begin * PAGE;
-	edata_init(r_edata, edata_arena_ind_get(r_edata), (void *)addr, size,
+	void *addr = hpdata_reserve_alloc(ps, size);
+	edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
 	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	    EXTENT_NOT_HEAD);
@@ -171,10 +169,9 @@ psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
        }
        psset_remove(psset, ps);
 
-       size_t npages = size / PAGE;
-       size_t begin = hpdata_reserve_alloc(ps, npages);
-       uintptr_t addr = (uintptr_t)hpdata_addr_get(ps) + begin * PAGE;
-       edata_init(r_edata, edata_arena_ind_get(r_edata), (void *)addr, size,
+
+       void *addr = hpdata_reserve_alloc(ps, size);
+       edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
 	   /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
 	   /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	   EXTENT_NOT_HEAD);
@@ -190,13 +187,8 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	assert(edata_ps_get(edata) != NULL);
 	hpdata_t *ps = edata_ps_get(edata);
 
-	size_t begin =
-	    ((uintptr_t)edata_base_get(edata) - (uintptr_t)hpdata_addr_get(ps))
-	    >> LG_PAGE;
-	size_t len = edata_size_get(edata) >> LG_PAGE;
-
 	psset_remove(psset, ps);
-	hpdata_unreserve(ps, begin, len);
+	hpdata_unreserve(ps, edata_base_get(edata), edata_size_get(edata));
 	if (hpdata_empty(ps)) {
 		return ps;
 	} else {
-- 
cgit v0.12


From f9299ca572e976597987a1786ac3c5a173a3dbce Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 30 Nov 2020 15:15:21 -0800
Subject: HPA: Use psset fit/insert/remove.

This will let us remove alloc_new and alloc_reuse functions from the psset.
---
 src/hpa.c | 86 +++++++++++++++++++++++++++++++--------------------------------
 1 file changed, 42 insertions(+), 44 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 5614961..79f97dc 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -255,12 +255,20 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	}
 	assert(edata_arena_ind_get(edata) == shard->ind);
 
-	err = psset_alloc_reuse(&shard->psset, edata, size);
-	if (err) {
+	hpdata_t *ps = psset_fit(&shard->psset, size);
+	if (ps == NULL) {
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		return NULL;
 	}
+
+	psset_remove(&shard->psset, ps);
+	void *addr = hpdata_reserve_alloc(ps, size);
+	edata_init(edata, shard->ind, addr, size, /* slab */ false,
+	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
+	    /* committed */ true, EXTENT_PAI_HPA, EXTENT_NOT_HEAD);
+	edata_ps_set(edata, ps);
+
 	/*
 	 * This could theoretically be moved outside of the critical section,
 	 * but that introduces the potential for a race.  Without the lock, the
@@ -272,31 +280,21 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	err = emap_register_boundary(tsdn, shard->emap, edata,
 	    SC_NSIZES, /* slab */ false);
 	if (err) {
-		hpdata_t *ps = psset_dalloc(&shard->psset, edata);
-		/*
-		 * The pageslab was nonempty before we started; it
-		 * should still be nonempty now, and so shouldn't get
-		 * evicted.
-		 */
-		assert(ps == NULL);
+		hpdata_unreserve(ps, edata_addr_get(edata),
+		    edata_size_get(edata));
+		psset_insert(&shard->psset, ps);
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		*oom = true;
 		return NULL;
 	}
 
-	hpdata_t *ps = edata_ps_get(edata);
-	assert(ps != NULL);
 	bool hugify = hpa_should_hugify(shard, ps);
 	if (hugify) {
-		/*
-		 * Do the metadata modification while holding the lock; we'll
-		 * actually change state with the lock dropped.
-		 */
-		psset_remove(&shard->psset, ps);
 		hpdata_huge_set(ps, true);
-		psset_insert(&shard->psset, ps);
 	}
+	psset_insert(&shard->psset, ps);
+
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	if (hugify) {
 		/*
@@ -345,8 +343,8 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	hpdata_t *grow_ps = hpa_grow(tsdn, shard);
-	if (grow_ps == NULL) {
+	hpdata_t *ps = hpa_grow(tsdn, shard);
+	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return NULL;
 	}
@@ -357,19 +355,21 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		hpa_handle_ps_eviction(tsdn, shard, grow_ps);
+		hpa_handle_ps_eviction(tsdn, shard, ps);
 		return NULL;
 	}
-	psset_alloc_new(&shard->psset, grow_ps, edata, size);
+
+	void *addr = hpdata_reserve_alloc(ps, size);
+	edata_init(edata, shard->ind, addr, size, /* slab */ false,
+	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
+	    /* committed */ true, EXTENT_PAI_HPA, EXTENT_NOT_HEAD);
+	edata_ps_set(edata, ps);
+
 	err = emap_register_boundary(tsdn, shard->emap, edata,
 	    SC_NSIZES, /* slab */ false);
 	if (err) {
-		hpdata_t *ps = psset_dalloc(&shard->psset, edata);
-		/*
-		 * The pageslab was empty except for the new allocation; it
-		 * should get evicted.
-		 */
-		assert(ps == grow_ps);
+		hpdata_unreserve(ps, edata_addr_get(edata),
+		    edata_size_get(edata));
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		/*
 		 * Technically the same as fallthrough at the time of this
@@ -381,6 +381,8 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		hpa_handle_ps_eviction(tsdn, shard, ps);
 		return NULL;
 	}
+	psset_insert(&shard->psset, ps);
+
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 	return edata;
@@ -464,21 +466,18 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	assert(ps != NULL);
 	emap_deregister_boundary(tsdn, shard->emap, edata);
 	malloc_mutex_lock(tsdn, &shard->mtx);
-	/*
-	 * Note that the shard mutex protects the edata hugified field, too.
-	 * Page slabs can move between pssets (and have their hugified status
-	 * change) in racy ways.
-	 */
-	hpdata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
-	/*
-	 * If a pageslab became empty because of the dalloc, it better have been
-	 * the one we expected.
-	 */
-	assert(evicted_ps == NULL || evicted_ps == ps);
+
+	/* Note that the shard mutex protects ps's metadata too. */
+	psset_remove(&shard->psset, ps);
+	hpdata_unreserve(ps, edata_addr_get(edata), edata_size_get(edata));
+
 	edata_cache_small_put(tsdn, &shard->ecs, edata);
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	if (evicted_ps != NULL) {
-		hpa_handle_ps_eviction(tsdn, shard, evicted_ps);
+	if (hpdata_empty(ps)) {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		hpa_handle_ps_eviction(tsdn, shard, ps);
+	} else {
+		psset_insert(&shard->psset, ps);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
 }
 
@@ -501,10 +500,9 @@ hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
 
 static void
 hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
-	edata_t edata = {0};
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	bool psset_empty = psset_alloc_reuse(psset, &edata, PAGE);
-	assert(psset_empty);
+	hpdata_t *ps = psset_fit(psset, PAGE);
+	assert(ps == NULL);
 	hpa_shard_assert_stats_empty(&psset->stats.full_slabs);
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		hpa_shard_assert_stats_empty(
-- 
cgit v0.12


From f7cf23aa4d7c266af512c599205b1fab80b26796 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 30 Nov 2020 16:10:56 -0800
Subject: psset: Relegate alloc/dalloc to test code.

This is no longer part of the "core" functionality; we only need the stub
implementations as an end-to-end test of hpdata + psset interactions when
metadata is being modified.  Treat them accordingly.
---
 include/jemalloc/internal/psset.h |  19 ------
 src/psset.c                       |  49 +------------
 test/unit/psset.c                 | 140 +++++++++++++++++++++++++-------------
 3 files changed, 94 insertions(+), 114 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index c876c5c..7027cff 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -67,23 +67,4 @@ void psset_remove(psset_t *psset, hpdata_t *ps);
 /* Analogous to the eset_fit; pick a hpdata to serve the request. */
 hpdata_t *psset_fit(psset_t *psset, size_t size);
 
-/*
- * Given a newly created pageslab ps (not currently in the set), pass ownership
- * to the psset and allocate an extent from within it.  The passed-in pageslab
- * must be at least as big as size.
- */
-void psset_alloc_new(psset_t *psset, hpdata_t *ps,
-    edata_t *r_edata, size_t size);
-bool psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size);
-
-/*
- * Given an extent that comes from a pageslab in this pageslab set, returns it
- * to its slab.  Does not take ownership of the underlying edata_t.
- *
- * If some slab becomes empty as a result of the dalloc, it is retuend -- the
- * result must be checked and deallocated to the central HPA.  Otherwise returns
- * NULL.
- */
-hpdata_t *psset_dalloc(psset_t *psset, edata_t *edata);
-
 #endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/src/psset.c b/src/psset.c
index c31520f..7a5bd60 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -90,6 +90,7 @@ psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 
 void
 psset_insert(psset_t *psset, hpdata_t *ps) {
+	assert(!hpdata_empty(ps));
 	hpdata_assert_consistent(ps);
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
@@ -148,51 +149,3 @@ psset_fit(psset_t *psset, size_t size) {
 
 	return ps;
 }
-
-void
-psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
-	hpdata_assert_empty(ps);
-	void *addr = hpdata_reserve_alloc(ps, size);
-	edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
-	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
-	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
-	    EXTENT_NOT_HEAD);
-	edata_ps_set(r_edata, ps);
-	psset_insert(psset, ps);
-}
-
-bool
-psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
-       hpdata_t *ps = psset_fit(psset, size);
-       if (ps == NULL) {
-               return true;
-       }
-       psset_remove(psset, ps);
-
-
-       void *addr = hpdata_reserve_alloc(ps, size);
-       edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
-	   /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
-	   /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
-	   EXTENT_NOT_HEAD);
-       edata_ps_set(r_edata, ps);
-       psset_insert(psset, ps);
-
-       return false;
-}
-
-hpdata_t *
-psset_dalloc(psset_t *psset, edata_t *edata) {
-	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
-	assert(edata_ps_get(edata) != NULL);
-	hpdata_t *ps = edata_ps_get(edata);
-
-	psset_remove(psset, ps);
-	hpdata_unreserve(ps, edata_base_get(edata), edata_size_get(edata));
-	if (hpdata_empty(ps)) {
-		return ps;
-	} else {
-		psset_insert(psset, ps);
-		return NULL;
-	}
-}
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 811c7be..4147729 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -16,6 +16,49 @@ edata_init_test(edata_t *edata) {
 }
 
 static void
+test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
+    size_t size) {
+	hpdata_assert_empty(ps);
+        void *addr = hpdata_reserve_alloc(ps, size);
+        edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
+	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
+            /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
+            EXTENT_NOT_HEAD);
+        edata_ps_set(r_edata, ps);
+	psset_insert(psset, ps);
+}
+
+static bool
+test_psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
+	hpdata_t *ps = psset_fit(psset, size);
+	if (ps == NULL) {
+		return true;
+	}
+	psset_remove(psset, ps);
+	void *addr = hpdata_reserve_alloc(ps, size);
+	edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
+	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
+	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
+	    EXTENT_NOT_HEAD);
+	edata_ps_set(r_edata, ps);
+	psset_insert(psset, ps);
+	return false;
+}
+
+static hpdata_t *
+test_psset_dalloc(psset_t *psset, edata_t *edata) {
+	hpdata_t *ps = edata_ps_get(edata);
+	psset_remove(psset, ps);
+	hpdata_unreserve(ps, edata_addr_get(edata), edata_size_get(edata));
+	if (hpdata_empty(ps)) {
+		return ps;
+	} else {
+		psset_insert(psset, ps);
+		return NULL;
+	}
+}
+
+static void
 edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 	/*
 	 * Note that allocations should get the arena ind of their home
@@ -50,7 +93,7 @@ TEST_BEGIN(test_empty) {
 	psset_init(&psset);
 
 	/* Empty psset should return fail allocations. */
-	err = psset_alloc_reuse(&psset, &alloc, PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc, PAGE);
 	expect_true(err, "Empty psset succeeded in an allocation.");
 }
 TEST_END
@@ -67,10 +110,10 @@ TEST_BEGIN(test_fill) {
 	psset_init(&psset);
 
 	edata_init_test(&alloc[0]);
-	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
 	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
 
@@ -82,7 +125,7 @@ TEST_BEGIN(test_fill) {
 	/* The pageslab, and thus psset, should now have no allocations. */
 	edata_t extra_alloc;
 	edata_init_test(&extra_alloc);
-	err = psset_alloc_reuse(&psset, &extra_alloc, PAGE);
+	err = test_psset_alloc_reuse(&psset, &extra_alloc, PAGE);
 	expect_true(err, "Alloc succeeded even though psset should be empty");
 }
 TEST_END
@@ -100,10 +143,10 @@ TEST_BEGIN(test_reuse) {
 	psset_init(&psset);
 
 	edata_init_test(&alloc[0]);
-	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
 	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
 
@@ -112,7 +155,7 @@ TEST_BEGIN(test_reuse) {
 		if (i % 2 == 0) {
 			continue;
 		}
-		ps = psset_dalloc(&psset, &alloc[i]);
+		ps = test_psset_dalloc(&psset, &alloc[i]);
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
 	/* Realloc into them. */
@@ -120,7 +163,7 @@ TEST_BEGIN(test_reuse) {
 		if (i % 2 == 0) {
 			continue;
 		}
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 		edata_expect(&alloc[i], i, 1);
 	}
@@ -129,7 +172,7 @@ TEST_BEGIN(test_reuse) {
 		if (i % 4 > 1) {
 			continue;
 		}
-		ps = psset_dalloc(&psset, &alloc[i]);
+		ps = test_psset_dalloc(&psset, &alloc[i]);
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
 	/* And realloc 2-page allocations into them. */
@@ -137,7 +180,7 @@ TEST_BEGIN(test_reuse) {
 		if (i % 4 != 0) {
 			continue;
 		}
-		err = psset_alloc_reuse(&psset, &alloc[i], 2 * PAGE);
+		err = test_psset_alloc_reuse(&psset, &alloc[i], 2 * PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 		edata_expect(&alloc[i], i, 2);
 	}
@@ -146,7 +189,7 @@ TEST_BEGIN(test_reuse) {
 		if (i % 4 != 0) {
 			continue;
 		}
-		ps = psset_dalloc(&psset, &alloc[i]);
+		ps = test_psset_dalloc(&psset, &alloc[i]);
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
 	/*
@@ -155,23 +198,23 @@ TEST_BEGIN(test_reuse) {
 	 * (since 12 % 4 == 0).
 	 */
 	size_t index_of_3 = 11;
-	ps = psset_dalloc(&psset, &alloc[index_of_3]);
+	ps = test_psset_dalloc(&psset, &alloc[index_of_3]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
-	err = psset_alloc_reuse(&psset, &alloc[index_of_3], 3 * PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[index_of_3], 3 * PAGE);
 	expect_false(err, "Should have been able to find alloc.");
 	edata_expect(&alloc[index_of_3], index_of_3, 3);
 
 	/* Free up a 4-page hole at the end. */
-	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
+	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
-	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 2]);
+	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 2]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
 
 	/* Make sure we can satisfy an allocation at the very end of a slab. */
 	size_t index_of_4 = HUGEPAGE_PAGES - 4;
-	ps = psset_dalloc(&psset, &alloc[index_of_4]);
+	ps = test_psset_dalloc(&psset, &alloc[index_of_4]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
-	err = psset_alloc_reuse(&psset, &alloc[index_of_4], 4 * PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[index_of_4], 4 * PAGE);
 	expect_false(err, "Should have been able to find alloc.");
 	edata_expect(&alloc[index_of_4], index_of_4, 4);
 }
@@ -191,22 +234,22 @@ TEST_BEGIN(test_evict) {
 
 	/* Alloc the whole slab. */
 	edata_init_test(&alloc[0]);
-	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
 	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Unxpected allocation failure");
 	}
 
 	/* Dealloc the whole slab, going forwards. */
 	for (size_t i = 0; i < HUGEPAGE_PAGES - 1; i++) {
-		ps = psset_dalloc(&psset, &alloc[i]);
+		ps = test_psset_dalloc(&psset, &alloc[i]);
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
-	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
+	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_eq(&pageslab, ps, "Empty pageslab not evicted.");
 
-	err = psset_alloc_reuse(&psset, &alloc[0], PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[0], PAGE);
 	expect_true(err, "psset should be empty.");
 }
 TEST_END
@@ -228,15 +271,15 @@ TEST_BEGIN(test_multi_pageslab) {
 
 	/* Insert both slabs. */
 	edata_init_test(&alloc[0][0]);
-	psset_alloc_new(&psset, &pageslab[0], &alloc[0][0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab[0], &alloc[0][0], PAGE);
 	edata_init_test(&alloc[1][0]);
-	psset_alloc_new(&psset, &pageslab[1], &alloc[1][0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab[1], &alloc[1][0], PAGE);
 
 	/* Fill them both up; make sure we do so in first-fit order. */
 	for (size_t i = 0; i < 2; i++) {
 		for (size_t j = 1; j < HUGEPAGE_PAGES; j++) {
 			edata_init_test(&alloc[i][j]);
-			err = psset_alloc_reuse(&psset, &alloc[i][j], PAGE);
+			err = test_psset_alloc_reuse(&psset, &alloc[i][j], PAGE);
 			expect_false(err,
 			    "Nonempty psset failed page allocation.");
 			assert_ptr_eq(&pageslab[i], edata_ps_get(&alloc[i][j]),
@@ -248,13 +291,13 @@ TEST_BEGIN(test_multi_pageslab) {
 	 * Free up a 2-page hole in the earlier slab, and a 1-page one in the
 	 * later one.  We should still pick the later one.
 	 */
-	ps = psset_dalloc(&psset, &alloc[0][0]);
+	ps = test_psset_dalloc(&psset, &alloc[0][0]);
 	expect_ptr_null(ps, "Unexpected eviction");
-	ps = psset_dalloc(&psset, &alloc[0][1]);
+	ps = test_psset_dalloc(&psset, &alloc[0][1]);
 	expect_ptr_null(ps, "Unexpected eviction");
-	ps = psset_dalloc(&psset, &alloc[1][0]);
+	ps = test_psset_dalloc(&psset, &alloc[1][0]);
 	expect_ptr_null(ps, "Unexpected eviction");
-	err = psset_alloc_reuse(&psset, &alloc[0][0], PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[0][0], PAGE);
 	expect_ptr_eq(&pageslab[1], edata_ps_get(&alloc[0][0]),
 	    "Should have picked the fuller pageslab");
 
@@ -262,14 +305,14 @@ TEST_BEGIN(test_multi_pageslab) {
 	 * Now both slabs have 1-page holes. Free up a second one in the later
 	 * slab.
 	 */
-	ps = psset_dalloc(&psset, &alloc[1][1]);
+	ps = test_psset_dalloc(&psset, &alloc[1][1]);
 	expect_ptr_null(ps, "Unexpected eviction");
 
 	/*
 	 * We should be able to allocate a 2-page object, even though an earlier
 	 * size class is nonempty.
 	 */
-	err = psset_alloc_reuse(&psset, &alloc[1][0], 2 * PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[1][0], 2 * PAGE);
 	expect_false(err, "Allocation should have succeeded");
 }
 TEST_END
@@ -333,23 +376,24 @@ TEST_BEGIN(test_stats) {
 	stats_expect(&psset, 0);
 
 	edata_init_test(&alloc[0]);
-	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
 	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		stats_expect(&psset, i);
 		edata_init_test(&alloc[i]);
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
 	stats_expect(&psset, HUGEPAGE_PAGES);
 	hpdata_t *ps;
 	for (ssize_t i = HUGEPAGE_PAGES - 1; i >= 0; i--) {
-		ps = psset_dalloc(&psset, &alloc[i]);
+		ps = test_psset_dalloc(&psset, &alloc[i]);
 		expect_true((ps == NULL) == (i != 0),
-		    "psset_dalloc should only evict a slab on the last free");
+		    "test_psset_dalloc should only evict a slab on the last "
+		    "free");
 		stats_expect(&psset, i);
 	}
 
-	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
 	stats_expect(&psset, 1);
 	psset_remove(&psset, &pageslab);
 	stats_expect(&psset, 0);
@@ -381,17 +425,17 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
 	psset_init(psset);
 
 	edata_init_test(&alloc[0]);
-	psset_alloc_new(psset, pageslab, &alloc[0], PAGE);
+	test_psset_alloc_new(psset, pageslab, &alloc[0], PAGE);
 	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
-		err = psset_alloc_reuse(psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 		expect_ptr_eq(pageslab, edata_ps_get(&alloc[i]),
 		    "Allocated from the wrong pageslab");
 	}
 
 	edata_init_test(&worse_alloc[0]);
-	psset_alloc_new(psset, worse_pageslab, &worse_alloc[0], PAGE);
+	test_psset_alloc_new(psset, worse_pageslab, &worse_alloc[0], PAGE);
 	expect_ptr_eq(worse_pageslab, edata_ps_get(&worse_alloc[0]),
 	    "Allocated from the wrong pageslab");
 	/*
@@ -400,14 +444,15 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
 	 */
 	for (size_t i = 1; i < HUGEPAGE_PAGES - 1; i++) {
 		edata_init_test(&worse_alloc[i]);
-		err = psset_alloc_reuse(psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 		expect_ptr_eq(worse_pageslab, edata_ps_get(&alloc[i]),
 		    "Allocated from the wrong pageslab");
 	}
 
 	/* Deallocate the last page from the older pageslab. */
-	hpdata_t *evicted = psset_dalloc(psset, &alloc[HUGEPAGE_PAGES - 1]);
+	hpdata_t *evicted = test_psset_dalloc(psset,
+	    &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(evicted, "Unexpected eviction");
 }
 
@@ -427,7 +472,7 @@ TEST_BEGIN(test_oldest_fit) {
 	/* The edata should come from the better pageslab. */
 	edata_t test_edata;
 	edata_init_test(&test_edata);
-	err = psset_alloc_reuse(&psset, &test_edata, PAGE);
+	err = test_psset_alloc_reuse(&psset, &test_edata, PAGE);
 	expect_false(err, "Nonempty psset failed page allocation");
 	expect_ptr_eq(&pageslab, edata_ps_get(&test_edata),
 	    "Allocated from the wrong pageslab");
@@ -450,7 +495,8 @@ TEST_BEGIN(test_insert_remove) {
 
 	/* Remove better; should still be able to alloc from worse. */
 	psset_remove(&psset, &pageslab);
-	err = psset_alloc_reuse(&psset, &worse_alloc[HUGEPAGE_PAGES - 1], PAGE);
+	err = test_psset_alloc_reuse(&psset, &worse_alloc[HUGEPAGE_PAGES - 1],
+	    PAGE);
 	expect_false(err, "Removal should still leave an empty page");
 	expect_ptr_eq(&worse_pageslab,
 	    edata_ps_get(&worse_alloc[HUGEPAGE_PAGES - 1]),
@@ -460,21 +506,21 @@ TEST_BEGIN(test_insert_remove) {
 	 * After deallocating the previous alloc and reinserting better, it
 	 * should be preferred for future allocations.
 	 */
-	ps = psset_dalloc(&psset, &worse_alloc[HUGEPAGE_PAGES - 1]);
+	ps = test_psset_dalloc(&psset, &worse_alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Incorrect eviction of nonempty pageslab");
 	psset_insert(&psset, &pageslab);
-	err = psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_false(err, "psset should be nonempty");
 	expect_ptr_eq(&pageslab, edata_ps_get(&alloc[HUGEPAGE_PAGES - 1]),
 	    "Removal/reinsertion shouldn't change ordering");
 	/*
 	 * After deallocating and removing both, allocations should fail.
 	 */
-	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
+	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Incorrect eviction");
 	psset_remove(&psset, &pageslab);
 	psset_remove(&psset, &worse_pageslab);
-	err = psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_true(err, "psset should be empty, but an alloc succeeded");
 }
 TEST_END
-- 
cgit v0.12


From fffcefed338429b43ad29a185067f976fe564d11 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 30 Nov 2020 17:25:54 -0800
Subject: malloc_conf: Clarify HPA options.

---
 src/jemalloc.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 277b9e7..30c2fe1 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -137,8 +137,8 @@ malloc_mutex_t arenas_lock;
 
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
-size_t opt_hpa_slab_goal = 128 * 1024;
 size_t opt_hpa_slab_max_alloc = 256 * 1024;
+size_t opt_hpa_slab_goal = 128 * 1024;
 size_t opt_hpa_small_max = 32 * 1024;
 size_t opt_hpa_large_min = 4 * 1024 * 1024;
 
@@ -1495,20 +1495,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			CONF_HANDLE_BOOL(opt_hpa, "hpa")
-			/*
-			 * If someone violates these mins and maxes, they're
-			 * confused.
-			 */
-			CONF_HANDLE_SIZE_T(opt_hpa_slab_goal, "hpa_slab_goal",
-			    PAGE, 512 * PAGE, CONF_CHECK_MIN, CONF_CHECK_MAX,
-			    true)
 			CONF_HANDLE_SIZE_T(opt_hpa_slab_max_alloc,
 			    "hpa_slab_max_alloc", PAGE, 512 * PAGE,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_small_max, "hpa_small_max",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_large_min, "hpa_large_min",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
 			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
@@ -1517,6 +1506,21 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_nshards, "hpa_sec_nshards",
 			    0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 
+			/*
+			 * These no longer have any effect, but various
+			 * non-public test configs set them as we iterate on HPA
+			 * development.  We parse and report them for now, but
+			 * they don't affect behavior.  Eventually they'll be
+			 * removed.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_slab_goal, "hpa_slab_goal",
+			    PAGE, 512 * PAGE, CONF_CHECK_MIN, CONF_CHECK_MAX,
+			    true)
+			CONF_HANDLE_SIZE_T(opt_hpa_small_max, "hpa_small_max",
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_large_min, "hpa_large_min",
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
 					sc_data_init(sc_data);
-- 
cgit v0.12


From 3ed0b4e8a3f53c099ba6b2989b1e38878b40ef9b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 30 Nov 2020 19:06:50 -0800
Subject: HPA: Add an nevictions counter.

I.e. the number of times we've purged a hugepage-sized region.
---
 include/jemalloc/internal/hpa.h | 12 +++++++++++-
 src/ctl.c                       |  7 ++++++-
 src/hpa.c                       | 14 +++++++++-----
 src/stats.c                     | 16 +++++++++++++---
 4 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index edb3617..217604e 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -6,10 +6,12 @@
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
-/* Used only by CTL; not actually stored here (i.e., all derived). */
+/* Completely derived; only used by CTL. */
 typedef struct hpa_shard_stats_s hpa_shard_stats_t;
 struct hpa_shard_stats_s {
 	psset_stats_t psset_stats;
+	/* The stat version of the nevictions counter. */
+	uint64_t nevictions;
 };
 
 typedef struct hpa_shard_s hpa_shard_t;
@@ -69,6 +71,14 @@ struct hpa_shard_s {
 	/* The arena ind we're associated with. */
 	unsigned ind;
 	emap_t *emap;
+
+	/*
+	 * The number of times we've purged a hugepage.  Each eviction purges a
+	 * single hugepage.
+	 *
+	 * Guarded by the grow mutex.
+	 */
+	uint64_t nevictions;
 };
 
 /*
diff --git a/src/ctl.c b/src/ctl.c
index 88cee66..4266e4b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -220,6 +220,7 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
 INDEX_PROTO(stats_arenas_i_extents_j)
+CTL_PROTO(stats_arenas_i_hpa_shard_nevictions)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)
@@ -655,7 +656,8 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
 	{NAME("full_slabs"),	CHILD(named,
 	    stats_arenas_i_hpa_shard_full_slabs)},
 	{NAME("nonfull_slabs"),	CHILD(indexed,
-	    stats_arenas_i_hpa_shard_nonfull_slabs)}
+	    stats_arenas_i_hpa_shard_nonfull_slabs)},
+	{NAME("nevictions"),	CTL(stats_arenas_i_hpa_shard_nevictions)}
 };
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
@@ -3372,6 +3374,9 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 	return super_stats_arenas_i_extents_j_node;
 }
 
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nevictions,
+    arenas_i(mib[2])->astats->hpastats.nevictions, uint64_t);
+
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_huge,
diff --git a/src/hpa.c b/src/hpa.c
index 79f97dc..a51f83c 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -74,6 +74,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
+	shard->nevictions = 0;
 
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
@@ -97,14 +98,18 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 void
 hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
 	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
+	dst->nevictions += src->nevictions;
 }
 
 void
 hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
     hpa_shard_stats_t *dst) {
+	malloc_mutex_lock(tsdn, &shard->grow_mtx);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	psset_stats_accum(&dst->psset_stats, &shard->psset.stats);
+	dst->nevictions += shard->nevictions;
 	malloc_mutex_unlock(tsdn, &shard->mtx);
+	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
 
 static hpdata_t *
@@ -238,6 +243,7 @@ hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	hpa_dehugify(ps);
 
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
+	shard->nevictions++;
 	hpdata_list_prepend(&shard->unused_slabs, ps);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
@@ -353,6 +359,7 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata = edata_cache_small_get(tsdn, &shard->ecs);
 	if (edata == NULL) {
+		shard->nevictions++;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		hpa_handle_ps_eviction(tsdn, shard, ps);
@@ -371,11 +378,8 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		hpdata_unreserve(ps, edata_addr_get(edata),
 		    edata_size_get(edata));
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
-		/*
-		 * Technically the same as fallthrough at the time of this
-		 * writing, but consistent with the error handling in the rest
-		 * of the function.
-		 */
+
+		shard->nevictions++;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		hpa_handle_ps_eviction(tsdn, shard, ps);
diff --git a/src/stats.c b/src/stats.c
index abe3ab1..aab9fb5 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -661,12 +661,14 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 }
 
 static void
-stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
+stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_row_t header_row;
 	emitter_row_init(&header_row);
 	emitter_row_t row;
 	emitter_row_init(&row);
 
+	uint64_t nevictions;
+
 	size_t npageslabs_huge;
 	size_t nactive_huge;
 	size_t ninactive_huge;
@@ -675,6 +677,9 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
 	size_t nactive_nonhuge;
 	size_t ninactive_nonhuge;
 
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nevictions",
+	    i, &nevictions, uint64_t);
+
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
@@ -696,13 +701,18 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
 
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
+	    "  Evictions: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  In full slabs:\n"
 	    "      npageslabs: %zu huge, %zu nonhuge\n"
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ninactive: %zu huge, %zu nonhuge \n",
-	    npageslabs_huge, npageslabs_nonhuge, nactive_huge, nactive_nonhuge,
+	    nevictions, rate_per_second(nevictions, uptime),
+	    npageslabs_huge, npageslabs_nonhuge,
+	    nactive_huge, nactive_nonhuge,
 	    ninactive_huge, ninactive_nonhuge);
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
+	emitter_json_kv(emitter, "nevictions", emitter_type_uint64,
+	    &nevictions);
 	emitter_json_object_kv_begin(emitter, "full_slabs");
 	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
 	    &npageslabs_huge);
@@ -1137,7 +1147,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 		stats_arena_extents_print(emitter, i);
 	}
 	if (hpa) {
-		stats_arena_hpa_shard_print(emitter, i);
+		stats_arena_hpa_shard_print(emitter, i, uptime);
 	}
 }
 
-- 
cgit v0.12


From d9f7e6c66899b29976cd6ec828ee0f14d4db3aac Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 07:04:01 -0800
Subject: hpdata: Add a test.

We're about to make the functionality here more complicated; testing hpdata
directly (rather than relying on user's tests) will make debugging easier.
---
 Makefile.in                        |  1 +
 include/jemalloc/internal/hpdata.h |  9 ++++--
 test/unit/hpdata.c                 | 61 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/hpdata.c

diff --git a/Makefile.in b/Makefile.in
index f263fc3..ba6dd76 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -219,6 +219,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
 	$(srcroot)test/unit/hpa_central.c \
+	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/inspect.c \
 	$(srcroot)test/unit/junk.c \
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index cb034ea..e8433c5 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -112,10 +112,15 @@ hpdata_assert_empty(hpdata_t *hpdata) {
 	assert(hpdata_nfree_get(hpdata) == HUGEPAGE_PAGES);
 }
 
+static inline bool
+hpdata_consistent(hpdata_t *hpdata) {
+	return fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
+	    == hpdata_longest_free_range_get(hpdata);
+}
+
 static inline void
 hpdata_assert_consistent(hpdata_t *hpdata) {
-	assert(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
-	    == hpdata_longest_free_range_get(hpdata));
+	assert(hpdata_consistent(hpdata));
 }
 
 TYPED_LIST(hpdata_list, hpdata_t, ql_link)
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
new file mode 100644
index 0000000..1bf58bc
--- /dev/null
+++ b/test/unit/hpdata.c
@@ -0,0 +1,61 @@
+#include "test/jemalloc_test.h"
+
+#define HPDATA_ADDR ((void *)(10 * HUGEPAGE))
+#define HPDATA_AGE 123
+
+TEST_BEGIN(test_reserve_alloc) {
+	hpdata_t hpdata;
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+
+	/* Allocating a page at a time, we should do first fit. */
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
+		expect_true(hpdata_consistent(&hpdata), "");
+		expect_zu_eq(HUGEPAGE_PAGES - i,
+		    hpdata_longest_free_range_get(&hpdata), "");
+		void *alloc = hpdata_reserve_alloc(&hpdata, PAGE);
+		expect_ptr_eq((char *)HPDATA_ADDR + i * PAGE, alloc, "");
+		expect_true(hpdata_consistent(&hpdata), "");
+	}
+	expect_true(hpdata_consistent(&hpdata), "");
+	expect_zu_eq(0, hpdata_longest_free_range_get(&hpdata), "");
+
+	/*
+	 * Build up a bigger free-range, 2 pages at a time, until we've got 6
+	 * adjacent free pages total.  Pages 8-13 should be unreserved after
+	 * this.
+	 */
+	hpdata_unreserve(&hpdata, (char *)HPDATA_ADDR + 10 * PAGE, 2 * PAGE);
+	expect_true(hpdata_consistent(&hpdata), "");
+	expect_zu_eq(2, hpdata_longest_free_range_get(&hpdata), "");
+
+	hpdata_unreserve(&hpdata, (char *)HPDATA_ADDR + 12 * PAGE, 2 * PAGE);
+	expect_true(hpdata_consistent(&hpdata), "");
+	expect_zu_eq(4, hpdata_longest_free_range_get(&hpdata), "");
+
+	hpdata_unreserve(&hpdata, (char *)HPDATA_ADDR + 8 * PAGE, 2 * PAGE);
+	expect_true(hpdata_consistent(&hpdata), "");
+	expect_zu_eq(6, hpdata_longest_free_range_get(&hpdata), "");
+
+	/*
+	 * Leave page 14 reserved, but free page 15 (this test the case where
+	 * unreserving combines two ranges).
+	 */
+	hpdata_unreserve(&hpdata, (char *)HPDATA_ADDR + 15 * PAGE, PAGE);
+	/*
+	 * Longest free range shouldn't change; we've got a free range of size
+	 * 6, then a reserved page, then another free range.
+	 */
+	expect_true(hpdata_consistent(&hpdata), "");
+	expect_zu_eq(6, hpdata_longest_free_range_get(&hpdata), "");
+
+	/* After freeing page 14, the two ranges get combined. */
+	hpdata_unreserve(&hpdata, (char *)HPDATA_ADDR + 14 * PAGE, PAGE);
+	expect_true(hpdata_consistent(&hpdata), "");
+	expect_zu_eq(8, hpdata_longest_free_range_get(&hpdata), "");
+}
+TEST_END
+
+int main(void) {
+	return test_no_reentrancy(
+	    test_reserve_alloc);
+}
-- 
cgit v0.12


From 734e72ce8fb897bdbcbd48bb994c3778dba50dc6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 10:04:32 -0800
Subject: bit_util: Guarantee popcount's presence.

Implement popcount generically, so that we can rely on it being present.
---
 configure.ac                         |  1 +
 include/jemalloc/internal/bit_util.h | 89 +++++++++++++++++++++++++++++++++++-
 test/unit/bit_util.c                 | 79 +++++++++++++++++++++++++++++++-
 3 files changed, 166 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 8e21f3f..8284e87 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1594,6 +1594,7 @@ JE_COMPILABLE([a program using __builtin_popcountl], [
 if test "x${je_cv_gcc_builtin_popcountl}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNT], [__builtin_popcount])
   AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTL], [__builtin_popcountl])
+  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTLL], [__builtin_popcountll])
 fi
 
 AC_ARG_WITH([lg_quantum],
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index c5158f6..bac5914 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -179,12 +179,97 @@ fls_u(unsigned x) {
 }
 #endif
 
-#ifdef JEMALLOC_INTERNAL_POPCOUNTL
+#if LG_SIZEOF_LONG_LONG > 3
+#  error "Haven't implemented popcount for 16-byte ints."
+#endif
+
+#define DO_POPCOUNT(x, type) do {					\
+	/*								\
+	 * Algorithm from an old AMD optimization reference manual.	\
+	 * We're putting a little bit more work than you might expect	\
+	 * into the no-instrinsic case, since we only support the	\
+	 * GCC intrinsics spelling of popcount (for now).  Detecting	\
+	 * whether or not the popcount builtin is actually useable in	\
+	 * MSVC is nontrivial.						\
+	 */								\
+									\
+	type bmul = (type)0x0101010101010101ULL;			\
+									\
+	/*								\
+	 * Replace each 2 bits with the sideways sum of the original	\
+	 * values.  0x5 = 0b0101.					\
+	 *								\
+	 * You might expect this to be:					\
+	 *   x = (x & 0x55...) + ((x >> 1) & 0x55...).			\
+	 * That costs an extra mask relative to this, though.		\
+	 */								\
+	x = x - ((x >> 1) & (0x55U * bmul));				\
+	/* Replace each 4 bits with their sideays sum.  0x3 = 0b0011. */\
+	x = (x & (bmul * 0x33U)) + ((x >> 2) & (bmul * 0x33U));		\
+	/*								\
+	 * Replace each 8 bits with their sideways sum.  Note that we	\
+	 * can't overflow within each 4-bit sum here, so we can skip	\
+	 * the initial mask.						\
+	 */								\
+	x = (x + (x >> 4)) & (bmul * 0x0FU);				\
+	/*								\
+	 * None of the partial sums in this multiplication (viewed in	\
+	 * base-256) can overflow into the next digit.  So the least	\
+	 * significant byte of the product will be the least		\
+	 * significant byte of the original value, the second least	\
+	 * significant byte will be the sum of the two least		\
+	 * significant bytes of the original value, and so on.		\
+	 * Importantly, the high byte will be the byte-wise sum of all	\
+	 * the bytes of the original value.				\
+	 */								\
+	x = x * bmul;							\
+	x >>= ((sizeof(x) - 1) * 8);					\
+	return (unsigned)x;						\
+} while(0)
+
+static inline unsigned
+popcount_u_slow(unsigned bitmap) {
+	DO_POPCOUNT(bitmap, unsigned);
+}
+
+static inline unsigned
+popcount_lu_slow(unsigned long bitmap) {
+	DO_POPCOUNT(bitmap, unsigned long);
+}
+
+static inline unsigned
+popcount_llu_slow(unsigned long long bitmap) {
+	DO_POPCOUNT(bitmap, unsigned long long);
+}
+
+#undef DO_POPCOUNT
+
+static inline unsigned
+popcount_u(unsigned bitmap) {
+#ifdef JEMALLOC_INTERNAL_POPCOUNT
+	return JEMALLOC_INTERNAL_POPCOUNT(bitmap);
+#else
+	return popcount_u_slow(bitmap);
+#endif
+}
+
 static inline unsigned
 popcount_lu(unsigned long bitmap) {
-  return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
+#ifdef JEMALLOC_INTERNAL_POPCOUNTL
+	return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
+#else
+	return popcount_lu_slow(bitmap);
+#endif
 }
+
+static inline unsigned
+popcount_llu(unsigned long long bitmap) {
+#ifdef JEMALLOC_INTERNAL_POPCOUNTLL
+	return JEMALLOC_INTERNAL_POPCOUNTLL(bitmap);
+#else
+	return popcount_llu_slow(bitmap);
 #endif
+}
 
 /*
  * Clears first unset bit in bitmap, and returns
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index 045cf8b..7d31b21 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -204,6 +204,77 @@ TEST_BEGIN(test_fls_llu_slow) {
 }
 TEST_END
 
+static unsigned
+popcount_byte(unsigned byte) {
+	int count = 0;
+	for (int i = 0; i < 8; i++) {
+		if ((byte & (1 << i)) != 0) {
+			count++;
+		}
+	}
+	return count;
+}
+
+static uint64_t
+expand_byte_to_mask(unsigned byte) {
+	uint64_t result = 0;
+	for (int i = 0; i < 8; i++) {
+		if ((byte & (1 << i)) != 0) {
+			result |= ((uint64_t)0xFF << (i * 8));
+		}
+	}
+	return result;
+}
+
+#define TEST_POPCOUNT(t, suf, pri_hex) do {				\
+	t bmul = (t)0x0101010101010101ULL;				\
+	for (unsigned i = 0; i < (1 << sizeof(t)); i++) {		\
+		for (unsigned j = 0; j < 256; j++) {			\
+			/*						\
+			 * Replicate the byte j into various		\
+			 * bytes of the integer (as indicated by the	\
+			 * mask in i), and ensure that the popcount of	\
+			 * the result is popcount(i) * popcount(j)	\
+			 */						\
+			t mask = (t)expand_byte_to_mask(i);		\
+			t x = (bmul * j) & mask;			\
+			expect_u_eq(					\
+			    popcount_byte(i) * popcount_byte(j),	\
+			    popcount_##suf(x),				\
+			    "Unexpected result, x=0x%"pri_hex, x);	\
+		}							\
+	}								\
+} while (0)
+
+TEST_BEGIN(test_popcount_u) {
+	TEST_POPCOUNT(unsigned, u, "x");
+}
+TEST_END
+
+TEST_BEGIN(test_popcount_u_slow) {
+	TEST_POPCOUNT(unsigned, u_slow, "x");
+}
+TEST_END
+
+TEST_BEGIN(test_popcount_lu) {
+	TEST_POPCOUNT(unsigned long, lu, "lx");
+}
+TEST_END
+
+TEST_BEGIN(test_popcount_lu_slow) {
+	TEST_POPCOUNT(unsigned long, lu_slow, "lx");
+}
+TEST_END
+
+TEST_BEGIN(test_popcount_llu) {
+	TEST_POPCOUNT(unsigned long long, llu, "llx");
+}
+TEST_END
+
+TEST_BEGIN(test_popcount_llu_slow) {
+	TEST_POPCOUNT(unsigned long long, llu_slow, "llx");
+}
+TEST_END
 
 int
 main(void) {
@@ -226,5 +297,11 @@ main(void) {
 	    test_fls_zu,
 	    test_fls_u_slow,
 	    test_fls_lu_slow,
-	    test_fls_llu_slow);
+	    test_fls_llu_slow,
+	    test_popcount_u,
+	    test_popcount_u_slow,
+	    test_popcount_lu,
+	    test_popcount_lu_slow,
+	    test_popcount_llu,
+	    test_popcount_llu_slow);
 }
-- 
cgit v0.12


From e6c057ad35b0c83eef100bf0e125f75ebf8b5edc Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 11:46:49 -0800
Subject: fb: implement assign in terms of a visitor.

We'll reuse this visitor in the next commit.
---
 include/jemalloc/internal/flat_bitmap.h | 55 +++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/flat_bitmap.h
index 0faf447..9f1909e 100644
--- a/include/jemalloc/internal/flat_bitmap.h
+++ b/include/jemalloc/internal/flat_bitmap.h
@@ -71,21 +71,16 @@ fb_unset(fb_group_t *fb, size_t nbits, size_t bit) {
 	fb[group_ind] &= ~((fb_group_t)1 << bit_ind);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-fb_assign_group_impl(fb_group_t *fb, size_t start, size_t cnt, bool val) {
-	assert(cnt > 0);
-	assert(start + cnt - 1 < FB_GROUP_BITS);
-	fb_group_t bits = ((~(fb_group_t)0) >> (FB_GROUP_BITS - cnt)) << start;
-	if (val) {
-		*fb |= bits;
-	} else {
-		*fb &= ~bits;
-	}
-}
 
+/*
+ * Some implementation details.  This visitation function lets us apply a group
+ * visitor to each group in the bitmap (potentially modifying it).  The mask
+ * indicates which bits are logically part of the visitation.
+ */
+typedef void (*fb_group_visitor_t)(void *ctx, fb_group_t *fb, fb_group_t mask);
 JEMALLOC_ALWAYS_INLINE void
-fb_assign_impl(fb_group_t *fb, size_t nbits, size_t start, size_t cnt,
-    bool val) {
+fb_visit_impl(fb_group_t *fb, size_t nbits, fb_group_visitor_t visit, void *ctx,
+    size_t start, size_t cnt) {
 	assert(start + cnt - 1 < nbits);
 	size_t group_ind = start / FB_GROUP_BITS;
 	size_t start_bit_ind = start % FB_GROUP_BITS;
@@ -93,10 +88,8 @@ fb_assign_impl(fb_group_t *fb, size_t nbits, size_t start, size_t cnt,
 	 * The first group is special; it's the only one we don't start writing
 	 * to from bit 0.
 	 */
-	size_t first_group_cnt =
-	    (start_bit_ind + cnt > FB_GROUP_BITS
-		? FB_GROUP_BITS - start_bit_ind
-		: cnt);
+	size_t first_group_cnt = (start_bit_ind + cnt > FB_GROUP_BITS
+		? FB_GROUP_BITS - start_bit_ind : cnt);
 	/*
 	 * We can basically split affected words into:
 	 *   - The first group, where we touch only the high bits
@@ -106,32 +99,48 @@ fb_assign_impl(fb_group_t *fb, size_t nbits, size_t start, size_t cnt,
 	 * this can lead to bad codegen for those middle words.
 	 */
 	/* First group */
-	fb_assign_group_impl(&fb[group_ind], start_bit_ind, first_group_cnt,
-	    val);
+	fb_group_t mask = ((~(fb_group_t)0)
+	    >> (FB_GROUP_BITS - first_group_cnt))
+	    << start_bit_ind;
+	visit(ctx, &fb[group_ind], mask);
+
 	cnt -= first_group_cnt;
 	group_ind++;
 	/* Middle groups */
 	while (cnt > FB_GROUP_BITS) {
-		fb_assign_group_impl(&fb[group_ind], 0, FB_GROUP_BITS, val);
+		visit(ctx, &fb[group_ind], ~(fb_group_t)0);
 		cnt -= FB_GROUP_BITS;
 		group_ind++;
 	}
 	/* Last group */
 	if (cnt != 0) {
-		fb_assign_group_impl(&fb[group_ind], 0, cnt, val);
+		mask = (~(fb_group_t)0) >> (FB_GROUP_BITS - cnt);
+		visit(ctx, &fb[group_ind], mask);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+fb_assign_visitor(void *ctx, fb_group_t *fb, fb_group_t mask) {
+	bool val = *(bool *)ctx;
+	if (val) {
+		*fb |= mask;
+	} else {
+		*fb &= ~mask;
 	}
 }
 
 /* Sets the cnt bits starting at position start.  Must not have a 0 count. */
 static inline void
 fb_set_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
-	fb_assign_impl(fb, nbits, start, cnt, true);
+	bool val = true;
+	fb_visit_impl(fb, nbits, &fb_assign_visitor, &val, start, cnt);
 }
 
 /* Unsets the cnt bits starting at position start.  Must not have a 0 count. */
 static inline void
 fb_unset_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
-	fb_assign_impl(fb, nbits, start, cnt, false);
+	bool val = false;
+	fb_visit_impl(fb, nbits, &fb_assign_visitor, &val, start, cnt);
 }
 
 /*
-- 
cgit v0.12


From 54c94c1679899db53c4a1002256e8604bc60eb36 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 13:29:13 -0800
Subject: flat bitmap: add scount / ucount functions.

These can compute the number or set or unset bits in a subrange of the bitmap.
---
 include/jemalloc/internal/flat_bitmap.h |  24 +++-
 test/unit/flat_bitmap.c                 | 236 +++++++++++++++++++++++++++++++-
 2 files changed, 258 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/flat_bitmap.h
index 9f1909e..c8cf518 100644
--- a/include/jemalloc/internal/flat_bitmap.h
+++ b/include/jemalloc/internal/flat_bitmap.h
@@ -81,7 +81,8 @@ typedef void (*fb_group_visitor_t)(void *ctx, fb_group_t *fb, fb_group_t mask);
 JEMALLOC_ALWAYS_INLINE void
 fb_visit_impl(fb_group_t *fb, size_t nbits, fb_group_visitor_t visit, void *ctx,
     size_t start, size_t cnt) {
-	assert(start + cnt - 1 < nbits);
+	assert(cnt > 0);
+	assert(start + cnt <= nbits);
 	size_t group_ind = start / FB_GROUP_BITS;
 	size_t start_bit_ind = start % FB_GROUP_BITS;
 	/*
@@ -143,6 +144,27 @@ fb_unset_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
 	fb_visit_impl(fb, nbits, &fb_assign_visitor, &val, start, cnt);
 }
 
+JEMALLOC_ALWAYS_INLINE void
+fb_scount_visitor(void *ctx, fb_group_t *fb, fb_group_t mask) {
+	size_t *scount = (size_t *)ctx;
+	*scount += popcount_lu(*fb & mask);
+}
+
+/* Finds the number of set bit in the of length cnt starting at start. */
+JEMALLOC_ALWAYS_INLINE size_t
+fb_scount(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+	size_t scount = 0;
+	fb_visit_impl(fb, nbits, &fb_scount_visitor, &scount, start, cnt);
+	return scount;
+}
+
+/* Finds the number of unset bit in the of length cnt starting at start. */
+JEMALLOC_ALWAYS_INLINE size_t
+fb_ucount(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+	size_t scount = fb_scount(fb, nbits, start, cnt);
+	return cnt - scount;
+}
+
 /*
  * An implementation detail; find the first bit at position >= min_bit with the
  * value val.
diff --git a/test/unit/flat_bitmap.c b/test/unit/flat_bitmap.c
index 2f360d3..f088379 100644
--- a/test/unit/flat_bitmap.c
+++ b/test/unit/flat_bitmap.c
@@ -576,6 +576,237 @@ TEST_BEGIN(test_iter_range_exhaustive) {
 }
 TEST_END
 
+/*
+ * If all set bits in the bitmap are contiguous, in [set_start, set_end),
+ * returns the number of set bits in [scount_start, scount_end).
+ */
+static size_t
+scount_contiguous(size_t set_start, size_t set_end, size_t scount_start,
+    size_t scount_end) {
+	/* No overlap. */
+	if (set_end <= scount_start || scount_end <= set_start) {
+		return 0;
+	}
+	/* set range contains scount range */
+	if (set_start <= scount_start && set_end >= scount_end) {
+		return scount_end - scount_start;
+	}
+	/* scount range contains set range. */
+	if (scount_start <= set_start && scount_end >= set_end) {
+		return set_end - set_start;
+	}
+	/* Partial overlap, with set range starting first. */
+	if (set_start < scount_start && set_end < scount_end) {
+		return set_end - scount_start;
+	}
+	/* Partial overlap, with scount range starting first. */
+	if (scount_start < set_start && scount_end < set_end) {
+		return scount_end - set_start;
+	}
+	/*
+	 * Trigger an assert failure; the above list should have been
+	 * exhaustive.
+	 */
+	unreachable();
+}
+
+static size_t
+ucount_contiguous(size_t set_start, size_t set_end, size_t ucount_start,
+    size_t ucount_end) {
+	/* No overlap. */
+	if (set_end <= ucount_start || ucount_end <= set_start) {
+		return ucount_end - ucount_start;
+	}
+	/* set range contains ucount range */
+	if (set_start <= ucount_start && set_end >= ucount_end) {
+		return 0;
+	}
+	/* ucount range contains set range. */
+	if (ucount_start <= set_start && ucount_end >= set_end) {
+		return (ucount_end - ucount_start) - (set_end - set_start);
+	}
+	/* Partial overlap, with set range starting first. */
+	if (set_start < ucount_start && set_end < ucount_end) {
+		return ucount_end - set_end;
+	}
+	/* Partial overlap, with ucount range starting first. */
+	if (ucount_start < set_start && ucount_end < set_end) {
+		return set_start - ucount_start;
+	}
+	/*
+	 * Trigger an assert failure; the above list should have been
+	 * exhaustive.
+	 */
+	unreachable();
+}
+
+static void
+expect_count_match_contiguous(fb_group_t *fb, size_t nbits, size_t set_start,
+    size_t set_end) {
+	for (size_t i = 0; i < nbits; i++) {
+		for (size_t j = i + 1; j <= nbits; j++) {
+			size_t cnt = j - i;
+			size_t scount_expected = scount_contiguous(set_start,
+			    set_end, i, j);
+			size_t scount_computed = fb_scount(fb, nbits, i, cnt);
+			expect_zu_eq(scount_expected, scount_computed,
+			    "fb_scount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with bits set in [%zu, %zu)",
+			    nbits, i, cnt, set_start, set_end);
+
+			size_t ucount_expected = ucount_contiguous(set_start,
+			    set_end, i, j);
+			size_t ucount_computed = fb_ucount(fb, nbits, i, cnt);
+			assert_zu_eq(ucount_expected, ucount_computed,
+			    "fb_ucount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with bits set in [%zu, %zu)",
+			    nbits, i, cnt, set_start, set_end);
+
+		}
+	}
+}
+
+static void
+do_test_count_contiguous(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+
+	fb_init(fb, nbits);
+
+	expect_count_match_contiguous(fb, nbits, 0, 0);
+	for (size_t i = 0; i < nbits; i++) {
+		fb_set(fb, nbits, i);
+		expect_count_match_contiguous(fb, nbits, 0, i + 1);
+	}
+
+	for (size_t i = 0; i < nbits; i++) {
+		fb_unset(fb, nbits, i);
+		expect_count_match_contiguous(fb, nbits, i + 1, nbits);
+	}
+
+	free(fb);
+}
+
+TEST_BEGIN(test_count_contiguous_simple) {
+	enum {nbits = 300};
+	fb_group_t fb[FB_NGROUPS(nbits)];
+	fb_init(fb, nbits);
+	/* Just an arbitrary number. */
+	size_t start = 23;
+
+	fb_set_range(fb, nbits, start, 30 - start);
+	expect_count_match_contiguous(fb, nbits, start, 30);
+
+	fb_set_range(fb, nbits, start, 40 - start);
+	expect_count_match_contiguous(fb, nbits, start, 40);
+
+	fb_set_range(fb, nbits, start, 70 - start);
+	expect_count_match_contiguous(fb, nbits, start, 70);
+
+	fb_set_range(fb, nbits, start, 120 - start);
+	expect_count_match_contiguous(fb, nbits, start, 120);
+
+	fb_set_range(fb, nbits, start, 150 - start);
+	expect_count_match_contiguous(fb, nbits, start, 150);
+
+	fb_set_range(fb, nbits, start, 200 - start);
+	expect_count_match_contiguous(fb, nbits, start, 200);
+
+	fb_set_range(fb, nbits, start, 290 - start);
+	expect_count_match_contiguous(fb, nbits, start, 290);
+}
+TEST_END
+
+TEST_BEGIN(test_count_contiguous) {
+#define NB(nbits) \
+	/* This test is *particularly* slow in debug builds. */ \
+	if ((!config_debug && nbits < 300) || nbits < 150) { \
+		do_test_count_contiguous(nbits); \
+	}
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static void
+expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
+    size_t nbits) {
+	for (size_t i = 0; i < nbits; i++) {
+		for (size_t j = i + 1; j <= nbits; j++) {
+			size_t cnt = j - i;
+			size_t odd_scount = cnt / 2
+			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
+			size_t odd_scount_computed = fb_scount(fb_odd, nbits,
+			    i, j - i);
+			assert_zu_eq(odd_scount, odd_scount_computed,
+			    "fb_scount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with alternating bits set.",
+			    nbits, i, j - i);
+
+			size_t odd_ucount = cnt / 2
+			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
+			size_t odd_ucount_computed = fb_ucount(fb_odd, nbits,
+			    i, j - i);
+			assert_zu_eq(odd_ucount, odd_ucount_computed,
+			    "fb_ucount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with alternating bits set.",
+			    nbits, i, j - i);
+
+			size_t even_scount = cnt / 2
+			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
+			size_t even_scount_computed = fb_scount(fb_even, nbits,
+			    i, j - i);
+			assert_zu_eq(even_scount, even_scount_computed,
+			    "fb_scount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with alternating bits set.",
+			    nbits, i, j - i);
+
+			size_t even_ucount = cnt / 2
+			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
+			size_t even_ucount_computed = fb_ucount(fb_even, nbits,
+			    i, j - i);
+			assert_zu_eq(even_ucount, even_ucount_computed,
+			    "fb_ucount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with alternating bits set.",
+			    nbits, i, j - i);
+		}
+	}
+}
+
+static void
+do_test_count_alternating(size_t nbits) {
+	if (nbits > 1000) {
+		return;
+	}
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb_even = malloc(sz);
+	fb_group_t *fb_odd = malloc(sz);
+
+	fb_init(fb_even, nbits);
+	fb_init(fb_odd, nbits);
+
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 2 == 0) {
+			fb_set(fb_even, nbits, i);
+		} else {
+			fb_set(fb_odd, nbits, i);
+		}
+	}
+
+	expect_count_match_alternating(fb_even, fb_odd, nbits);
+
+	free(fb_even);
+	free(fb_odd);
+}
+
+TEST_BEGIN(test_count_alternating) {
+#define NB(nbits) \
+	do_test_count_alternating(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -586,5 +817,8 @@ main(void) {
 	    test_range_simple,
 	    test_empty_full,
 	    test_iter_range_simple,
-	    test_iter_range_exhaustive);
+	    test_iter_range_exhaustive,
+	    test_count_contiguous_simple,
+	    test_count_contiguous,
+	    test_count_alternating);
 }
-- 
cgit v0.12


From f51948d9e11046ed0b131767bad47879807e2d8b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 14:20:45 -0800
Subject: psset unit test: fix a bug.

The next commit adds assertions that reveal a bug in the test code
(double-free).  Fix it.
---
 test/unit/psset.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/test/unit/psset.c b/test/unit/psset.c
index 4147729..6f35fa8 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -204,7 +204,11 @@ TEST_BEGIN(test_reuse) {
 	expect_false(err, "Should have been able to find alloc.");
 	edata_expect(&alloc[index_of_3], index_of_3, 3);
 
-	/* Free up a 4-page hole at the end. */
+	/*
+	 * Free up a 4-page hole at the end.  Recall that the pages at offsets 0
+	 * and 1 mod 4 were freed above, so we just have to free the last
+	 * allocations.
+	 */
 	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
 	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 2]);
@@ -212,8 +216,6 @@ TEST_BEGIN(test_reuse) {
 
 	/* Make sure we can satisfy an allocation at the very end of a slab. */
 	size_t index_of_4 = HUGEPAGE_PAGES - 4;
-	ps = test_psset_dalloc(&psset, &alloc[index_of_4]);
-	expect_ptr_null(ps, "Nonempty pageslab evicted");
 	err = test_psset_alloc_reuse(&psset, &alloc[index_of_4], 4 * PAGE);
 	expect_false(err, "Should have been able to find alloc.");
 	edata_expect(&alloc[index_of_4], index_of_4, 4);
@@ -405,7 +407,8 @@ TEST_END
 /*
  * Fills in and inserts two pageslabs, with the first better than the second,
  * and each fully allocated (into the allocations in allocs and worse_allocs,
- * each of which should be HUGEPAGE_PAGES long).
+ * each of which should be HUGEPAGE_PAGES long), except for a single free page
+ * at the end.
  *
  * (There's nothing magic about these numbers; it's just useful to share the
  * setup between the oldest fit and the insert/remove test).
@@ -418,7 +421,7 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
 	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE);
 	/*
 	 * This pageslab would be better from an address-first-fit POV, but
-	 * better from an age POV.
+	 * worse from an age POV.
 	 */
 	hpdata_init(worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1);
 
-- 
cgit v0.12


From a559caf74aa5421f608a59bd2d38da688b1f2572 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 14:21:36 -0800
Subject: hpdata: Strengthen assertions.

Now that we have flat bitmap bit counting functions, we can easily assert that
nfree is always correct.  While we're tightening up this code, enforce
consistency on API boundaries as well.
---
 include/jemalloc/internal/hpdata.h | 11 +++++++++--
 src/hpdata.c                       |  9 ++++++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index e8433c5..fdd6673 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -114,8 +114,15 @@ hpdata_assert_empty(hpdata_t *hpdata) {
 
 static inline bool
 hpdata_consistent(hpdata_t *hpdata) {
-	return fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
-	    == hpdata_longest_free_range_get(hpdata);
+	if(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
+	    != hpdata_longest_free_range_get(hpdata)) {
+		return false;
+	}
+	if (fb_ucount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
+	    != hpdata_nfree_get(hpdata)) {
+		return false;
+	}
+	return true;
 }
 
 static inline void
diff --git a/src/hpdata.c b/src/hpdata.c
index 847eb9d..a242efe 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -17,7 +17,6 @@ hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
 
 ph_gen(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t, ph_link, hpdata_age_comp)
 
-
 void
 hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
@@ -26,10 +25,13 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_nfree_set(hpdata, HUGEPAGE_PAGES);
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
+
+	hpdata_assert_consistent(hpdata);
 }
 
 void *
 hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
+	hpdata_assert_consistent(hpdata);
 	assert((sz & PAGE_MASK) == 0);
 	size_t npages = sz >> LG_PAGE;
 	assert(npages <= hpdata_longest_free_range_get(hpdata));
@@ -93,12 +95,15 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	}
 	hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
 
+	hpdata_assert_consistent(hpdata);
 	return (void *)(
 	    (uintptr_t)hpdata_addr_get(hpdata) + (result << LG_PAGE));
 }
 
 void
 hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
+	hpdata_assert_consistent(hpdata);
+	assert(((uintptr_t)addr & PAGE_MASK) == 0);
 	assert((sz & PAGE_MASK) == 0);
 	size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
 	    >> LG_PAGE;
@@ -119,4 +124,6 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	}
 
 	hpdata_nfree_set(hpdata, hpdata_nfree_get(hpdata) + npages);
+
+	hpdata_assert_consistent(hpdata);
 }
-- 
cgit v0.12


From 9522ae41d6167ea32a4b30ffcf0b21fc4db80c2b Mon Sep 17 00:00:00 2001
From: Aditya Kumar <aditya7@fb.com>
Date: Sun, 6 Dec 2020 19:03:13 -0800
Subject: Move n_search outside of assert as reported by static analyzer

---
 src/background_thread.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index d4f96b1..7302a30 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -185,7 +185,8 @@ arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, decay_t *decay,
 			lb = target;
 			npurge_lb = npurge;
 		}
-		assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
+		assert(n_search < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
+		++n_search;
 	}
 	interval = decay_interval_ns * (ub + lb) / 2;
 label_done:
-- 
cgit v0.12


From 0dfdd31e0fc69206b7198b52f4bd4a8eb805d8be Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 2 Dec 2020 17:17:28 -0800
Subject: Add tiny batch size to batch allocation stress test

---
 test/stress/batch_alloc.c | 162 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 133 insertions(+), 29 deletions(-)

diff --git a/test/stress/batch_alloc.c b/test/stress/batch_alloc.c
index b203e05..14a870e 100644
--- a/test/stress/batch_alloc.c
+++ b/test/stress/batch_alloc.c
@@ -1,10 +1,15 @@
 #include "test/jemalloc_test.h"
 #include "test/bench.h"
 
-#define BATCH (1000 * 1000)
-#define HUGE_BATCH (100 * BATCH)
-static void *batch_ptrs[HUGE_BATCH];
-static void *item_ptrs[HUGE_BATCH];
+#define TINY_BATCH 10
+#define TINY_BATCH_ITER (10 * 1000 * 1000)
+#define HUGE_BATCH (1000 * 1000)
+#define HUGE_BATCH_ITER 100
+#define LEN (100 * 1000 * 1000)
+static void *batch_ptrs[LEN];
+static size_t batch_ptrs_next = 0;
+static void *item_ptrs[LEN];
+static size_t item_ptrs_next = 0;
 
 #define SIZE 7
 
@@ -18,7 +23,8 @@ struct batch_alloc_packet_s {
 
 static void
 batch_alloc_wrapper(size_t batch) {
-	batch_alloc_packet_t batch_alloc_packet = {batch_ptrs, batch, SIZE, 0};
+	batch_alloc_packet_t batch_alloc_packet =
+	    {batch_ptrs + batch_ptrs_next, batch, SIZE, 0};
 	size_t filled;
 	size_t len = sizeof(size_t);
 	assert_d_eq(mallctl("experimental.batch_alloc", &filled, &len,
@@ -28,14 +34,14 @@ batch_alloc_wrapper(size_t batch) {
 
 static void
 item_alloc_wrapper(size_t batch) {
-	for (size_t i = 0; i < batch; ++i) {
+	for (size_t i = item_ptrs_next, end = i + batch; i < end; ++i) {
 		item_ptrs[i] = malloc(SIZE);
 	}
 }
 
 static void
-release_and_clear(void **ptrs, size_t batch) {
-	for (size_t i = 0; i < batch; ++i) {
+release_and_clear(void **ptrs, size_t len) {
+	for (size_t i = 0; i < len; ++i) {
 		void *p = ptrs[i];
 		assert_ptr_not_null(p, "allocation failed");
 		sdallocx(p, SIZE, 0);
@@ -44,45 +50,143 @@ release_and_clear(void **ptrs, size_t batch) {
 }
 
 static void
-batch_alloc_small_can_repeat() {
-	batch_alloc_wrapper(BATCH);
-	release_and_clear(batch_ptrs, BATCH);
+batch_alloc_without_free(size_t batch) {
+	batch_alloc_wrapper(batch);
+	batch_ptrs_next += batch;
 }
 
 static void
-item_alloc_small_can_repeat() {
-	item_alloc_wrapper(BATCH);
-	release_and_clear(item_ptrs, BATCH);
+item_alloc_without_free(size_t batch) {
+	item_alloc_wrapper(batch);
+	item_ptrs_next += batch;
 }
 
-TEST_BEGIN(test_small_batch_with_free) {
-	compare_funcs(10, 100,
-	    "batch allocation", batch_alloc_small_can_repeat,
-	    "item allocation", item_alloc_small_can_repeat);
+static void
+batch_alloc_with_free(size_t batch) {
+	batch_alloc_wrapper(batch);
+	release_and_clear(batch_ptrs + batch_ptrs_next, batch);
+	batch_ptrs_next += batch;
+}
+
+static void
+item_alloc_with_free(size_t batch) {
+	item_alloc_wrapper(batch);
+	release_and_clear(item_ptrs + item_ptrs_next, batch);
+	item_ptrs_next += batch;
+}
+
+static void
+compare_without_free(size_t batch, size_t iter,
+    void (*batch_alloc_without_free_func)(void),
+    void (*item_alloc_without_free_func)(void)) {
+	assert(batch_ptrs_next == 0);
+	assert(item_ptrs_next == 0);
+	assert(batch * iter <= LEN);
+	for (size_t i = 0; i < iter; ++i) {
+		batch_alloc_without_free_func();
+		item_alloc_without_free_func();
+	}
+	release_and_clear(batch_ptrs, batch_ptrs_next);
+	batch_ptrs_next = 0;
+	release_and_clear(item_ptrs, item_ptrs_next);
+	item_ptrs_next = 0;
+	compare_funcs(0, iter,
+	    "batch allocation", batch_alloc_without_free_func,
+	    "item allocation", item_alloc_without_free_func);
+	release_and_clear(batch_ptrs, batch_ptrs_next);
+	batch_ptrs_next = 0;
+	release_and_clear(item_ptrs, item_ptrs_next);
+	item_ptrs_next = 0;
+}
+
+static void
+compare_with_free(size_t batch, size_t iter,
+    void (*batch_alloc_with_free_func)(void),
+    void (*item_alloc_with_free_func)(void)) {
+	assert(batch_ptrs_next == 0);
+	assert(item_ptrs_next == 0);
+	assert(batch * iter <= LEN);
+	for (size_t i = 0; i < iter; ++i) {
+		batch_alloc_with_free_func();
+		item_alloc_with_free_func();
+	}
+	batch_ptrs_next = 0;
+	item_ptrs_next = 0;
+	compare_funcs(0, iter,
+	    "batch allocation", batch_alloc_with_free_func,
+	    "item allocation", item_alloc_with_free_func);
+	batch_ptrs_next = 0;
+	item_ptrs_next = 0;
+}
+
+static void
+batch_alloc_without_free_tiny() {
+	batch_alloc_without_free(TINY_BATCH);
+}
+
+static void
+item_alloc_without_free_tiny() {
+	item_alloc_without_free(TINY_BATCH);
+}
+
+TEST_BEGIN(test_tiny_batch_without_free) {
+	compare_without_free(TINY_BATCH, TINY_BATCH_ITER,
+	    batch_alloc_without_free_tiny, item_alloc_without_free_tiny);
+}
+TEST_END
+
+static void
+batch_alloc_with_free_tiny() {
+	batch_alloc_with_free(TINY_BATCH);
+}
+
+static void
+item_alloc_with_free_tiny() {
+	item_alloc_with_free(TINY_BATCH);
+}
+
+TEST_BEGIN(test_tiny_batch_with_free) {
+	compare_with_free(TINY_BATCH, TINY_BATCH_ITER,
+	    batch_alloc_with_free_tiny, item_alloc_with_free_tiny);
 }
 TEST_END
 
 static void
-batch_alloc_huge_cannot_repeat() {
-	batch_alloc_wrapper(HUGE_BATCH);
+batch_alloc_without_free_huge() {
+	batch_alloc_without_free(HUGE_BATCH);
 }
 
 static void
-item_alloc_huge_cannot_repeat() {
-	item_alloc_wrapper(HUGE_BATCH);
+item_alloc_without_free_huge() {
+	item_alloc_without_free(HUGE_BATCH);
 }
 
 TEST_BEGIN(test_huge_batch_without_free) {
-	compare_funcs(0, 1,
-	    "batch allocation", batch_alloc_huge_cannot_repeat,
-	    "item allocation", item_alloc_huge_cannot_repeat);
-	release_and_clear(batch_ptrs, HUGE_BATCH);
-	release_and_clear(item_ptrs, HUGE_BATCH);
+	compare_without_free(HUGE_BATCH, HUGE_BATCH_ITER,
+	    batch_alloc_without_free_huge, item_alloc_without_free_huge);
+}
+TEST_END
+
+static void
+batch_alloc_with_free_huge() {
+	batch_alloc_with_free(HUGE_BATCH);
+}
+
+static void
+item_alloc_with_free_huge() {
+	item_alloc_with_free(HUGE_BATCH);
+}
+
+TEST_BEGIN(test_huge_batch_with_free) {
+	compare_with_free(HUGE_BATCH, HUGE_BATCH_ITER,
+	    batch_alloc_with_free_huge, item_alloc_with_free_huge);
 }
 TEST_END
 
 int main(void) {
 	return test_no_reentrancy(
-	    test_small_batch_with_free,
-	    test_huge_batch_without_free);
+	    test_tiny_batch_without_free,
+	    test_tiny_batch_with_free,
+	    test_huge_batch_without_free,
+	    test_huge_batch_with_free);
 }
-- 
cgit v0.12


From e82771807ec33c6a7db7612158cbfb9af87818b9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 2 Dec 2020 17:09:59 -0800
Subject: Cache mallctl mib for batch allocation stress test

---
 test/stress/batch_alloc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/test/stress/batch_alloc.c b/test/stress/batch_alloc.c
index 14a870e..427e1cb 100644
--- a/test/stress/batch_alloc.c
+++ b/test/stress/batch_alloc.c
@@ -1,6 +1,10 @@
 #include "test/jemalloc_test.h"
 #include "test/bench.h"
 
+#define MIBLEN 8
+static size_t mib[MIBLEN];
+static size_t miblen = MIBLEN;
+
 #define TINY_BATCH 10
 #define TINY_BATCH_ITER (10 * 1000 * 1000)
 #define HUGE_BATCH (1000 * 1000)
@@ -27,7 +31,7 @@ batch_alloc_wrapper(size_t batch) {
 	    {batch_ptrs + batch_ptrs_next, batch, SIZE, 0};
 	size_t filled;
 	size_t len = sizeof(size_t);
-	assert_d_eq(mallctl("experimental.batch_alloc", &filled, &len,
+	assert_d_eq(mallctlbymib(mib, miblen, &filled, &len,
 	    &batch_alloc_packet, sizeof(batch_alloc_packet)), 0, "");
 	assert_zu_eq(filled, batch, "");
 }
@@ -184,6 +188,8 @@ TEST_BEGIN(test_huge_batch_with_free) {
 TEST_END
 
 int main(void) {
+	assert_d_eq(mallctlnametomib("experimental.batch_alloc", mib, &miblen),
+	    0, "");
 	return test_no_reentrancy(
 	    test_tiny_batch_without_free,
 	    test_tiny_batch_with_free,
-- 
cgit v0.12


From 1e3b8636ff02fa2150cd84720727d300455b4c63 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 8 Dec 2020 09:39:27 -0800
Subject: HPA: Remove unused malloc_conf options.

---
 include/jemalloc/internal/jemalloc_internal_externs.h |  3 ---
 src/ctl.c                                             |  9 ---------
 src/jemalloc.c                                        | 18 ------------------
 src/stats.c                                           |  3 ---
 test/unit/mallctl.c                                   |  3 ---
 5 files changed, 36 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 814a7a1..fb8dc3f 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -13,10 +13,7 @@ extern bool opt_abort;
 extern bool opt_abort_conf;
 extern bool opt_confirm_conf;
 extern bool opt_hpa;
-extern size_t opt_hpa_slab_goal;
 extern size_t opt_hpa_slab_max_alloc;
-extern size_t opt_hpa_small_max;
-extern size_t opt_hpa_large_min;
 
 extern size_t opt_hpa_sec_max_alloc;
 extern size_t opt_hpa_sec_max_bytes;
diff --git a/src/ctl.c b/src/ctl.c
index 4266e4b..f113742 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -91,10 +91,7 @@ CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
-CTL_PROTO(opt_hpa_slab_goal)
 CTL_PROTO(opt_hpa_slab_max_alloc)
-CTL_PROTO(opt_hpa_small_max)
-CTL_PROTO(opt_hpa_large_min)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
 CTL_PROTO(opt_hpa_sec_nshards)
@@ -369,10 +366,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("hpa"),		CTL(opt_hpa)},
-	{NAME("hpa_slab_goal"),	CTL(opt_hpa_slab_goal)},
 	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
-	{NAME("hpa_small_max"),	CTL(opt_hpa_small_max)},
-	{NAME("hpa_large_min"),	CTL(opt_hpa_large_min)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
 	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
@@ -1920,10 +1914,7 @@ CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
-CTL_RO_NL_GEN(opt_hpa_slab_goal, opt_hpa_slab_goal, size_t)
 CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
-CTL_RO_NL_GEN(opt_hpa_small_max, opt_hpa_small_max, size_t)
-CTL_RO_NL_GEN(opt_hpa_large_min, opt_hpa_large_min, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 30c2fe1..c2817cf 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -138,9 +138,6 @@ malloc_mutex_t arenas_lock;
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
 size_t opt_hpa_slab_max_alloc = 256 * 1024;
-size_t opt_hpa_slab_goal = 128 * 1024;
-size_t opt_hpa_small_max = 32 * 1024;
-size_t opt_hpa_large_min = 4 * 1024 * 1024;
 
 size_t opt_hpa_sec_max_alloc = 32 * 1024;
 /* These settings correspond to a maximum of 1MB cached per arena. */
@@ -1506,21 +1503,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_nshards, "hpa_sec_nshards",
 			    0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 
-			/*
-			 * These no longer have any effect, but various
-			 * non-public test configs set them as we iterate on HPA
-			 * development.  We parse and report them for now, but
-			 * they don't affect behavior.  Eventually they'll be
-			 * removed.
-			 */
-			CONF_HANDLE_SIZE_T(opt_hpa_slab_goal, "hpa_slab_goal",
-			    PAGE, 512 * PAGE, CONF_CHECK_MIN, CONF_CHECK_MAX,
-			    true)
-			CONF_HANDLE_SIZE_T(opt_hpa_small_max, "hpa_small_max",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_large_min, "hpa_large_min",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
 					sc_data_init(sc_data);
diff --git a/src/stats.c b/src/stats.c
index aab9fb5..ab440c4 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1248,10 +1248,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("percpu_arena")
 	OPT_WRITE_SIZE_T("oversize_threshold")
 	OPT_WRITE_BOOL("hpa")
-	OPT_WRITE_SIZE_T("hpa_slab_goal")
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
-	OPT_WRITE_SIZE_T("hpa_small_max")
-	OPT_WRITE_SIZE_T("hpa_large_min")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index d4e2621..72dc0f3 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -164,10 +164,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(bool, hpa, always);
-	TEST_MALLCTL_OPT(size_t, hpa_slab_goal, always);
 	TEST_MALLCTL_OPT(size_t, hpa_slab_max_alloc, always);
-	TEST_MALLCTL_OPT(size_t, hpa_small_max, always);
-	TEST_MALLCTL_OPT(size_t, hpa_large_min, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
-- 
cgit v0.12


From 986cbe4881609f46897915e75a1e58971a814d84 Mon Sep 17 00:00:00 2001
From: Jin Qian <jqian@aurora.tech>
Date: Mon, 2 Nov 2020 16:15:14 -0800
Subject: Disable JEMALLOC_TLS for QNX

TLS access triggers recurisive malloc during bootstrapping. Need to use
pthread_getspecific and pthread_setspecific with a follow up fix.
---
 configure.ac | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/configure.ac b/configure.ac
index 8284e87..468c025 100644
--- a/configure.ac
+++ b/configure.ac
@@ -765,6 +765,10 @@ case "${host}" in
 	  default_retain="1"
 	fi
 	;;
+  *-*-nto-qnx)
+	abi="elf"
+  force_tls="0"
+	;;
   *)
 	AC_MSG_RESULT([Unsupported operating system: ${host}])
 	abi="elf"
-- 
cgit v0.12


From 96a59c3bb59a1d725c266019ca0acf0bc28ff1a5 Mon Sep 17 00:00:00 2001
From: Jin Qian <jqian@aurora.tech>
Date: Thu, 29 Oct 2020 18:28:35 -0700
Subject: Fix recursive malloc during bootstrap on QNX

pthread_key_create on QNX triggers recursive allocation during tsd
bootstrapping. Using tsd_init_check_recursion to detect that.

Before pthread_key_create, the address of tsd_boot_wrapper is returned
from tsd_get_wrapper instead of using TLS to store the pointer.
tsd_set_wrapper becomes a no-op. After that, the address of
tsd_boot_wrapper is written to TLS and bootstrap continues as before.

Signed-off-by: Jin Qian <jqian@aurora.tech>
---
 include/jemalloc/internal/tsd_generic.h | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h
index cf73c0c..a718472 100644
--- a/include/jemalloc/internal/tsd_generic.h
+++ b/include/jemalloc/internal/tsd_generic.h
@@ -52,6 +52,9 @@ tsd_cleanup_wrapper(void *arg) {
 
 JEMALLOC_ALWAYS_INLINE void
 tsd_wrapper_set(tsd_wrapper_t *wrapper) {
+	if (unlikely(!tsd_booted)) {
+		return;
+	}
 	if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0) {
 		malloc_write("<jemalloc>: Error setting TSD\n");
 		abort();
@@ -60,7 +63,13 @@ tsd_wrapper_set(tsd_wrapper_t *wrapper) {
 
 JEMALLOC_ALWAYS_INLINE tsd_wrapper_t *
 tsd_wrapper_get(bool init) {
-	tsd_wrapper_t *wrapper = (tsd_wrapper_t *)pthread_getspecific(tsd_tsd);
+	tsd_wrapper_t *wrapper;
+
+	if (unlikely(!tsd_booted)) {
+		return &tsd_boot_wrapper;
+	}
+
+	wrapper = (tsd_wrapper_t *)pthread_getspecific(tsd_tsd);
 
 	if (init && unlikely(wrapper == NULL)) {
 		tsd_init_block_t block;
@@ -91,11 +100,21 @@ tsd_wrapper_get(bool init) {
 
 JEMALLOC_ALWAYS_INLINE bool
 tsd_boot0(void) {
+	tsd_wrapper_t *wrapper;
+	tsd_init_block_t block;
+
+	wrapper = (tsd_wrapper_t *)
+	    tsd_init_check_recursion(&tsd_init_head, &block);
+	if (wrapper) {
+		return false;
+	}
+	block.data = &tsd_boot_wrapper;
 	if (pthread_key_create(&tsd_tsd, tsd_cleanup_wrapper) != 0) {
 		return true;
 	}
-	tsd_wrapper_set(&tsd_boot_wrapper);
 	tsd_booted = true;
+	tsd_wrapper_set(&tsd_boot_wrapper);
+	tsd_init_finish(&tsd_init_head, &block);
 	return false;
 }
 
-- 
cgit v0.12


From 26c1dc5a3aa49e95bfdf5af0d01d784a67edf0cb Mon Sep 17 00:00:00 2001
From: Jin Qian <jqian@aurora.tech>
Date: Fri, 30 Oct 2020 13:54:36 -0700
Subject: Support AutoConf for posix_madvise and POSIX_MADV_DONTNEED

---
 configure.ac                                         | 20 ++++++++++++++++++++
 .../jemalloc/internal/jemalloc_internal_defs.h.in    | 11 +++++++++++
 2 files changed, 31 insertions(+)

diff --git a/configure.ac b/configure.ac
index 468c025..e7430d8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2204,6 +2204,26 @@ case "${host_cpu}" in
   fi
   ;;
 esac
+else
+  dnl Check for posix_madvise.
+  JE_COMPILABLE([posix_madvise], [
+  #include <sys/mman.h>
+  ], [
+    posix_madvise((void *)0, 0, 0);
+  ], [je_cv_posix_madvise])
+  if test "x${je_cv_posix_madvise}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_POSIX_MADVISE], [ ])
+
+    dnl Check for posix_madvise(..., POSIX_MADV_DONTNEED).
+    JE_COMPILABLE([posix_madvise(..., POSIX_MADV_DONTNEED)], [
+  #include <sys/mman.h>
+  ], [
+    posix_madvise((void *)0, 0, POSIX_MADV_DONTNEED);
+  ], [je_cv_posix_madv_dontneed])
+    if test "x${je_cv_posix_madv_dontneed}" = "xyes" ; then
+      AC_DEFINE([JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED], [ ])
+    fi
+  fi
 fi
 
 dnl ============================================================================
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index ff0e15b..dc4f01f 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -318,6 +318,17 @@
  */
 #undef JEMALLOC_THP
 
+/* Defined if posix_madvise is available. */
+#undef JEMALLOC_HAVE_POSIX_MADVISE
+
+/*
+ * Method for purging unused pages using posix_madvise.
+ *
+ *   posix_madvise(..., POSIX_MADV_DONTNEED)
+ */
+#undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED
+#undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS
+
 /*
  * Defined if memcntl page admin call is supported
  */
-- 
cgit v0.12


From 4e3fe218e90c125a3d9616a0b50e8ccb506e9a44 Mon Sep 17 00:00:00 2001
From: Jin Qian <jqian@aurora.tech>
Date: Fri, 30 Oct 2020 14:09:05 -0700
Subject: Use posix_madvise to purge pages when available

---
 src/pages.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 59a03f2..b23c9e9 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -332,6 +332,9 @@ pages_purge_lazy(void *addr, size_t size) {
 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
     !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
 	return (madvise(addr, size, MADV_DONTNEED) != 0);
+#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
+    !defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
+	return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
 #else
 	not_reached();
 #endif
@@ -349,6 +352,9 @@ pages_purge_forced(void *addr, size_t size) {
 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
     defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
 	return (madvise(addr, size, MADV_DONTNEED) != 0);
+#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
+    defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
+	return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
 #elif defined(JEMALLOC_MAPS_COALESCE)
 	/* Try to overlay a new demand-zeroed mapping. */
 	return pages_commit(addr, size);
-- 
cgit v0.12


From 063a767ffe453624a1d4c5b26115efcc1ea5f2e1 Mon Sep 17 00:00:00 2001
From: Jin Qian <jqian@aurora.tech>
Date: Fri, 30 Oct 2020 14:36:07 -0700
Subject: Define JEMALLOC_HAS_ALLOCA_H for QNX

QNX has <alloca.h>
---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index e7430d8..eba3e78 100644
--- a/configure.ac
+++ b/configure.ac
@@ -768,6 +768,7 @@ case "${host}" in
   *-*-nto-qnx)
 	abi="elf"
   force_tls="0"
+  AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	;;
   *)
 	AC_MSG_RESULT([Unsupported operating system: ${host}])
-- 
cgit v0.12


From 91e006c4c2c523f185077015e66d99f862165262 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 09:56:53 -0700
Subject: Enable ctl_lookup() to start from arbitrary node

---
 src/ctl.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index f113742..f7ed148 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1328,8 +1328,8 @@ label_return:
 }
 
 static int
-ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp,
-    size_t *mibp, size_t *depthp) {
+ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
+    const char *name, ctl_node_t const **nodesp, size_t *mibp, size_t *depthp) {
 	int ret;
 	const char *elm, *tdot, *dot;
 	size_t elen, i, j;
@@ -1343,7 +1343,7 @@ ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp,
 		ret = ENOENT;
 		goto label_return;
 	}
-	node = super_root_node;
+	node = starting_node;
 	for (i = 0; i < *depthp; i++) {
 		assert(node);
 		assert(node->nchildren > 0);
@@ -1440,7 +1440,8 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	}
 
 	depth = CTL_MAX_DEPTH;
-	ret = ctl_lookup(tsd_tsdn(tsd), name, nodes, mib, &depth);
+	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, nodes, mib,
+	    &depth);
 	if (ret != 0) {
 		goto label_return;
 	}
@@ -1466,7 +1467,8 @@ ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp) {
 		goto label_return;
 	}
 
-	ret = ctl_lookup(tsd_tsdn(tsd), name, NULL, mibp, miblenp);
+	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, NULL, mibp,
+	    miblenp);
 label_return:
 	return(ret);
 }
-- 
cgit v0.12


From 3a627b9674a9d12413b01be8c4e7d2d2bf4965e7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 10:08:42 -0700
Subject: No need to record all nodes in ctl_lookup()

---
 src/ctl.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index f7ed148..6240220 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1329,7 +1329,8 @@ label_return:
 
 static int
 ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
-    const char *name, ctl_node_t const **nodesp, size_t *mibp, size_t *depthp) {
+    const char *name, const ctl_named_node_t **ending_nodep, size_t *mibp,
+    size_t *depthp) {
 	int ret;
 	const char *elm, *tdot, *dot;
 	size_t elen, i, j;
@@ -1357,10 +1358,6 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 				if (strlen(child->name) == elen &&
 				    strncmp(elm, child->name, elen) == 0) {
 					node = child;
-					if (nodesp != NULL) {
-						nodesp[i] =
-						    (const ctl_node_t *)node;
-					}
 					mibp[i] = j;
 					break;
 				}
@@ -1387,9 +1384,6 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 				goto label_return;
 			}
 
-			if (nodesp != NULL) {
-				nodesp[i] = (const ctl_node_t *)node;
-			}
 			mibp[i] = (size_t)index;
 		}
 
@@ -1419,6 +1413,9 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 		    strchr(elm, '\0');
 		elen = (size_t)((uintptr_t)dot - (uintptr_t)elm);
 	}
+	if (ending_nodep != NULL) {
+		*ending_nodep = node;
+	}
 
 	ret = 0;
 label_return:
@@ -1430,7 +1427,6 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen) {
 	int ret;
 	size_t depth;
-	ctl_node_t const *nodes[CTL_MAX_DEPTH];
 	size_t mib[CTL_MAX_DEPTH];
 	const ctl_named_node_t *node;
 
@@ -1440,13 +1436,12 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	}
 
 	depth = CTL_MAX_DEPTH;
-	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, nodes, mib,
+	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, &node, mib,
 	    &depth);
 	if (ret != 0) {
 		goto label_return;
 	}
 
-	node = ctl_named_node(nodes[depth-1]);
 	if (node != NULL && node->ctl) {
 		ret = node->ctl(tsd, mib, depth, oldp, oldlenp, newp, newlen);
 	} else {
-- 
cgit v0.12


From 6ab181d2b72ece43cb6bcc706172ff8f0fe7dd51 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 10:36:00 -0700
Subject: Extract node lookup given mib input

---
 src/ctl.c | 43 +++++++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 6240220..7bb6c1d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1468,21 +1468,13 @@ label_return:
 	return(ret);
 }
 
-int
-ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+static int
+ctl_lookupbymib(tsdn_t *tsdn, const ctl_named_node_t **ending_nodep,
+    const size_t *mib, size_t miblen) {
 	int ret;
-	const ctl_named_node_t *node;
-	size_t i;
-
-	if (!ctl_initialized && ctl_init(tsd)) {
-		ret = EAGAIN;
-		goto label_return;
-	}
 
-	/* Iterate down the tree. */
-	node = super_root_node;
-	for (i = 0; i < miblen; i++) {
+	const ctl_named_node_t *node = super_root_node;
+	for (size_t i = 0; i < miblen; i++) {
 		assert(node);
 		assert(node->nchildren > 0);
 		if (ctl_named_node(node->children) != NULL) {
@@ -1497,13 +1489,36 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 			/* Indexed element. */
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(tsd_tsdn(tsd), mib, miblen, mib[i]);
+			node = inode->index(tsdn, mib, miblen, mib[i]);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
 			}
 		}
 	}
+	assert(ending_nodep != NULL);
+	*ending_nodep = node;
+	ret = 0;
+
+label_return:
+	return(ret);
+}
+
+int
+ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	const ctl_named_node_t *node;
+
+	if (!ctl_initialized && ctl_init(tsd)) {
+		ret = EAGAIN;
+		goto label_return;
+	}
+
+	ret = ctl_lookupbymib(tsd_tsdn(tsd), &node, mib, miblen);
+	if (ret != 0) {
+		goto label_return;
+	}
 
 	/* Call the ctl function. */
 	if (node && node->ctl) {
-- 
cgit v0.12


From f2e1a5be776de0a4d12c03820bcb5fb0d475d756 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 13:26:44 -0700
Subject: Do not fail on partial ctl path for ctl_nametomib()

We do not fail on partial ctl path when the given `mib` array is
shorter than the given name, and we should keep the behavior the
same in the reverse case, which I feel is also the more natural way.
---
 src/ctl.c           |  8 ++------
 test/unit/mallctl.c | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 7bb6c1d..d139e6e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1387,7 +1387,8 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 			mibp[i] = (size_t)index;
 		}
 
-		if (node->ctl != NULL) {
+		/* Reached the end? */
+		if (node->ctl != NULL || *dot == '\0') {
 			/* Terminal node. */
 			if (*dot != '\0') {
 				/*
@@ -1403,11 +1404,6 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 		}
 
 		/* Update elm. */
-		if (*dot == '\0') {
-			/* No more elements. */
-			ret = ENOENT;
-			goto label_return;
-		}
 		elm = &dot[1];
 		dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot :
 		    strchr(elm, '\0');
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 72dc0f3..3cd0c4d 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -117,6 +117,20 @@ TEST_BEGIN(test_mallctlnametomib_short_mib) {
 }
 TEST_END
 
+TEST_BEGIN(test_mallctlnametomib_short_name) {
+	size_t mib[4];
+	size_t miblen;
+
+	miblen = 4;
+	mib[3] = 42;
+	expect_d_eq(mallctlnametomib("arenas.bin.0", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	expect_zu_eq(miblen, 3, "Unexpected mib output length");
+	expect_zu_eq(mib[3], 42,
+	    "mallctlnametomib() wrote past the end of the input mib");
+}
+TEST_END
+
 TEST_BEGIN(test_mallctl_config) {
 #define TEST_MALLCTL_CONFIG(config, t) do {				\
 	t oldval;							\
@@ -1106,6 +1120,7 @@ main(void) {
 	    test_mallctlbymib_errors,
 	    test_mallctl_read_write,
 	    test_mallctlnametomib_short_mib,
+	    test_mallctlnametomib_short_name,
 	    test_mallctl_config,
 	    test_mallctl_opt,
 	    test_manpage_example,
-- 
cgit v0.12


From 006dd0414e6356ee76218ca6b2db960fc671df16 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 11:28:22 -0700
Subject: Add partial name-to-mib functionality

---
 include/jemalloc/internal/ctl.h |  3 ++-
 src/ctl.c                       | 30 ++++++++++++++++++++++
 test/unit/mallctl.c             | 57 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index a6ae05c..e124977 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -98,9 +98,10 @@ typedef struct ctl_arenas_s {
 int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
 int ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp);
-
 int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen);
+int ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
+    size_t *miblenp);
 bool ctl_boot(void);
 void ctl_prefork(tsdn_t *tsdn);
 void ctl_postfork_parent(tsdn_t *tsdn);
diff --git a/src/ctl.c b/src/ctl.c
index d139e6e..307fd2d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1528,6 +1528,36 @@ label_return:
 	return(ret);
 }
 
+int
+ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
+    size_t *miblenp) {
+	int ret;
+	const ctl_named_node_t *node;
+
+	if (!ctl_initialized && ctl_init(tsd)) {
+		ret = EAGAIN;
+		goto label_return;
+	}
+
+	ret = ctl_lookupbymib(tsd_tsdn(tsd), &node, mib, miblen);
+	if (ret != 0) {
+		goto label_return;
+	}
+	if (node == NULL || node->ctl != NULL) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	assert(miblenp != NULL);
+	assert(*miblenp >= miblen);
+	*miblenp -= miblen;
+	ret = ctl_lookup(tsd_tsdn(tsd), node, name, NULL, mib + miblen,
+	    miblenp);
+	*miblenp += miblen;
+label_return:
+	return(ret);
+}
+
 bool
 ctl_boot(void) {
 	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL,
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 3cd0c4d..7dfc344 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1,5 +1,6 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/util.h"
 
@@ -131,6 +132,61 @@ TEST_BEGIN(test_mallctlnametomib_short_name) {
 }
 TEST_END
 
+TEST_BEGIN(test_mallctlmibnametomib) {
+	size_t mib[4];
+	size_t miblen = 4;
+	uint32_t result, result_ref;
+	size_t len_result = sizeof(uint32_t);
+
+	tsd_t *tsd = tsd_fetch();
+
+	/* Error cases */
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 0, "bob", &miblen), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 0, "9999", &miblen), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+
+	/* Valid case. */
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 0, "arenas", &miblen), 0, "");
+	assert_zu_eq(miblen, 1, "");
+	miblen = 4;
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 1, "bin", &miblen), 0, "");
+	assert_zu_eq(miblen, 2, "");
+	expect_d_eq(mallctlbymib(mib, miblen, &result, &len_result, NULL, 0),
+	    ENOENT, "mallctlbymib() should fail on partial path");
+
+	/* Error cases. */
+	miblen = 4;
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 2, "bob", &miblen), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 2, "9999", &miblen), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+
+	/* Valid case. */
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 2, "0", &miblen), 0, "");
+	assert_zu_eq(miblen, 3, "");
+	expect_d_eq(mallctlbymib(mib, miblen, &result, &len_result, NULL, 0),
+	    ENOENT, "mallctlbymib() should fail on partial path");
+
+	/* Error cases. */
+	miblen = 4;
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 3, "bob", &miblen), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 3, "9999", &miblen), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+
+	/* Valid case. */
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 3, "nregs", &miblen), 0, "");
+	assert_zu_eq(miblen, 4, "");
+	assert_d_eq(mallctlbymib(mib, miblen, &result, &len_result, NULL, 0),
+	    0, "Unexpected mallctlbymib() failure");
+	assert_d_eq(mallctl("arenas.bin.0.nregs", &result_ref, &len_result,
+	    NULL, 0), 0, "Unexpected mallctl() failure");
+	expect_zu_eq(result, result_ref,
+	    "mallctlbymib() and mallctl() returned different result");
+}
+TEST_END
+
 TEST_BEGIN(test_mallctl_config) {
 #define TEST_MALLCTL_CONFIG(config, t) do {				\
 	t oldval;							\
@@ -1121,6 +1177,7 @@ main(void) {
 	    test_mallctl_read_write,
 	    test_mallctlnametomib_short_mib,
 	    test_mallctlnametomib_short_name,
+	    test_mallctlmibnametomib,
 	    test_mallctl_config,
 	    test_mallctl_opt,
 	    test_manpage_example,
-- 
cgit v0.12


From 4557c0a67d8804945935b99b5c493d257be71b43 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 15:26:46 -0700
Subject: Enable ctl on partial mib and partial name

---
 include/jemalloc/internal/ctl.h |  2 ++
 src/ctl.c                       | 45 ++++++++++++++++++++++++++++++++++++
 test/unit/mallctl.c             | 51 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 98 insertions(+)

diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index e124977..174b9f7 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -102,6 +102,8 @@ int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen);
 int ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
     size_t *miblenp);
+int ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
+    size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 bool ctl_boot(void);
 void ctl_prefork(tsdn_t *tsdn);
 void ctl_postfork_parent(tsdn_t *tsdn);
diff --git a/src/ctl.c b/src/ctl.c
index 307fd2d..0f1f652 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1558,6 +1558,51 @@ label_return:
 	return(ret);
 }
 
+int
+ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
+    size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	const ctl_named_node_t *node;
+
+	if (!ctl_initialized && ctl_init(tsd)) {
+		ret = EAGAIN;
+		goto label_return;
+	}
+
+	ret = ctl_lookupbymib(tsd_tsdn(tsd), &node, mib, miblen);
+	if (ret != 0) {
+		goto label_return;
+	}
+	if (node == NULL || node->ctl != NULL) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	assert(miblenp != NULL);
+	assert(*miblenp >= miblen);
+	*miblenp -= miblen;
+	/*
+	 * The same node supplies the starting node and stores the ending node.
+	 */
+	ret = ctl_lookup(tsd_tsdn(tsd), node, name, &node, mib + miblen,
+	    miblenp);
+	*miblenp += miblen;
+	if (ret != 0) {
+		goto label_return;
+	}
+
+	if (node != NULL && node->ctl) {
+		ret = node->ctl(tsd, mib, *miblenp, oldp, oldlenp, newp,
+		    newlen);
+	} else {
+		/* The name refers to a partial path through the ctl tree. */
+		ret = ENOENT;
+	}
+
+label_return:
+	return(ret);
+}
+
 bool
 ctl_boot(void) {
 	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL,
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 7dfc344..3d5b278 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -187,6 +187,56 @@ TEST_BEGIN(test_mallctlmibnametomib) {
 }
 TEST_END
 
+TEST_BEGIN(test_mallctlbymibname) {
+	size_t mib[4];
+	size_t miblen = 4;
+	uint32_t result, result_ref;
+	size_t len_result = sizeof(uint32_t);
+
+	tsd_t *tsd = tsd_fetch();
+
+	/* Error cases. */
+
+	assert_d_eq(mallctlnametomib("arenas", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	assert_zu_eq(miblen, 1, "");
+
+	miblen = 4;
+	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0", &miblen,
+	    &result, &len_result, NULL, 0), ENOENT, "");
+	miblen = 4;
+	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0.bob", &miblen,
+	    &result, &len_result, NULL, 0), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+
+	/* Valid cases. */
+
+	assert_d_eq(mallctl("arenas.bin.0.nregs", &result_ref, &len_result,
+	    NULL, 0), 0, "Unexpected mallctl() failure");
+	miblen = 4;
+
+	assert_d_eq(ctl_bymibname(tsd, mib, 0, "arenas.bin.0.nregs", &miblen,
+	    &result, &len_result, NULL, 0), 0, "");
+	assert_zu_eq(miblen, 4, "");
+	expect_zu_eq(result, result_ref, "Unexpected result");
+
+	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0.nregs", &miblen, &result,
+	    &len_result, NULL, 0), 0, "");
+	assert_zu_eq(miblen, 4, "");
+	expect_zu_eq(result, result_ref, "Unexpected result");
+
+	assert_d_eq(ctl_bymibname(tsd, mib, 2, "0.nregs", &miblen, &result,
+	    &len_result, NULL, 0), 0, "");
+	assert_zu_eq(miblen, 4, "");
+	expect_zu_eq(result, result_ref, "Unexpected result");
+
+	assert_d_eq(ctl_bymibname(tsd, mib, 3, "nregs", &miblen, &result,
+	    &len_result, NULL, 0), 0, "");
+	assert_zu_eq(miblen, 4, "");
+	expect_zu_eq(result, result_ref, "Unexpected result");
+}
+TEST_END
+
 TEST_BEGIN(test_mallctl_config) {
 #define TEST_MALLCTL_CONFIG(config, t) do {				\
 	t oldval;							\
@@ -1178,6 +1228,7 @@ main(void) {
 	    test_mallctlnametomib_short_mib,
 	    test_mallctlnametomib_short_name,
 	    test_mallctlmibnametomib,
+	    test_mallctlbymibname,
 	    test_mallctl_config,
 	    test_mallctl_opt,
 	    test_manpage_example,
-- 
cgit v0.12


From 74bd63b2034c5f25bbc1fdf46095dfed08fdd2a5 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 17 Dec 2020 14:01:56 -0800
Subject: Optimize stats print using partial name-to-mib

---
 include/jemalloc/internal/ctl.h |  19 ++++
 src/stats.c                     | 246 +++++++++++++++++++++-------------------
 2 files changed, 151 insertions(+), 114 deletions(-)

diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 174b9f7..63d27f8 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -137,4 +137,23 @@ void ctl_mtx_assert_held(tsdn_t *tsdn);
 	}								\
 } while (0)
 
+#define xmallctlmibnametomib(mib, miblen, name, miblenp) do {		\
+	if (ctl_mibnametomib(tsd_fetch(), mib, miblen, name, miblenp)	\
+	    != 0) {							\
+		malloc_write(						\
+		    "<jemalloc>: Failure in ctl_mibnametomib()\n");	\
+		abort();						\
+	}								\
+} while (0)
+
+#define xmallctlbymibname(mib, miblen, name, miblenp, oldp, oldlenp,	\
+    newp, newlen) do {							\
+	if (ctl_bymibname(tsd_fetch(), mib, miblen, name, miblenp,	\
+	    oldp, oldlenp, newp, newlen) != 0) {			\
+		malloc_write(						\
+		    "<jemalloc>: Failure in ctl_bymibname()\n");	\
+		abort();						\
+	}								\
+} while (0)
+
 #endif /* JEMALLOC_INTERNAL_CTL_H */
diff --git a/src/stats.c b/src/stats.c
index ab440c4..999ba9f 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -24,32 +24,28 @@ const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 	xmallctl(n, (void *)v, &sz, NULL, 0);				\
 } while (0)
 
-#define CTL_M2_GET(n, i, v, t) do {					\
-	size_t mib[CTL_MAX_DEPTH];					\
-	size_t miblen = sizeof(mib) / sizeof(size_t);			\
-	size_t sz = sizeof(t);						\
-	xmallctlnametomib(n, mib, &miblen);				\
-	mib[2] = (i);							\
-	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
+#define CTL_LEAF_PREPARE(mib, miblen, name) do {			\
+	assert(miblen < CTL_MAX_DEPTH);					\
+	size_t miblen_new = CTL_MAX_DEPTH;				\
+	xmallctlmibnametomib(mib, miblen, name, &miblen_new);		\
+	assert(miblen_new > miblen);					\
 } while (0)
 
-#define CTL_M2_M4_GET(n, i, j, v, t) do {				\
-	size_t mib[CTL_MAX_DEPTH];					\
-	size_t miblen = sizeof(mib) / sizeof(size_t);			\
+#define CTL_LEAF(mib, miblen, leaf, v, t) do {			\
+	assert(miblen < CTL_MAX_DEPTH);					\
+	size_t miblen_new = CTL_MAX_DEPTH;				\
 	size_t sz = sizeof(t);						\
-	xmallctlnametomib(n, mib, &miblen);				\
-	mib[2] = (i);							\
-	mib[4] = (j);							\
-	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
+	xmallctlbymibname(mib, miblen, leaf, &miblen_new, (void *)v,	\
+	    &sz, NULL, 0);						\
+	assert(miblen_new == miblen + 1);				\
 } while (0)
 
-#define CTL_M2_M5_GET(n, i, j, v, t) do {				\
+#define CTL_M2_GET(n, i, v, t) do {					\
 	size_t mib[CTL_MAX_DEPTH];					\
 	size_t miblen = sizeof(mib) / sizeof(size_t);			\
 	size_t sz = sizeof(t);						\
 	xmallctlnametomib(n, mib, &miblen);				\
 	mib[2] = (i);							\
-	mib[5] = (j);							\
 	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
 } while (0)
 
@@ -107,13 +103,6 @@ get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) {
 	return false;
 }
 
-#define MUTEX_CTL_STR_MAX_LENGTH 128
-static void
-gen_mutex_ctl_str(char *str, size_t buf_len, const char *prefix,
-    const char *mutex, const char *counter) {
-	malloc_snprintf(str, buf_len, "stats.%s.%s.%s", prefix, mutex, counter);
-}
-
 static void
 mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
     emitter_col_t *name,
@@ -150,11 +139,13 @@ mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
 }
 
 static void
-mutex_stats_read_global(const char *name, emitter_col_t *col_name,
+mutex_stats_read_global(size_t mib[], size_t miblen, const char *name,
+    emitter_col_t *col_name,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
     emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
     uint64_t uptime) {
-	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
+	CTL_LEAF_PREPARE(mib, miblen, name);
+	size_t miblen_name = miblen + 1;
 
 	col_name->str_val = name;
 
@@ -165,10 +156,8 @@ mutex_stats_read_global(const char *name, emitter_col_t *col_name,
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
 	if (!derived) {							\
-		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
-		    "mutexes", name, #counter);				\
-		CTL_GET(cmd, (counter_type *)&dst->bool_val,		\
-		    counter_type);					\
+		CTL_LEAF(mib, miblen_name, #counter,			\
+		    (counter_type *)&dst->bool_val, counter_type);	\
 	} else {							\
 		emitter_col_t *base =					\
 		    &col_##counter_type[mutex_counter_##base_counter];	\
@@ -183,12 +172,13 @@ mutex_stats_read_global(const char *name, emitter_col_t *col_name,
 }
 
 static void
-mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind,
-    const char *name, emitter_col_t *col_name,
+mutex_stats_read_arena(size_t mib[], size_t miblen, const char *name,
+    emitter_col_t *col_name,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
     emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
     uint64_t uptime) {
-	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
+	CTL_LEAF_PREPARE(mib, miblen, name);
+	size_t miblen_name = miblen + 1;
 
 	col_name->str_val = name;
 
@@ -199,10 +189,7 @@ mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind,
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
 	if (!derived) {							\
-		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,        \
-		    "arenas.0.mutexes", arena_mutex_names[mutex_ind],	\
-		    #counter);						\
-		CTL_M2_GET(cmd, arena_ind,				\
+		CTL_LEAF(mib, miblen_name, #counter,			\
 		    (counter_type *)&dst->bool_val, counter_type);	\
 	} else {							\
 		emitter_col_t *base =					\
@@ -218,11 +205,13 @@ mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind,
 }
 
 static void
-mutex_stats_read_arena_bin(unsigned arena_ind, unsigned bin_ind,
+mutex_stats_read_arena_bin(size_t mib[], size_t miblen,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
     emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
     uint64_t uptime) {
-	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
+	CTL_LEAF_PREPARE(mib, miblen, "mutex");
+	size_t miblen_mutex = miblen + 1;
+
 	emitter_col_t *dst;
 
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
@@ -231,9 +220,7 @@ mutex_stats_read_arena_bin(unsigned arena_ind, unsigned bin_ind,
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
 	if (!derived) {							\
-		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
-		    "arenas.0.bins.0","mutex", #counter);		\
-		CTL_M2_M4_GET(cmd, arena_ind, bin_ind,			\
+		CTL_LEAF(mib, miblen_mutex, #counter,			\
 		    (counter_type *)&dst->bool_val, counter_type);	\
 	} else {							\
 		emitter_col_t *base =					\
@@ -362,6 +349,14 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "bins");
 
+	size_t stats_arenas_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
+	stats_arenas_mib[2] = i;
+	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "bins");
+
+	size_t arenas_bin_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(arenas_bin_mib, 0, "arenas.bin");
+
 	for (j = 0, in_gap = false; j < nbins; j++) {
 		uint64_t nslabs;
 		size_t reg_size, slab_size, curregs;
@@ -371,8 +366,11 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
 		uint64_t nreslabs;
 
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nslabs", i, j, &nslabs,
-		    uint64_t);
+		stats_arenas_mib[4] = j;
+		arenas_bin_mib[2] = j;
+
+		CTL_LEAF(stats_arenas_mib, 5, "nslabs", &nslabs, uint64_t);
+
 		in_gap_prev = in_gap;
 		in_gap = (nslabs == 0);
 
@@ -381,33 +379,25 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 			    "                     ---\n");
 		}
 
-		CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
-		CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
-		CTL_M2_GET("arenas.bin.0.slab_size", j, &slab_size, size_t);
-		CTL_M2_GET("arenas.bin.0.nshards", j, &nshards, uint32_t);
-
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc,
-		    uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, &ndalloc,
+		CTL_LEAF(arenas_bin_mib, 3, "size", &reg_size, size_t);
+		CTL_LEAF(arenas_bin_mib, 3, "nregs", &nregs, uint32_t);
+		CTL_LEAF(arenas_bin_mib, 3, "slab_size", &slab_size, size_t);
+		CTL_LEAF(arenas_bin_mib, 3, "nshards", &nshards, uint32_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nmalloc", &nmalloc, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "ndalloc", &ndalloc, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "curregs", &curregs, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nrequests", &nrequests,
 		    uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, &curregs,
-		    size_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j,
-		    &nrequests, uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, j, &nfills,
-		    uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", i, j, &nflushes,
-		    uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nreslabs", i, j, &nreslabs,
-		    uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
-		    size_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nonfull_slabs", i, j, &nonfull_slabs,
+		CTL_LEAF(stats_arenas_mib, 5, "nfills", &nfills, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nflushes", &nflushes, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nreslabs", &nreslabs, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "curslabs", &curslabs, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nonfull_slabs", &nonfull_slabs,
 		    size_t);
 
 		if (mutex) {
-			mutex_stats_read_arena_bin(i, j, col_mutex64,
-			    col_mutex32, uptime);
+			mutex_stats_read_arena_bin(stats_arenas_mib, 5,
+			    col_mutex64, col_mutex32, uptime);
 		}
 
 		emitter_json_object_begin(emitter);
@@ -524,16 +514,26 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "lextents");
 
+	size_t stats_arenas_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
+	stats_arenas_mib[2] = i;
+	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "lextents");
+
+	size_t arenas_lextent_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(arenas_lextent_mib, 0, "arenas.lextent");
+
 	for (j = 0, in_gap = false; j < nlextents; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
 		size_t lextent_size, curlextents;
 
-		CTL_M2_M4_GET("stats.arenas.0.lextents.0.nmalloc", i, j,
-		    &nmalloc, uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.lextents.0.ndalloc", i, j,
-		    &ndalloc, uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.lextents.0.nrequests", i, j,
-		    &nrequests, uint64_t);
+		stats_arenas_mib[4] = j;
+		arenas_lextent_mib[2] = j;
+
+		CTL_LEAF(stats_arenas_mib, 5, "nmalloc", &nmalloc, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "ndalloc", &ndalloc, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nrequests", &nrequests,
+		    uint64_t);
+
 		in_gap_prev = in_gap;
 		in_gap = (nrequests == 0);
 
@@ -542,9 +542,9 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 			    "                     ---\n");
 		}
 
-		CTL_M2_GET("arenas.lextent.0.size", j, &lextent_size, size_t);
-		CTL_M2_M4_GET("stats.arenas.0.lextents.0.curlextents", i, j,
-		    &curlextents, size_t);
+		CTL_LEAF(arenas_lextent_mib, 3, "size", &lextent_size, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "curlextents", &curlextents,
+		    size_t);
 
 		emitter_json_object_begin(emitter);
 		emitter_json_kv(emitter, "curlextents", emitter_type_size,
@@ -598,22 +598,27 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "extents");
 
+	size_t stats_arenas_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
+	stats_arenas_mib[2] = i;
+	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "extents");
+
 	in_gap = false;
 	for (j = 0; j < SC_NPSIZES; j++) {
 		size_t ndirty, nmuzzy, nretained, total, dirty_bytes,
 		    muzzy_bytes, retained_bytes, total_bytes;
-		CTL_M2_M4_GET("stats.arenas.0.extents.0.ndirty", i, j,
-		    &ndirty, size_t);
-		CTL_M2_M4_GET("stats.arenas.0.extents.0.nmuzzy", i, j,
-		    &nmuzzy, size_t);
-		CTL_M2_M4_GET("stats.arenas.0.extents.0.nretained", i, j,
-		    &nretained, size_t);
-		CTL_M2_M4_GET("stats.arenas.0.extents.0.dirty_bytes", i, j,
-		    &dirty_bytes, size_t);
-		CTL_M2_M4_GET("stats.arenas.0.extents.0.muzzy_bytes", i, j,
-		    &muzzy_bytes, size_t);
-		CTL_M2_M4_GET("stats.arenas.0.extents.0.retained_bytes", i, j,
+		stats_arenas_mib[4] = j;
+
+		CTL_LEAF(stats_arenas_mib, 5, "ndirty", &ndirty, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nmuzzy", &nmuzzy, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nretained", &nretained, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "dirty_bytes", &dirty_bytes,
+		    size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "muzzy_bytes", &muzzy_bytes,
+		    size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "retained_bytes",
 		    &retained_bytes, size_t);
+
 		total = ndirty + nmuzzy + nretained;
 		total_bytes = dirty_bytes + muzzy_bytes + retained_bytes;
 
@@ -737,29 +742,29 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	COL_HDR(row, nactive_nonhuge, NULL, right, 20, size)
 	COL_HDR(row, ninactive_nonhuge, NULL, right, 20, size)
 
+	size_t stats_arenas_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
+	stats_arenas_mib[2] = i;
+	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "hpa_shard.nonfull_slabs");
+
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "nonfull_slabs");
 	bool in_gap = false;
 	for (pszind_t j = 0; j < PSSET_NPSIZES; j++) {
-		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs_huge",
-		    i, j, &npageslabs_huge, size_t);
-		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive_huge",
-		    i, j, &nactive_huge, size_t);
-		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive_huge",
-		    i, j, &ninactive_huge, size_t);
-
-		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs_nonhuge",
-		    i, j, &npageslabs_nonhuge, size_t);
-		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive_nonhuge",
-		    i, j, &nactive_nonhuge, size_t);
-		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive_nonhuge",
-		    i, j, &ninactive_nonhuge, size_t);
+		stats_arenas_mib[5] = j;
+
+		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_huge",
+		    &npageslabs_huge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "nactive_huge",
+		    &nactive_huge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "ninactive_huge",
+		    &ninactive_huge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_nonhuge",
+		    &npageslabs_nonhuge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "nactive_nonhuge",
+		    &nactive_nonhuge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "ninactive_nonhuge",
+		    &ninactive_nonhuge, size_t);
 
 		bool in_gap_prev = in_gap;
 		in_gap = (npageslabs_huge == 0 && npageslabs_nonhuge == 0);
@@ -812,12 +817,17 @@ stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptim
 	emitter_json_object_kv_begin(emitter, "mutexes");
 	emitter_table_row(emitter, &row);
 
+	size_t stats_arenas_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
+	stats_arenas_mib[2] = arena_ind;
+	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "mutexes");
+
 	for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes;
 	    i++) {
 		const char *name = arena_mutex_names[i];
 		emitter_json_object_kv_begin(emitter, name);
-		mutex_stats_read_arena(arena_ind, i, name, &col_name, col64,
-		    col32, uptime);
+		mutex_stats_read_arena(stats_arenas_mib, 4, name, &col_name,
+		    col64, col32, uptime);
 		mutex_stats_emit(emitter, &row, col64, col32);
 		emitter_json_object_end(emitter); /* Close the mutex dict. */
 	}
@@ -1376,22 +1386,25 @@ stats_general_print(emitter_t *emitter) {
 	 */
 	if (emitter_outputs_json(emitter)) {
 		emitter_json_array_kv_begin(emitter, "bin");
+		size_t arenas_bin_mib[CTL_MAX_DEPTH];
+		CTL_LEAF_PREPARE(arenas_bin_mib, 0, "arenas.bin");
 		for (unsigned i = 0; i < nbins; i++) {
+			arenas_bin_mib[2] = i;
 			emitter_json_object_begin(emitter);
 
-			CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t);
+			CTL_LEAF(arenas_bin_mib, 3, "size", &sv, size_t);
 			emitter_json_kv(emitter, "size", emitter_type_size,
 			    &sv);
 
-			CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t);
+			CTL_LEAF(arenas_bin_mib, 3, "nregs", &u32v, uint32_t);
 			emitter_json_kv(emitter, "nregs", emitter_type_uint32,
 			    &u32v);
 
-			CTL_M2_GET("arenas.bin.0.slab_size", i, &sv, size_t);
+			CTL_LEAF(arenas_bin_mib, 3, "slab_size", &sv, size_t);
 			emitter_json_kv(emitter, "slab_size", emitter_type_size,
 			    &sv);
 
-			CTL_M2_GET("arenas.bin.0.nshards", i, &u32v, uint32_t);
+			CTL_LEAF(arenas_bin_mib, 3, "nshards", &u32v, uint32_t);
 			emitter_json_kv(emitter, "nshards", emitter_type_uint32,
 			    &u32v);
 
@@ -1407,10 +1420,13 @@ stats_general_print(emitter_t *emitter) {
 
 	if (emitter_outputs_json(emitter)) {
 		emitter_json_array_kv_begin(emitter, "lextent");
+		size_t arenas_lextent_mib[CTL_MAX_DEPTH];
+		CTL_LEAF_PREPARE(arenas_lextent_mib, 0, "arenas.lextent");
 		for (unsigned i = 0; i < nlextents; i++) {
+			arenas_lextent_mib[2] = i;
 			emitter_json_object_begin(emitter);
 
-			CTL_M2_GET("arenas.lextent.0.size", i, &sv, size_t);
+			CTL_LEAF(arenas_lextent_mib, 3, "size", &sv, size_t);
 			emitter_json_kv(emitter, "size", emitter_type_size,
 			    &sv);
 
@@ -1510,9 +1526,11 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 
 		CTL_M2_GET("stats.arenas.0.uptime", 0, &uptime, uint64_t);
 
+		size_t stats_mutexes_mib[CTL_MAX_DEPTH];
+		CTL_LEAF_PREPARE(stats_mutexes_mib, 0, "stats.mutexes");
 		for (int i = 0; i < mutex_prof_num_global_mutexes; i++) {
-			mutex_stats_read_global(global_mutex_names[i], &name,
-			    col64, col32, uptime);
+			mutex_stats_read_global(stats_mutexes_mib, 2,
+			    global_mutex_names[i], &name, col64, col32, uptime);
 			emitter_json_object_kv_begin(emitter, global_mutex_names[i]);
 			mutex_stats_emit(emitter, &row, col64, col32);
 			emitter_json_object_end(emitter);
-- 
cgit v0.12


From ea013d8fa4eaa0a3d1fa1c15e8506a32f4e70475 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 25 Aug 2020 11:31:58 -0700
Subject: Enforce realloc sizing stability

---
 src/jemalloc.c | 34 ++++++++++------------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index c2817cf..8384cfc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3391,18 +3391,18 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 
 JEMALLOC_ALWAYS_INLINE void *
 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
-    size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
+    size_t alignment, size_t usize, bool zero, tcache_t *tcache,
     arena_t *arena, emap_alloc_ctx_t *alloc_ctx,
     hook_ralloc_args_t *hook_args) {
 	prof_info_t old_prof_info;
 	prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
 	bool prof_active = prof_active_get_unlocked();
-	bool sample_event = te_prof_sample_event_lookahead(tsd, *usize);
+	bool sample_event = te_prof_sample_event_lookahead(tsd, usize);
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
 	void *p;
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
-		    *usize, alignment, zero, tcache, arena, tctx, hook_args);
+		    usize, alignment, zero, tcache, arena, tctx, hook_args);
 	} else {
 		p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment,
 		    zero, tcache, arena, hook_args);
@@ -3411,22 +3411,8 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		prof_alloc_rollback(tsd, tctx);
 		return NULL;
 	}
-
-	if (p == old_ptr && alignment != 0) {
-		/*
-		 * The allocation did not move, so it is possible that the size
-		 * class is smaller than would guarantee the requested
-		 * alignment, and that the alignment constraint was
-		 * serendipitously satisfied.  Additionally, old_usize may not
-		 * be the same as the current usize because of in-place large
-		 * reallocation.  Therefore, query the actual value of usize.
-		 */
-		assert(*usize >= isalloc(tsd_tsdn(tsd), p));
-		*usize = isalloc(tsd_tsdn(tsd), p);
-	}
-
-	sample_event = te_prof_sample_event_lookahead(tsd, *usize);
-	prof_realloc(tsd, p, size, *usize, tctx, prof_active, old_ptr,
+	assert(usize == isalloc(tsd_tsdn(tsd), p));
+	prof_realloc(tsd, p, size, usize, tctx, prof_active, old_ptr,
 	    old_usize, &old_prof_info, sample_event);
 
 	return p;
@@ -3464,14 +3450,14 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
+	if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
+		goto label_oom;
+	}
 
 	hook_ralloc_args_t hook_args = {is_realloc, {(uintptr_t)ptr, size,
 		flags, 0}};
 	if (config_prof && opt_prof) {
-		if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
-			goto label_oom;
-		}
-		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
+		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, usize,
 		    zero, tcache, arena, &alloc_ctx, &hook_args);
 		if (unlikely(p == NULL)) {
 			goto label_oom;
@@ -3482,7 +3468,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
-		usize = isalloc(tsd_tsdn(tsd), p);
+		assert(usize == isalloc(tsd_tsdn(tsd), p));
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 	thread_alloc_event(tsd, usize);
-- 
cgit v0.12


From 6c5a3a24dd03e98c8b78178496c2a9756ec1490a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 24 Aug 2020 15:15:27 -0700
Subject: Omit bin stats rows with no data

---
 src/stats.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/stats.c b/src/stats.c
index 999ba9f..d5b94fb 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -379,6 +379,10 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 			    "                     ---\n");
 		}
 
+		if (in_gap && !emitter_outputs_json(emitter)) {
+			continue;
+		}
+
 		CTL_LEAF(arenas_bin_mib, 3, "size", &reg_size, size_t);
 		CTL_LEAF(arenas_bin_mib, 3, "nregs", &nregs, uint32_t);
 		CTL_LEAF(arenas_bin_mib, 3, "slab_size", &slab_size, size_t);
-- 
cgit v0.12


From 22d62d8cbd873fd3b2acb4bfccf6a06cd2e0d2e7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 18 Dec 2020 11:06:22 -0800
Subject: Handle ending gap properly for HPA stats

---
 src/stats.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/stats.c b/src/stats.c
index d5b94fb..7c2707e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -806,6 +806,9 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	}
 	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
 	emitter_json_object_end(emitter); /* End "hpa_shard" */
+	if (in_gap) {
+		emitter_table_printf(emitter, "                     ---\n");
+	}
 }
 
 static void
-- 
cgit v0.12


From 8a56d6b6369487a9595dff69c28ccc88073d643e Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 28 Dec 2020 14:30:43 -0800
Subject: Add last-N mutex stats

---
 include/jemalloc/internal/mutex_prof.h | 4 +++-
 src/ctl.c                              | 8 ++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 3759daa..a13e285 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -11,7 +11,9 @@
     OP(ctl)								\
     OP(prof)								\
     OP(prof_thds_data)							\
-    OP(prof_dump)
+    OP(prof_dump)							\
+    OP(prof_recent_alloc)						\
+    OP(prof_recent_dump)
 
 typedef enum {
 #define OP(mtx) global_prof_mutex_##mtx,
diff --git a/src/ctl.c b/src/ctl.c
index 0f1f652..8f6aff3 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1232,6 +1232,12 @@ ctl_refresh(tsdn_t *tsdn) {
 			    global_prof_mutex_prof_thds_data, tdatas_mtx);
 			READ_GLOBAL_MUTEX_PROF_DATA(
 			    global_prof_mutex_prof_dump, prof_dump_mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_prof_recent_alloc,
+			    prof_recent_alloc_mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_prof_recent_dump,
+			    prof_recent_dump_mtx);
 		}
 		if (have_background_thread) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
@@ -3344,6 +3350,8 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(bt2gctx_mtx);
 		MUTEX_PROF_RESET(tdatas_mtx);
 		MUTEX_PROF_RESET(prof_dump_mtx);
+		MUTEX_PROF_RESET(prof_recent_alloc_mtx);
+		MUTEX_PROF_RESET(prof_recent_dump_mtx);
 	}
 
 	/* Per arena mutexes. */
-- 
cgit v0.12


From b35ac00d58529b266598322de2529414c91909cd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 25 Aug 2020 14:30:37 -0700
Subject: Do not bump to large size for page aligned request

---
 include/jemalloc/internal/sz.h |  2 +-
 src/arena.c                    | 13 ++++++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index b094116..91940cc 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -288,7 +288,7 @@ sz_sa2u(size_t size, size_t alignment) {
 	assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
 
 	/* Try for a small size class. */
-	if (size <= SC_SMALL_MAXCLASS && alignment < PAGE) {
+	if (size <= SC_SMALL_MAXCLASS && alignment <= PAGE) {
 		/*
 		 * Round size up to the nearest multiple of alignment.
 		 *
diff --git a/src/arena.c b/src/arena.c
index 209eb34..6a062de 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1049,10 +1049,17 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero, tcache_t *tcache) {
 	void *ret;
 
-	if (usize <= SC_SMALL_MAXCLASS
-	    && (alignment < PAGE
-	    || (alignment == PAGE && (usize & PAGE_MASK) == 0))) {
+	if (usize <= SC_SMALL_MAXCLASS) {
 		/* Small; alignment doesn't require special slab placement. */
+
+		/* usize should be a result of sz_sa2u() */
+		assert((usize & (alignment - 1)) == 0);
+
+		/*
+		 * Small usize can't come from an alignment larger than a page.
+		 */
+		assert(alignment <= PAGE);
+
 		ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize),
 		    zero, tcache, true);
 	} else {
-- 
cgit v0.12


From 526180b76d9e54f40d0fb9e58b0647a21a7e5f77 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 17 Dec 2020 17:14:30 -0800
Subject: Extent.c: Avoid an rtree NULL-check.

The edge case in which pages_map returns (void *)PAGE can trigger an incorrect
assertion failure.  Avoid it.
---
 src/extent.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 378bc73..c41f17c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -893,8 +893,22 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		}
 
 		/* Try to coalesce backward. */
-		edata_t *prev = emap_lock_edata_from_addr(tsdn, pac->emap,
-		    edata_before_get(edata), inactive_only);
+		edata_t *prev = NULL;
+		if (edata_before_get(edata) != NULL) {
+			/*
+			 * This is subtle; the rtree code asserts that its input
+			 * pointer is non-NULL, and this is a useful thing to
+			 * check.  But it's possible that edata corresponds to
+			 * an address of (void *)PAGE (in practice, this has
+			 * only been observed on FreeBSD when address-space
+			 * randomization is on, but it could in principle happen
+			 * anywhere).  In this case, edata_before_get(edata) is
+			 * NULL, triggering the assert.
+			 */
+			prev = emap_lock_edata_from_addr(tsdn, pac->emap,
+			    edata_before_get(edata), inactive_only);
+
+		}
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(ecache, edata,
 			    prev);
-- 
cgit v0.12


From 83cad746aeb7ed68bedec501b4cb6c0eff438c11 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 17 Dec 2020 11:18:21 -0800
Subject: prof_log: cassert(config_prof) in public functions

This lets the compiler infer that the code is dead in builds where profiling is
enabled, saving on space there.
---
 src/prof_log.c       | 11 +++++++++++
 test/unit/prof_log.c |  4 +++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index 3a653fb..4465821 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -202,6 +202,7 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 
 void
 prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
+	cassert(config_prof);
 	prof_tctx_t *tctx = prof_info->alloc_tctx;
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
@@ -307,6 +308,7 @@ prof_thr_node_keycomp(const void *k1, const void *k2) {
 /* Used in unit tests. */
 size_t
 prof_log_bt_count(void) {
+	cassert(config_prof);
 	size_t cnt = 0;
 	prof_bt_node_t *node = log_bt_first;
 	while (node != NULL) {
@@ -319,6 +321,7 @@ prof_log_bt_count(void) {
 /* Used in unit tests. */
 size_t
 prof_log_alloc_count(void) {
+	cassert(config_prof);
 	size_t cnt = 0;
 	prof_alloc_node_t *node = log_alloc_first;
 	while (node != NULL) {
@@ -331,6 +334,7 @@ prof_log_alloc_count(void) {
 /* Used in unit tests. */
 size_t
 prof_log_thr_count(void) {
+	cassert(config_prof);
 	size_t cnt = 0;
 	prof_thr_node_t *node = log_thr_first;
 	while (node != NULL) {
@@ -343,12 +347,14 @@ prof_log_thr_count(void) {
 /* Used in unit tests. */
 bool
 prof_log_is_logging(void) {
+	cassert(config_prof);
 	return prof_logging_state == prof_logging_state_started;
 }
 
 /* Used in unit tests. */
 bool
 prof_log_rep_check(void) {
+	cassert(config_prof);
 	if (prof_logging_state == prof_logging_state_stopped
 	    && log_tables_initialized) {
 		return true;
@@ -401,11 +407,14 @@ prof_log_rep_check(void) {
 /* Used in unit tests. */
 void
 prof_log_dummy_set(bool new_value) {
+	cassert(config_prof);
 	prof_log_dummy = new_value;
 }
 
 bool
 prof_log_start(tsdn_t *tsdn, const char *filename) {
+	cassert(config_prof);
+
 	if (!opt_prof) {
 		return true;
 	}
@@ -586,6 +595,7 @@ prof_log_emit_metadata(emitter_t *emitter) {
 #define PROF_LOG_STOP_BUFSIZE PROF_DUMP_BUFSIZE
 bool
 prof_log_stop(tsdn_t *tsdn) {
+	cassert(config_prof);
 	if (!opt_prof || !prof_booted) {
 		return true;
 	}
@@ -672,6 +682,7 @@ prof_log_stop(tsdn_t *tsdn) {
 #undef PROF_LOG_STOP_BUFSIZE
 
 bool prof_log_init(tsd_t *tsd) {
+	cassert(config_prof);
 	if (malloc_mutex_init(&log_mtx, "prof_log",
 	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
 		return true;
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index 6b2336d..5ff208e 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -141,7 +141,9 @@ TEST_END
 
 int
 main(void) {
-	prof_log_dummy_set(true);
+	if (config_prof) {
+		prof_log_dummy_set(true);
+	}
 	return test_no_reentrancy(
 	    test_prof_log_many_logs,
 	    test_prof_log_many_traces,
-- 
cgit v0.12


From 5d8e70ab26baf712a8741f9ba2acb646fba4de45 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 17 Dec 2020 11:25:13 -0800
Subject: prof_recent: cassert(config_prof) more often.

This tells the compiler that these functions are never called, which lets them
be optimized away in builds where profiling is disabled.
---
 src/prof_recent.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index ff87678..af75860 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -63,6 +63,7 @@ increment_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
 
 bool
 prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx) {
+	cassert(config_prof);
 	assert(opt_prof && prof_booted);
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -106,6 +107,7 @@ prof_recent_alloc_edata_get_no_lock(const prof_recent_t *n) {
 
 edata_t *
 prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *n) {
+	cassert(config_prof);
 	return prof_recent_alloc_edata_get_no_lock(n);
 }
 
@@ -123,16 +125,19 @@ prof_recent_alloc_edata_set(tsd_t *tsd, prof_recent_t *n, edata_t *edata) {
 
 void
 edata_prof_recent_alloc_init(edata_t *edata) {
+	cassert(config_prof);
 	edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
 }
 
 static inline prof_recent_t *
 edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
+	cassert(config_prof);
 	return edata_prof_recent_alloc_get_dont_call_directly(edata);
 }
 
 prof_recent_t *
 edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata) {
+	cassert(config_prof);
 	return edata_prof_recent_alloc_get_no_lock(edata);
 }
 
@@ -189,6 +194,7 @@ edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata,
  */
 void
 prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
+	cassert(config_prof);
 	/*
 	 * Check whether the recent allocation record still exists without
 	 * trying to acquire the lock.
@@ -271,6 +277,7 @@ prof_recent_alloc_assert_count(tsd_t *tsd) {
 
 void
 prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize) {
+	cassert(config_prof);
 	assert(edata != NULL);
 	prof_tctx_t *tctx = edata_prof_tctx_get(edata);
 
@@ -397,6 +404,7 @@ label_rollback:
 
 ssize_t
 prof_recent_alloc_max_ctl_read() {
+	cassert(config_prof);
 	/* Don't bother to acquire the lock. */
 	return prof_recent_alloc_max_get_no_lock();
 }
@@ -450,6 +458,7 @@ prof_recent_alloc_async_cleanup(tsd_t *tsd, prof_recent_list_t *to_delete) {
 
 ssize_t
 prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
+	cassert(config_prof);
 	assert(max >= -1);
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_alloc_assert_count(tsd);
@@ -521,6 +530,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 #define PROF_RECENT_PRINT_BUFSIZE 65536
 void
 prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
+	cassert(config_prof);
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_dump_mtx);
 	buf_writer_t buf_writer;
 	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
@@ -570,6 +580,7 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 
 bool
 prof_recent_init() {
+	cassert(config_prof);
 	prof_recent_alloc_max_init();
 
 	if (malloc_mutex_init(&prof_recent_alloc_mtx, "prof_recent_alloc",
-- 
cgit v0.12


From a9fa2defdbe98b849151688cb70e24ba55dc8587 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 17 Dec 2020 12:04:07 -0800
Subject: Add JEMALLOC_COLD, and mark some functions cold.

This hints to the compiler that it should care more about space than CPU (among
other things).  In cases where the compiler lacks profile-guided information,
this can be a substantial space savings.

For now, we mark the mallctl or atexit driven profiling and stats functions that
take up the most space.
---
 configure.ac                          | 12 ++++++++++++
 include/jemalloc/jemalloc_defs.h.in   |  3 +++
 include/jemalloc/jemalloc_macros.h.in |  7 +++++++
 src/malloc_io.c                       |  1 +
 src/prof_log.c                        |  7 ++++++-
 src/prof_recent.c                     |  1 +
 src/stats.c                           |  6 ++++++
 7 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index eba3e78..53ac7cc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -914,6 +914,18 @@ if test "x${je_cv_fallthrough}" = "xyes" ; then
   JE_CXXFLAGS_ADD([-Wimplicit-fallthrough])
 fi
 
+dnl Check for cold attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
+JE_COMPILABLE([cold attribute], [],
+              [__attribute__((__cold__)) void foo();],
+              [je_cv_cold])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_cold}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_COLD], [ ])
+fi
+
 dnl Support optional additions to rpath.
 AC_ARG_WITH([rpath],
   [AS_HELP_STRING([--with-rpath=<rpath>], [Colon-separated rpath (ELF systems only)])],
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index 032fba4..cbe2fca 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -16,6 +16,9 @@
 /* Defined if fallthrough attribute is supported. */
 #undef JEMALLOC_HAVE_ATTR_FALLTHROUGH
 
+/* Defined if cold attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_COLD
+
 /*
  * Define overrides for non-standard allocator-related functions if they are
  * present on the system.
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 1ceb7b1..5bb5c75 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -85,6 +85,7 @@
 #  else
 #    define JEMALLOC_ALLOCATOR
 #  endif
+#  define JEMALLOC_COLD
 #elif defined(JEMALLOC_HAVE_ATTR)
 #  define JEMALLOC_ATTR(s) __attribute__((s))
 #  define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s))
@@ -120,6 +121,11 @@
 #  define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s))
 #  define JEMALLOC_RESTRICT_RETURN
 #  define JEMALLOC_ALLOCATOR
+#  ifdef JEMALLOC_HAVE_ATTR_COLD
+#    define JEMALLOC_COLD JEMALLOC_ATTR(__cold__)
+#  else
+#    define JEMALLOC_COLD
+#  endif
 #else
 #  define JEMALLOC_ATTR(s)
 #  define JEMALLOC_ALIGNED(s)
@@ -133,6 +139,7 @@
 #  define JEMALLOC_SECTION(s)
 #  define JEMALLOC_RESTRICT_RETURN
 #  define JEMALLOC_ALLOCATOR
+#  define JEMALLOC_COLD
 #endif
 
 #if defined(__APPLE__) && !defined(JEMALLOC_NO_RENAME)
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 59a0cbf..b76885c 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -321,6 +321,7 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) {
 	return s;
 }
 
+JEMALLOC_COLD
 size_t
 malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 	size_t i;
diff --git a/src/prof_log.c b/src/prof_log.c
index 4465821..356a886 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -200,6 +200,7 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 	}
 }
 
+JEMALLOC_COLD
 void
 prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	cassert(config_prof);
@@ -411,6 +412,7 @@ prof_log_dummy_set(bool new_value) {
 	prof_log_dummy = new_value;
 }
 
+JEMALLOC_COLD
 bool
 prof_log_start(tsdn_t *tsdn, const char *filename) {
 	cassert(config_prof);
@@ -593,6 +595,7 @@ prof_log_emit_metadata(emitter_t *emitter) {
 }
 
 #define PROF_LOG_STOP_BUFSIZE PROF_DUMP_BUFSIZE
+JEMALLOC_COLD
 bool
 prof_log_stop(tsdn_t *tsdn) {
 	cassert(config_prof);
@@ -681,7 +684,9 @@ prof_log_stop(tsdn_t *tsdn) {
 }
 #undef PROF_LOG_STOP_BUFSIZE
 
-bool prof_log_init(tsd_t *tsd) {
+JEMALLOC_COLD
+bool
+prof_log_init(tsd_t *tsd) {
 	cassert(config_prof);
 	if (malloc_mutex_init(&log_mtx, "prof_log",
 	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
diff --git a/src/prof_recent.c b/src/prof_recent.c
index af75860..834a944 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -528,6 +528,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 }
 
 #define PROF_RECENT_PRINT_BUFSIZE 65536
+JEMALLOC_COLD
 void
 prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	cassert(config_prof);
diff --git a/src/stats.c b/src/stats.c
index 7c2707e..dac0683 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -281,6 +281,7 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 	header_##column_name.str_val = human ? human : #column_name;
 
 
+JEMALLOC_COLD
 static void
 stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t uptime) {
 	size_t page;
@@ -488,6 +489,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 	}
 }
 
+JEMALLOC_COLD
 static void
 stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	unsigned nbins, nlextents, j;
@@ -576,6 +578,7 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	}
 }
 
+JEMALLOC_COLD
 static void
 stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 	unsigned j;
@@ -841,6 +844,7 @@ stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptim
 	emitter_json_object_end(emitter); /* End "mutexes". */
 }
 
+JEMALLOC_COLD
 static void
 stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
     bool mutex, bool extents, bool hpa) {
@@ -1168,6 +1172,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	}
 }
 
+JEMALLOC_COLD
 static void
 stats_general_print(emitter_t *emitter) {
 	const char *cpv;
@@ -1445,6 +1450,7 @@ stats_general_print(emitter_t *emitter) {
 	emitter_json_object_end(emitter); /* Close "arenas" */
 }
 
+JEMALLOC_COLD
 static void
 stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
     bool unmerged, bool bins, bool large, bool mutex, bool extents, bool hpa) {
-- 
cgit v0.12


From f9bb8dedef92fc00225c52546acfb58bd8e74217 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 17 Dec 2020 12:16:38 -0800
Subject: Un-force-inline do_rallocx.

The additional overhead of the function-call setup and flags checking is
relatively small, but costs us the replication of the entire realloc pathway in
terms of size.
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8384cfc..b0a3b76 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3418,7 +3418,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	return p;
 }
 
-JEMALLOC_ALWAYS_INLINE void *
+static void *
 do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	void *p;
 	tsd_t *tsd;
-- 
cgit v0.12


From afa489c3c5fd16bd31b2756c081c92e08937e6b7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 21 Aug 2020 11:31:53 -0700
Subject: Record request size in prof info

---
 include/jemalloc/internal/arena_inlines_b.h |  5 +++--
 include/jemalloc/internal/edata.h           | 12 ++++++++++++
 include/jemalloc/internal/large_externs.h   |  2 +-
 include/jemalloc/internal/prof_inlines.h    |  4 ++--
 include/jemalloc/internal/prof_structs.h    |  2 ++
 src/large.c                                 |  4 +++-
 src/prof.c                                  |  2 +-
 7 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 7971b4c..aaef45c 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -105,11 +105,12 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
+arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx,
+    size_t size) {
 	cassert(config_prof);
 
 	assert(!edata_slab_get(edata));
-	large_prof_info_set(edata, tctx);
+	large_prof_info_set(edata, tctx, size);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index c048288..11358ea 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -40,6 +40,8 @@ typedef enum extent_pai_e extent_pai_t;
 struct e_prof_info_s {
 	/* Time when this was allocated. */
 	nstime_t	e_prof_alloc_time;
+	/* Allocation request size. */
+	size_t		e_prof_alloc_size;
 	/* Points to a prof_tctx_t. */
 	atomic_p_t	e_prof_tctx;
 	/*
@@ -390,6 +392,11 @@ edata_prof_alloc_time_get(const edata_t *edata) {
 	return &edata->e_prof_info.e_prof_alloc_time;
 }
 
+static inline size_t
+edata_prof_alloc_size_get(const edata_t *edata) {
+	return edata->e_prof_info.e_prof_alloc_size;
+}
+
 static inline prof_recent_t *
 edata_prof_recent_alloc_get_dont_call_directly(const edata_t *edata) {
 	return (prof_recent_t *)atomic_load_p(
@@ -527,6 +534,11 @@ edata_prof_alloc_time_set(edata_t *edata, nstime_t *t) {
 }
 
 static inline void
+edata_prof_alloc_size_set(edata_t *edata, size_t size) {
+	edata->e_prof_info.e_prof_alloc_size = size;
+}
+
+static inline void
 edata_prof_recent_alloc_set_dont_call_directly(edata_t *edata,
     prof_recent_t *recent_alloc) {
 	atomic_store_p(&edata->e_prof_info.e_prof_recent_alloc, recent_alloc,
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 2797964..8e09122 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -19,6 +19,6 @@ size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
 void large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
     bool reset_recent);
 void large_prof_tctx_reset(edata_t *edata);
-void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx);
+void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx, size_t size);
 
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 62c5683..c76d2ae 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -98,12 +98,12 @@ prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
+prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx, size_t size) {
 	cassert(config_prof);
 	assert(edata != NULL);
 	assert((uintptr_t)tctx > (uintptr_t)1U);
 
-	arena_prof_info_set(tsd, edata, tctx);
+	arena_prof_info_set(tsd, edata, tctx, size);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 73ac3d5..c2a111a 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -103,6 +103,8 @@ struct prof_info_s {
 	nstime_t		alloc_time;
 	/* Points to the prof_tctx_t corresponding to the allocation. */
 	prof_tctx_t		*alloc_tctx;
+	/* Allocation request size. */
+	size_t			alloc_size;
 };
 
 struct prof_gctx_s {
diff --git a/src/large.c b/src/large.c
index 42d2fd7..f23839f 100644
--- a/src/large.c
+++ b/src/large.c
@@ -281,6 +281,7 @@ large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
 	if ((uintptr_t)alloc_tctx > (uintptr_t)1U) {
 		nstime_copy(&prof_info->alloc_time,
 		    edata_prof_alloc_time_get(edata));
+		prof_info->alloc_size = edata_prof_alloc_size_get(edata);
 		if (reset_recent) {
 			/*
 			 * Reset the pointer on the recent allocation record,
@@ -302,10 +303,11 @@ large_prof_tctx_reset(edata_t *edata) {
 }
 
 void
-large_prof_info_set(edata_t *edata, prof_tctx_t *tctx) {
+large_prof_info_set(edata_t *edata, prof_tctx_t *tctx, size_t size) {
 	nstime_t t;
 	nstime_prof_init_update(&t);
 	edata_prof_alloc_time_set(edata, &t);
+	edata_prof_alloc_size_set(edata, size);
 	edata_prof_recent_alloc_init(edata);
 	large_prof_tctx_set(edata, tctx);
 }
diff --git a/src/prof.c b/src/prof.c
index 9b651db..258b5f2 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -97,7 +97,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 
 	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
 	    ptr);
-	prof_info_set(tsd, edata, tctx);
+	prof_info_set(tsd, edata, tctx, size);
 
 	szind_t szind = sz_size2index(usize);
 
-- 
cgit v0.12


From 40fa4d29d3e938765d0b608f92701410ce90b887 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 18 Dec 2020 17:14:59 -0800
Subject: Track per size class internal fragmentation

---
 Makefile.in                                        |  2 +
 include/jemalloc/internal/prof_externs.h           |  3 +
 include/jemalloc/internal/prof_stats.h             | 17 +++++
 include/jemalloc/internal/witness.h                |  1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  3 +
 src/ctl.c                                          |  3 +
 src/jemalloc.c                                     |  1 +
 src/prof.c                                         | 31 +++++++--
 src/prof_stats.c                                   | 57 +++++++++++++++
 test/unit/mallctl.c                                |  1 +
 test/unit/prof_stats.c                             | 80 ++++++++++++++++++++++
 test/unit/prof_stats.sh                            |  5 ++
 15 files changed, 203 insertions(+), 6 deletions(-)
 create mode 100644 include/jemalloc/internal/prof_stats.h
 create mode 100644 src/prof_stats.c
 create mode 100644 test/unit/prof_stats.c
 create mode 100644 test/unit/prof_stats.sh

diff --git a/Makefile.in b/Makefile.in
index ba6dd76..3cb3161 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -139,6 +139,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/prof_recent.c \
+	$(srcroot)src/prof_stats.c \
 	$(srcroot)src/prof_sys.c \
 	$(srcroot)src/psset.c \
 	$(srcroot)src/rtree.c \
@@ -248,6 +249,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_mdump.c \
 	$(srcroot)test/unit/prof_recent.c \
 	$(srcroot)test/unit/prof_reset.c \
+	$(srcroot)test/unit/prof_stats.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
 	$(srcroot)test/unit/prof_sys_thread_name.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index b94fbed..671ac9b 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -27,6 +27,9 @@ extern ssize_t opt_prof_recent_alloc_max;
 /* Whether to use thread name provided by the system or by mallctl. */
 extern bool opt_prof_sys_thread_name;
 
+/* Whether to record per size class counts and request size totals. */
+extern bool opt_prof_stats;
+
 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
 extern bool prof_active;
 
diff --git a/include/jemalloc/internal/prof_stats.h b/include/jemalloc/internal/prof_stats.h
new file mode 100644
index 0000000..7954e82
--- /dev/null
+++ b/include/jemalloc/internal/prof_stats.h
@@ -0,0 +1,17 @@
+#ifndef JEMALLOC_INTERNAL_PROF_STATS_H
+#define JEMALLOC_INTERNAL_PROF_STATS_H
+
+typedef struct prof_stats_s prof_stats_t;
+struct prof_stats_s {
+	uint64_t req_sum;
+	uint64_t count;
+};
+
+extern malloc_mutex_t prof_stats_mtx;
+
+void prof_stats_inc(tsd_t *tsd, szind_t ind, size_t size);
+void prof_stats_dec(tsd_t *tsd, szind_t ind, size_t size);
+void prof_stats_get_live(tsd_t *tsd, szind_t ind, prof_stats_t *stats);
+void prof_stats_get_accum(tsd_t *tsd, szind_t ind, prof_stats_t *stats);
+
+#endif /* JEMALLOC_INTERNAL_PROF_STATS_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 662907c..66dcf66 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -73,6 +73,7 @@ enum witness_rank_e {
 	WITNESS_RANK_PROF_GDUMP = WITNESS_RANK_LEAF,
 	WITNESS_RANK_PROF_NEXT_THR_UID = WITNESS_RANK_LEAF,
 	WITNESS_RANK_PROF_RECENT_ALLOC = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_STATS = WITNESS_RANK_LEAF,
 	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT = WITNESS_RANK_LEAF,
 };
 typedef enum witness_rank_e witness_rank_t;
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 531dd9a..9443ac5 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -80,6 +80,7 @@
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index f031fb1..3c4bff6 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -124,6 +124,9 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index bc64de5..fafb491 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -80,6 +80,7 @@
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index f031fb1..3c4bff6 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -124,6 +124,9 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index 8f6aff3..598759c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -136,6 +136,7 @@ CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(opt_prof_recent_alloc_max)
+CTL_PROTO(opt_prof_stats)
 CTL_PROTO(opt_prof_sys_thread_name)
 CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_zero_realloc)
@@ -415,6 +416,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
 	{NAME("prof_accum"),	CTL(opt_prof_accum)},
 	{NAME("prof_recent_alloc_max"),	CTL(opt_prof_recent_alloc_max)},
+	{NAME("prof_stats"),	CTL(opt_prof_stats)},
 	{NAME("prof_sys_thread_name"),	CTL(opt_prof_sys_thread_name)},
 	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)},
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
@@ -2057,6 +2059,7 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
     opt_prof_recent_alloc_max, ssize_t)
+CTL_RO_NL_CGEN(config_prof, opt_prof_stats, opt_prof_stats, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_sys_thread_name, opt_prof_sys_thread_name,
     bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_time_res,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b0a3b76..0271415 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1552,6 +1552,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
+				CONF_HANDLE_BOOL(opt_prof_stats, "prof_stats")
 				CONF_HANDLE_BOOL(opt_prof_sys_thread_name,
 				    "prof_sys_thread_name")
 				if (CONF_MATCH("prof_time_resolution")) {
diff --git a/src/prof.c b/src/prof.c
index 258b5f2..0f1f7a7 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -8,6 +8,7 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_log.h"
 #include "jemalloc/internal/prof_recent.h"
+#include "jemalloc/internal/prof_stats.h"
 #include "jemalloc/internal/prof_sys.h"
 #include "jemalloc/internal/thread_event.h"
 
@@ -131,6 +132,10 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 		assert(tctx == edata_prof_tctx_get(edata));
 		prof_recent_alloc(tsd, edata, size, usize);
 	}
+
+	if (opt_prof_stats) {
+		prof_stats_inc(tsd, szind, size);
+	}
 }
 
 void
@@ -160,6 +165,10 @@ prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	prof_try_log(tsd, usize, prof_info);
 
 	prof_tctx_try_destroy(tsd, tctx);
+
+	if (opt_prof_stats) {
+		prof_stats_dec(tsd, szind, prof_info->alloc_size);
+	}
 }
 
 prof_tctx_t *
@@ -587,7 +596,13 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 
 		next_thr_uid = 0;
 		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
-		    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
+		    WITNESS_RANK_PROF_NEXT_THR_UID,
+		    malloc_mutex_rank_exclusive)) {
+			return true;
+		}
+
+		if (malloc_mutex_init(&prof_stats_mtx, "prof_stats",
+		    WITNESS_RANK_PROF_STATS, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
@@ -595,8 +610,9 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 			return true;
 		}
 
-		if (malloc_mutex_init(&prof_dump_filename_mtx, "prof_dump_filename",
-		    WITNESS_RANK_PROF_DUMP_FILENAME, malloc_mutex_rank_exclusive)) {
+		if (malloc_mutex_init(&prof_dump_filename_mtx,
+		    "prof_dump_filename", WITNESS_RANK_PROF_DUMP_FILENAME,
+		    malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
@@ -681,9 +697,10 @@ prof_prefork1(tsdn_t *tsdn) {
 		malloc_mutex_prefork(tsdn, &prof_active_mtx);
 		malloc_mutex_prefork(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
+		malloc_mutex_prefork(tsdn, &prof_recent_alloc_mtx);
+		malloc_mutex_prefork(tsdn, &prof_stats_mtx);
 		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
-		malloc_mutex_prefork(tsdn, &prof_recent_alloc_mtx);
 	}
 }
 
@@ -692,10 +709,11 @@ prof_postfork_parent(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx);
 		malloc_mutex_postfork_parent(tsdn,
 		    &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_stats_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
@@ -719,9 +737,10 @@ prof_postfork_child(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_child(tsdn, &prof_recent_alloc_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_stats_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_recent_alloc_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
diff --git a/src/prof_stats.c b/src/prof_stats.c
new file mode 100644
index 0000000..5d1a506
--- /dev/null
+++ b/src/prof_stats.c
@@ -0,0 +1,57 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/prof_stats.h"
+
+bool opt_prof_stats = false;
+malloc_mutex_t prof_stats_mtx;
+static prof_stats_t prof_stats_live[PROF_SC_NSIZES];
+static prof_stats_t prof_stats_accum[PROF_SC_NSIZES];
+
+static void
+prof_stats_enter(tsd_t *tsd, szind_t ind) {
+	assert(opt_prof && opt_prof_stats);
+	assert(ind < SC_NSIZES);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_stats_mtx);
+}
+
+static void
+prof_stats_leave(tsd_t *tsd) {
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_stats_mtx);
+}
+
+void
+prof_stats_inc(tsd_t *tsd, szind_t ind, size_t size) {
+	cassert(config_prof);
+	prof_stats_enter(tsd, ind);
+	prof_stats_live[ind].req_sum += size;
+	prof_stats_live[ind].count++;
+	prof_stats_accum[ind].req_sum += size;
+	prof_stats_accum[ind].count++;
+	prof_stats_leave(tsd);
+}
+
+void
+prof_stats_dec(tsd_t *tsd, szind_t ind, size_t size) {
+	cassert(config_prof);
+	prof_stats_enter(tsd, ind);
+	prof_stats_live[ind].req_sum -= size;
+	prof_stats_live[ind].count--;
+	prof_stats_leave(tsd);
+}
+
+void
+prof_stats_get_live(tsd_t *tsd, szind_t ind, prof_stats_t *stats) {
+	cassert(config_prof);
+	prof_stats_enter(tsd, ind);
+	memcpy(stats, &prof_stats_live[ind], sizeof(prof_stats_t));
+	prof_stats_leave(tsd);
+}
+
+void
+prof_stats_get_accum(tsd_t *tsd, szind_t ind, prof_stats_t *stats) {
+	cassert(config_prof);
+	prof_stats_enter(tsd, ind);
+	memcpy(stats, &prof_stats_accum[ind], sizeof(prof_stats_t));
+	prof_stats_leave(tsd);
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 3d5b278..85dcb4e 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -317,6 +317,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_final, prof);
 	TEST_MALLCTL_OPT(bool, prof_leak, prof);
 	TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof);
+	TEST_MALLCTL_OPT(bool, prof_stats, prof);
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
 
 #undef TEST_MALLCTL_OPT
diff --git a/test/unit/prof_stats.c b/test/unit/prof_stats.c
new file mode 100644
index 0000000..555b69e
--- /dev/null
+++ b/test/unit/prof_stats.c
@@ -0,0 +1,80 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/prof_stats.h"
+
+static void
+test_wrapper(szind_t ind) {
+#define N_PTRS 3
+	assert(opt_prof && opt_prof_stats);
+
+	tsd_t *tsd = tsd_fetch();
+
+	prof_stats_t live_stats_orig;
+	prof_stats_get_live(tsd, ind, &live_stats_orig);
+	prof_stats_t accum_stats_orig;
+	prof_stats_get_accum(tsd, ind, &accum_stats_orig);
+
+	void *ptrs[N_PTRS];
+
+	uint64_t live_req_sum = 0;
+	uint64_t live_count = 0;
+	uint64_t accum_req_sum = 0;
+	uint64_t accum_count = 0;
+
+	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
+	    ++i, ++sz) {
+		void *p = malloc(sz);
+		assert_ptr_not_null(p, "malloc() failed");
+		ptrs[i] = p;
+		live_req_sum += sz;
+		live_count++;
+		accum_req_sum += sz;
+		accum_count++;
+		prof_stats_t live_stats;
+		prof_stats_get_live(tsd, ind, &live_stats);
+		expect_u64_eq(live_stats.req_sum - live_stats_orig.req_sum,
+		    live_req_sum, "");
+		expect_u64_eq(live_stats.count - live_stats_orig.count,
+		    live_count, "");
+		prof_stats_t accum_stats;
+		prof_stats_get_accum(tsd, ind, &accum_stats);
+		expect_u64_eq(accum_stats.req_sum - accum_stats_orig.req_sum,
+		    accum_req_sum, "");
+		expect_u64_eq(accum_stats.count - accum_stats_orig.count,
+		    accum_count, "");
+	}
+
+	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
+	    ++i, ++sz) {
+		free(ptrs[i]);
+		live_req_sum -= sz;
+		live_count--;
+		prof_stats_t live_stats;
+		prof_stats_get_live(tsd, ind, &live_stats);
+		expect_u64_eq(live_stats.req_sum - live_stats_orig.req_sum,
+		    live_req_sum, "");
+		expect_u64_eq(live_stats.count - live_stats_orig.count,
+		    live_count, "");
+		prof_stats_t accum_stats;
+		prof_stats_get_accum(tsd, ind, &accum_stats);
+		expect_u64_eq(accum_stats.req_sum - accum_stats_orig.req_sum,
+		    accum_req_sum, "");
+		expect_u64_eq(accum_stats.count - accum_stats_orig.count,
+		    accum_count, "");
+	}
+#undef N_PTRS
+}
+
+TEST_BEGIN(test_prof_stats) {
+	test_skip_if(!config_prof);
+	test_wrapper(0);
+	test_wrapper(1);
+	test_wrapper(2);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_prof_stats);
+}
diff --git a/test/unit/prof_stats.sh b/test/unit/prof_stats.sh
new file mode 100644
index 0000000..b01dfd4
--- /dev/null
+++ b/test/unit/prof_stats.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_stats:true"
+fi
-- 
cgit v0.12


From 54f3351f1f699a2d50f42da7f9a73a8d1a25ea30 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 21 Aug 2020 14:37:34 -0700
Subject: Add mallctl for prof stats fetching

---
 src/ctl.c              | 170 ++++++++++++++++++++++++++++++++++++++++++++++++-
 test/unit/prof_stats.c |  69 +++++++++++++-------
 2 files changed, 215 insertions(+), 24 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 598759c..a4f1916 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -12,6 +12,7 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_log.h"
 #include "jemalloc/internal/prof_recent.h"
+#include "jemalloc/internal/prof_stats.h"
 #include "jemalloc/internal/prof_sys.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/util.h"
@@ -183,6 +184,12 @@ CTL_PROTO(prof_interval)
 CTL_PROTO(lg_prof_sample)
 CTL_PROTO(prof_log_start)
 CTL_PROTO(prof_log_stop)
+CTL_PROTO(prof_stats_bins_i_live)
+CTL_PROTO(prof_stats_bins_i_accum)
+INDEX_PROTO(prof_stats_bins_i)
+CTL_PROTO(prof_stats_lextents_i_live)
+CTL_PROTO(prof_stats_lextents_i_accum)
+INDEX_PROTO(prof_stats_lextents_i)
 CTL_PROTO(stats_arenas_i_small_allocated)
 CTL_PROTO(stats_arenas_i_small_nmalloc)
 CTL_PROTO(stats_arenas_i_small_ndalloc)
@@ -494,6 +501,37 @@ static const ctl_named_node_t arenas_node[] = {
 	{NAME("lookup"),	CTL(arenas_lookup)}
 };
 
+static const ctl_named_node_t prof_stats_bins_i_node[] = {
+	{NAME("live"),		CTL(prof_stats_bins_i_live)},
+	{NAME("accum"),		CTL(prof_stats_bins_i_accum)}
+};
+
+static const ctl_named_node_t super_prof_stats_bins_i_node[] = {
+	{NAME(""),		CHILD(named, prof_stats_bins_i)}
+};
+
+static const ctl_indexed_node_t prof_stats_bins_node[] = {
+	{INDEX(prof_stats_bins_i)}
+};
+
+static const ctl_named_node_t prof_stats_lextents_i_node[] = {
+	{NAME("live"),		CTL(prof_stats_lextents_i_live)},
+	{NAME("accum"),		CTL(prof_stats_lextents_i_accum)}
+};
+
+static const ctl_named_node_t super_prof_stats_lextents_i_node[] = {
+	{NAME(""),		CHILD(named, prof_stats_lextents_i)}
+};
+
+static const ctl_indexed_node_t prof_stats_lextents_node[] = {
+	{INDEX(prof_stats_lextents_i)}
+};
+
+static const ctl_named_node_t	prof_stats_node[] = {
+	{NAME("bins"),		CHILD(indexed, prof_stats_bins)},
+	{NAME("lextents"),	CHILD(indexed, prof_stats_lextents)},
+};
+
 static const ctl_named_node_t	prof_node[] = {
 	{NAME("thread_active_init"), CTL(prof_thread_active_init)},
 	{NAME("active"),	CTL(prof_active)},
@@ -504,8 +542,10 @@ static const ctl_named_node_t	prof_node[] = {
 	{NAME("interval"),	CTL(prof_interval)},
 	{NAME("lg_sample"),	CTL(lg_prof_sample)},
 	{NAME("log_start"),	CTL(prof_log_start)},
-	{NAME("log_stop"),	CTL(prof_log_stop)}
+	{NAME("log_stop"),	CTL(prof_log_stop)},
+	{NAME("stats"),		CHILD(named, prof_stats)}
 };
+
 static const ctl_named_node_t stats_arenas_i_small_node[] = {
 	{NAME("allocated"),	CTL(stats_arenas_i_small_allocated)},
 	{NAME("nmalloc"),	CTL(stats_arenas_i_small_nmalloc)},
@@ -3975,3 +4015,131 @@ experimental_batch_alloc_ctl(tsd_t *tsd, const size_t *mib,
 label_return:
 	return ret;
 }
+
+static int
+prof_stats_bins_i_live_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned binind;
+	prof_stats_t stats;
+
+	if (!(config_prof && opt_prof && opt_prof_stats)) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	READONLY();
+	MIB_UNSIGNED(binind, 3);
+	if (binind >= SC_NBINS) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	prof_stats_get_live(tsd, (szind_t)binind, &stats);
+	READ(stats, prof_stats_t);
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+prof_stats_bins_i_accum_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned binind;
+	prof_stats_t stats;
+
+	if (!(config_prof && opt_prof && opt_prof_stats)) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	READONLY();
+	MIB_UNSIGNED(binind, 3);
+	if (binind >= SC_NBINS) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	prof_stats_get_accum(tsd, (szind_t)binind, &stats);
+	READ(stats, prof_stats_t);
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static const ctl_named_node_t *
+prof_stats_bins_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+    size_t i) {
+	if (!(config_prof && opt_prof && opt_prof_stats)) {
+		return NULL;
+	}
+	if (i >= SC_NBINS) {
+		return NULL;
+	}
+	return super_prof_stats_bins_i_node;
+}
+
+static int
+prof_stats_lextents_i_live_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned lextent_ind;
+	prof_stats_t stats;
+
+	if (!(config_prof && opt_prof && opt_prof_stats)) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	READONLY();
+	MIB_UNSIGNED(lextent_ind, 3);
+	if (lextent_ind >= SC_NSIZES - SC_NBINS) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	prof_stats_get_live(tsd, (szind_t)(lextent_ind + SC_NBINS), &stats);
+	READ(stats, prof_stats_t);
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+prof_stats_lextents_i_accum_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned lextent_ind;
+	prof_stats_t stats;
+
+	if (!(config_prof && opt_prof && opt_prof_stats)) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	READONLY();
+	MIB_UNSIGNED(lextent_ind, 3);
+	if (lextent_ind >= SC_NSIZES - SC_NBINS) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	prof_stats_get_accum(tsd, (szind_t)(lextent_ind + SC_NBINS), &stats);
+	READ(stats, prof_stats_t);
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static const ctl_named_node_t *
+prof_stats_lextents_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+    size_t i) {
+	if (!(config_prof && opt_prof && opt_prof_stats)) {
+		return NULL;
+	}
+	if (i >= SC_NSIZES - SC_NBINS) {
+		return NULL;
+	}
+	return super_prof_stats_lextents_i_node;
+}
diff --git a/test/unit/prof_stats.c b/test/unit/prof_stats.c
index 555b69e..123d899 100644
--- a/test/unit/prof_stats.c
+++ b/test/unit/prof_stats.c
@@ -1,18 +1,33 @@
 #include "test/jemalloc_test.h"
 
-#include "jemalloc/internal/prof_stats.h"
-
 static void
 test_wrapper(szind_t ind) {
 #define N_PTRS 3
+#define MALLCTL_STR_LEN 64
 	assert(opt_prof && opt_prof_stats);
 
-	tsd_t *tsd = tsd_fetch();
+	char mallctl_live_str[MALLCTL_STR_LEN];
+	char mallctl_accum_str[MALLCTL_STR_LEN];
+	if (ind < SC_NBINS) {
+		malloc_snprintf(mallctl_live_str, MALLCTL_STR_LEN,
+		    "prof.stats.bins.%u.live", (unsigned)ind);
+		malloc_snprintf(mallctl_accum_str, MALLCTL_STR_LEN,
+		    "prof.stats.bins.%u.accum", (unsigned)ind);
+	} else {
+		malloc_snprintf(mallctl_live_str, MALLCTL_STR_LEN,
+		    "prof.stats.lextents.%u.live", (unsigned)(ind - SC_NBINS));
+		malloc_snprintf(mallctl_accum_str, MALLCTL_STR_LEN,
+		    "prof.stats.lextents.%u.accum", (unsigned)(ind - SC_NBINS));
+	}
+
+	size_t stats_len = 2 * sizeof(uint64_t);
 
-	prof_stats_t live_stats_orig;
-	prof_stats_get_live(tsd, ind, &live_stats_orig);
-	prof_stats_t accum_stats_orig;
-	prof_stats_get_accum(tsd, ind, &accum_stats_orig);
+	uint64_t live_stats_orig[2];
+	assert_d_eq(mallctl(mallctl_live_str, &live_stats_orig, &stats_len,
+	    NULL, 0), 0, "");
+	uint64_t accum_stats_orig[2];
+	assert_d_eq(mallctl(mallctl_accum_str, &accum_stats_orig, &stats_len,
+	    NULL, 0), 0, "");
 
 	void *ptrs[N_PTRS];
 
@@ -30,17 +45,19 @@ test_wrapper(szind_t ind) {
 		live_count++;
 		accum_req_sum += sz;
 		accum_count++;
-		prof_stats_t live_stats;
-		prof_stats_get_live(tsd, ind, &live_stats);
-		expect_u64_eq(live_stats.req_sum - live_stats_orig.req_sum,
+		uint64_t live_stats[2];
+		assert_d_eq(mallctl(mallctl_live_str, &live_stats, &stats_len,
+		    NULL, 0), 0, "");
+		expect_u64_eq(live_stats[0] - live_stats_orig[0],
 		    live_req_sum, "");
-		expect_u64_eq(live_stats.count - live_stats_orig.count,
+		expect_u64_eq(live_stats[1] - live_stats_orig[1],
 		    live_count, "");
-		prof_stats_t accum_stats;
-		prof_stats_get_accum(tsd, ind, &accum_stats);
-		expect_u64_eq(accum_stats.req_sum - accum_stats_orig.req_sum,
+		uint64_t accum_stats[2];
+		assert_d_eq(mallctl(mallctl_accum_str, &accum_stats, &stats_len,
+		    NULL, 0), 0, "");
+		expect_u64_eq(accum_stats[0] - accum_stats_orig[0],
 		    accum_req_sum, "");
-		expect_u64_eq(accum_stats.count - accum_stats_orig.count,
+		expect_u64_eq(accum_stats[1] - accum_stats_orig[1],
 		    accum_count, "");
 	}
 
@@ -49,19 +66,22 @@ test_wrapper(szind_t ind) {
 		free(ptrs[i]);
 		live_req_sum -= sz;
 		live_count--;
-		prof_stats_t live_stats;
-		prof_stats_get_live(tsd, ind, &live_stats);
-		expect_u64_eq(live_stats.req_sum - live_stats_orig.req_sum,
+		uint64_t live_stats[2];
+		assert_d_eq(mallctl(mallctl_live_str, &live_stats, &stats_len,
+		    NULL, 0), 0, "");
+		expect_u64_eq(live_stats[0] - live_stats_orig[0],
 		    live_req_sum, "");
-		expect_u64_eq(live_stats.count - live_stats_orig.count,
+		expect_u64_eq(live_stats[1] - live_stats_orig[1],
 		    live_count, "");
-		prof_stats_t accum_stats;
-		prof_stats_get_accum(tsd, ind, &accum_stats);
-		expect_u64_eq(accum_stats.req_sum - accum_stats_orig.req_sum,
+		uint64_t accum_stats[2];
+		assert_d_eq(mallctl(mallctl_accum_str, &accum_stats, &stats_len,
+		    NULL, 0), 0, "");
+		expect_u64_eq(accum_stats[0] - accum_stats_orig[0],
 		    accum_req_sum, "");
-		expect_u64_eq(accum_stats.count - accum_stats_orig.count,
+		expect_u64_eq(accum_stats[1] - accum_stats_orig[1],
 		    accum_count, "");
 	}
+#undef MALLCTL_STR_LEN
 #undef N_PTRS
 }
 
@@ -70,6 +90,9 @@ TEST_BEGIN(test_prof_stats) {
 	test_wrapper(0);
 	test_wrapper(1);
 	test_wrapper(2);
+	test_wrapper(SC_NBINS);
+	test_wrapper(SC_NBINS + 1);
+	test_wrapper(SC_NBINS + 2);
 }
 TEST_END
 
-- 
cgit v0.12


From 4352cbc21c597d5147c352740fdeefdcc4af0f11 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 26 Aug 2020 16:48:59 -0700
Subject: Add alignment tests for prof stats

---
 test/unit/prof_stats.c | 80 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 64 insertions(+), 16 deletions(-)

diff --git a/test/unit/prof_stats.c b/test/unit/prof_stats.c
index 123d899..a914587 100644
--- a/test/unit/prof_stats.c
+++ b/test/unit/prof_stats.c
@@ -1,8 +1,10 @@
 #include "test/jemalloc_test.h"
 
-static void
-test_wrapper(szind_t ind) {
 #define N_PTRS 3
+
+static void
+test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
+    int flags_array[N_PTRS]) {
 #define MALLCTL_STR_LEN 64
 	assert(opt_prof && opt_prof_stats);
 
@@ -36,10 +38,12 @@ test_wrapper(szind_t ind) {
 	uint64_t accum_req_sum = 0;
 	uint64_t accum_count = 0;
 
-	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
-	    ++i, ++sz) {
-		void *p = malloc(sz);
+	for (size_t i = 0; i < N_PTRS; ++i) {
+		size_t sz = sizes_array[i];
+		int flags = flags_array[i];
+		void *p = mallocx(sz, flags);
 		assert_ptr_not_null(p, "malloc() failed");
+		assert(malloc_usable_size(p) == sz_index2size(ind));
 		ptrs[i] = p;
 		live_req_sum += sz;
 		live_count++;
@@ -61,9 +65,10 @@ test_wrapper(szind_t ind) {
 		    accum_count, "");
 	}
 
-	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
-	    ++i, ++sz) {
-		free(ptrs[i]);
+	for (size_t i = 0; i < N_PTRS; ++i) {
+		size_t sz = sizes_array[i];
+		int flags = flags_array[i];
+		sdallocx(ptrs[i], sz, flags);
 		live_req_sum -= sz;
 		live_count--;
 		uint64_t live_stats[2];
@@ -82,22 +87,65 @@ test_wrapper(szind_t ind) {
 		    accum_count, "");
 	}
 #undef MALLCTL_STR_LEN
-#undef N_PTRS
+}
+
+static void
+test_szind_wrapper(szind_t ind) {
+	size_t sizes_array[N_PTRS];
+	int flags_array[N_PTRS];
+	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
+	    ++i, ++sz) {
+		sizes_array[i] = sz;
+		flags_array[i] = 0;
+	}
+	test_combinations(ind, sizes_array, flags_array);
 }
 
 TEST_BEGIN(test_prof_stats) {
 	test_skip_if(!config_prof);
-	test_wrapper(0);
-	test_wrapper(1);
-	test_wrapper(2);
-	test_wrapper(SC_NBINS);
-	test_wrapper(SC_NBINS + 1);
-	test_wrapper(SC_NBINS + 2);
+	test_szind_wrapper(0);
+	test_szind_wrapper(1);
+	test_szind_wrapper(2);
+	test_szind_wrapper(SC_NBINS);
+	test_szind_wrapper(SC_NBINS + 1);
+	test_szind_wrapper(SC_NBINS + 2);
+}
+TEST_END
+
+static void
+test_szind_aligned_wrapper(szind_t ind, unsigned lg_align) {
+	size_t sizes_array[N_PTRS];
+	int flags_array[N_PTRS];
+	int flags = MALLOCX_LG_ALIGN(lg_align);
+	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
+	    ++i, ++sz) {
+		sizes_array[i] = sz;
+		flags_array[i] = flags;
+	}
+	test_combinations(
+	    sz_size2index(sz_sa2u(sz_index2size(ind), 1 << lg_align)),
+	    sizes_array, flags_array);
+}
+
+TEST_BEGIN(test_prof_stats_aligned) {
+	test_skip_if(!config_prof);
+	for (szind_t ind = 0; ind < 10; ++ind) {
+		for (unsigned lg_align = 0; lg_align < 10; ++lg_align) {
+			test_szind_aligned_wrapper(ind, lg_align);
+		}
+	}
+	for (szind_t ind = SC_NBINS - 5; ind < SC_NBINS + 5; ++ind) {
+		for (unsigned lg_align = SC_LG_LARGE_MINCLASS - 5;
+		    lg_align < SC_LG_LARGE_MINCLASS + 5; ++lg_align) {
+			test_szind_aligned_wrapper(ind, lg_align);
+		}
+	}
 }
 TEST_END
 
 int
 main(void) {
 	return test(
-	    test_prof_stats);
+	    test_prof_stats,
+	    test_prof_stats_aligned);
 }
-- 
cgit v0.12


From 1f1a0231ed9909119db2d350a2b44e1b21bda60f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 17 Aug 2020 10:40:28 -0700
Subject: Split macros for initializing stats headers

---
 src/stats.c | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index dac0683..393df15 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -264,22 +264,37 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 #undef EMITTER_TYPE_uint64_t
 }
 
-#define COL(row_name, column_name, left_or_right, col_width, etype)      \
-	emitter_col_t col_##column_name;                                     \
-	emitter_col_init(&col_##column_name, &row_name);                     \
-	col_##column_name.justify = emitter_justify_##left_or_right;         \
-	col_##column_name.width = col_width;                                 \
+#define COL_DECLARE(column_name)					\
+	emitter_col_t col_##column_name;
+
+#define COL_INIT(row_name, column_name, left_or_right, col_width, etype)\
+	emitter_col_init(&col_##column_name, &row_name);		\
+	col_##column_name.justify = emitter_justify_##left_or_right;	\
+	col_##column_name.width = col_width;				\
 	col_##column_name.type = emitter_type_##etype;
 
-#define COL_HDR(row_name, column_name, human, left_or_right, col_width, etype)  \
-	COL(row_name, column_name, left_or_right, col_width, etype)	         \
-	emitter_col_t header_##column_name;                                  \
-	emitter_col_init(&header_##column_name, &header_##row_name);         \
-	header_##column_name.justify = emitter_justify_##left_or_right;      \
-	header_##column_name.width = col_width;                              \
-	header_##column_name.type = emitter_type_title;                      \
+#define COL(row_name, column_name, left_or_right, col_width, etype)	\
+	COL_DECLARE(column_name);					\
+	COL_INIT(row_name, column_name, left_or_right, col_width, etype)
+
+#define COL_HDR_DECLARE(column_name)					\
+	COL_DECLARE(column_name);					\
+	emitter_col_t header_##column_name;
+
+#define COL_HDR_INIT(row_name, column_name, human, left_or_right,	\
+	col_width, etype)						\
+	COL_INIT(row_name, column_name, left_or_right, col_width, etype)\
+	emitter_col_init(&header_##column_name, &header_##row_name);	\
+	header_##column_name.justify = emitter_justify_##left_or_right;	\
+	header_##column_name.width = col_width;				\
+	header_##column_name.type = emitter_type_title;			\
 	header_##column_name.str_val = human ? human : #column_name;
 
+#define COL_HDR(row_name, column_name, human, left_or_right, col_width,	\
+    etype)								\
+	COL_HDR_DECLARE(column_name)					\
+	COL_HDR_INIT(row_name, column_name, human, left_or_right,	\
+	    col_width, etype)
 
 JEMALLOC_COLD
 static void
-- 
cgit v0.12


From 9f71b5779be6d59d2a603b0270e4c0c896d49d1c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 16:47:40 -0700
Subject: Output prof stats in stats print

---
 src/stats.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 98 insertions(+), 2 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 393df15..86a2c01 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/emitter.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/prof_stats.h"
 
 const char *global_mutex_names[mutex_prof_num_global_mutexes] = {
 #define OP(mtx) #mtx,
@@ -298,7 +299,8 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 
 JEMALLOC_COLD
 static void
-stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t uptime) {
+stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
+    uint64_t uptime) {
 	size_t page;
 	bool in_gap, in_gap_prev;
 	unsigned nbins, j;
@@ -313,6 +315,9 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 	emitter_row_t row;
 	emitter_row_init(&row);
 
+	bool prof_stats_on = config_prof && opt_prof && opt_prof_stats
+	    && i == MALLCTL_ARENAS_ALL;
+
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
 	COL_HDR(row, allocated, NULL, right, 13, uint64)
@@ -322,6 +327,16 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 	COL_HDR(row, ndalloc_ps, "(#/sec)", right, 8, uint64)
 	COL_HDR(row, nrequests, NULL, right, 13, uint64)
 	COL_HDR(row, nrequests_ps, "(#/sec)", right, 10, uint64)
+	COL_HDR_DECLARE(prof_live_requested);
+	COL_HDR_DECLARE(prof_live_count);
+	COL_HDR_DECLARE(prof_accum_requested);
+	COL_HDR_DECLARE(prof_accum_count);
+	if (prof_stats_on) {
+		COL_HDR_INIT(row, prof_live_requested, NULL, right, 21, uint64)
+		COL_HDR_INIT(row, prof_live_count, NULL, right, 17, uint64)
+		COL_HDR_INIT(row, prof_accum_requested, NULL, right, 21, uint64)
+		COL_HDR_INIT(row, prof_accum_count, NULL, right, 17, uint64)
+	}
 	COL_HDR(row, nshards, NULL, right, 9, unsigned)
 	COL_HDR(row, curregs, NULL, right, 13, size)
 	COL_HDR(row, curslabs, NULL, right, 13, size)
@@ -373,6 +388,11 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 	size_t arenas_bin_mib[CTL_MAX_DEPTH];
 	CTL_LEAF_PREPARE(arenas_bin_mib, 0, "arenas.bin");
 
+	size_t prof_stats_mib[CTL_MAX_DEPTH];
+	if (prof_stats_on) {
+		CTL_LEAF_PREPARE(prof_stats_mib, 0, "prof.stats.bins");
+	}
+
 	for (j = 0, in_gap = false; j < nbins; j++) {
 		uint64_t nslabs;
 		size_t reg_size, slab_size, curregs;
@@ -381,14 +401,28 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		uint32_t nregs, nshards;
 		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
 		uint64_t nreslabs;
+		prof_stats_t prof_live;
+		prof_stats_t prof_accum;
 
 		stats_arenas_mib[4] = j;
 		arenas_bin_mib[2] = j;
 
 		CTL_LEAF(stats_arenas_mib, 5, "nslabs", &nslabs, uint64_t);
 
+		if (prof_stats_on) {
+			prof_stats_mib[3] = j;
+			CTL_LEAF(prof_stats_mib, 4, "live", &prof_live,
+			    prof_stats_t);
+			CTL_LEAF(prof_stats_mib, 4, "accum", &prof_accum,
+			    prof_stats_t);
+		}
+
 		in_gap_prev = in_gap;
-		in_gap = (nslabs == 0);
+		if (prof_stats_on) {
+			in_gap = (nslabs == 0 && prof_accum.count == 0);
+		} else {
+			in_gap = (nslabs == 0);
+		}
 
 		if (in_gap_prev && !in_gap) {
 			emitter_table_printf(emitter,
@@ -429,6 +463,16 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		    &curregs);
 		emitter_json_kv(emitter, "nrequests", emitter_type_uint64,
 		    &nrequests);
+		if (prof_stats_on) {
+			emitter_json_kv(emitter, "prof_live_requested",
+			    emitter_type_uint64, &prof_live.req_sum);
+			emitter_json_kv(emitter, "prof_live_count",
+			    emitter_type_uint64, &prof_live.count);
+			emitter_json_kv(emitter, "prof_accum_requested",
+			    emitter_type_uint64, &prof_accum.req_sum);
+			emitter_json_kv(emitter, "prof_accum_count",
+			    emitter_type_uint64, &prof_accum.count);
+		}
 		emitter_json_kv(emitter, "nfills", emitter_type_uint64,
 		    &nfills);
 		emitter_json_kv(emitter, "nflushes", emitter_type_uint64,
@@ -475,6 +519,13 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime);
 		col_nrequests.uint64_val = nrequests;
 		col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime);
+		if (prof_stats_on) {
+			col_prof_live_requested.uint64_val = prof_live.req_sum;
+			col_prof_live_count.uint64_val = prof_live.count;
+			col_prof_accum_requested.uint64_val =
+			    prof_accum.req_sum;
+			col_prof_accum_count.uint64_val = prof_accum.count;
+		}
 		col_nshards.unsigned_val = nshards;
 		col_curregs.size_val = curregs;
 		col_curslabs.size_val = curslabs;
@@ -518,6 +569,9 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_row_t row;
 	emitter_row_init(&row);
 
+	bool prof_stats_on = config_prof && opt_prof && opt_prof_stats
+	    && i == MALLCTL_ARENAS_ALL;
+
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
 	COL_HDR(row, allocated, NULL, right, 13, size)
@@ -527,6 +581,16 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	COL_HDR(row, ndalloc_ps, "(#/sec)", right, 8, uint64)
 	COL_HDR(row, nrequests, NULL, right, 13, uint64)
 	COL_HDR(row, nrequests_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR_DECLARE(prof_live_requested)
+	COL_HDR_DECLARE(prof_live_count)
+	COL_HDR_DECLARE(prof_accum_requested)
+	COL_HDR_DECLARE(prof_accum_count)
+	if (prof_stats_on) {
+		COL_HDR_INIT(row, prof_live_requested, NULL, right, 21, uint64)
+		COL_HDR_INIT(row, prof_live_count, NULL, right, 17, uint64)
+		COL_HDR_INIT(row, prof_accum_requested, NULL, right, 21, uint64)
+		COL_HDR_INIT(row, prof_accum_count, NULL, right, 17, uint64)
+	}
 	COL_HDR(row, curlextents, NULL, right, 13, size)
 
 	/* As with bins, we label the large extents table. */
@@ -543,9 +607,16 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	size_t arenas_lextent_mib[CTL_MAX_DEPTH];
 	CTL_LEAF_PREPARE(arenas_lextent_mib, 0, "arenas.lextent");
 
+	size_t prof_stats_mib[CTL_MAX_DEPTH];
+	if (prof_stats_on) {
+		CTL_LEAF_PREPARE(prof_stats_mib, 0, "prof.stats.lextents");
+	}
+
 	for (j = 0, in_gap = false; j < nlextents; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
 		size_t lextent_size, curlextents;
+		prof_stats_t prof_live;
+		prof_stats_t prof_accum;
 
 		stats_arenas_mib[4] = j;
 		arenas_lextent_mib[2] = j;
@@ -567,7 +638,25 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		CTL_LEAF(stats_arenas_mib, 5, "curlextents", &curlextents,
 		    size_t);
 
+		if (prof_stats_on) {
+			prof_stats_mib[3] = j;
+			CTL_LEAF(prof_stats_mib, 4, "live", &prof_live,
+			    prof_stats_t);
+			CTL_LEAF(prof_stats_mib, 4, "accum", &prof_accum,
+			    prof_stats_t);
+		}
+
 		emitter_json_object_begin(emitter);
+		if (prof_stats_on) {
+			emitter_json_kv(emitter, "prof_live_requested",
+			    emitter_type_uint64, &prof_live.req_sum);
+			emitter_json_kv(emitter, "prof_live_count",
+			    emitter_type_uint64, &prof_live.count);
+			emitter_json_kv(emitter, "prof_accum_requested",
+			    emitter_type_uint64, &prof_accum.req_sum);
+			emitter_json_kv(emitter, "prof_accum_count",
+			    emitter_type_uint64, &prof_accum.count);
+		}
 		emitter_json_kv(emitter, "curlextents", emitter_type_size,
 		    &curlextents);
 		emitter_json_object_end(emitter);
@@ -581,6 +670,13 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime);
 		col_nrequests.uint64_val = nrequests;
 		col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime);
+		if (prof_stats_on) {
+			col_prof_live_requested.uint64_val = prof_live.req_sum;
+			col_prof_live_count.uint64_val = prof_live.count;
+			col_prof_accum_requested.uint64_val =
+			    prof_accum.req_sum;
+			col_prof_accum_count.uint64_val = prof_accum.count;
+		}
 		col_curlextents.size_val = curlextents;
 
 		if (!in_gap) {
-- 
cgit v0.12


From 14d689c0f990f1f946eae5d4706008882d5457a8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 28 Dec 2020 14:47:50 -0800
Subject: Add prof stats mutex stats

---
 include/jemalloc/internal/mutex_prof.h | 3 ++-
 src/ctl.c                              | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index a13e285..4a526a5 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -13,7 +13,8 @@
     OP(prof_thds_data)							\
     OP(prof_dump)							\
     OP(prof_recent_alloc)						\
-    OP(prof_recent_dump)
+    OP(prof_recent_dump)						\
+    OP(prof_stats)
 
 typedef enum {
 #define OP(mtx) global_prof_mutex_##mtx,
diff --git a/src/ctl.c b/src/ctl.c
index a4f1916..b94ef64 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1280,6 +1280,8 @@ ctl_refresh(tsdn_t *tsdn) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
 			    global_prof_mutex_prof_recent_dump,
 			    prof_recent_dump_mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_prof_stats, prof_stats_mtx);
 		}
 		if (have_background_thread) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
@@ -3395,6 +3397,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(prof_dump_mtx);
 		MUTEX_PROF_RESET(prof_recent_alloc_mtx);
 		MUTEX_PROF_RESET(prof_recent_dump_mtx);
+		MUTEX_PROF_RESET(prof_stats_mtx);
 	}
 
 	/* Per arena mutexes. */
-- 
cgit v0.12


From a011c4c22d3fd1da5415dd5001afd195f5cd7ad5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 7 Jan 2021 13:22:08 -0800
Subject: cache_bin: Separate out local and remote accesses.

This fixes an incorrect debug-mode assert:
- T1 starts an arena stats update and reads stack_head from another thread's
  cache bin, when that cache bin has 1 item in it.
- T2 allocates from that cache bin.  The cache_bin's stack_head now points to a
  NULL pointer, since the cache bin is empty.
- T1 Re-reads the cache_bin's stack_head to perform an assertion check (since it
  previously saw that the bin was empty, whatever stack_head points to should be
  non-NULL).
---
 include/jemalloc/internal/cache_bin.h | 54 +++++++++++++++++++++++++++--------
 src/arena.c                           |  4 +--
 src/cache_bin.c                       |  2 +-
 src/tcache.c                          |  6 ++--
 test/unit/cache_bin.c                 | 40 ++++++++++++++------------
 5 files changed, 70 insertions(+), 36 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index c1b8fc4..cf5ed3e 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -167,20 +167,50 @@ cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 	return later - earlier;
 }
 
-/* Number of items currently cached in the bin, without checking ncached_max. */
+/*
+ * Number of items currently cached in the bin, without checking ncached_max.
+ * We require specifying whether or not the request is racy or not (i.e. whether
+ * or not concurrent modifications are possible).
+ */
 static inline cache_bin_sz_t
-cache_bin_ncached_get_internal(cache_bin_t *bin) {
+cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
 	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	cache_bin_sz_t n = diff / sizeof(void *);
-	assert(n == 0 || *(bin->stack_head) != NULL);
+	/*
+	 * We have undefined behavior here; if this function is called from the
+	 * arena stats updating code, then stack_head could change from the
+	 * first line to the next one.  Morally, these loads should be atomic,
+	 * but compilers won't currently generate comparisons with in-memory
+	 * operands against atomics, and these variables get accessed on the
+	 * fast paths.  This should still be "safe" in the sense of generating
+	 * the correct assembly for the foreseeable future, though.
+	 */
+	assert(n == 0 || *(bin->stack_head) != NULL || racy);
 	return n;
 }
 
-/* Number of items currently cached in the bin, with checking ncached_max. */
+/*
+ * Number of items currently cached in the bin, with checking ncached_max.  The
+ * caller must know that no concurrent modification of the cache_bin is
+ * possible.
+ */
+static inline cache_bin_sz_t
+cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
+	    /* racy */ false);
+	assert(n <= cache_bin_info_ncached_max(info));
+	return n;
+}
+
+/*
+ * Obtain a racy view of the number of items currently in the cache bin, in the
+ * presence of possible concurrent modifications.
+ */
 static inline cache_bin_sz_t
-cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
+cache_bin_ncached_get_remote(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
+	    /* racy */ true);
 	assert(n <= cache_bin_info_ncached_max(info));
 	return n;
 }
@@ -208,7 +238,7 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
  */
 static inline void
 cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
-	assert(cache_bin_ncached_get(bin, info) == 0);
+	assert(cache_bin_ncached_get_local(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin) == bin->stack_head);
 }
 
@@ -228,7 +258,7 @@ static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin);
 	assert(low_water <= cache_bin_info_ncached_max(info));
-	assert(low_water <= cache_bin_ncached_get(bin, info));
+	assert(low_water <= cache_bin_ncached_get_local(bin, info));
 
 	cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
 	    bin->low_bits_low_water);
@@ -247,7 +277,7 @@ cache_bin_low_water_set(cache_bin_t *bin) {
 
 static inline void
 cache_bin_low_water_adjust(cache_bin_t *bin) {
-	if (cache_bin_ncached_get_internal(bin)
+	if (cache_bin_ncached_get_internal(bin, /* racy */ false)
 	    < cache_bin_low_water_get_internal(bin)) {
 		cache_bin_low_water_set(bin);
 	}
@@ -319,7 +349,7 @@ cache_bin_alloc(cache_bin_t *bin, bool *success) {
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
-	size_t n = cache_bin_ncached_get_internal(bin);
+	size_t n = cache_bin_ncached_get_internal(bin, /* racy */ false);
 	if (n > num) {
 		n = num;
 	}
@@ -416,7 +446,7 @@ static inline void
 cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
 	arr->ptr = cache_bin_empty_position_get(bin) - 1;
-	assert(cache_bin_ncached_get(bin, info) == 0
+	assert(cache_bin_ncached_get_local(bin, info) == 0
 	    || *arr->ptr != NULL);
 }
 
@@ -437,7 +467,7 @@ cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
 static inline void
 cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
-	unsigned rem = cache_bin_ncached_get(bin, info) - nflushed;
+	unsigned rem = cache_bin_ncached_get_local(bin, info) - nflushed;
 	memmove(bin->stack_head + nflushed, bin->stack_head,
 	    rem * sizeof(void *));
 	bin->stack_head = bin->stack_head + nflushed;
diff --git a/src/arena.c b/src/arena.c
index 6a062de..914e63f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -150,7 +150,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		for (szind_t i = 0; i < nhbins; i++) {
 			cache_bin_t *cache_bin = &descriptor->bins[i];
 			astats->tcache_bytes +=
-			    cache_bin_ncached_get(cache_bin,
+			    cache_bin_ncached_get_remote(cache_bin,
 			    &tcache_bin_info[i]) * sz_index2size(i);
 		}
 	}
@@ -767,7 +767,7 @@ void
 arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
     cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
     const unsigned nfill) {
-	assert(cache_bin_ncached_get(cache_bin, cache_bin_info) == 0);
+	assert(cache_bin_ncached_get_local(cache_bin, cache_bin_info) == 0);
 
 	const bin_info_t *bin_info = &bin_infos[binind];
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 5f50606..b747082 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -83,7 +83,7 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
 	assert(cache_bin_diff(bin, bin->low_bits_full,
 	    (uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
-	assert(cache_bin_ncached_get(bin, info) == 0);
+	assert(cache_bin_ncached_get_local(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin) == empty_position);
 
 	assert(bin_stack_size > 0 || empty_position == full_position);
diff --git a/src/tcache.c b/src/tcache.c
index 41a1b82..ef0b87d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -125,7 +125,7 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	assert(szind < SC_NBINS);
 
 	cache_bin_t *cache_bin = &tcache->bins[szind];
-	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 	    &tcache_bin_info[szind]);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &tcache_bin_info[szind]);
@@ -159,7 +159,7 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	/* Like the small GC; flush 3/4 of untouched items. */
 	assert(szind >= SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
-	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 	    &tcache_bin_info[szind]);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &tcache_bin_info[szind]);
@@ -289,7 +289,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	} else {
 		assert(binind < nhbins);
 	}
-	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 	    &tcache_bin_info[binind]);
 	assert((cache_bin_sz_t)rem <= ncached);
 	arena_t *tcache_arena = tcache_slow->arena;
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index b31d07d..a69cad6 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -6,14 +6,15 @@ do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
     cache_bin_sz_t nfill_succeed) {
 	bool success;
 	void *ptr;
-	assert_true(cache_bin_ncached_get(bin, info) == 0, "");
+	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill_attempt);
 	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill_attempt);
 	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
 		arr.ptr[i] = &ptrs[i];
 	}
 	cache_bin_finish_fill(bin, info, &arr, nfill_succeed);
-	expect_true(cache_bin_ncached_get(bin, info) == nfill_succeed, "");
+	expect_true(cache_bin_ncached_get_local(bin, info) == nfill_succeed,
+	    "");
 	cache_bin_low_water_set(bin);
 
 	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
@@ -24,7 +25,7 @@ do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		expect_true(cache_bin_low_water_get(bin, info)
 		    == nfill_succeed - i - 1, "");
 	}
-	expect_true(cache_bin_ncached_get(bin, info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(bin, info) == 0, "");
 	expect_true(cache_bin_low_water_get(bin, info) == 0, "");
 }
 
@@ -32,7 +33,7 @@ static void
 do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
     cache_bin_sz_t nfill, cache_bin_sz_t nflush) {
 	bool success;
-	assert_true(cache_bin_ncached_get(bin, info) == 0, "");
+	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
 
 	for (cache_bin_sz_t i = 0; i < nfill; i++) {
 		success = cache_bin_dalloc_easy(bin, &ptrs[i]);
@@ -46,8 +47,9 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	}
 	cache_bin_finish_flush(bin, info, &arr, nflush);
 
-	expect_true(cache_bin_ncached_get(bin, info) == nfill - nflush, "");
-	while (cache_bin_ncached_get(bin, info) > 0) {
+	expect_true(cache_bin_ncached_get_local(bin, info) == nfill - nflush,
+	    "");
+	while (cache_bin_ncached_get_local(bin, info) > 0) {
 		cache_bin_alloc(bin, &success);
 	}
 }
@@ -55,14 +57,14 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 static void
 do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
     cache_bin_sz_t nfill, size_t batch) {
-	assert_true(cache_bin_ncached_get(bin, info) == 0, "");
+	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill);
 	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill);
 	for (cache_bin_sz_t i = 0; i < nfill; i++) {
 		arr.ptr[i] = &ptrs[i];
 	}
 	cache_bin_finish_fill(bin, info, &arr, nfill);
-	assert_true(cache_bin_ncached_get(bin, info) == nfill, "");
+	assert_true(cache_bin_ncached_get_local(bin, info) == nfill, "");
 	cache_bin_low_water_set(bin);
 
 	void **out = malloc((batch + 1) * sizeof(void *));
@@ -73,7 +75,7 @@ do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	}
 	expect_true(cache_bin_low_water_get(bin, info) == nfill -
 	    (cache_bin_sz_t)n, "");
-	while (cache_bin_ncached_get(bin, info) > 0) {
+	while (cache_bin_ncached_get_local(bin, info) > 0) {
 		bool success;
 		cache_bin_alloc(bin, &success);
 	}
@@ -104,7 +106,7 @@ TEST_BEGIN(test_cache_bin) {
 
 	/* Initialize to empty; should then have 0 elements. */
 	expect_d_eq(ncached_max, cache_bin_info_ncached_max(&info), "");
-	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(&bin, &info) == 0, "");
 	expect_true(cache_bin_low_water_get(&bin, &info) == 0, "");
 
 	ptr = cache_bin_alloc_easy(&bin, &success);
@@ -122,14 +124,15 @@ TEST_BEGIN(test_cache_bin) {
 	void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
 	for  (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get(&bin, &info) == i, "");
+		expect_true(cache_bin_ncached_get_local(&bin, &info) == i, "");
 		success = cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		expect_true(success,
 		    "Should be able to dalloc into a non-full cache bin.");
 		expect_true(cache_bin_low_water_get(&bin, &info) == 0,
 		    "Pushes and pops shouldn't change low water of zero.");
 	}
-	expect_true(cache_bin_ncached_get(&bin, &info) == ncached_max, "");
+	expect_true(cache_bin_ncached_get_local(&bin, &info) == ncached_max,
+	    "");
 	success = cache_bin_dalloc_easy(&bin, &ptrs[ncached_max]);
 	expect_false(success, "Shouldn't be able to dalloc into a full bin.");
 
@@ -138,7 +141,7 @@ TEST_BEGIN(test_cache_bin) {
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
 		expect_true(cache_bin_low_water_get(&bin, &info)
 		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get(&bin, &info)
+		expect_true(cache_bin_ncached_get_local(&bin, &info)
 		    == ncached_max - i, "");
 		/*
 		 * This should fail -- the easy variant can't change the low
@@ -149,7 +152,7 @@ TEST_BEGIN(test_cache_bin) {
 		expect_false(success, "");
 		expect_true(cache_bin_low_water_get(&bin, &info)
 		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get(&bin, &info)
+		expect_true(cache_bin_ncached_get_local(&bin, &info)
 		    == ncached_max - i, "");
 
 		/* This should succeed, though. */
@@ -159,11 +162,11 @@ TEST_BEGIN(test_cache_bin) {
 		    "Alloc should pop in stack order");
 		expect_true(cache_bin_low_water_get(&bin, &info)
 		    == ncached_max - i - 1, "");
-		expect_true(cache_bin_ncached_get(&bin, &info)
+		expect_true(cache_bin_ncached_get_local(&bin, &info)
 		    == ncached_max - i - 1, "");
 	}
 	/* Now we're empty -- all alloc attempts should fail. */
-	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(&bin, &info) == 0, "");
 	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_ptr_null(ptr, "");
 	expect_false(success, "");
@@ -179,7 +182,8 @@ TEST_BEGIN(test_cache_bin) {
 	for (cache_bin_sz_t i = ncached_max / 2; i < ncached_max; i++) {
 		cache_bin_dalloc_easy(&bin, &ptrs[i]);
 	}
-	expect_true(cache_bin_ncached_get(&bin, &info) == ncached_max, "");
+	expect_true(cache_bin_ncached_get_local(&bin, &info) == ncached_max,
+	    "");
 	for (cache_bin_sz_t i = ncached_max - 1; i >= ncached_max / 2; i--) {
 		/*
 		 * Size is bigger than low water -- the reduced version should
@@ -195,7 +199,7 @@ TEST_BEGIN(test_cache_bin) {
 	expect_ptr_null(ptr, "");
 
 	/* We're going to test filling -- we must be empty to start. */
-	while (cache_bin_ncached_get(&bin, &info)) {
+	while (cache_bin_ncached_get_local(&bin, &info)) {
 		cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 	}
-- 
cgit v0.12


From 2e3104ba07da1df4c04586231ff9266a1e35094d Mon Sep 17 00:00:00 2001
From: "Uwe L. Korn" <uwelk@xhochy.com>
Date: Sun, 10 Jan 2021 15:48:13 +0100
Subject: Update config.{sub,guess} to support support-aarch64-apple-darwin as
 a target

---
 build-aux/config.guess | 39 +++++++++++++++++++++++++++------------
 build-aux/config.sub   | 14 ++++++++------
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/build-aux/config.guess b/build-aux/config.guess
index 0fc11ed..f772702 100755
--- a/build-aux/config.guess
+++ b/build-aux/config.guess
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright 1992-2020 Free Software Foundation, Inc.
+#   Copyright 1992-2021 Free Software Foundation, Inc.
 
-timestamp='2020-11-07'
+timestamp='2021-01-01'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -27,7 +27,7 @@ timestamp='2020-11-07'
 # Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
 #
 # You can get the latest version of this script from:
-# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
 #
 # Please send patches to <config-patches@gnu.org>.
 
@@ -50,7 +50,7 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright 1992-2020 Free Software Foundation, Inc.
+Copyright 1992-2021 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -138,9 +138,7 @@ UNAME_VERSION=$( (uname -v) 2>/dev/null) || UNAME_VERSION=unknown
 
 case "$UNAME_SYSTEM" in
 Linux|GNU|GNU/*)
-	# If the system lacks a compiler, then just pick glibc.
-	# We could probably try harder.
-	LIBC=gnu
+	LIBC=unknown
 
 	set_cc_for_build
 	cat <<-EOF > "$dummy.c"
@@ -149,16 +147,30 @@ Linux|GNU|GNU/*)
 	LIBC=uclibc
 	#elif defined(__dietlibc__)
 	LIBC=dietlibc
+	#elif defined(__GLIBC__)
+	LIBC=gnu
 	#else
 	#include <stdarg.h>
+	/* First heuristic to detect musl libc.  */
 	#ifdef __DEFINED_va_list
 	LIBC=musl
-	#else
-	LIBC=gnu
 	#endif
 	#endif
 	EOF
 	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g')"
+
+	# Second heuristic to detect musl libc.
+	if [ "$LIBC" = unknown ] &&
+	   command -v ldd >/dev/null &&
+	   ldd --version 2>&1 | grep -q ^musl; then
+		LIBC=musl
+	fi
+
+	# If the system lacks a compiler, then just pick glibc.
+	# We could probably try harder.
+	if [ "$LIBC" = unknown ]; then
+		LIBC=gnu
+	fi
 	;;
 esac
 
@@ -984,6 +996,9 @@ EOF
     k1om:Linux:*:*)
 	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
+    loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
     m32r*:Linux:*:*)
 	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
@@ -1072,7 +1087,7 @@ EOF
     ppcle:Linux:*:*)
 	echo powerpcle-unknown-linux-"$LIBC"
 	exit ;;
-    riscv32:Linux:*:* | riscv64:Linux:*:*)
+    riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*)
 	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
@@ -1638,9 +1653,9 @@ This script (version $timestamp), has failed to recognize the
 operating system you are using. If your script is old, overwrite *all*
 copies of config.guess and config.sub with the latest versions from:
 
-  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+  https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
 and
-  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+  https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
 EOF
 
 year=$(echo $timestamp | sed 's,-.*,,')
diff --git a/build-aux/config.sub b/build-aux/config.sub
index c874b7a..b0f8492 100755
--- a/build-aux/config.sub
+++ b/build-aux/config.sub
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Configuration validation subroutine script.
-#   Copyright 1992-2020 Free Software Foundation, Inc.
+#   Copyright 1992-2021 Free Software Foundation, Inc.
 
-timestamp='2020-11-07'
+timestamp='2021-01-07'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -33,7 +33,7 @@ timestamp='2020-11-07'
 # Otherwise, we print the canonical config type on stdout and succeed.
 
 # You can get the latest version of this script from:
-# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+# https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
 
 # This file is supposed to be the same for all GNU packages
 # and recognize all the CPU types, system types and aliases
@@ -67,7 +67,7 @@ Report bugs and patches to <config-patches@gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright 1992-2020 Free Software Foundation, Inc.
+Copyright 1992-2021 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -1185,6 +1185,7 @@ case $cpu-$vendor in
 			| k1om \
 			| le32 | le64 \
 			| lm32 \
+			| loongarch32 | loongarch64 | loongarchx32 \
 			| m32c | m32r | m32rle \
 			| m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
 			| m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
@@ -1229,7 +1230,7 @@ case $cpu-$vendor in
 			| powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
 			| pru \
 			| pyramid \
-			| riscv | riscv32 | riscv64 \
+			| riscv | riscv32 | riscv32be | riscv64 | riscv64be \
 			| rl78 | romp | rs6000 | rx \
 			| s390 | s390x \
 			| score \
@@ -1241,6 +1242,7 @@ case $cpu-$vendor in
 			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
 			| spu \
 			| tahoe \
+			| thumbv7* \
 			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
 			| tron \
 			| ubicom32 \
@@ -1685,7 +1687,7 @@ case $os in
 	musl* | newlib* | uclibc*)
 		;;
 	# Likewise for "kernel-libc"
-	eabi | eabihf | gnueabi | gnueabihf)
+	eabi* | gnueabi*)
 		;;
 	# Now accept the basic system types.
 	# The portable systems comes first.
-- 
cgit v0.12


From a943172b732e65da34a19469f31cd3ec70cf05b0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 18 Dec 2020 22:23:35 +0300
Subject: Add runtime detection for MADV_DONTNEED zeroes pages (mostly for
 qemu)

qemu does not support this, yet [1], and you can get very tricky assert
if you will run program with jemalloc in use under qemu:

    <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0"

  [1]: https://patchwork.kernel.org/patch/10576637/

Here is a simple example that shows the problem [2]:

    // Gist to check possible issues with MADV_DONTNEED
    // For example it does not supported by qemu user
    // There is a patch for this [1], but it hasn't been applied.
    //   [1]: https://lists.gnu.org/archive/html/qemu-devel/2018-08/msg05422.html

    #include <sys/mman.h>
    #include <stdio.h>
    #include <stddef.h>
    #include <assert.h>
    #include <string.h>

    int main(int argc, char **argv)
    {
        void *addr = mmap(NULL, 1<<16, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
        if (addr == MAP_FAILED) {
            perror("mmap");
            return 1;
        }
        memset(addr, 'A', 1<<16);

        if (!madvise(addr, 1<<16, MADV_DONTNEED)) {
            puts("MADV_DONTNEED does not return error. Check memory.");
            for (int i = 0; i < 1<<16; ++i) {
                assert(((unsigned char *)addr)[i] == 0);
            }
        } else {
            perror("madvise");
        }

        if (munmap(addr, 1<<16)) {
            perror("munmap");
            return 1;
        }

        return 0;
    }

  ### unpatched qemu

      $ qemu-x86_64-static /tmp/test-MADV_DONTNEED
      MADV_DONTNEED does not return error. Check memory.
      test-MADV_DONTNEED: /tmp/test-MADV_DONTNEED.c:19: main: Assertion `((unsigned char *)addr)[i] == 0' failed.
      qemu: uncaught target signal 6 (Aborted) - core dumped
      Aborted (core dumped)

  ### patched qemu (by returning ENOSYS error)

      $ qemu-x86_64 /tmp/test-MADV_DONTNEED
      madvise: Success

  ### patch for qemu to return ENOSYS

      diff --git a/linux-user/syscall.c b/linux-user/syscall.c
      index 897d20c076..5540792e0e 100644
      --- a/linux-user/syscall.c
      +++ b/linux-user/syscall.c
      @@ -11775,7 +11775,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
                  turns private file-backed mappings into anonymous mappings.
                  This will break MADV_DONTNEED.
                  This is a hint, so ignoring and returning success is ok.  */
      -        return 0;
      +        return ENOSYS;
       #endif
       #ifdef TARGET_NR_fcntl64
           case TARGET_NR_fcntl64:

  [2]: https://gist.github.com/azat/12ba2c825b710653ece34dba7f926ece

v2:
- review fixes
- add opt_dont_trust_madvise
v3:
- review fixes
- rename opt_dont_trust_madvise to opt_trust_madvise
---
 doc/jemalloc.xml.in                                | 12 ++++
 .../jemalloc/internal/jemalloc_internal_externs.h  |  1 +
 src/ctl.c                                          |  3 +
 src/jemalloc.c                                     |  8 +++
 src/pages.c                                        | 71 +++++++++++++++++++++-
 test/unit/mallctl.c                                |  1 +
 6 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index e24c191..4b93c5a 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -950,6 +950,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         is <quote>disabled</quote>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.trust_madvise">
+        <term>
+          <mallctl>opt.trust_madvise</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Do not perform runtime check for MADV_DONTNEED, to
+        check that it actually zeros pages.  The default is
+        <quote>disabled</quote> on linux and <quote>enabled</quote> elsewhere.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.retain">
         <term>
           <mallctl>opt.retain</mallctl>
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index fb8dc3f..40591b9 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -11,6 +11,7 @@ extern bool malloc_slow;
 /* Run-time options. */
 extern bool opt_abort;
 extern bool opt_abort_conf;
+extern bool opt_trust_madvise;
 extern bool opt_confirm_conf;
 extern bool opt_hpa;
 extern size_t opt_hpa_slab_max_alloc;
diff --git a/src/ctl.c b/src/ctl.c
index b94ef64..d516196 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -90,6 +90,7 @@ CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
+CTL_PROTO(opt_trust_madvise)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
@@ -372,6 +373,7 @@ static const ctl_named_node_t	config_node[] = {
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
+	{NAME("trust_madvise"),	CTL(opt_trust_madvise)},
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("hpa"),		CTL(opt_hpa)},
 	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
@@ -2045,6 +2047,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
+CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0271415..f7c3963 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -94,6 +94,13 @@ bool	opt_junk_free =
     false
 #endif
     ;
+bool	opt_trust_madvise =
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
+    false
+#else
+    true
+#endif
+    ;
 
 zero_realloc_action_t opt_zero_realloc_action =
     zero_realloc_action_strict;
@@ -1256,6 +1263,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 			CONF_HANDLE_BOOL(opt_abort, "abort")
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
+			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
 			if (strncmp("metadata_thp", k, klen) == 0) {
 				int i;
 				bool match = false;
diff --git a/src/pages.c b/src/pages.c
index b23c9e9..6984d2a 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -42,6 +42,57 @@ thp_mode_t init_system_thp_mode;
 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
 static bool pages_can_purge_lazy_runtime = true;
 
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
+static int madvise_dont_need_zeros_is_faulty = -1;
+/**
+ * Check that MADV_DONTNEED will actually zero pages on subsequent access.
+ *
+ * Since qemu does not support this, yet [1], and you can get very tricky
+ * assert if you will run program with jemalloc in use under qemu:
+ *
+ *     <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0"
+ *
+ *   [1]: https://patchwork.kernel.org/patch/10576637/
+ */
+static int madvise_MADV_DONTNEED_zeroes_pages()
+{
+	int works = -1;
+	size_t size = PAGE;
+
+	void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
+	    MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+	if (addr == MAP_FAILED) {
+		malloc_write("<jemalloc>: Cannot allocate memory for "
+		    "MADV_DONTNEED check\n");
+		if (opt_abort) {
+			abort();
+		}
+	}
+
+	memset(addr, 'A', size);
+	if (madvise(addr, size, MADV_DONTNEED) == 0) {
+		works = memchr(addr, 'A', size) == NULL;
+	} else {
+		/*
+		 * If madvise() does not support MADV_DONTNEED, then we can
+		 * call it anyway, and use it's return code.
+		 */
+		works = 1;
+	}
+
+	if (munmap(addr, size) != 0) {
+		malloc_write("<jemalloc>: Cannot deallocate memory for "
+		    "MADV_DONTNEED check\n");
+		if (opt_abort) {
+			abort();
+		}
+	}
+
+	return works;
+}
+#endif
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -351,10 +402,12 @@ pages_purge_forced(void *addr, size_t size) {
 
 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
     defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
-	return (madvise(addr, size, MADV_DONTNEED) != 0);
+	return (unlikely(madvise_dont_need_zeros_is_faulty) ||
+	    madvise(addr, size, MADV_DONTNEED) != 0);
 #elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
     defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
-	return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
+	return (unlikely(madvise_dont_need_zeros_is_faulty) ||
+	    posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
 #elif defined(JEMALLOC_MAPS_COALESCE)
 	/* Try to overlay a new demand-zeroed mapping. */
 	return pages_commit(addr, size);
@@ -642,6 +695,20 @@ pages_boot(void) {
 		return true;
 	}
 
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
+	if (!opt_trust_madvise) {
+		madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages();
+		if (madvise_dont_need_zeros_is_faulty) {
+			malloc_write("<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n");
+			malloc_write("<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n");
+		}
+	} else {
+		/* In case opt_trust_madvise is disable,
+		 * do not do runtime check */
+		madvise_dont_need_zeros_is_faulty = 0;
+	}
+#endif
+
 #ifndef _WIN32
 	mmap_flags = MAP_PRIVATE | MAP_ANON;
 #endif
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 85dcb4e..6f5a8f1 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -279,6 +279,7 @@ TEST_BEGIN(test_mallctl_opt) {
 
 	TEST_MALLCTL_OPT(bool, abort, always);
 	TEST_MALLCTL_OPT(bool, abort_conf, always);
+	TEST_MALLCTL_OPT(bool, trust_madvise, always);
 	TEST_MALLCTL_OPT(bool, confirm_conf, always);
 	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
-- 
cgit v0.12


From f6699803e2772de2a4eb253d5b55f00c3842a950 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 25 Jan 2021 14:05:23 -0800
Subject: Fix duration in prof log

---
 src/prof_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index 356a886..0f27a12 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -573,7 +573,7 @@ prof_log_emit_metadata(emitter_t *emitter) {
 
 	nstime_t now;
 
-	nstime_init_update(&now);
+	nstime_prof_init_update(&now);
 	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
 	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
 
-- 
cgit v0.12


From 35a8552605be4fcbded961bf2dcbee5655401575 Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Tue, 26 Jan 2021 21:49:08 +0000
Subject: Mac OS: Tag mapped pages.

This can be used to help profiling tools (e.g. vmmap) identify the
sources of mappings more specifically.
---
 configure.ac                                          | 12 ++++++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  3 +++
 src/pages.c                                           |  9 +++++++--
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 53ac7cc..34613fe 100644
--- a/configure.ac
+++ b/configure.ac
@@ -926,6 +926,18 @@ if test "x${je_cv_cold}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_COLD], [ ])
 fi
 
+dnl Check for VM_MAKE_TAG for mmap support.
+JE_COMPILABLE([vm_make_tag],
+	      [#include <sys/mman.h>
+	       #include <mach/vm_statistics.h>],
+	      [void *p;
+	       p = mmap(0, 16, PROT_READ, MAP_ANON|MAP_PRIVATE, VM_MAKE_TAG(1), 0);
+	       munmap(p, 16);],
+	      [je_cv_vm_make_tag])
+if test "x${je_cv_vm_make_tag}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_VM_MAKE_TAG], [ ])
+fi
+
 dnl Support optional additions to rpath.
 AC_ARG_WITH([rpath],
   [AS_HELP_STRING([--with-rpath=<rpath>], [Colon-separated rpath (ELF systems only)])],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index dc4f01f..093c8be 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -404,4 +404,7 @@
 /* Performs additional size checks when defined. */
 #undef JEMALLOC_OPT_SIZE_CHECKS
 
+/* Darwin VM_MAKE_TAG support */
+#undef JEMALLOC_HAVE_VM_MAKE_TAG
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/src/pages.c b/src/pages.c
index 6984d2a..4261885 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -16,6 +16,11 @@
 #ifdef __NetBSD__
 #include <sys/bitops.h>	/* ilog2 */
 #endif
+#ifdef JEMALLOC_HAVE_VM_MAKE_TAG
+#define PAGES_FD_TAG VM_MAKE_TAG(101U)
+#else
+#define PAGES_FD_TAG -1
+#endif
 
 /******************************************************************************/
 /* Data. */
@@ -141,7 +146,7 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 #endif
 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
 
-		ret = mmap(addr, size, prot, mmap_flags, -1, 0);
+		ret = mmap(addr, size, prot, mmap_flags, PAGES_FD_TAG, 0);
 	}
 	assert(ret != NULL);
 
@@ -326,7 +331,7 @@ pages_commit_impl(void *addr, size_t size, bool commit) {
 	{
 		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
 		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
-		    -1, 0);
+		    PAGES_FD_TAG, 0);
 		if (result == MAP_FAILED) {
 			return true;
 		}
-- 
cgit v0.12


From c007c537ff038538b9312cf110bc5d395da14000 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 Jan 2021 15:36:11 -0800
Subject: Tcache flush: Unify edata lookup path.

---
 src/tcache.c | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index ef0b87d..678fe52 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -236,18 +236,16 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	return ret;
 }
 
-/* Enabled with --enable-extra-size-check. */
 static void
-tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_ptr_array_t *arr,
+tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
     szind_t binind, size_t nflush, edata_t **edatas) {
 	/* Avoids null-checking tsdn in the loop below. */
 	util_assume(tsd != NULL);
 
 	/*
-	 * Verify that the items in the tcache all have the correct size; this
-	 * is useful for catching sized deallocation bugs, also to fail early
-	 * instead of corrupting metadata.  Since this can be turned on for opt
-	 * builds, avoid the branch in the loop.
+	 * This gets compiled away when config_opt_safety_checks is false.
+	 * Checks for sized deallocation bugs, failing early rather than
+	 * corrupting metadata.
 	 */
 	size_t szind_sum = binind * nflush;
 	for (unsigned i = 0; i < nflush; i++) {
@@ -258,9 +256,10 @@ tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_ptr_array_t *arr,
 		szind_sum -= full_alloc_ctx.szind;
 	}
 
-	if (szind_sum != 0) {
+	if (config_opt_safety_checks && szind_sum != 0) {
 		safety_check_fail_sized_dealloc(false);
 	}
+
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -306,17 +305,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
 	    &ptrs, nflush);
 
-	/* Look up edata once per item. */
-	if (config_opt_safety_checks) {
-		tbin_edatas_lookup_size_check(tsd, &ptrs, binind, nflush,
-		    item_edata);
-	} else {
-		for (unsigned i = 0 ; i < nflush; i++) {
-			item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd),
-			    &arena_emap_global,
-			    cache_bin_ptr_array_get(&ptrs, i));
-		}
-	}
+	tcache_bin_flush_edatas_lookup(tsd, &ptrs, binind, nflush, item_edata);
 
 	/*
 	 * The slabs where we freed the last remaining object in the slab (and
-- 
cgit v0.12


From 181ba7fd4d039a3acfc4d2b115be55d93ac8c406 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 Jan 2021 16:10:37 -0800
Subject: Tcache flush: Add an emap "batch lookup" path.

For now this is a no-op; but the interface is a little more flexible for our
purposes.
---
 include/jemalloc/internal/emap.h | 33 +++++++++++++++++++++++++++++++++
 src/tcache.c                     | 28 +++++++++++++++++-----------
 2 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 8b2c6ba..f0d7e76 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -213,4 +213,37 @@ emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
 	return false;
 }
 
+/*
+ * We want to do batch lookups out of the cache bins, which use
+ * cache_bin_ptr_array_get to access the i'th element of the bin (since they
+ * invert usual ordering in deciding what to flush).  This lets the emap avoid
+ * caring about its caller's ordering.
+ */
+typedef const void *(*emap_ptr_getter)(void *ctx, size_t ind);
+/*
+ * This allows size-checking assertions, which we can only do while we're in the
+ * process of edata lookups.
+ */
+typedef void (*emap_metadata_visitor)(void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
+
+JEMALLOC_ALWAYS_INLINE void
+emap_edata_lookup_batch(tsd_t *tsd, emap_t *emap, size_t nptrs,
+    emap_ptr_getter ptr_getter, void *ptr_getter_ctx,
+    emap_metadata_visitor metadata_visitor, void *metadata_visitor_ctx,
+    edata_t **r_edatas) {
+
+	/* Avoids null-checking tsdn in the loop below. */
+	util_assume(tsd != NULL);
+
+	for (size_t i = 0; i < nptrs; i++) {
+		emap_full_alloc_ctx_t full_alloc_ctx;
+		const void *ptr = ptr_getter(ptr_getter_ctx, i);
+
+		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), emap, ptr,
+		    &full_alloc_ctx);
+		r_edatas[i] = full_alloc_ctx.edata;
+		metadata_visitor(metadata_visitor_ctx, &full_alloc_ctx);
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/tcache.c b/src/tcache.c
index 678fe52..602823d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -236,11 +236,22 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	return ret;
 }
 
+static const void *
+tcache_bin_flush_ptr_getter(void *arr_ctx, size_t ind) {
+	cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx;
+	return cache_bin_ptr_array_get(arr, (unsigned)ind);
+}
+
+static void
+tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
+    emap_full_alloc_ctx_t *alloc_ctx) {
+	size_t *szind_sum = (size_t *)szind_sum_ctx;
+	*szind_sum -= alloc_ctx->szind;
+}
+
 static void
 tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
     szind_t binind, size_t nflush, edata_t **edatas) {
-	/* Avoids null-checking tsdn in the loop below. */
-	util_assume(tsd != NULL);
 
 	/*
 	 * This gets compiled away when config_opt_safety_checks is false.
@@ -248,18 +259,13 @@ tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
 	 * corrupting metadata.
 	 */
 	size_t szind_sum = binind * nflush;
-	for (unsigned i = 0; i < nflush; i++) {
-		emap_full_alloc_ctx_t full_alloc_ctx;
-		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
-		    cache_bin_ptr_array_get(arr, i), &full_alloc_ctx);
-		edatas[i] = full_alloc_ctx.edata;
-		szind_sum -= full_alloc_ctx.szind;
-	}
-
+	emap_edata_lookup_batch(tsd, &arena_emap_global, nflush,
+	    &tcache_bin_flush_ptr_getter, (void *)arr,
+	    &tcache_bin_flush_metadata_visitor, (void *)&szind_sum,
+	    edatas);
 	if (config_opt_safety_checks && szind_sum != 0) {
 		safety_check_fail_sized_dealloc(false);
 	}
-
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-- 
cgit v0.12


From 9f9247a62ed5ac1157519cd2b1f966cacf772aaa Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 Jan 2021 17:14:38 -0800
Subject: Tcache fluhing: increase cache miss parallelism.

In practice, many rtree_leaf_elm accesses are cache misses.  By restructuring,
we can make it more likely that these misses occur without blocking us from
starting later lookups, taking more of those misses in parallel.
---
 include/jemalloc/internal/emap.h | 38 +++++++++++++++++++++++++++++++-------
 src/tcache.c                     | 12 ++++++------
 2 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index f0d7e76..ac0050b 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -226,23 +226,47 @@ typedef const void *(*emap_ptr_getter)(void *ctx, size_t ind);
  */
 typedef void (*emap_metadata_visitor)(void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
 
+typedef union emap_batch_lookup_result_u emap_batch_lookup_result_t;
+union emap_batch_lookup_result_u {
+	edata_t *edata;
+	rtree_leaf_elm_t *rtree_leaf;
+};
+
 JEMALLOC_ALWAYS_INLINE void
 emap_edata_lookup_batch(tsd_t *tsd, emap_t *emap, size_t nptrs,
     emap_ptr_getter ptr_getter, void *ptr_getter_ctx,
     emap_metadata_visitor metadata_visitor, void *metadata_visitor_ctx,
-    edata_t **r_edatas) {
-
+    emap_batch_lookup_result_t *result) {
 	/* Avoids null-checking tsdn in the loop below. */
 	util_assume(tsd != NULL);
+	rtree_ctx_t *rtree_ctx = tsd_rtree_ctxp_get(tsd);
 
 	for (size_t i = 0; i < nptrs; i++) {
-		emap_full_alloc_ctx_t full_alloc_ctx;
 		const void *ptr = ptr_getter(ptr_getter_ctx, i);
+		/*
+		 * Reuse the edatas array as a temp buffer, lying a little about
+		 * the types.
+		 */
+		result[i].rtree_leaf = rtree_leaf_elm_lookup(tsd_tsdn(tsd),
+		    &emap->rtree, rtree_ctx, (uintptr_t)ptr,
+		    /* dependent */ true, /* init_missing */ false);
+	}
 
-		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), emap, ptr,
-		    &full_alloc_ctx);
-		r_edatas[i] = full_alloc_ctx.edata;
-		metadata_visitor(metadata_visitor_ctx, &full_alloc_ctx);
+	for (size_t i = 0; i < nptrs; i++) {
+		rtree_leaf_elm_t *elm = result[i].rtree_leaf;
+		rtree_contents_t contents = rtree_leaf_elm_read(tsd_tsdn(tsd),
+		    &emap->rtree, elm, /* dependent */ true);
+		result[i].edata = contents.edata;
+		emap_full_alloc_ctx_t alloc_ctx;
+		/*
+		 * Not all these fields are read in practice by the metadata
+		 * visitor.  But the compiler can easily optimize away the ones
+		 * that aren't, so no sense in being incomplete.
+		 */
+		alloc_ctx.szind = contents.metadata.szind;
+		alloc_ctx.slab = contents.metadata.slab;
+		alloc_ctx.edata = contents.edata;
+		metadata_visitor(metadata_visitor_ctx, &alloc_ctx);
 	}
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 602823d..635ba0b 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -251,7 +251,7 @@ tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
 
 static void
 tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
-    szind_t binind, size_t nflush, edata_t **edatas) {
+    szind_t binind, size_t nflush, emap_batch_lookup_result_t *edatas) {
 
 	/*
 	 * This gets compiled away when config_opt_safety_checks is false.
@@ -305,7 +305,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	 * Variable length array must have > 0 length; the last element is never
 	 * touched (it's just included to satisfy the no-zero-length rule).
 	 */
-	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
+	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 
 	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
@@ -329,7 +329,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	bool merged_stats = false;
 	while (nflush > 0) {
 		/* Lock the arena, or bin, associated with the first object. */
-		edata_t *edata = item_edata[0];
+		edata_t *edata = item_edata[0].edata;
 		unsigned cur_arena_ind = edata_arena_ind_get(edata);
 		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
 
@@ -382,7 +382,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		if (!small) {
 			for (unsigned i = 0; i < nflush; i++) {
 				void *ptr = cache_bin_ptr_array_get(&ptrs, i);
-				edata = item_edata[i];
+				edata = item_edata[i].edata;
 				assert(ptr != NULL && edata != NULL);
 
 				if (tcache_bin_flush_match(edata, cur_arena_ind,
@@ -400,7 +400,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = cache_bin_ptr_array_get(&ptrs, i);
-			edata = item_edata[i];
+			edata = item_edata[i].edata;
 			assert(ptr != NULL && edata != NULL);
 			if (!tcache_bin_flush_match(edata, cur_arena_ind,
 			    cur_binshard, small)) {
@@ -411,7 +411,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 				 * it can be handled in a future pass.
 				 */
 				cache_bin_ptr_array_set(&ptrs, ndeferred, ptr);
-				item_edata[ndeferred] = edata;
+				item_edata[ndeferred].edata = edata;
 				ndeferred++;
 				continue;
 			}
-- 
cgit v0.12


From 31a629c3dea4c903d16025b4fe5261d2f3db8bd6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 28 Jan 2021 16:14:39 -0800
Subject: Tcache flush: prefetch edata contents.

This frontloads more of the miss latency.  It also moves it to a pathway where
we have not yet acquired any locks, so that it should (hopefully) reduce hold
times.
---
 include/jemalloc/internal/util.h | 49 ++++++++++++++++++++++++++++++++++++++++
 src/tcache.c                     |  1 +
 2 files changed, 50 insertions(+)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index cb75147..dcb1c0a 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -69,6 +69,55 @@ util_assume(bool b) {
 	}
 }
 
+/* ptr should be valid. */
+JEMALLOC_ALWAYS_INLINE void
+util_prefetch_read(void *ptr) {
+	/*
+	 * This should arguably be a config check; but any version of GCC so old
+	 * that it doesn't support __builtin_prefetch is also too old to build
+	 * jemalloc.
+	 */
+#ifdef __GNUC__
+	if (config_debug) {
+		/* Enforce the "valid ptr" requirement. */
+		*(volatile char *)ptr;
+	}
+	__builtin_prefetch(ptr, /* read or write */ 0, /* locality hint */ 3);
+#else
+	*(volatile char *)ptr;
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE void
+util_prefetch_write(void *ptr) {
+#ifdef __GNUC__
+	if (config_debug) {
+		*(volatile char *)ptr;
+	}
+	/*
+	 * The only difference from the read variant is that this has a 1 as the
+	 * second argument (the write hint).
+	 */
+	__builtin_prefetch(ptr, 1, 3);
+#else
+	*(volatile char *)ptr;
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE void
+util_prefetch_read_range(void *ptr, size_t sz) {
+	for (size_t i = 0; i < sz; i += CACHELINE) {
+		util_prefetch_read((void *)((uintptr_t)ptr + i));
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+util_prefetch_write_range(void *ptr, size_t sz) {
+	for (size_t i = 0; i < sz; i += CACHELINE) {
+		util_prefetch_write((void *)((uintptr_t)ptr + i));
+	}
+}
+
 #undef UTIL_INLINE
 
 #endif /* JEMALLOC_INTERNAL_UTIL_H */
diff --git a/src/tcache.c b/src/tcache.c
index 635ba0b..3daf426 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -247,6 +247,7 @@ tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
     emap_full_alloc_ctx_t *alloc_ctx) {
 	size_t *szind_sum = (size_t *)szind_sum_ctx;
 	*szind_sum -= alloc_ctx->szind;
+	util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
 }
 
 static void
-- 
cgit v0.12


From 229994a204f7d4712fe5ecd1508fbbe679c1baf6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 Jan 2021 16:06:28 -0800
Subject: Tcache flush: keep common path state in registers.

By carefully force-inlining the division constants and the operation sum count,
we can eliminate redundant operations in the arena-level dalloc function.  Do
so.
---
 include/jemalloc/internal/arena_externs.h   | 13 ++--
 include/jemalloc/internal/arena_inlines_b.h | 93 +++++++++++++++++++++++++++++
 src/arena.c                                 | 87 +++++----------------------
 src/tcache.c                                | 11 +++-
 test/unit/slab.c                            |  5 +-
 5 files changed, 130 insertions(+), 79 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index e3cfcee..f06cb34 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 
 #include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/div.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/pages.h"
@@ -13,6 +14,8 @@ extern ssize_t opt_muzzy_decay_ms;
 extern percpu_arena_mode_t opt_percpu_arena;
 extern const char *percpu_arena_mode_names[];
 
+extern div_info_t arena_binind_div_info[SC_NBINS];
+
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 extern emap_t arena_emap_global;
@@ -29,9 +32,6 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
 void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
-#ifdef JEMALLOC_JET
-size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
-#endif
 edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool zero);
 void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
@@ -59,8 +59,11 @@ void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
 void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
 void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
-bool arena_dalloc_bin_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, edata_t *edata, void *ptr);
+
+void arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
+    edata_t *slab, bin_t *bin);
+void arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
+    edata_t *slab, bin_t *bin);
 void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index aaef45c..66dcff0 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 #define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 
+#include "jemalloc/internal/div.h"
 #include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
@@ -441,4 +442,96 @@ arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	}
 }
 
+/*
+ * The dalloc bin info contains just the information that the common paths need
+ * during tcache flushes.  By force-inlining these paths, and using local copies
+ * of data (so that the compiler knows it's constant), we avoid a whole bunch of
+ * redundant loads and stores by leaving this information in registers.
+ */
+typedef struct arena_dalloc_bin_locked_info_s arena_dalloc_bin_locked_info_t;
+struct arena_dalloc_bin_locked_info_s {
+	div_info_t div_info;
+	uint32_t nregs;
+	uint64_t ndalloc;
+};
+
+JEMALLOC_ALWAYS_INLINE size_t
+arena_slab_regind(arena_dalloc_bin_locked_info_t *info, szind_t binind,
+    edata_t *slab, const void *ptr) {
+	size_t diff, regind;
+
+	/* Freeing a pointer outside the slab can cause assertion failure. */
+	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
+	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
+	/* Freeing an interior pointer can cause assertion failure. */
+	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
+	    (uintptr_t)bin_infos[binind].reg_size == 0);
+
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
+
+	/* Avoid doing division with a variable divisor. */
+	regind = div_compute(&info->div_info, diff);
+
+	assert(regind < bin_infos[binind].nregs);
+
+	return regind;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_dalloc_bin_locked_begin(arena_dalloc_bin_locked_info_t *info,
+    szind_t binind) {
+	info->div_info = arena_binind_div_info[binind];
+	info->nregs = bin_infos[binind].nregs;
+	info->ndalloc = 0;
+}
+
+/*
+ * Does the deallocation work associated with freeing a single pointer (a
+ * "step") in between a arena_dalloc_bin_locked begin and end call.
+ *
+ * Returns true if arena_slab_dalloc must be called on slab.  Doesn't do
+ * stats updates, which happen during finish (this lets running counts get left
+ * in a register).
+ */
+JEMALLOC_ALWAYS_INLINE bool
+arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
+    void *ptr) {
+	const bin_info_t *bin_info = &bin_infos[binind];
+	size_t regind = arena_slab_regind(info, binind, slab, ptr);
+	slab_data_t *slab_data = edata_slab_data_get(slab);
+
+	assert(edata_nfree_get(slab) < bin_info->nregs);
+	/* Freeing an unallocated pointer can cause assertion failure. */
+	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
+
+	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
+	edata_nfree_inc(slab);
+
+	if (config_stats) {
+		info->ndalloc++;
+	}
+
+	unsigned nfree = edata_nfree_get(slab);
+	if (nfree == bin_info->nregs) {
+		arena_dalloc_bin_locked_handle_newly_empty(tsdn, arena, slab,
+		    bin);
+		return true;
+	} else if (nfree == 1 && slab != bin->slabcur) {
+		arena_dalloc_bin_locked_handle_newly_nonempty(tsdn, arena, slab,
+		    bin);
+	}
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    arena_dalloc_bin_locked_info_t *info) {
+	if (config_stats) {
+		bin->stats.ndalloc += info->ndalloc;
+		assert(bin->stats.curregs >= (size_t)info->ndalloc);
+		bin->stats.curregs -= (size_t)info->ndalloc;
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/src/arena.c b/src/arena.c
index 914e63f..56c34af 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3,7 +3,6 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/decay.h"
-#include "jemalloc/internal/div.h"
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
@@ -45,7 +44,7 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #undef STEP
 };
 
-static div_info_t arena_binind_div_info[SC_NBINS];
+div_info_t arena_binind_div_info[SC_NBINS];
 
 size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
@@ -260,44 +259,6 @@ arena_slab_reg_alloc_batch(edata_t *slab, const bin_info_t *bin_info,
 	edata_nfree_sub(slab, cnt);
 }
 
-#ifndef JEMALLOC_JET
-static
-#endif
-size_t
-arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr) {
-	size_t diff, regind;
-
-	/* Freeing a pointer outside the slab can cause assertion failure. */
-	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
-	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
-	/* Freeing an interior pointer can cause assertion failure. */
-	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
-	    (uintptr_t)bin_infos[binind].reg_size == 0);
-
-	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
-
-	/* Avoid doing division with a variable divisor. */
-	regind = div_compute(&arena_binind_div_info[binind], diff);
-
-	assert(regind < bin_infos[binind].nregs);
-
-	return regind;
-}
-
-static void
-arena_slab_reg_dalloc(edata_t *slab, slab_data_t *slab_data, void *ptr) {
-	szind_t binind = edata_szind_get(slab);
-	const bin_info_t *bin_info = &bin_infos[binind];
-	size_t regind = arena_slab_regind(slab, binind, ptr);
-
-	assert(edata_nfree_get(slab) < bin_info->nregs);
-	/* Freeing an unallocated pointer can cause assertion failure. */
-	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
-
-	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
-	edata_nfree_inc(slab);
-}
-
 static void
 arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	szind_t index, hindex;
@@ -1189,37 +1150,18 @@ arena_dalloc_bin_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
 	}
 }
 
-/* Returns true if arena_slab_dalloc must be called on slab */
-static bool
-arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, edata_t *slab, void *ptr) {
-	const bin_info_t *bin_info = &bin_infos[binind];
-	arena_slab_reg_dalloc(slab, edata_slab_data_get(slab), ptr);
-
-	bool ret = false;
-	unsigned nfree = edata_nfree_get(slab);
-	if (nfree == bin_info->nregs) {
-		arena_dissociate_bin_slab(arena, slab, bin);
-		arena_dalloc_bin_slab_prepare(tsdn, slab, bin);
-		ret = true;
-	} else if (nfree == 1 && slab != bin->slabcur) {
-		arena_bin_slabs_full_remove(arena, bin, slab);
-		arena_bin_lower_slab(tsdn, arena, slab, bin);
-	}
-
-	if (config_stats) {
-		bin->stats.ndalloc++;
-		bin->stats.curregs--;
-	}
-
-	return ret;
+void
+arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
+    edata_t *slab, bin_t *bin) {
+	arena_dissociate_bin_slab(arena, slab, bin);
+	arena_dalloc_bin_slab_prepare(tsdn, slab, bin);
 }
 
-bool
-arena_dalloc_bin_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-szind_t binind, edata_t *edata, void *ptr) {
-	return arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
-	    ptr);
+void
+arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
+    edata_t *slab, bin_t *bin) {
+	arena_bin_slabs_full_remove(arena, bin, slab);
+	arena_bin_lower_slab(tsdn, arena, slab, bin);
 }
 
 static void
@@ -1229,8 +1171,11 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	bin_t *bin = &arena->bins[binind].bin_shards[binshard];
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	bool ret = arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
-	    ptr);
+	arena_dalloc_bin_locked_info_t info;
+	arena_dalloc_bin_locked_begin(&info, binind);
+	bool ret = arena_dalloc_bin_locked_step(tsdn, arena, bin,
+	    &info, binind, edata, ptr);
+	arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
 	if (ret) {
diff --git a/src/tcache.c b/src/tcache.c
index 3daf426..c7bdbf9 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -399,6 +399,10 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 
 		/* Deallocate whatever we can. */
 		unsigned ndeferred = 0;
+		arena_dalloc_bin_locked_info_t dalloc_bin_info;
+		if (small) {
+			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
+		}
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = cache_bin_ptr_array_get(&ptrs, i);
 			edata = item_edata[i].edata;
@@ -417,8 +421,9 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 				continue;
 			}
 			if (small) {
-				if (arena_dalloc_bin_locked(tsdn, cur_arena,
-				    cur_bin, binind, edata, ptr)) {
+				if (arena_dalloc_bin_locked_step(tsdn,
+				    cur_arena, cur_bin, &dalloc_bin_info,
+				    binind, edata, ptr)) {
 					dalloc_slabs[dalloc_count] = edata;
 					dalloc_count++;
 				}
@@ -432,6 +437,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		}
 
 		if (small) {
+			arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
+			    &dalloc_bin_info);
 			malloc_mutex_unlock(tsdn, &cur_bin->lock);
 		}
 		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 6baa9d3..70fc5c7 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -16,10 +16,13 @@ TEST_BEGIN(test_arena_slab_regind) {
 		    EXTENT_NOT_HEAD);
 		expect_ptr_not_null(edata_addr_get(&slab),
 		    "Unexpected malloc() failure");
+		arena_dalloc_bin_locked_info_t dalloc_info;
+		arena_dalloc_bin_locked_begin(&dalloc_info, binind);
 		for (regind = 0; regind < bin_info->nregs; regind++) {
 			void *reg = (void *)((uintptr_t)edata_addr_get(&slab) +
 			    (bin_info->reg_size * regind));
-			expect_zu_eq(arena_slab_regind(&slab, binind, reg),
+			expect_zu_eq(arena_slab_regind(&dalloc_info, binind,
+			    &slab, reg),
 			    regind,
 			    "Incorrect region index computed for size %zu",
 			    bin_info->reg_size);
-- 
cgit v0.12


From 4c46e11365566ec03723c46356cd524f4abd7fd8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 Jan 2021 21:22:57 -0800
Subject: Cache an arena's index in the arena.

This saves us a pointer hop down some perf-sensitive paths.
---
 include/jemalloc/internal/arena_inlines_a.h | 2 +-
 include/jemalloc/internal/arena_structs.h   | 6 ++++++
 src/arena.c                                 | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index b83d0e8..8568358 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -3,7 +3,7 @@
 
 static inline unsigned
 arena_ind_get(const arena_t *arena) {
-	return base_ind_get(arena->base);
+	return arena->ind;
 }
 
 static inline void
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index baa7031..913184d 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -84,6 +84,12 @@ struct arena_s {
 	bins_t			bins[SC_NBINS];
 
 	/*
+	 * A cached copy of base->ind.  This can get accessed on hot paths;
+	 * looking it up in base requires an extra pointer hop / cache miss.
+	 */
+	unsigned ind;
+
+	/*
 	 * Base allocator, from which arena metadata are allocated.
 	 *
 	 * Synchronization: internal.
diff --git a/src/arena.c b/src/arena.c
index 56c34af..7836e27 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1475,6 +1475,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	arena->base = base;
 	/* Set arena before creating background threads. */
 	arena_set(ind, arena);
+	arena->ind = ind;
 
 	nstime_init_update(&arena->create_time);
 
-- 
cgit v0.12


From 2fcbd18115c93fb4649d2861dd2e0d3351bf6f6f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 Jan 2021 13:10:44 -0800
Subject: Cache bin: Don't reverse flush order.

The items we pick to flush matter a lot, but the order in which they get flushed
doesn't; just use forward scans.  This simplifies the accessing code, both in
terms of the C and the generated assembly (i.e. this speeds up the flush
pathways).
---
 include/jemalloc/internal/cache_bin.h | 21 +++++----------------
 src/tcache.c                          |  8 ++++----
 test/unit/cache_bin.c                 |  2 +-
 3 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index cf5ed3e..41942e9 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -441,29 +441,18 @@ cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
 	bin->stack_head = empty_position - nfilled;
 }
 
-/* Same deal, but with flush. */
+/*
+ * Same deal, but with flush.  Unlike fill (which can fail), the user must flush
+ * everything we give them.
+ */
 static inline void
 cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
-	arr->ptr = cache_bin_empty_position_get(bin) - 1;
+	arr->ptr = cache_bin_empty_position_get(bin) - nflush;
 	assert(cache_bin_ncached_get_local(bin, info) == 0
 	    || *arr->ptr != NULL);
 }
 
-/*
- * These accessors are used by the flush pathways -- they reverse ordinary array
- * ordering.  See the note above.
- */
-JEMALLOC_ALWAYS_INLINE void *
-cache_bin_ptr_array_get(cache_bin_ptr_array_t *arr, cache_bin_sz_t n) {
-	return *(arr->ptr - n);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
-	*(arr->ptr - n) = p;
-}
-
 static inline void
 cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
diff --git a/src/tcache.c b/src/tcache.c
index c7bdbf9..a7337e7 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -239,7 +239,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 static const void *
 tcache_bin_flush_ptr_getter(void *arr_ctx, size_t ind) {
 	cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx;
-	return cache_bin_ptr_array_get(arr, (unsigned)ind);
+	return arr->ptr[ind];
 }
 
 static void
@@ -382,7 +382,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		 */
 		if (!small) {
 			for (unsigned i = 0; i < nflush; i++) {
-				void *ptr = cache_bin_ptr_array_get(&ptrs, i);
+				void *ptr = ptrs.ptr[i];
 				edata = item_edata[i].edata;
 				assert(ptr != NULL && edata != NULL);
 
@@ -404,7 +404,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
 		}
 		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = cache_bin_ptr_array_get(&ptrs, i);
+			void *ptr = ptrs.ptr[i];
 			edata = item_edata[i].edata;
 			assert(ptr != NULL && edata != NULL);
 			if (!tcache_bin_flush_match(edata, cur_arena_ind,
@@ -415,7 +415,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 				 * arena.  Either way, stash the object so that
 				 * it can be handled in a future pass.
 				 */
-				cache_bin_ptr_array_set(&ptrs, ndeferred, ptr);
+				ptrs.ptr[ndeferred] = ptr;
 				item_edata[ndeferred].edata = edata;
 				ndeferred++;
 				continue;
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index a69cad6..56e6901 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -43,7 +43,7 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nflush);
 	cache_bin_init_ptr_array_for_flush(bin, info, &arr, nflush);
 	for (cache_bin_sz_t i = 0; i < nflush; i++) {
-		expect_ptr_eq(cache_bin_ptr_array_get(&arr, i), &ptrs[i], "");
+		expect_ptr_eq(arr.ptr[i], &ptrs[nflush - i - 1], "");
 	}
 	cache_bin_finish_flush(bin, info, &arr, nflush);
 
-- 
cgit v0.12


From 39673298130bdeb95859c95fe314c0a1d7181329 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 30 Jan 2021 15:35:33 -0800
Subject: Arena: share bin offsets in a global.

This saves us a cache miss when lookup up the arena bin offset in a remote
arena during tcache flush.  All arenas share the base offset, and so we don't
need to look it up repeatedly for each arena.  Secondarily, it shaves 288 bytes
off the arena on, e.g., x86-64.
---
 include/jemalloc/internal/arena_externs.h   |  6 +++
 include/jemalloc/internal/arena_inlines_b.h |  6 +++
 include/jemalloc/internal/arena_structs.h   | 13 +++---
 src/arena.c                                 | 64 ++++++++++++-----------------
 src/ctl.c                                   |  2 +-
 src/inspect.c                               |  4 +-
 src/tcache.c                                |  4 +-
 7 files changed, 50 insertions(+), 49 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index f06cb34..360653f 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -23,6 +23,12 @@ extern emap_t arena_emap_global;
 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
 
+/*
+ * arena_bin_offsets[binind] is the offset of the first bin shard for size class
+ * binind.
+ */
+extern uint32_t arena_bin_offsets[SC_NBINS];
+
 void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
     unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
     ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 66dcff0..318de11 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -534,4 +534,10 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	}
 }
 
+static inline bin_t *
+arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
+	bin_t *shard0 = (bin_t *)((uintptr_t)arena + arena_bin_offsets[binind]);
+	return shard0 + binshard;
+}
+
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 913184d..4aff63c 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -77,13 +77,6 @@ struct arena_s {
 	pa_shard_t		pa_shard;
 
 	/*
-	 * bins is used to store heaps of free regions.
-	 *
-	 * Synchronization: internal.
-	 */
-	bins_t			bins[SC_NBINS];
-
-	/*
 	 * A cached copy of base->ind.  This can get accessed on hot paths;
 	 * looking it up in base requires an extra pointer hop / cache miss.
 	 */
@@ -97,6 +90,12 @@ struct arena_s {
 	base_t			*base;
 	/* Used to determine uptime.  Read-only after initialization. */
 	nstime_t		create_time;
+
+	/*
+	 * The arena is allocated alongside its bins; really this is a
+	 * dynamically sized array determined by the binshard settings.
+	 */
+	bin_t			bins[0];
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
diff --git a/src/arena.c b/src/arena.c
index 7836e27..3448160 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -48,6 +48,10 @@ div_info_t arena_binind_div_info[SC_NBINS];
 
 size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
+
+uint32_t arena_bin_offsets[SC_NBINS];
+static unsigned nbins_total;
+
 static unsigned huge_arena_ind;
 
 /******************************************************************************/
@@ -179,7 +183,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
 			bin_stats_merge(tsdn, &bstats[i],
-			    &arena->bins[i].bin_shards[j]);
+			    arena_get_bin(arena, i, j));
 		}
 	}
 }
@@ -595,8 +599,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Bins. */
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			arena_bin_reset(tsd, arena,
-			    &arena->bins[i].bin_shards[j]);
+			arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j));
 		}
 	}
 	pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
@@ -721,7 +724,7 @@ arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	if (binshard_p != NULL) {
 		*binshard_p = binshard;
 	}
-	return &arena->bins[binind].bin_shards[binshard];
+	return arena_get_bin(arena, binind, binshard);
 }
 
 void
@@ -1168,7 +1171,7 @@ static void
 arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	szind_t binind = edata_szind_get(edata);
 	unsigned binshard = edata_binshard_get(edata);
-	bin_t *bin = &arena->bins[binind].bin_shards[binshard];
+	bin_t *bin = arena_get_bin(arena, binind, binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	arena_dalloc_bin_locked_info_t info;
@@ -1411,10 +1414,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		}
 	}
 
-	unsigned nbins_total = 0;
-	for (i = 0; i < SC_NBINS; i++) {
-		nbins_total += bin_infos[i].n_shards;
-	}
 	size_t arena_size = sizeof(arena_t) + sizeof(bin_t) * nbins_total;
 	arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
 	if (arena == NULL) {
@@ -1457,20 +1456,13 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	/* Initialize bins. */
-	uintptr_t bin_addr = (uintptr_t)arena + sizeof(arena_t);
 	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
-	for (i = 0; i < SC_NBINS; i++) {
-		unsigned nshards = bin_infos[i].n_shards;
-		arena->bins[i].bin_shards = (bin_t *)bin_addr;
-		bin_addr += nshards * sizeof(bin_t);
-		for (unsigned j = 0; j < nshards; j++) {
-			bool err = bin_init(&arena->bins[i].bin_shards[j]);
-			if (err) {
-				goto label_error;
-			}
+	for (i = 0; i < nbins_total; i++) {
+		bool err = bin_init(&arena->bins[i]);
+		if (err) {
+			goto label_error;
 		}
 	}
-	assert(bin_addr == (uintptr_t)arena + arena_size);
 
 	arena->base = base;
 	/* Set arena before creating background threads. */
@@ -1587,6 +1579,13 @@ arena_boot(sc_data_t *sc_data) {
 		div_init(&arena_binind_div_info[i],
 		    (1U << sc->lg_base) + (sc->ndelta << sc->lg_delta));
 	}
+
+	uint32_t cur_offset = (uint32_t)offsetof(arena_t, bins);
+	for (szind_t i = 0; i < SC_NBINS; i++) {
+		arena_bin_offsets[i] = cur_offset;
+		nbins_total += bin_infos[i].n_shards;
+		cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
+	}
 }
 
 void
@@ -1633,23 +1632,17 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
-	for (unsigned i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_prefork(tsdn, &arena->bins[i].bin_shards[j]);
-		}
+	for (unsigned i = 0; i < nbins_total; i++) {
+		bin_prefork(tsdn, &arena->bins[i]);
 	}
 }
 
 void
 arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
-	unsigned i;
-
-	for (i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_postfork_parent(tsdn,
-			    &arena->bins[i].bin_shards[j]);
-		}
+	for (unsigned i = 0; i < nbins_total; i++) {
+		bin_postfork_parent(tsdn, &arena->bins[i]);
 	}
+
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
 	pa_shard_postfork_parent(tsdn, &arena->pa_shard);
@@ -1660,8 +1653,6 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
-	unsigned i;
-
 	atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
 	atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
 	if (tsd_arena_get(tsdn_tsd(tsdn)) == arena) {
@@ -1686,11 +1677,10 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 		}
 	}
 
-	for (i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_postfork_child(tsdn, &arena->bins[i].bin_shards[j]);
-		}
+	for (unsigned i = 0; i < nbins_total; i++) {
+		bin_postfork_child(tsdn, &arena->bins[i]);
 	}
+
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
 	pa_shard_postfork_child(tsdn, &arena->pa_shard);
diff --git a/src/ctl.c b/src/ctl.c
index d516196..324925d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3423,7 +3423,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 
 		for (szind_t i = 0; i < SC_NBINS; i++) {
 			for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-				bin_t *bin = &arena->bins[i].bin_shards[j];
+				bin_t *bin = arena_get_bin(arena, i, j);
 				MUTEX_PROF_RESET(bin->lock);
 			}
 		}
diff --git a/src/inspect.c b/src/inspect.c
index 5e8d51d..911b5d5 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -52,11 +52,11 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	assert(*nfree <= *nregs);
 	assert(*nfree * edata_usize_get(edata) <= *size);
 
-	const arena_t *arena = (arena_t *)atomic_load_p(
+	arena_t *arena = (arena_t *)atomic_load_p(
 	    &arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED);
 	assert(arena != NULL);
 	const unsigned binshard = edata_binshard_get(edata);
-	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
+	bin_t *bin = arena_get_bin(arena, szind, binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if (config_stats) {
diff --git a/src/tcache.c b/src/tcache.c
index a7337e7..19e330a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -344,8 +344,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		bin_t *cur_bin = NULL;
 		if (small) {
 			cur_binshard = edata_binshard_get(edata);
-			cur_bin = &cur_arena->bins[binind].bin_shards[
-			    cur_binshard];
+			cur_bin = arena_get_bin(cur_arena, binind,
+			    cur_binshard);
 			assert(cur_binshard < bin_infos[binind].n_shards);
 		}
 
-- 
cgit v0.12


From 8edfc5b1700eab47d64d7cfa6a246ad88f832845 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 31 Jan 2021 11:55:45 -0800
Subject: Add ticker_geom_t.

This lets a single ticker object drive events across a large number of different
tick streams while sharing state.
---
 Makefile.in                                        |  4 +-
 include/jemalloc/internal/ticker.h                 | 92 +++++++++++++++++++++-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  3 +
 src/ticker.c                                       | 32 ++++++++
 src/ticker.py                                      | 15 ++++
 test/unit/ticker.c                                 | 29 ++++++-
 9 files changed, 174 insertions(+), 6 deletions(-)
 create mode 100644 src/ticker.c
 create mode 100755 src/ticker.py

diff --git a/Makefile.in b/Makefile.in
index 3cb3161..40c4144 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -151,6 +151,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/test_hooks.c \
 	$(srcroot)src/thread_event.c \
+	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
 	$(srcroot)src/witness.c
 ifeq ($(enable_zone_allocator), 1)
@@ -188,7 +189,8 @@ ifeq (1, $(link_whole_archive))
 C_UTIL_INTEGRATION_SRCS :=
 C_UTIL_CPP_SRCS :=
 else
-C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
+C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c \
+	$(srcroot)src/ticker.c
 C_UTIL_CPP_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
 endif
 TESTS_UNIT := \
diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
index 52d0db4..6b51dde 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TICKER_H
 #define JEMALLOC_INTERNAL_TICKER_H
 
+#include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/util.h"
 
 /**
@@ -10,11 +11,11 @@
  * have occurred with a call to ticker_ticks), which will return true (and reset
  * the counter) if the countdown hit zero.
  */
-
-typedef struct {
+typedef struct ticker_s ticker_t;
+struct ticker_s {
 	int32_t tick;
 	int32_t nticks;
-} ticker_t;
+};
 
 static inline void
 ticker_init(ticker_t *ticker, int32_t nticks) {
@@ -75,7 +76,7 @@ ticker_tick(ticker_t *ticker) {
 	return ticker_ticks(ticker, 1);
 }
 
-/* 
+/*
  * Try to tick.  If ticker would fire, return true, but rely on
  * slowpath to reset ticker.
  */
@@ -88,4 +89,87 @@ ticker_trytick(ticker_t *ticker) {
 	return false;
 }
 
+/*
+ * The ticker_geom_t is much like the ticker_t, except that instead of ticker
+ * having a constant countdown, it has an approximate one; each tick has
+ * approximately a 1/nticks chance of triggering the count.
+ *
+ * The motivation is in triggering arena decay.  With a naive strategy, each
+ * thread would maintain a ticker per arena, and check if decay is necessary
+ * each time that the arena's ticker fires.  This has two costs:
+ * - Since under reasonable assumptions both threads and arenas can scale
+ *   linearly with the number of CPUs, maintaining per-arena data in each thread
+ *   scales quadratically with the number of CPUs.
+ * - These tickers are often a cache miss down tcache flush pathways.
+ *
+ * By giving each tick a 1/nticks chance of firing, we still maintain the same
+ * average number of ticks-until-firing per arena, with only a single ticker's
+ * worth of metadata.
+ */
+
+/* See ticker.c for an explanation of these constants. */
+#define TICKER_GEOM_NBITS 6
+#define TICKER_GEOM_MUL 61
+extern const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS];
+
+/* Not actually any different from ticker_t; just for type safety. */
+typedef struct ticker_geom_s ticker_geom_t;
+struct ticker_geom_s {
+	int32_t tick;
+	int32_t nticks;
+};
+
+/*
+ * Just pick the average delay for the first counter.  We're more concerned with
+ * the behavior over long periods of time rather than the exact timing of the
+ * initial ticks.
+ */
+#define TICKER_GEOM_INIT(nticks) {nticks, nticks}
+
+static inline void
+ticker_geom_init(ticker_geom_t *ticker, int32_t nticks) {
+	/*
+	 * Make sure there's no overflow possible.  This shouldn't really be a
+	 * problem for reasonable nticks choices, which are all static and
+	 * relatively small.
+	 */
+	assert((uint64_t)nticks * (uint64_t)255 / (uint64_t)TICKER_GEOM_MUL
+	    <= (uint64_t)INT32_MAX);
+	ticker->tick = nticks;
+	ticker->nticks = nticks;
+}
+
+static inline int32_t
+ticker_geom_read(const ticker_geom_t *ticker) {
+	return ticker->tick;
+}
+
+/* Same deal as above. */
+#if defined(__GNUC__) && !defined(__clang__)				\
+    && (defined(__x86_64__) || defined(__i386__))
+JEMALLOC_NOINLINE
+#endif
+static bool
+ticker_geom_fixup(ticker_geom_t *ticker, uint64_t *prng_state) {
+	uint64_t idx = prng_lg_range_u64(prng_state, TICKER_GEOM_NBITS);
+	ticker->tick = (uint32_t)(
+	    (uint64_t)ticker->nticks * (uint64_t)ticker_geom_table[idx]
+	    / (uint64_t)TICKER_GEOM_MUL);
+	return true;
+}
+
+static inline bool
+ticker_geom_ticks(ticker_geom_t *ticker, uint64_t *prng_state, int32_t nticks) {
+	ticker->tick -= nticks;
+	if (unlikely(ticker->tick < 0)) {
+		return ticker_geom_fixup(ticker, prng_state);
+	}
+	return false;
+}
+
+static inline bool
+ticker_geom_tick(ticker_geom_t *ticker, uint64_t *prng_state) {
+	return ticker_geom_ticks(ticker, prng_state, 1);
+}
+
 #endif /* JEMALLOC_INTERNAL_TICKER_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 9443ac5..a93511d 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -92,6 +92,7 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 3c4bff6..06460e5 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -160,6 +160,9 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index fafb491..916460a 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -92,6 +92,7 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 3c4bff6..06460e5 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -160,6 +160,9 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ticker.c b/src/ticker.c
new file mode 100644
index 0000000..790b5c2
--- /dev/null
+++ b/src/ticker.c
@@ -0,0 +1,32 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+/*
+ * To avoid using floating point math down core paths (still necessary because
+ * versions of the glibc dynamic loader that did not preserve xmm registers are
+ * still somewhat common, requiring us to be compilable with -mno-sse), and also
+ * to avoid generally expensive library calls, we use a precomputed table of
+ * values.  We want to sample U uniformly on [0, 1], and then compute
+ * ceil(log(u)/log(1-1/nticks)).  We're mostly interested in the case where
+ * nticks is reasonably big, so 1/log(1-1/nticks) is well-approximated by
+ * -nticks.
+ *
+ * To compute log(u), we sample an integer in [1, 64] and divide, then just look
+ * up results in a table.  As a space-compression mechanism, we store these as
+ * uint8_t by dividing the range (255) by the highest-magnitude value the log
+ * can take on, and using that as a multiplier.  We then have to divide by that
+ * multiplier at the end of the computation.
+ *
+ * The values here are computed in src/ticker.py
+ */
+
+const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS] = {
+	254, 211, 187, 169, 156, 144, 135, 127,
+	120, 113, 107, 102, 97, 93, 89, 85,
+	81, 77, 74, 71, 68, 65, 62, 60,
+	57, 55, 53, 50, 48, 46, 44, 42,
+	40, 39, 37, 35, 33, 32, 30, 29,
+	27, 26, 24, 23, 21, 20, 19, 18,
+	16, 15, 14, 13, 12, 10, 9, 8,
+	7, 6, 5, 4, 3, 2, 1, 0
+};
diff --git a/src/ticker.py b/src/ticker.py
new file mode 100755
index 0000000..3807740
--- /dev/null
+++ b/src/ticker.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+
+import math
+
+# Must match TICKER_GEOM_NBITS
+lg_table_size = 6
+table_size = 2**lg_table_size
+byte_max = 255
+mul = math.floor(-byte_max/math.log(1 / table_size))
+values = [round(-mul * math.log(i / table_size))
+	for i in range(1, table_size+1)]
+print("mul =", mul)
+print("values:")
+for i in range(table_size // 8):
+	print(", ".join((str(x) for x in values[i*8 : i*8 + 8])))
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index 1cf10b0..0dd7786 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -64,10 +64,37 @@ TEST_BEGIN(test_ticker_copy) {
 }
 TEST_END
 
+TEST_BEGIN(test_ticker_geom) {
+	const int32_t ticks = 100;
+	const uint64_t niters = 100 * 1000;
+
+	ticker_geom_t ticker;
+	ticker_geom_init(&ticker, ticks);
+	uint64_t total_ticks = 0;
+	/* Just some random constant. */
+	uint64_t prng_state = 0x343219f93496db9fULL;
+	for (uint64_t i = 0; i < niters; i++) {
+		while(!ticker_geom_tick(&ticker, &prng_state)) {
+			total_ticks++;
+		}
+	}
+	/*
+	 * In fact, with this choice of random seed and the PRNG implementation
+	 * used at the time this was tested, total_ticks is 95.1% of the
+	 * expected ticks.
+	 */
+	expect_u64_ge(total_ticks , niters * ticks * 9 / 10,
+	    "Mean off by > 10%%");
+	expect_u64_le(total_ticks , niters * ticks * 11 / 10,
+	    "Mean off by > 10%%");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_ticker_tick,
 	    test_ticker_ticks,
-	    test_ticker_copy);
+	    test_ticker_copy,
+	    test_ticker_geom);
 }
-- 
cgit v0.12


From c259323ab3082324100c708109dbfff660d0f4b8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 31 Jan 2021 12:50:55 -0800
Subject: Use ticker_geom_t for arena tcache decay.

---
 include/jemalloc/internal/arena_inlines_b.h        | 22 +++---
 include/jemalloc/internal/arena_structs.h          |  5 --
 include/jemalloc/internal/arena_types.h            |  3 +-
 .../jemalloc/internal/jemalloc_internal_externs.h  |  2 -
 .../internal/jemalloc_internal_inlines_a.h         | 36 ---------
 include/jemalloc/internal/tsd.h                    |  9 +--
 src/jemalloc.c                                     | 90 ----------------------
 src/tsd.c                                          |  6 --
 test/unit/arena_decay.c                            | 77 +++++++++---------
 9 files changed, 52 insertions(+), 198 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 318de11..13e6eb5 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -116,18 +116,22 @@ arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx,
 
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
-	tsd_t *tsd;
-	ticker_t *decay_ticker;
-
 	if (unlikely(tsdn_null(tsdn))) {
 		return;
 	}
-	tsd = tsdn_tsd(tsdn);
-	decay_ticker = decay_ticker_get(tsd, arena_ind_get(arena));
-	if (unlikely(decay_ticker == NULL)) {
-		return;
-	}
-	if (unlikely(ticker_ticks(decay_ticker, nticks))) {
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	/*
+	 * We use the ticker_geom_t to avoid having per-arena state in the tsd.
+	 * Instead of having a countdown-until-decay timer running for every
+	 * arena in every thread, we flip a coin once per tick, whose
+	 * probability of coming up heads is 1/nticks; this is effectively the
+	 * operation of the ticker_geom_t.  Each arena has the same chance of a
+	 * coinflip coming up heads (1/ARENA_DECAY_NTICKS_PER_UPDATE), so we can
+	 * use a single ticker for all of them.
+	 */
+	ticker_geom_t *decay_ticker = tsd_arena_decay_tickerp_get(tsd);
+	uint64_t *prng_state = tsd_prng_statep_get(tsd);
+	if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks))) {
 		arena_decay(tsdn, arena, false, false);
 	}
 }
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 4aff63c..ad76a79 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -98,9 +98,4 @@ struct arena_s {
 	bin_t			bins[0];
 };
 
-/* Used in conjunction with tsd for fast arena-related context lookup. */
-struct arena_tdata_s {
-	ticker_t		decay_ticker;
-};
-
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index b13d8a0..e0f8218 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -7,11 +7,10 @@
 #define DIRTY_DECAY_MS_DEFAULT	ZD(10 * 1000)
 #define MUZZY_DECAY_MS_DEFAULT	(0)
 /* Number of event ticks between time checks. */
-#define DECAY_NTICKS_PER_UPDATE	1000
+#define ARENA_DECAY_NTICKS_PER_UPDATE	1000
 
 typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_s arena_t;
-typedef struct arena_tdata_s arena_tdata_t;
 
 typedef enum {
 	percpu_arena_mode_names_base   = 0, /* Used for options processing. */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 40591b9..c78db06 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -56,12 +56,10 @@ void bootstrap_free(void *ptr);
 void arena_set(unsigned ind, arena_t *arena);
 unsigned narenas_total_get(void);
 arena_t *arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
 arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
 void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
 void iarena_cleanup(tsd_t *tsd);
 void arena_cleanup(tsd_t *tsd);
-void arenas_tdata_cleanup(tsd_t *tsd);
 size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
 void jemalloc_prefork(void);
 void jemalloc_postfork_parent(void);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 25e5b50..24e42d3 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -56,31 +56,6 @@ percpu_arena_ind_limit(percpu_arena_mode_t mode) {
 	}
 }
 
-static inline arena_tdata_t *
-arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
-	arena_tdata_t *tdata;
-	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
-
-	if (unlikely(arenas_tdata == NULL)) {
-		/* arenas_tdata hasn't been initialized yet. */
-		return arena_tdata_get_hard(tsd, ind);
-	}
-	if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) {
-		/*
-		 * ind is invalid, cache is old (too small), or tdata to be
-		 * initialized.
-		 */
-		return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) :
-		    NULL);
-	}
-
-	tdata = &arenas_tdata[ind];
-	if (likely(tdata != NULL) || !refresh_if_missing) {
-		return tdata;
-	}
-	return arena_tdata_get_hard(tsd, ind);
-}
-
 static inline arena_t *
 arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	arena_t *ret;
@@ -97,17 +72,6 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	return ret;
 }
 
-static inline ticker_t *
-decay_ticker_get(tsd_t *tsd, unsigned ind) {
-	arena_tdata_t *tdata;
-
-	tdata = arena_tdata_get(tsd, ind, true);
-	if (unlikely(tdata == NULL)) {
-		return NULL;
-	}
-	return &tdata->decay_ticker;
-}
-
 JEMALLOC_ALWAYS_INLINE bool
 tcache_available(tsd_t *tsd) {
 	/*
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 6076419..d22fdc9 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -58,9 +58,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
 /*  O(name,			type,			nullable type) */
 #define TSD_DATA_SLOW							\
     O(tcache_enabled,		bool,			bool)		\
-    O(arenas_tdata_bypass,	bool,			bool)		\
     O(reentrancy_level,		int8_t,			int8_t)		\
-    O(narenas_tdata,		uint32_t,		uint32_t)	\
     O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
     O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
@@ -77,7 +75,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(prng_state,		uint64_t,		uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
-    O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
+    O(arena_decay_ticker,	ticker_geom_t,		ticker_geom_t)	\
     O(sec_shard,		uint8_t,		uint8_t)	\
     O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
     O(tsd_link,			tsd_link_t,		tsd_link_t)	\
@@ -90,9 +88,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
 
 #define TSD_DATA_SLOW_INITIALIZER					\
     /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
-    /* arenas_tdata_bypass */	false,					\
     /* reentrancy_level */	0,					\
-    /* narenas_tdata */		0,					\
     /* thread_allocated_last_event */	0,				\
     /* thread_allocated_next_event */	0,				\
     /* thread_deallocated_last_event */	0,				\
@@ -109,7 +105,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* prng_state */		0,					\
     /* iarena */		NULL,					\
     /* arena */			NULL,					\
-    /* arenas_tdata */		NULL,					\
+    /* arena_decay_ticker */						\
+	TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE),		\
     /* sec_shard */		(uint8_t)-1,				\
     /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
     /* tsd_link */		{NULL},					\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f7c3963..ca8a7de 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -493,82 +493,6 @@ arena_unbind(tsd_t *tsd, unsigned ind, bool internal) {
 	}
 }
 
-arena_tdata_t *
-arena_tdata_get_hard(tsd_t *tsd, unsigned ind) {
-	arena_tdata_t *tdata, *arenas_tdata_old;
-	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
-	unsigned narenas_tdata_old, i;
-	unsigned narenas_tdata = tsd_narenas_tdata_get(tsd);
-	unsigned narenas_actual = narenas_total_get();
-
-	/*
-	 * Dissociate old tdata array (and set up for deallocation upon return)
-	 * if it's too small.
-	 */
-	if (arenas_tdata != NULL && narenas_tdata < narenas_actual) {
-		arenas_tdata_old = arenas_tdata;
-		narenas_tdata_old = narenas_tdata;
-		arenas_tdata = NULL;
-		narenas_tdata = 0;
-		tsd_arenas_tdata_set(tsd, arenas_tdata);
-		tsd_narenas_tdata_set(tsd, narenas_tdata);
-	} else {
-		arenas_tdata_old = NULL;
-		narenas_tdata_old = 0;
-	}
-
-	/* Allocate tdata array if it's missing. */
-	if (arenas_tdata == NULL) {
-		bool *arenas_tdata_bypassp = tsd_arenas_tdata_bypassp_get(tsd);
-		narenas_tdata = (ind < narenas_actual) ? narenas_actual : ind+1;
-
-		if (tsd_nominal(tsd) && !*arenas_tdata_bypassp) {
-			*arenas_tdata_bypassp = true;
-			arenas_tdata = (arena_tdata_t *)a0malloc(
-			    sizeof(arena_tdata_t) * narenas_tdata);
-			*arenas_tdata_bypassp = false;
-		}
-		if (arenas_tdata == NULL) {
-			tdata = NULL;
-			goto label_return;
-		}
-		assert(tsd_nominal(tsd) && !*arenas_tdata_bypassp);
-		tsd_arenas_tdata_set(tsd, arenas_tdata);
-		tsd_narenas_tdata_set(tsd, narenas_tdata);
-	}
-
-	/*
-	 * Copy to tdata array.  It's possible that the actual number of arenas
-	 * has increased since narenas_total_get() was called above, but that
-	 * causes no correctness issues unless two threads concurrently execute
-	 * the arenas.create mallctl, which we trust mallctl synchronization to
-	 * prevent.
-	 */
-
-	/* Copy/initialize tickers. */
-	for (i = 0; i < narenas_actual; i++) {
-		if (i < narenas_tdata_old) {
-			ticker_copy(&arenas_tdata[i].decay_ticker,
-			    &arenas_tdata_old[i].decay_ticker);
-		} else {
-			ticker_init(&arenas_tdata[i].decay_ticker,
-			    DECAY_NTICKS_PER_UPDATE);
-		}
-	}
-	if (narenas_tdata > narenas_actual) {
-		memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t)
-		    * (narenas_tdata - narenas_actual));
-	}
-
-	/* Read the refreshed tdata array. */
-	tdata = &arenas_tdata[ind];
-label_return:
-	if (arenas_tdata_old != NULL) {
-		a0dalloc(arenas_tdata_old);
-	}
-	return tdata;
-}
-
 /* Slow path, called only by arena_choose(). */
 arena_t *
 arena_choose_hard(tsd_t *tsd, bool internal) {
@@ -705,20 +629,6 @@ arena_cleanup(tsd_t *tsd) {
 	}
 }
 
-void
-arenas_tdata_cleanup(tsd_t *tsd) {
-	arena_tdata_t *arenas_tdata;
-
-	/* Prevent tsd->arenas_tdata from being (re)created. */
-	*tsd_arenas_tdata_bypassp_get(tsd) = true;
-
-	arenas_tdata = tsd_arenas_tdata_get(tsd);
-	if (arenas_tdata != NULL) {
-		tsd_arenas_tdata_set(tsd, NULL);
-		a0dalloc(arenas_tdata);
-	}
-}
-
 static void
 stats_print_atexit(void) {
 	if (config_stats) {
diff --git a/src/tsd.c b/src/tsd.c
index 0dd4036..6820eb6 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -251,8 +251,6 @@ assert_tsd_data_cleanup_done(tsd_t *tsd) {
 	assert(!tsd_in_nominal_list(tsd));
 	assert(*tsd_arenap_get_unsafe(tsd) == NULL);
 	assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
-	assert(*tsd_arenas_tdata_bypassp_get_unsafe(tsd) == true);
-	assert(*tsd_arenas_tdatap_get_unsafe(tsd) == NULL);
 	assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
 	assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
 }
@@ -267,7 +265,6 @@ tsd_data_init_nocleanup(tsd_t *tsd) {
 	 * We set up tsd in a way that no cleanup is needed.
 	 */
 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
-	*tsd_arenas_tdata_bypassp_get(tsd) = true;
 	*tsd_tcache_enabledp_get_unsafe(tsd) = false;
 	*tsd_reentrancy_levelp_get(tsd) = 1;
 	tsd_prng_state_init(tsd);
@@ -375,7 +372,6 @@ tsd_do_data_cleanup(tsd_t *tsd) {
 	prof_tdata_cleanup(tsd);
 	iarena_cleanup(tsd);
 	arena_cleanup(tsd);
-	arenas_tdata_cleanup(tsd);
 	tcache_cleanup(tsd);
 	witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
 	*tsd_reentrancy_levelp_get(tsd) = 1;
@@ -439,7 +435,6 @@ malloc_tsd_boot0(void) {
 		return NULL;
 	}
 	tsd = tsd_fetch();
-	*tsd_arenas_tdata_bypassp_get(tsd) = true;
 	return tsd;
 }
 
@@ -449,7 +444,6 @@ malloc_tsd_boot1(void) {
 	tsd_t *tsd = tsd_fetch();
 	/* malloc_slow has been set properly.  Update tsd_slow. */
 	tsd_slow_update(tsd);
-	*tsd_arenas_tdata_bypassp_get(tsd) = false;
 }
 
 #ifdef _WIN32
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index a266168..cea39e0 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -187,7 +187,7 @@ TEST_BEGIN(test_decay_ticks) {
 	test_skip_if(check_background_thread_enabled());
 	test_skip_if(opt_hpa);
 
-	ticker_t *decay_ticker;
+	ticker_geom_t *decay_ticker;
 	unsigned tick0, tick1, arena_ind;
 	size_t sz, large0;
 	void *p;
@@ -205,7 +205,7 @@ TEST_BEGIN(test_decay_ticks) {
 	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind,
 	    &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)), 0,
 	    "Unexpected mallctl() failure");
-	decay_ticker = decay_ticker_get(tsd_fetch(), arena_ind);
+	decay_ticker = tsd_arena_decay_tickerp_get(tsd_fetch());
 	expect_ptr_not_null(decay_ticker,
 	    "Unexpected failure getting decay ticker");
 
@@ -216,60 +216,60 @@ TEST_BEGIN(test_decay_ticks) {
 	 */
 
 	/* malloc(). */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	p = malloc(large0);
 	expect_ptr_not_null(p, "Unexpected malloc() failure");
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()");
 	/* free(). */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	free(p);
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0, "Expected ticker to tick during free()");
 
 	/* calloc(). */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	p = calloc(1, large0);
 	expect_ptr_not_null(p, "Unexpected calloc() failure");
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()");
 	free(p);
 
 	/* posix_memalign(). */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	expect_d_eq(posix_memalign(&p, sizeof(size_t), large0), 0,
 	    "Unexpected posix_memalign() failure");
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0,
 	    "Expected ticker to tick during posix_memalign()");
 	free(p);
 
 	/* aligned_alloc(). */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	p = aligned_alloc(sizeof(size_t), large0);
 	expect_ptr_not_null(p, "Unexpected aligned_alloc() failure");
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0,
 	    "Expected ticker to tick during aligned_alloc()");
 	free(p);
 
 	/* realloc(). */
 	/* Allocate. */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	p = realloc(NULL, large0);
 	expect_ptr_not_null(p, "Unexpected realloc() failure");
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 	/* Reallocate. */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	p = realloc(p, large0);
 	expect_ptr_not_null(p, "Unexpected realloc() failure");
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 	/* Deallocate. */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	realloc(p, 0);
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 
 	/*
@@ -286,41 +286,41 @@ TEST_BEGIN(test_decay_ticks) {
 			sz = allocx_sizes[i];
 
 			/* mallocx(). */
-			tick0 = ticker_read(decay_ticker);
+			tick0 = ticker_geom_read(decay_ticker);
 			p = mallocx(sz, MALLOCX_TCACHE_NONE);
 			expect_ptr_not_null(p, "Unexpected mallocx() failure");
-			tick1 = ticker_read(decay_ticker);
+			tick1 = ticker_geom_read(decay_ticker);
 			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during mallocx() (sz=%zu)",
 			    sz);
 			/* rallocx(). */
-			tick0 = ticker_read(decay_ticker);
+			tick0 = ticker_geom_read(decay_ticker);
 			p = rallocx(p, sz, MALLOCX_TCACHE_NONE);
 			expect_ptr_not_null(p, "Unexpected rallocx() failure");
-			tick1 = ticker_read(decay_ticker);
+			tick1 = ticker_geom_read(decay_ticker);
 			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during rallocx() (sz=%zu)",
 			    sz);
 			/* xallocx(). */
-			tick0 = ticker_read(decay_ticker);
+			tick0 = ticker_geom_read(decay_ticker);
 			xallocx(p, sz, 0, MALLOCX_TCACHE_NONE);
-			tick1 = ticker_read(decay_ticker);
+			tick1 = ticker_geom_read(decay_ticker);
 			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during xallocx() (sz=%zu)",
 			    sz);
 			/* dallocx(). */
-			tick0 = ticker_read(decay_ticker);
+			tick0 = ticker_geom_read(decay_ticker);
 			dallocx(p, MALLOCX_TCACHE_NONE);
-			tick1 = ticker_read(decay_ticker);
+			tick1 = ticker_geom_read(decay_ticker);
 			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during dallocx() (sz=%zu)",
 			    sz);
 			/* sdallocx(). */
 			p = mallocx(sz, MALLOCX_TCACHE_NONE);
 			expect_ptr_not_null(p, "Unexpected mallocx() failure");
-			tick0 = ticker_read(decay_ticker);
+			tick0 = ticker_geom_read(decay_ticker);
 			sdallocx(p, sz, MALLOCX_TCACHE_NONE);
-			tick1 = ticker_read(decay_ticker);
+			tick1 = ticker_geom_read(decay_ticker);
 			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during sdallocx() "
 			    "(sz=%zu)", sz);
@@ -349,31 +349,24 @@ TEST_BEGIN(test_decay_ticks) {
 		sz = tcache_sizes[i];
 
 		/* tcache fill. */
-		tick0 = ticker_read(decay_ticker);
+		tick0 = ticker_geom_read(decay_ticker);
 		p = mallocx(sz, MALLOCX_TCACHE(tcache_ind));
 		expect_ptr_not_null(p, "Unexpected mallocx() failure");
-		tick1 = ticker_read(decay_ticker);
+		tick1 = ticker_geom_read(decay_ticker);
 		expect_u32_ne(tick1, tick0,
 		    "Expected ticker to tick during tcache fill "
 		    "(sz=%zu)", sz);
 		/* tcache flush. */
 		dallocx(p, MALLOCX_TCACHE(tcache_ind));
-		tick0 = ticker_read(decay_ticker);
+		tick0 = ticker_geom_read(decay_ticker);
 		expect_d_eq(mallctl("tcache.flush", NULL, NULL,
 		    (void *)&tcache_ind, sizeof(unsigned)), 0,
 		    "Unexpected mallctl failure");
-		tick1 = ticker_read(decay_ticker);
+		tick1 = ticker_geom_read(decay_ticker);
 
 		/* Will only tick if it's in tcache. */
-		if (sz <= tcache_max) {
-			expect_u32_ne(tick1, tick0,
-			    "Expected ticker to tick during tcache "
-			    "flush (sz=%zu)", sz);
-		} else {
-			expect_u32_eq(tick1, tick0,
-			    "Unexpected ticker tick during tcache "
-			    "flush (sz=%zu)", sz);
-		}
+		expect_u32_ne(tick1, tick0,
+		    "Expected ticker to tick during tcache flush (sz=%zu)", sz);
 	}
 }
 TEST_END
@@ -401,7 +394,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 	void *p = do_mallocx(1, flags);
 	uint64_t dirty_npurge1, muzzy_npurge1;
 	do {
-		for (unsigned i = 0; i < DECAY_NTICKS_PER_UPDATE / 2;
+		for (unsigned i = 0; i < ARENA_DECAY_NTICKS_PER_UPDATE / 2;
 		    i++) {
 			void *q = do_mallocx(1, flags);
 			dallocx(q, flags);
-- 
cgit v0.12


From 20140629b44f9a76241749b9c47e3905202d034c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 1 Feb 2021 12:03:11 -0800
Subject: Bin: Move stats closer to the mutex.

This is a slight cache locality optimization.
---
 include/jemalloc/internal/bin.h | 9 ++++++---
 src/tcache.c                    | 7 +++++++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 9241ee7..63f9739 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -17,6 +17,12 @@ struct bin_s {
 	malloc_mutex_t		lock;
 
 	/*
+	 * Bin statistics.  These get touched every time the lock is acquired,
+	 * so put them close by in the hopes of getting some cache locality.
+	 */
+	bin_stats_t	stats;
+
+	/*
 	 * Current slab being used to service allocations of this bin's size
 	 * class.  slabcur is independent of slabs_{nonfull,full}; whenever
 	 * slabcur is reassigned, the previous slab must be deallocated or
@@ -33,9 +39,6 @@ struct bin_s {
 
 	/* List used to track full slabs. */
 	edata_list_active_t	slabs_full;
-
-	/* Bin statistics. */
-	bin_stats_t	stats;
 };
 
 /* A set of sharded bins of the same size class. */
diff --git a/src/tcache.c b/src/tcache.c
index 19e330a..7c4047f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -347,6 +347,13 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 			cur_bin = arena_get_bin(cur_arena, binind,
 			    cur_binshard);
 			assert(cur_binshard < bin_infos[binind].n_shards);
+			/*
+			 * If you're looking at profiles, you might think this
+			 * is a good place to prefetch the bin stats, which are
+			 * often a cache miss.  This turns out not to be
+			 * helpful on the workloads we've looked at, with moving
+			 * the bin stats next to the lock seeming to do better.
+			 */
 		}
 
 		if (small) {
-- 
cgit v0.12


From 3624dd42ffd88e63a8f7c2ee0a6ed3cbdfff81b7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 28 Jan 2021 13:19:41 -0800
Subject: hpdata: Add a comment for hpdata_consistent.

---
 include/jemalloc/internal/hpdata.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index fdd6673..65cd073 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -112,6 +112,11 @@ hpdata_assert_empty(hpdata_t *hpdata) {
 	assert(hpdata_nfree_get(hpdata) == HUGEPAGE_PAGES);
 }
 
+/*
+ * Only used in tests, and in hpdata_assert_consistent, below.  Verifies some
+ * consistency properties of the hpdata (e.g. that cached counts of page stats
+ * match computed ones).
+ */
 static inline bool
 hpdata_consistent(hpdata_t *hpdata) {
 	if(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
-- 
cgit v0.12


From ff4086aa6b9b957409ccdc6d818490154decd343 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 17:01:57 -0800
Subject: hpdata: count active pages instead of free ones.

This will be more consistent with later naming choices.
---
 include/jemalloc/internal/hpdata.h | 29 ++++++++++++-----------------
 src/hpa.c                          |  2 +-
 src/hpdata.c                       |  6 +++---
 src/psset.c                        |  4 ++--
 4 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 65cd073..7cefb5c 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -44,11 +44,12 @@ struct hpdata_s {
 		ql_elm(hpdata_t) ql_link;
 	};
 
-	/* Number of currently free pages (regardless of contiguity). */
-	size_t h_nfree;
 	/* The length of the largest contiguous sequence of inactive pages. */
 	size_t h_longest_free_range;
 
+	/* Number of active pages. */
+	size_t h_nactive;
+
 	/* A bitmap with bits set in the active pages. */
 	fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 };
@@ -85,17 +86,6 @@ hpdata_huge_set(hpdata_t *hpdata, bool huge) {
 }
 
 static inline size_t
-hpdata_nfree_get(const hpdata_t *hpdata) {
-	return hpdata->h_nfree;
-}
-
-static inline void
-hpdata_nfree_set(hpdata_t *hpdata, size_t nfree) {
-	assert(nfree <= HUGEPAGE_PAGES);
-	hpdata->h_nfree = nfree;
-}
-
-static inline size_t
 hpdata_longest_free_range_get(const hpdata_t *hpdata) {
 	return hpdata->h_longest_free_range;
 }
@@ -106,10 +96,15 @@ hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
 	hpdata->h_longest_free_range = longest_free_range;
 }
 
+static inline size_t
+hpdata_nactive_get(hpdata_t *hpdata) {
+	return hpdata->h_nactive;
+}
+
 static inline void
 hpdata_assert_empty(hpdata_t *hpdata) {
 	assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
-	assert(hpdata_nfree_get(hpdata) == HUGEPAGE_PAGES);
+	assert(hpdata->h_nactive == 0);
 }
 
 /*
@@ -123,8 +118,8 @@ hpdata_consistent(hpdata_t *hpdata) {
 	    != hpdata_longest_free_range_get(hpdata)) {
 		return false;
 	}
-	if (fb_ucount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
-	    != hpdata_nfree_get(hpdata)) {
+	if (fb_scount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
+	    != hpdata->h_nactive) {
 		return false;
 	}
 	return true;
@@ -142,7 +137,7 @@ ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
 
 static inline bool
 hpdata_empty(hpdata_t *hpdata) {
-	return hpdata_nfree_get(hpdata) == HUGEPAGE_PAGES;
+	return hpdata->h_nactive == 0;
 }
 
 void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
diff --git a/src/hpa.c b/src/hpa.c
index a51f83c..8bbe8a8 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -125,7 +125,7 @@ hpa_should_hugify(hpa_shard_t *shard, hpdata_t *ps) {
 	 * inactive.  Eventually, this should be a malloc conf option.
 	 */
 	return !hpdata_huge_get(ps)
-	    && hpdata_nfree_get(ps) < (HUGEPAGE / PAGE) * 5 / 100;
+	    && hpdata_nactive_get(ps) >= (HUGEPAGE_PAGES) * 95 / 100;
 }
 
 /* Returns true on error. */
diff --git a/src/hpdata.c b/src/hpdata.c
index a242efe..d513896 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -22,7 +22,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
 	hpdata_huge_set(hpdata, false);
-	hpdata_nfree_set(hpdata, HUGEPAGE_PAGES);
+	hpdata->h_nactive = 0;
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
 
@@ -72,7 +72,7 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	/* We found a range; remember it. */
 	result = begin;
 	fb_set_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
-	hpdata_nfree_set(hpdata, hpdata_nfree_get(hpdata) - npages);
+	hpdata->h_nactive += npages;
 
 	/*
 	 * We might have shrunk the longest free range.  We have to keep
@@ -123,7 +123,7 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 		hpdata_longest_free_range_set(hpdata, new_range_len);
 	}
 
-	hpdata_nfree_set(hpdata, hpdata_nfree_get(hpdata) + npages);
+	hpdata->h_nactive -= npages;
 
 	hpdata_assert_consistent(hpdata);
 }
diff --git a/src/psset.c b/src/psset.c
index 7a5bd60..9fcdac2 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -57,8 +57,8 @@ psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, hpdata_t *ps,
 	size_t *ninactive_dst = hpdata_huge_get(ps)
 	    ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
 
-	size_t ninactive = hpdata_nfree_get(ps);
-	size_t nactive = HUGEPAGE_PAGES - ninactive;
+	size_t nactive = hpdata_nactive_get(ps);
+	size_t ninactive = HUGEPAGE_PAGES - nactive;
 
 	size_t mul = insert ? (size_t)1 : (size_t)-1;
 	*npageslabs_dst += mul * 1;
-- 
cgit v0.12


From 2ae966222f071929dd124d2953b35ca16feb2ba0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 17:56:58 -0800
Subject: hpdata: track per-page dirty state.

---
 include/jemalloc/internal/hpdata.h | 42 ++++++++++++++++++++++++++++++-----
 src/hpa.c                          |  7 ++++--
 src/hpdata.c                       | 45 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 85 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 7cefb5c..5952a18 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -52,6 +52,16 @@ struct hpdata_s {
 
 	/* A bitmap with bits set in the active pages. */
 	fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
+
+	/*
+	 * Number of dirty pages, and a bitmap tracking them.  This really means
+	 * "dirty" from the OS's point of view; it includes both active and
+	 * inactive pages that have been touched by the user.
+	 */
+	size_t h_ndirty;
+
+	/* The dirty pages (using the same definition as above). */
+	fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 };
 
 static inline void *
@@ -80,11 +90,6 @@ hpdata_huge_get(const hpdata_t *hpdata) {
 	return hpdata->h_huge;
 }
 
-static inline void
-hpdata_huge_set(hpdata_t *hpdata, bool huge) {
-	hpdata->h_huge = huge;
-}
-
 static inline size_t
 hpdata_longest_free_range_get(const hpdata_t *hpdata) {
 	return hpdata->h_longest_free_range;
@@ -122,6 +127,16 @@ hpdata_consistent(hpdata_t *hpdata) {
 	    != hpdata->h_nactive) {
 		return false;
 	}
+	if (fb_scount(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
+	    != hpdata->h_ndirty) {
+		return false;
+	}
+	if (hpdata->h_ndirty < hpdata->h_nactive) {
+		return false;
+	}
+	if (hpdata->h_huge && hpdata->h_ndirty != HUGEPAGE_PAGES) {
+		return false;
+	}
 	return true;
 }
 
@@ -141,6 +156,7 @@ hpdata_empty(hpdata_t *hpdata) {
 }
 
 void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
+
 /*
  * Given an hpdata which can serve an allocation request, pick and reserve an
  * offset within that allocation.
@@ -148,4 +164,20 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
 void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
 void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
 
+/*
+ * Tell the hpdata that it's now a hugepage (which, correspondingly, means that
+ * all its pages become dirty.
+ */
+void hpdata_hugify(hpdata_t *hpdata);
+/*
+ * Tell the hpdata that it's no longer a hugepage (all its pages are still
+ * counted as dirty, though; an explicit purge call is required to change that).
+ */
+void hpdata_dehugify(hpdata_t *hpdata);
+/*
+ * Tell the hpdata (which should be empty) that all dirty pages in it have been
+ * purged.
+ */
+void hpdata_purge(hpdata_t *hpdata);
+
 #endif /* JEMALLOC_INTERNAL_HPDATA_H */
diff --git a/src/hpa.c b/src/hpa.c
index 8bbe8a8..7563604 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -146,7 +146,10 @@ hpa_dehugify(hpdata_t *ps) {
 	/* Purge, then dehugify while unbacked. */
 	pages_purge_forced(hpdata_addr_get(ps), HUGEPAGE);
 	pages_nohuge(hpdata_addr_get(ps), HUGEPAGE);
-	hpdata_huge_set(ps, false);
+
+	/* Update metadata. */
+	hpdata_dehugify(ps);
+	hpdata_purge(ps);
 }
 
 static hpdata_t *
@@ -297,7 +300,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 
 	bool hugify = hpa_should_hugify(shard, ps);
 	if (hugify) {
-		hpdata_huge_set(ps, true);
+		hpdata_hugify(ps);
 	}
 	psset_insert(&shard->psset, ps);
 
diff --git a/src/hpdata.c b/src/hpdata.c
index d513896..8297158 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -21,10 +21,12 @@ void
 hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
-	hpdata_huge_set(hpdata, false);
-	hpdata->h_nactive = 0;
+	hpdata->h_huge = false;
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
+	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
+	hpdata->h_ndirty = 0;
+	fb_init(hpdata->dirty_pages, HUGEPAGE_PAGES);
 
 	hpdata_assert_consistent(hpdata);
 }
@@ -75,6 +77,15 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	hpdata->h_nactive += npages;
 
 	/*
+	 * We might be about to dirty some memory for the first time; update our
+	 * count if so.
+	 */
+	size_t new_dirty = fb_ucount(hpdata->dirty_pages,  HUGEPAGE_PAGES,
+	    result, npages);
+	fb_set_range(hpdata->dirty_pages, HUGEPAGE_PAGES, result, npages);
+	hpdata->h_ndirty += new_dirty;
+
+	/*
 	 * We might have shrunk the longest free range.  We have to keep
 	 * scanning until the end of the hpdata to be sure.
 	 *
@@ -127,3 +138,33 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 
 	hpdata_assert_consistent(hpdata);
 }
+
+void
+hpdata_hugify(hpdata_t *hpdata) {
+	hpdata_assert_consistent(hpdata);
+	hpdata->h_huge = true;
+	fb_set_range(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+	hpdata->h_ndirty = HUGEPAGE_PAGES;
+	hpdata_assert_consistent(hpdata);
+}
+
+void
+hpdata_dehugify(hpdata_t *hpdata) {
+	hpdata_assert_consistent(hpdata);
+	hpdata->h_huge = false;
+	hpdata_assert_consistent(hpdata);
+}
+
+void
+hpdata_purge(hpdata_t *hpdata) {
+	hpdata_assert_consistent(hpdata);
+	/*
+	 * The hpdata must be empty; we don't (yet) support partial purges of
+	 * hugepages.
+	 */
+	assert(hpdata->h_nactive == 0);
+	fb_unset_range(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+	fb_init(hpdata->dirty_pages, HUGEPAGE_PAGES);
+	hpdata->h_ndirty = 0;
+	hpdata_assert_consistent(hpdata);
+}
-- 
cgit v0.12


From 9b75808be171cc7c586e32ddb9d5dd86eca38669 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 19:06:32 -0800
Subject: flat bitmap: Add a bitwise and/or/not.

We're about to need them.
---
 include/jemalloc/internal/flat_bitmap.h |  30 ++++++++
 test/unit/flat_bitmap.c                 | 132 +++++++++++++++++++++++++++++++-
 2 files changed, 161 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/flat_bitmap.h
index c8cf518..90c4091 100644
--- a/include/jemalloc/internal/flat_bitmap.h
+++ b/include/jemalloc/internal/flat_bitmap.h
@@ -340,4 +340,34 @@ fb_urange_longest(fb_group_t *fb, size_t nbits) {
 	return fb_range_longest_impl(fb, nbits, /* val */ false);
 }
 
+/*
+ * Initializes each bit of dst with the bitwise-AND of the corresponding bits of
+ * src1 and src2.  All bitmaps must be the same size.
+ */
+static inline void
+fb_bit_and(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	for (size_t i = 0; i < ngroups; i++) {
+		dst[i] = src1[i] & src2[i];
+	}
+}
+
+/* Like fb_bit_and, but with bitwise-OR. */
+static inline void
+fb_bit_or(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	for (size_t i = 0; i < ngroups; i++) {
+		dst[i] = src1[i] | src2[i];
+	}
+}
+
+/* Initializes dst bit i to the negation of source bit i. */
+static inline void
+fb_bit_not(fb_group_t *dst, fb_group_t *src, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	for (size_t i = 0; i < ngroups; i++) {
+		dst[i] = ~src[i];
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_FB_H */
diff --git a/test/unit/flat_bitmap.c b/test/unit/flat_bitmap.c
index f088379..6b0bcc3 100644
--- a/test/unit/flat_bitmap.c
+++ b/test/unit/flat_bitmap.c
@@ -807,6 +807,133 @@ TEST_BEGIN(test_count_alternating) {
 }
 TEST_END
 
+static void
+do_test_bit_op(size_t nbits, bool (*op)(bool a, bool b),
+    void (*fb_op)(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits)) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb1 = malloc(sz);
+	fb_group_t *fb2 = malloc(sz);
+	fb_group_t *fb_result = malloc(sz);
+	fb_init(fb1, nbits);
+	fb_init(fb2, nbits);
+	fb_init(fb_result, nbits);
+
+	/* Just two random numbers. */
+	const uint64_t prng_init1 = (uint64_t)0X4E9A9DE6A35691CDULL;
+	const uint64_t prng_init2 = (uint64_t)0X7856E396B063C36EULL;
+
+	uint64_t prng1 = prng_init1;
+	uint64_t prng2 = prng_init2;
+
+	for (size_t i = 0; i < nbits; i++) {
+		bool bit1 = ((prng1 & (1ULL << (i % 64))) != 0);
+		bool bit2 = ((prng2 & (1ULL << (i % 64))) != 0);
+
+		if (bit1) {
+			fb_set(fb1, nbits, i);
+		}
+		if (bit2) {
+			fb_set(fb2, nbits, i);
+		}
+
+		if (i % 64 == 0) {
+			prng1 = prng_state_next_u64(prng1);
+			prng2 = prng_state_next_u64(prng2);
+		}
+	}
+
+	fb_op(fb_result, fb1, fb2, nbits);
+
+	/* Reset the prngs to replay them. */
+	prng1 = prng_init1;
+	prng2 = prng_init2;
+
+	for (size_t i = 0; i < nbits; i++) {
+		bool bit1 = ((prng1 & (1ULL << (i % 64))) != 0);
+		bool bit2 = ((prng2 & (1ULL << (i % 64))) != 0);
+
+		/* Original bitmaps shouldn't change. */
+		expect_b_eq(bit1, fb_get(fb1, nbits, i), "difference at bit %zu", i);
+		expect_b_eq(bit2, fb_get(fb2, nbits, i), "difference at bit %zu", i);
+
+		/* New one should be bitwise and. */
+		expect_b_eq(op(bit1, bit2), fb_get(fb_result, nbits, i),
+		    "difference at bit %zu", i);
+
+		/* Update the same way we did last time. */
+		if (i % 64 == 0) {
+			prng1 = prng_state_next_u64(prng1);
+			prng2 = prng_state_next_u64(prng2);
+		}
+	}
+
+	free(fb1);
+	free(fb2);
+	free(fb_result);
+}
+
+static bool
+binary_and(bool a, bool b) {
+	return a & b;
+}
+
+static void
+do_test_bit_and(size_t nbits) {
+	do_test_bit_op(nbits, &binary_and, &fb_bit_and);
+}
+
+TEST_BEGIN(test_bit_and) {
+#define NB(nbits) \
+	do_test_bit_and(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static bool
+binary_or(bool a, bool b) {
+	return a | b;
+}
+
+static void
+do_test_bit_or(size_t nbits) {
+	do_test_bit_op(nbits, &binary_or, &fb_bit_or);
+}
+
+TEST_BEGIN(test_bit_or) {
+#define NB(nbits) \
+	do_test_bit_or(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static bool
+binary_not(bool a, bool b) {
+	(void)b;
+	return !a;
+}
+
+static void
+fb_bit_not_shim(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2,
+    size_t nbits) {
+	(void)src2;
+	fb_bit_not(dst, src1, nbits);
+}
+
+static void
+do_test_bit_not(size_t nbits) {
+	do_test_bit_op(nbits, &binary_not, &fb_bit_not_shim);
+}
+
+TEST_BEGIN(test_bit_not) {
+#define NB(nbits) \
+	do_test_bit_not(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -820,5 +947,8 @@ main(void) {
 	    test_iter_range_exhaustive,
 	    test_count_contiguous_simple,
 	    test_count_contiguous,
-	    test_count_alternating);
+	    test_count_alternating,
+	    test_bit_and,
+	    test_bit_or,
+	    test_bit_not);
 }
-- 
cgit v0.12


From 70692cfb13332678af49f9d3c7bfe1fde65ec1aa Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 18:44:34 -0800
Subject: hpdata: Add state changing helpers.

We're about to allow hugepage subextent purging; get as much of our metadata
handling ready as possible.
---
 include/jemalloc/internal/hpdata.h |  96 +++++++++++++++++++++++++---
 src/hpa.c                          |   5 +-
 src/hpdata.c                       | 112 +++++++++++++++++++++++++++++++-
 test/unit/hpdata.c                 | 127 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 329 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 5952a18..faa6243 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -34,6 +34,16 @@ struct hpdata_s {
 	uint64_t h_age;
 	/* Whether or not we think the hugepage is mapped that way by the OS. */
 	bool h_huge;
+
+	/*
+	 * Whether or not some thread is purging this hpdata (i.e. has called
+	 * hpdata_purge_begin but not yet called hpdata_purge_end), or
+	 * hugifying it.  Only one thread at a time is allowed to change a
+	 * hugepage's state.
+	 */
+	bool h_mid_purge;
+	bool h_mid_hugify;
+
 	union {
 		/* When nonempty, used by the psset bins. */
 		phn(hpdata_t) ph_link;
@@ -90,6 +100,22 @@ hpdata_huge_get(const hpdata_t *hpdata) {
 	return hpdata->h_huge;
 }
 
+static inline bool
+hpdata_changing_state_get(const hpdata_t *hpdata) {
+	return hpdata->h_mid_purge || hpdata->h_mid_hugify;
+}
+
+static inline bool
+hpdata_mid_purge_get(const hpdata_t *hpdata) {
+	return hpdata->h_mid_purge;
+}
+
+static inline bool
+hpdata_mid_hugify_get(const hpdata_t *hpdata) {
+	return hpdata->h_mid_hugify;
+}
+
+
 static inline size_t
 hpdata_longest_free_range_get(const hpdata_t *hpdata) {
 	return hpdata->h_longest_free_range;
@@ -106,6 +132,11 @@ hpdata_nactive_get(hpdata_t *hpdata) {
 	return hpdata->h_nactive;
 }
 
+static inline size_t
+hpdata_ndirty_get(hpdata_t *hpdata) {
+	return hpdata->h_ndirty;
+}
+
 static inline void
 hpdata_assert_empty(hpdata_t *hpdata) {
 	assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
@@ -165,19 +196,68 @@ void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
 void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
 
 /*
- * Tell the hpdata that it's now a hugepage (which, correspondingly, means that
- * all its pages become dirty.
+ * Tell the hpdata (which should be empty) that all dirty pages in it have been
+ * purged.
+ */
+void hpdata_purge(hpdata_t *hpdata);
+
+/*
+ * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
+ * subranges of a hugepage while holding a lock, drop the lock during the actual
+ * purging of them, and reacquire it to update the metadata again.
+ */
+typedef struct hpdata_purge_state_s hpdata_purge_state_t;
+struct hpdata_purge_state_s {
+	size_t npurged;
+	fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
+	size_t next_purge_search_begin;
+};
+
+/*
+ * Initializes purge state.  The access to hpdata must be externally
+ * synchronized with other hpdata_* calls.
+ *
+ * You can tell whether or not a thread is purging or hugifying a given hpdata
+ * via hpdata_changing_state_get(hpdata).  Racing hugification or purging
+ * operations aren't allowed.
+ *
+ * Once you begin purging, you have to follow through and call hpdata_purge_next
+ * until you're done, and then end.  Allocating out of an hpdata undergoing
+ * purging is not allowed.
+ */
+void hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
+/*
+ * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
+ * true, and returns true.  Otherwise, returns false to indicate that we're
+ * done.
+ *
+ * This requires exclusive access to the purge state, but *not* to the hpdata.
+ * In particular, unreserve calls are allowed while purging (i.e. you can dalloc
+ * into one part of the hpdata while purging a different part).
+ */
+bool hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
+    void **r_purge_addr, size_t *r_purge_size);
+/*
+ * Updates the hpdata metadata after all purging is done.  Needs external
+ * synchronization.
+ */
+void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
+
+/*
+ * Similarly, when hugifying , callers can do the metadata modifications while
+ * holding a lock (thereby setting the change_state field), but actually do the
+ * operation without blocking other threads.
  */
-void hpdata_hugify(hpdata_t *hpdata);
+void hpdata_hugify_begin(hpdata_t *hpdata);
+void hpdata_hugify_end(hpdata_t *hpdata);
+
 /*
  * Tell the hpdata that it's no longer a hugepage (all its pages are still
  * counted as dirty, though; an explicit purge call is required to change that).
+ *
+ * This should only be done after starting to purge, and before actually purging
+ * any contents.
  */
 void hpdata_dehugify(hpdata_t *hpdata);
-/*
- * Tell the hpdata (which should be empty) that all dirty pages in it have been
- * purged.
- */
-void hpdata_purge(hpdata_t *hpdata);
 
 #endif /* JEMALLOC_INTERNAL_HPDATA_H */
diff --git a/src/hpa.c b/src/hpa.c
index 7563604..a36eee4 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -300,7 +300,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 
 	bool hugify = hpa_should_hugify(shard, ps);
 	if (hugify) {
-		hpdata_hugify(ps);
+		hpdata_hugify_begin(ps);
 	}
 	psset_insert(&shard->psset, ps);
 
@@ -319,6 +319,9 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		 * operations in this hpa shard.
 		 */
 		hpa_hugify(ps);
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		hpdata_hugify_end(ps);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
 	return edata;
 }
diff --git a/src/hpdata.c b/src/hpdata.c
index 8297158..29aecff 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -22,6 +22,8 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
 	hpdata->h_huge = false;
+	hpdata->h_mid_purge = false;
+	hpdata->h_mid_hugify = false;
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
@@ -140,8 +142,97 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 }
 
 void
-hpdata_hugify(hpdata_t *hpdata) {
+hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
+	assert(!hpdata->h_mid_purge);
+	assert(!hpdata->h_mid_hugify);
+	hpdata->h_mid_purge = true;
+
+	purge_state->npurged = 0;
+	purge_state->next_purge_search_begin = 0;
+
+	/*
+	 * Initialize to_purge with everything that's not active but that is
+	 * dirty.
+	 *
+	 * As an optimization, we could note that in practice we never allocate
+	 * out of a hugepage while purging within it, and so could try to
+	 * combine dirty extents separated by a non-dirty but non-active extent
+	 * to avoid purge calls.  This does nontrivially complicate metadata
+	 * tracking though, so let's hold off for now.
+	 */
+	fb_bit_not(purge_state->to_purge, hpdata->active_pages, HUGEPAGE_PAGES);
+	fb_bit_and(purge_state->to_purge, purge_state->to_purge,
+	    hpdata->dirty_pages, HUGEPAGE_PAGES);
+
+	/* We purge everything we can. */
+	assert(hpdata->h_ndirty - hpdata->h_nactive == fb_scount(
+	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+
+	hpdata_assert_consistent(hpdata);
+}
+
+bool
+hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
+    void **r_purge_addr, size_t *r_purge_size) {
+	/*
+	 * Note that we don't have a consistency check here; we're accessing
+	 * hpdata without synchronization, and therefore have no right to expect
+	 * a consistent state.
+	 */
+	assert(hpdata->h_mid_purge);
+	/* Should have dehugified already (if necessary). */
+	assert(!hpdata->h_huge);
+	assert(!hpdata->h_mid_hugify);
+
+	if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
+		return false;
+	}
+	size_t purge_begin;
+	size_t purge_len;
+	bool found_range = fb_srange_iter(purge_state->to_purge, HUGEPAGE_PAGES,
+	    purge_state->next_purge_search_begin, &purge_begin, &purge_len);
+	if (!found_range) {
+		return false;
+	}
+
+	*r_purge_addr = (void *)(
+	    (uintptr_t)hpdata_addr_get(hpdata) + purge_begin * PAGE);
+	*r_purge_size = purge_len * PAGE;
+
+	purge_state->next_purge_search_begin = purge_begin + purge_len;
+	purge_state->npurged += purge_len;
+	assert(purge_state->npurged <= HUGEPAGE_PAGES);
+
+	return true;
+}
+
+void
+hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
+	hpdata_assert_consistent(hpdata);
+	assert(hpdata->h_mid_purge);
+	assert(!hpdata->h_mid_hugify);
+	hpdata->h_mid_purge = false;
+
+	assert(purge_state->npurged == fb_scount(purge_state->to_purge,
+	    HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+
+	fb_bit_not(purge_state->to_purge, purge_state->to_purge,
+	    HUGEPAGE_PAGES);
+	fb_bit_and(hpdata->dirty_pages, hpdata->dirty_pages,
+	    purge_state->to_purge, HUGEPAGE_PAGES);
+	assert(hpdata->h_ndirty >= purge_state->npurged);
+	hpdata->h_ndirty -= purge_state->npurged;
+
+	hpdata_assert_consistent(hpdata);
+}
+
+void
+hpdata_hugify_begin(hpdata_t *hpdata) {
+	hpdata_assert_consistent(hpdata);
+	assert(!hpdata->h_mid_purge);
+	assert(!hpdata->h_mid_hugify);
+	hpdata->h_mid_hugify = true;
 	hpdata->h_huge = true;
 	fb_set_range(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
 	hpdata->h_ndirty = HUGEPAGE_PAGES;
@@ -149,8 +240,27 @@ hpdata_hugify(hpdata_t *hpdata) {
 }
 
 void
+hpdata_hugify_end(hpdata_t *hpdata) {
+	hpdata_assert_consistent(hpdata);
+	assert(!hpdata->h_mid_purge);
+	assert(hpdata->h_mid_hugify);
+	hpdata->h_mid_hugify = false;
+	hpdata_assert_consistent(hpdata);
+}
+
+void
 hpdata_dehugify(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
+	/*
+	 * These asserts are morally right; for now, though, we have the "purge a
+	 * hugepage only in its entirety, when it becomes empty", path sharing
+	 * hpdata_dehugify with the new purge pathway coming in the next
+	 * commit.
+	 */
+	/*
+	assert(hpdata->h_mid_purge);
+	assert(!hpdata->h_mid_hugify);
+	*/
 	hpdata->h_huge = false;
 	hpdata_assert_consistent(hpdata);
 }
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 1bf58bc..2fd9a36 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -55,7 +55,132 @@ TEST_BEGIN(test_reserve_alloc) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_simple) {
+	hpdata_t hpdata;
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+
+	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE);
+	expect_ptr_eq(alloc, HPDATA_ADDR, "");
+
+	/* Create HUGEPAGE_PAGES / 4 dirty inactive pages at the beginning. */
+	hpdata_unreserve(&hpdata, alloc, HUGEPAGE_PAGES / 4 * PAGE);
+
+	expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 2, "");
+
+	expect_false(hpdata_changing_state_get(&hpdata), "");
+
+	hpdata_purge_state_t purge_state;
+	hpdata_purge_begin(&hpdata, &purge_state);
+
+	expect_true(hpdata_changing_state_get(&hpdata), "");
+
+	void *purge_addr;
+	size_t purge_size;
+	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_true(got_result, "");
+	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
+	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
+
+	expect_true(hpdata_changing_state_get(&hpdata), "");
+
+	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_false(got_result, "Unexpected additional purge range: "
+	    "extent at %p of size %zu", purge_addr, purge_size);
+
+	expect_true(hpdata_changing_state_get(&hpdata), "");
+
+	hpdata_purge_end(&hpdata, &purge_state);
+	expect_false(hpdata_changing_state_get(&hpdata), "");
+	expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 4, "");
+}
+TEST_END
+
+/*
+ * We only test intervening dalloc's not intervening allocs; we don't need
+ * intervening allocs, and foreseeable optimizations will make them not just
+ * unnecessary but incorrect.  In particular, if there are two dirty extents
+ * separated only by a retained extent, we can just purge the entire range,
+ * saving a purge call.
+ */
+TEST_BEGIN(test_purge_intervening_dalloc) {
+	hpdata_t hpdata;
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+
+	/* Allocate the first 3/4 of the pages. */
+	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
+	expect_ptr_eq(alloc, HPDATA_ADDR, "");
+
+	/* Free the first 1/4 and the third 1/4 of the pages. */
+	hpdata_unreserve(&hpdata, alloc, HUGEPAGE_PAGES / 4 * PAGE);
+	hpdata_unreserve(&hpdata,
+	    (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
+	    HUGEPAGE_PAGES / 4 * PAGE);
+
+	expect_zu_eq(hpdata_ndirty_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
+
+	hpdata_purge_state_t purge_state;
+	hpdata_purge_begin(&hpdata, &purge_state);
+
+	void *purge_addr;
+	size_t purge_size;
+	/* First purge. */
+	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_true(got_result, "");
+	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
+	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
+
+	/* Deallocate the second 1/4 before the second purge occurs. */
+	hpdata_unreserve(&hpdata,
+	    (void *)((uintptr_t)alloc + 1 * HUGEPAGE_PAGES / 4 * PAGE),
+	    HUGEPAGE_PAGES / 4 * PAGE);
+
+	/* Now continue purging. */
+	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_true(got_result, "");
+	expect_ptr_eq(
+	    (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
+	    purge_addr, "");
+	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
+
+	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_false(got_result, "Unexpected additional purge range: "
+	    "extent at %p of size %zu", purge_addr, purge_size);
+
+	hpdata_purge_end(&hpdata, &purge_state);
+
+	expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 4, "");
+}
+TEST_END
+
+TEST_BEGIN(test_hugify) {
+	hpdata_t hpdata;
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+
+	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
+	expect_ptr_eq(alloc, HPDATA_ADDR, "");
+
+	expect_zu_eq(HUGEPAGE_PAGES / 2, hpdata_ndirty_get(&hpdata), "");
+
+	expect_false(hpdata_changing_state_get(&hpdata), "");
+	hpdata_hugify_begin(&hpdata);
+	expect_true(hpdata_changing_state_get(&hpdata), "");
+	hpdata_hugify_end(&hpdata);
+	expect_false(hpdata_changing_state_get(&hpdata), "");
+
+	/* Hugeifying should have increased the dirty page count. */
+	expect_zu_eq(HUGEPAGE_PAGES, hpdata_ndirty_get(&hpdata), "");
+}
+TEST_END
+
 int main(void) {
 	return test_no_reentrancy(
-	    test_reserve_alloc);
+	    test_reserve_alloc,
+	    test_purge_simple,
+	    test_purge_intervening_dalloc,
+	    test_hugify);
 }
-- 
cgit v0.12


From 30b9e8162b9127d5c352fc312dfdea5e07d51e56 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 22:24:15 -0800
Subject: HPA: Generalize purging.

Previously, we would purge a hugepage only when it's completely empty.  With
this change, we can purge even when only partially empty.  Although the
heuristic here is still fairly primitive, this infrastructure can scale to
become more advanced.
---
 include/jemalloc/internal/hpdata.h |  23 +++--
 src/hpa.c                          | 206 ++++++++++++++++++++++++++++++-------
 src/hpdata.c                       |  35 +++----
 src/psset.c                        |   5 +
 test/unit/hpdata.c                 |   1 +
 5 files changed, 207 insertions(+), 63 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index faa6243..66473d2 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -44,6 +44,9 @@ struct hpdata_s {
 	bool h_mid_purge;
 	bool h_mid_hugify;
 
+	/* Whether or not the hpdata is a the psset. */
+	bool h_in_psset;
+
 	union {
 		/* When nonempty, used by the psset bins. */
 		phn(hpdata_t) ph_link;
@@ -115,6 +118,15 @@ hpdata_mid_hugify_get(const hpdata_t *hpdata) {
 	return hpdata->h_mid_hugify;
 }
 
+static inline bool
+hpdata_in_psset_get(const hpdata_t *hpdata) {
+	return hpdata->h_in_psset;
+}
+
+static inline void
+hpdata_in_psset_set(hpdata_t *hpdata, bool in_psset) {
+	hpdata->h_in_psset = in_psset;
+}
 
 static inline size_t
 hpdata_longest_free_range_get(const hpdata_t *hpdata) {
@@ -196,12 +208,6 @@ void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
 void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
 
 /*
- * Tell the hpdata (which should be empty) that all dirty pages in it have been
- * purged.
- */
-void hpdata_purge(hpdata_t *hpdata);
-
-/*
  * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
  * subranges of a hugepage while holding a lock, drop the lock during the actual
  * purging of them, and reacquire it to update the metadata again.
@@ -247,6 +253,11 @@ void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
  * Similarly, when hugifying , callers can do the metadata modifications while
  * holding a lock (thereby setting the change_state field), but actually do the
  * operation without blocking other threads.
+ *
+ * Unlike most metadata operations, hugification ending should happen while an
+ * hpdata is in the psset (or upcoming hugepage collections).  This is because
+ * while purge/use races are unsafe, purge/hugepageify races are perfectly
+ * reasonable.
  */
 void hpdata_hugify_begin(hpdata_t *hpdata);
 void hpdata_hugify_end(hpdata_t *hpdata);
diff --git a/src/hpa.c b/src/hpa.c
index a36eee4..9959454 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -124,32 +124,26 @@ hpa_should_hugify(hpa_shard_t *shard, hpdata_t *ps) {
 	 * For now, just use a static check; hugify a page if it's <= 5%
 	 * inactive.  Eventually, this should be a malloc conf option.
 	 */
+	if (hpdata_changing_state_get(ps)) {
+		return false;
+	}
 	return !hpdata_huge_get(ps)
 	    && hpdata_nactive_get(ps) >= (HUGEPAGE_PAGES) * 95 / 100;
 }
 
-/* Returns true on error. */
-static void
-hpa_hugify(hpdata_t *ps) {
-	assert(hpdata_huge_get(ps));
-	bool err = pages_huge(hpdata_addr_get(ps), HUGEPAGE);
-	/*
-	 * Eat the error; even if the hugification failed, it's still safe to
-	 * pretend it didn't (and would require extraordinary measures to
-	 * unhugify).
-	 */
-	(void)err;
-}
-
-static void
-hpa_dehugify(hpdata_t *ps) {
-	/* Purge, then dehugify while unbacked. */
-	pages_purge_forced(hpdata_addr_get(ps), HUGEPAGE);
-	pages_nohuge(hpdata_addr_get(ps), HUGEPAGE);
-
-	/* Update metadata. */
-	hpdata_dehugify(ps);
-	hpdata_purge(ps);
+/*
+ * Whether or not the given pageslab meets the criteria for being purged (and,
+ * if necessary, dehugified).
+ */
+static bool
+hpa_should_purge(hpa_shard_t *shard, hpdata_t *ps) {
+	/* Ditto. */
+	if (hpdata_changing_state_get(ps)) {
+		return false;
+	}
+	size_t purgeable = hpdata_ndirty_get(ps) - hpdata_nactive_get(ps);
+	return purgeable > HUGEPAGE_PAGES * 25 / 100
+	    || (purgeable > 0 && hpdata_empty(ps));
 }
 
 static hpdata_t *
@@ -226,9 +220,65 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 /*
- * The psset does not hold empty slabs.  Upon becoming empty, then, we need to
- * put them somewhere.  We take this as an opportunity to purge, and retain
- * their address space in a list outside the psset.
+ * As a precondition, ps should not be in the psset (we can handle deallocation
+ * races, but not allocation ones), and we should hold the shard mutex.
+ */
+static void
+hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	while (hpa_should_purge(shard, ps)) {
+		/* Do the metadata update bit while holding the lock. */
+		hpdata_purge_state_t purge_state;
+		hpdata_purge_begin(ps, &purge_state);
+
+		/*
+		 * Dehugifying can only happen on the first loop iteration,
+		 * since no other threads can allocate out of this ps while
+		 * we're purging (and thus, can't hugify it), but there's not a
+		 * natural way to express that in the control flow.
+		 */
+		bool needs_dehugify = false;
+		if (hpdata_huge_get(ps)) {
+			needs_dehugify = true;
+			hpdata_dehugify(ps);
+		}
+
+		/* Drop the lock to do the OS calls. */
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+
+		if (needs_dehugify) {
+			pages_nohuge(hpdata_addr_get(ps), HUGEPAGE);
+		}
+
+		size_t total_purged = 0;
+		void *purge_addr;
+		size_t purge_size;
+		while (hpdata_purge_next(ps, &purge_state, &purge_addr,
+		    &purge_size)) {
+			pages_purge_forced(purge_addr, purge_size);
+			total_purged += purge_size;
+		}
+
+		/* Reacquire to finish our metadata update. */
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		hpdata_purge_end(ps, &purge_state);
+
+		assert(total_purged <= HUGEPAGE);
+
+		/*
+		 * We're not done here; other threads can't allocate out of ps
+		 * while purging, but they can still deallocate.  Those
+		 * deallocations could have meant more purging than what we
+		 * planned ought to happen.  We have to re-check now that we've
+		 * reacquired the mutex again.
+		 */
+	}
+}
+
+/*
+ * Does the metadata tracking associated with a page slab becoming empty.  The
+ * psset doesn't hold empty pageslabs, but we do want address space reuse, so we
+ * track these pages outside the psset.
  */
 static void
 hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
@@ -239,12 +289,6 @@ hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
 	malloc_mutex_assert_not_owner(tsdn, &shard->grow_mtx);
 
-	/*
-	 * We do this unconditionally, even for pages which were not originally
-	 * hugified; it has the same effect.
-	 */
-	hpa_dehugify(ps);
-
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
 	shard->nevictions++;
 	hpdata_list_prepend(&shard->unused_slabs, ps);
@@ -291,6 +335,11 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	if (err) {
 		hpdata_unreserve(ps, edata_addr_get(edata),
 		    edata_size_get(edata));
+		/*
+		 * We should arguably reset dirty state here, but this would
+		 * require some sort of prepare + commit functionality that's a
+		 * little much to deal with for now.
+		 */
 		psset_insert(&shard->psset, ps);
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
@@ -318,9 +367,26 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		 * on this page slab, but also operations any other alloc/dalloc
 		 * operations in this hpa shard.
 		 */
-		hpa_hugify(ps);
+		bool err = pages_huge(hpdata_addr_get(ps), HUGEPAGE);
+		/*
+		 * Pretending we succeed when we actually failed is safe; trying
+		 * to rolllback would be tricky, though.  Eat the error.
+		 */
+		(void)err;
+
 		malloc_mutex_lock(tsdn, &shard->mtx);
 		hpdata_hugify_end(ps);
+		if (hpa_should_purge(shard, ps)) {
+			/*
+			 * There was a race in which the ps went from being
+			 * almost full to having lots of free space while we
+			 * hugified.  Undo our operation, taking care to meet
+			 * the precondition that the ps isn't in the psset.
+			 */
+			psset_remove(&shard->psset, ps);
+			hpa_purge(tsdn, shard, ps);
+			psset_insert(&shard->psset, ps);
+		}
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
 	return edata;
@@ -383,11 +449,28 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	if (err) {
 		hpdata_unreserve(ps, edata_addr_get(edata),
 		    edata_size_get(edata));
+
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 
 		shard->nevictions++;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+
+		/* We'll do a fake purge; the pages weren't really touched. */
+		hpdata_purge_state_t purge_state;
+		void *purge_addr;
+		size_t purge_size;
+		hpdata_purge_begin(ps, &purge_state);
+		bool found_extent = hpdata_purge_next(ps, &purge_state,
+		    &purge_addr, &purge_size);
+		assert(found_extent);
+		assert(purge_addr == addr);
+		assert(purge_size == size);
+		found_extent = hpdata_purge_next(ps, &purge_state,
+		    &purge_addr, &purge_size);
+		assert(!found_extent);
+		hpdata_purge_end(ps, &purge_state);
+
 		hpa_handle_ps_eviction(tsdn, shard, ps);
 		return NULL;
 	}
@@ -475,13 +558,66 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	/* Currently, all edatas come from pageslabs. */
 	assert(ps != NULL);
 	emap_deregister_boundary(tsdn, shard->emap, edata);
+	/*
+	 * Note that the shard mutex protects ps's metadata too; it wouldn't be
+	 * correct to try to read most information out of it without the lock.
+	 */
 	malloc_mutex_lock(tsdn, &shard->mtx);
 
-	/* Note that the shard mutex protects ps's metadata too. */
+	/*
+	 * Release the metadata early, to avoid having to remember to do it
+	 * while we're also doing tricky purging logic.
+	 */
+	void *unreserve_addr = edata_addr_get(edata);
+	size_t unreserve_size = edata_size_get(edata);
+	edata_cache_small_put(tsdn, &shard->ecs, edata);
+
+	/*
+	 * We have three rules interacting here:
+	 * - You can't update ps metadata while it's still in the psset.  We
+	 *   enforce this because it's necessary for stats tracking and metadata
+	 *   management.
+	 * - The ps must not be in the psset while purging.  This is because we
+	 *   can't handle purge/alloc races.
+	 * - Whoever removes the ps from the psset is the one to reinsert it (or
+	 *   to pass it to hpa_handle_ps_eviction upon emptying).  This keeps
+	 *   responsibility tracking simple.
+	 */
+	if (hpdata_mid_purge_get(ps)) {
+		/*
+		 * Another thread started purging, and so the ps is not in the
+		 * psset and we can do our metadata update.  The other thread is
+		 * in charge of reinserting the ps, so we're done.
+		 */
+		assert(!hpdata_in_psset_get(ps));
+		hpdata_unreserve(ps, unreserve_addr, unreserve_size);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		return;
+	}
+	/*
+	 * No other thread is purging, and the ps is non-empty, so it should be
+	 * in the psset.
+	 */
+	assert(hpdata_in_psset_get(ps));
 	psset_remove(&shard->psset, ps);
-	hpdata_unreserve(ps, edata_addr_get(edata), edata_size_get(edata));
+	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
+	if (!hpa_should_purge(shard, ps)) {
+		/*
+		 * This should be the common case; no other thread is purging,
+		 * and we won't purge either.
+		 */
+		psset_insert(&shard->psset, ps);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		return;
+	}
 
-	edata_cache_small_put(tsdn, &shard->ecs, edata);
+	/* It's our job to purge. */
+	hpa_purge(tsdn, shard, ps);
+
+	/*
+	 * OK, the hpdata is as purged as we want it to be, and it's going back
+	 * into the psset (if nonempty) or getting evicted (if empty).
+	 */
 	if (hpdata_empty(ps)) {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		hpa_handle_ps_eviction(tsdn, shard, ps);
diff --git a/src/hpdata.c b/src/hpdata.c
index 29aecff..7881619 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -24,6 +24,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata->h_huge = false;
 	hpdata->h_mid_purge = false;
 	hpdata->h_mid_hugify = false;
+	hpdata->h_in_psset = false;
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
@@ -36,6 +37,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 void *
 hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	hpdata_assert_consistent(hpdata);
+	assert(!hpdata_in_psset_get(hpdata));
 	assert((sz & PAGE_MASK) == 0);
 	size_t npages = sz >> LG_PAGE;
 	assert(npages <= hpdata_longest_free_range_get(hpdata));
@@ -116,6 +118,7 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 void
 hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	hpdata_assert_consistent(hpdata);
+	assert(!hpdata->h_in_psset);
 	assert(((uintptr_t)addr & PAGE_MASK) == 0);
 	assert((sz & PAGE_MASK) == 0);
 	size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
@@ -144,6 +147,7 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 void
 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
+	assert(!hpdata->h_in_psset);
 	assert(!hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_purge = true;
@@ -181,6 +185,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	 * a consistent state.
 	 */
 	assert(hpdata->h_mid_purge);
+	assert(!hpdata->h_in_psset);
 	/* Should have dehugified already (if necessary). */
 	assert(!hpdata->h_huge);
 	assert(!hpdata->h_mid_hugify);
@@ -210,6 +215,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 void
 hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
+	assert(!hpdata->h_in_psset);
 	assert(hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_purge = false;
@@ -230,6 +236,7 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 void
 hpdata_hugify_begin(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
+	assert(!hpdata_in_psset_get(hpdata));
 	assert(!hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = true;
@@ -242,6 +249,11 @@ hpdata_hugify_begin(hpdata_t *hpdata) {
 void
 hpdata_hugify_end(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
+	/*
+	 * This is the exception to the "no metadata tweaks while in the psset"
+	 * rule.
+	 */
+	/* assert(!hpdata_in_psset_get(hpdata)); */
 	assert(!hpdata->h_mid_purge);
 	assert(hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = false;
@@ -251,30 +263,9 @@ hpdata_hugify_end(hpdata_t *hpdata) {
 void
 hpdata_dehugify(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
-	/*
-	 * These asserts are morally right; for now, though, we have the "purge a
-	 * hugepage only in its entirety, when it becomes empty", path sharing
-	 * hpdata_dehugify with the new purge pathway coming in the next
-	 * commit.
-	 */
-	/*
+	assert(!hpdata_in_psset_get(hpdata));
 	assert(hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
-	*/
 	hpdata->h_huge = false;
 	hpdata_assert_consistent(hpdata);
 }
-
-void
-hpdata_purge(hpdata_t *hpdata) {
-	hpdata_assert_consistent(hpdata);
-	/*
-	 * The hpdata must be empty; we don't (yet) support partial purges of
-	 * hugepages.
-	 */
-	assert(hpdata->h_nactive == 0);
-	fb_unset_range(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
-	fb_init(hpdata->dirty_pages, HUGEPAGE_PAGES);
-	hpdata->h_ndirty = 0;
-	hpdata_assert_consistent(hpdata);
-}
diff --git a/src/psset.c b/src/psset.c
index 9fcdac2..688cd62 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -92,6 +92,8 @@ void
 psset_insert(psset_t *psset, hpdata_t *ps) {
 	assert(!hpdata_empty(ps));
 	hpdata_assert_consistent(ps);
+	assert(!hpdata_in_psset_get(ps));
+	hpdata_in_psset_set(ps, true);
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
@@ -116,6 +118,9 @@ psset_insert(psset_t *psset, hpdata_t *ps) {
 void
 psset_remove(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
+	assert(hpdata_in_psset_get(ps));
+	hpdata_in_psset_set(ps, false);
+
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 2fd9a36..aa4506f 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -169,6 +169,7 @@ TEST_BEGIN(test_hugify) {
 	expect_false(hpdata_changing_state_get(&hpdata), "");
 	hpdata_hugify_begin(&hpdata);
 	expect_true(hpdata_changing_state_get(&hpdata), "");
+
 	hpdata_hugify_end(&hpdata);
 	expect_false(hpdata_changing_state_get(&hpdata), "");
 
-- 
cgit v0.12


From 746ea3de6f0c372aebb4d7d56172eb2614c83d2d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 15:35:38 -0800
Subject: HPA stats: Allow some derived stats.

However, we put them in their own struct, to avoid the messiness that the arena
has (mixing derived and non-derived stats in the arena_stats_t).
---
 include/jemalloc/internal/hpa.h | 22 +++++++++++++++-------
 src/ctl.c                       |  2 +-
 src/hpa.c                       | 19 +++++++++++++------
 3 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 217604e..8dc9b3c 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -6,12 +6,22 @@
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
+typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
+struct hpa_shard_nonderived_stats_s {
+	/*
+	 * The number of times we've purged a hugepage.  Each eviction purges a
+	 * single hugepage.
+	 *
+	 * Guarded by the grow mutex.
+	 */
+	uint64_t nevictions;
+};
+
 /* Completely derived; only used by CTL. */
 typedef struct hpa_shard_stats_s hpa_shard_stats_t;
 struct hpa_shard_stats_s {
 	psset_stats_t psset_stats;
-	/* The stat version of the nevictions counter. */
-	uint64_t nevictions;
+	hpa_shard_nonderived_stats_t nonderived_stats;
 };
 
 typedef struct hpa_shard_s hpa_shard_t;
@@ -73,12 +83,10 @@ struct hpa_shard_s {
 	emap_t *emap;
 
 	/*
-	 * The number of times we've purged a hugepage.  Each eviction purges a
-	 * single hugepage.
-	 *
-	 * Guarded by the grow mutex.
+	 * Those stats which are copied directly into the CTL-centric hpa shard
+	 * stats.
 	 */
-	uint64_t nevictions;
+	hpa_shard_nonderived_stats_t stats;
 };
 
 /*
diff --git a/src/ctl.c b/src/ctl.c
index 324925d..7e30b75 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3506,7 +3506,7 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nevictions,
-    arenas_i(mib[2])->astats->hpastats.nevictions, uint64_t);
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.nevictions, uint64_t);
 
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
diff --git a/src/hpa.c b/src/hpa.c
index 9959454..d1a5431 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -74,7 +74,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
-	shard->nevictions = 0;
+	shard->stats.nevictions = 0;
 
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
@@ -95,10 +95,17 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
  * only combines the stats from one stats objet to another.  Hence the lack of
  * locking here.
  */
+static void
+hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
+    hpa_shard_nonderived_stats_t *src) {
+	dst->nevictions += src->nevictions;
+}
+
 void
 hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
 	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
-	dst->nevictions += src->nevictions;
+	hpa_shard_nonderived_stats_accum(&dst->nonderived_stats,
+	    &src->nonderived_stats);
 }
 
 void
@@ -107,7 +114,7 @@ hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	psset_stats_accum(&dst->psset_stats, &shard->psset.stats);
-	dst->nevictions += shard->nevictions;
+	hpa_shard_nonderived_stats_accum(&dst->nonderived_stats, &shard->stats);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
@@ -290,7 +297,7 @@ hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	malloc_mutex_assert_not_owner(tsdn, &shard->grow_mtx);
 
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
-	shard->nevictions++;
+	shard->stats.nevictions++;
 	hpdata_list_prepend(&shard->unused_slabs, ps);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
@@ -431,7 +438,7 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata = edata_cache_small_get(tsdn, &shard->ecs);
 	if (edata == NULL) {
-		shard->nevictions++;
+		shard->stats.nevictions++;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		hpa_handle_ps_eviction(tsdn, shard, ps);
@@ -452,7 +459,7 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 
-		shard->nevictions++;
+		shard->stats.nevictions++;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 
-- 
cgit v0.12


From b25ee5d88e07adcb3c085c19654039bb6b32dcf4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 16:09:50 -0800
Subject: HPA: Add purge stats.

---
 include/jemalloc/internal/hpa.h | 34 +++++++++++++++++++++++++++++++---
 src/ctl.c                       | 19 ++++++++++++++++++-
 src/hpa.c                       | 15 +++++++++++++++
 src/stats.c                     | 36 +++++++++++++++++++++++++++++++++---
 4 files changed, 97 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 8dc9b3c..bea88c3 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -9,12 +9,40 @@
 typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
 struct hpa_shard_nonderived_stats_s {
 	/*
-	 * The number of times we've purged a hugepage.  Each eviction purges a
-	 * single hugepage.
+	 * The number of times we've fully purged a hugepage and evicted it from
+	 * the psset.
 	 *
-	 * Guarded by the grow mutex.
+	 * Guarded by grow_mtx.
 	 */
 	uint64_t nevictions;
+
+	/*
+	 * The number of times we've purged within a hugepage.
+	 *
+	 * Guarded by mtx.
+	 */
+	uint64_t npurge_passes;
+	/*
+	 * The number of individual purge calls we perform (which should always
+	 * be bigger than npurge_passes, since each pass purges at least one
+	 * extent within a hugepage.
+	 *
+	 * Guarded by mtx.
+	 */
+	uint64_t npurges;
+
+	/*
+	 * The number of times we've hugified a pageslab.
+	 *
+	 * Guarded by mtx.
+	 */
+	uint64_t nhugifies;
+	/*
+	 * The number of times we've dehugified a pageslab.
+	 *
+	 * Guarded by mtx.
+	 */
+	uint64_t ndehugifies;
 };
 
 /* Completely derived; only used by CTL. */
diff --git a/src/ctl.c b/src/ctl.c
index 7e30b75..8871fd1 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -227,6 +227,10 @@ CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
 INDEX_PROTO(stats_arenas_i_extents_j)
 CTL_PROTO(stats_arenas_i_hpa_shard_nevictions)
+CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
+CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
+CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
+CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)
@@ -695,7 +699,12 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
 	    stats_arenas_i_hpa_shard_full_slabs)},
 	{NAME("nonfull_slabs"),	CHILD(indexed,
 	    stats_arenas_i_hpa_shard_nonfull_slabs)},
-	{NAME("nevictions"),	CTL(stats_arenas_i_hpa_shard_nevictions)}
+
+	{NAME("nevictions"),	CTL(stats_arenas_i_hpa_shard_nevictions)},
+	{NAME("npurge_passes"),	CTL(stats_arenas_i_hpa_shard_npurge_passes)},
+	{NAME("npurges"),	CTL(stats_arenas_i_hpa_shard_npurges)},
+	{NAME("nhugifies"),	CTL(stats_arenas_i_hpa_shard_nhugifies)},
+	{NAME("ndehugifies"),	CTL(stats_arenas_i_hpa_shard_ndehugifies)}
 };
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
@@ -3507,6 +3516,14 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nevictions,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.nevictions, uint64_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurge_passes,
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes, uint64_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges,
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurges, uint64_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugifies,
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.nhugifies, uint64_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndehugifies,
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndehugifies, uint64_t);
 
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
diff --git a/src/hpa.c b/src/hpa.c
index d1a5431..5230f6b 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -74,7 +74,12 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
+
 	shard->stats.nevictions = 0;
+	shard->stats.npurge_passes = 0;
+	shard->stats.npurges = 0;
+	shard->stats.nhugifies = 0;
+	shard->stats.ndehugifies = 0;
 
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
@@ -99,6 +104,10 @@ static void
 hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
     hpa_shard_nonderived_stats_t *src) {
 	dst->nevictions += src->nevictions;
+	dst->npurge_passes += src->npurge_passes;
+	dst->npurges += src->npurges;
+	dst->nhugifies += src->nhugifies;
+	dst->ndehugifies += src->ndehugifies;
 }
 
 void
@@ -237,6 +246,7 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 		/* Do the metadata update bit while holding the lock. */
 		hpdata_purge_state_t purge_state;
 		hpdata_purge_begin(ps, &purge_state);
+		shard->stats.npurge_passes++;
 
 		/*
 		 * Dehugifying can only happen on the first loop iteration,
@@ -247,6 +257,7 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 		bool needs_dehugify = false;
 		if (hpdata_huge_get(ps)) {
 			needs_dehugify = true;
+			shard->stats.ndehugifies++;
 			hpdata_dehugify(ps);
 		}
 
@@ -258,16 +269,19 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 		}
 
 		size_t total_purged = 0;
+		uint64_t purges_this_pass = 0;
 		void *purge_addr;
 		size_t purge_size;
 		while (hpdata_purge_next(ps, &purge_state, &purge_addr,
 		    &purge_size)) {
+			purges_this_pass++;
 			pages_purge_forced(purge_addr, purge_size);
 			total_purged += purge_size;
 		}
 
 		/* Reacquire to finish our metadata update. */
 		malloc_mutex_lock(tsdn, &shard->mtx);
+		shard->stats.npurges += purges_this_pass;
 		hpdata_purge_end(ps, &purge_state);
 
 		assert(total_purged <= HUGEPAGE);
@@ -357,6 +371,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	bool hugify = hpa_should_hugify(shard, ps);
 	if (hugify) {
 		hpdata_hugify_begin(ps);
+		shard->stats.nhugifies++;
 	}
 	psset_insert(&shard->psset, ps);
 
diff --git a/src/stats.c b/src/stats.c
index 86a2c01..1b51c8b 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -791,6 +791,21 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_row_init(&row);
 
 	uint64_t nevictions;
+	uint64_t npurge_passes;
+	uint64_t npurges;
+	uint64_t nhugifies;
+	uint64_t ndehugifies;
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nevictions",
+	    i, &nevictions, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes",
+	    i, &npurge_passes, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.npurges",
+	    i, &npurges, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugifies",
+	    i, &nhugifies, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.ndehugifies",
+	    i, &ndehugifies, uint64_t);
 
 	size_t npageslabs_huge;
 	size_t nactive_huge;
@@ -800,9 +815,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	size_t nactive_nonhuge;
 	size_t ninactive_nonhuge;
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.nevictions",
-	    i, &nevictions, uint64_t);
-
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
@@ -825,17 +837,35 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
 	    "  Evictions: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "\n"
 	    "  In full slabs:\n"
 	    "      npageslabs: %zu huge, %zu nonhuge\n"
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ninactive: %zu huge, %zu nonhuge \n",
 	    nevictions, rate_per_second(nevictions, uptime),
+	    npurge_passes, rate_per_second(npurge_passes, uptime),
+	    npurges, rate_per_second(npurges, uptime),
+	    nhugifies, rate_per_second(nhugifies, uptime),
+	    ndehugifies, rate_per_second(ndehugifies, uptime),
 	    npageslabs_huge, npageslabs_nonhuge,
 	    nactive_huge, nactive_nonhuge,
 	    ninactive_huge, ninactive_nonhuge);
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
 	emitter_json_kv(emitter, "nevictions", emitter_type_uint64,
 	    &nevictions);
+	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
+	    &npurge_passes);
+	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
+	    &npurges);
+	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64,
+	    &nhugifies);
+	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
+	    &ndehugifies);
+
 	emitter_json_object_kv_begin(emitter, "full_slabs");
 	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
 	    &npageslabs_huge);
-- 
cgit v0.12


From 94cd9444c5eecdeea871f008a1e2d805d48dfe5d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 18:02:23 -0800
Subject: HPA: Some minor reformattings.

---
 include/jemalloc/internal/hpdata.h |  9 ++++-----
 src/hpa.c                          | 12 ++++++++----
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 66473d2..12a72a6 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -77,6 +77,10 @@ struct hpdata_s {
 	fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 };
 
+TYPED_LIST(hpdata_list, hpdata_t, ql_link)
+typedef ph(hpdata_t) hpdata_age_heap_t;
+ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
+
 static inline void *
 hpdata_addr_get(const hpdata_t *hpdata) {
 	return hpdata->h_address;
@@ -188,11 +192,6 @@ hpdata_assert_consistent(hpdata_t *hpdata) {
 	assert(hpdata_consistent(hpdata));
 }
 
-TYPED_LIST(hpdata_list, hpdata_t, ql_link)
-
-typedef ph(hpdata_t) hpdata_age_heap_t;
-ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
-
 static inline bool
 hpdata_empty(hpdata_t *hpdata) {
 	return hpdata->h_nactive == 0;
diff --git a/src/hpa.c b/src/hpa.c
index 5230f6b..4069c1e 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -135,16 +135,20 @@ hpa_alloc_ps(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 static bool
-hpa_should_hugify(hpa_shard_t *shard, hpdata_t *ps) {
+hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	/*
 	 * For now, just use a static check; hugify a page if it's <= 5%
 	 * inactive.  Eventually, this should be a malloc conf option.
 	 */
-	if (hpdata_changing_state_get(ps)) {
+	return hpdata_nactive_get(ps) >= (HUGEPAGE_PAGES) * 95 / 100;
+}
+
+static bool
+hpa_should_hugify(hpa_shard_t *shard, hpdata_t *ps) {
+	if (hpdata_changing_state_get(ps) || hpdata_huge_get(ps)) {
 		return false;
 	}
-	return !hpdata_huge_get(ps)
-	    && hpdata_nactive_get(ps) >= (HUGEPAGE_PAGES) * 95 / 100;
+	return hpa_good_hugification_candidate(shard, ps);
 }
 
 /*
-- 
cgit v0.12


From 55e0f60ca1c154659b56ec90a85c8b53b580361e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 18:32:42 -0800
Subject: psset stats: Simplify handling.

We can treat the huge and nonhuge cases uniformly using huge state as an array
index.
---
 include/jemalloc/internal/psset.h | 21 ++++++++++-------
 src/ctl.c                         | 47 +++++++++++++++++++------------------
 src/hpa.c                         | 19 +++++++--------
 src/psset.c                       | 49 ++++++++++++++++-----------------------
 test/unit/psset.c                 | 22 +++++++++---------
 5 files changed, 76 insertions(+), 82 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 7027cff..d818966 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -26,25 +26,28 @@
 typedef struct psset_bin_stats_s psset_bin_stats_t;
 struct psset_bin_stats_s {
 	/* How many pageslabs are in this bin? */
-	size_t npageslabs_huge;
-	size_t npageslabs_nonhuge;
+	size_t npageslabs;
 	/* Of them, how many pages are active? */
-	size_t nactive_huge;
-	size_t nactive_nonhuge;
+	size_t nactive;
 	/* How many are inactive? */
-	size_t ninactive_huge;
-	size_t ninactive_nonhuge;
+	size_t ninactive;
 };
 
-/* Used only by CTL; not actually stored here (i.e., all derived). */
 typedef struct psset_stats_s psset_stats_t;
 struct psset_stats_s {
+
+	/*
+	 * The second index is huge stats; nonfull_slabs[pszind][0] contains
+	 * stats for the non-huge slabs in bucket pszind, while
+	 * nonfull_slabs[pszind][1] contains stats for the huge slabs.
+	 */
+	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][2];
+
 	/*
 	 * Full slabs don't live in any edata heap.  But we still track their
 	 * stats.
 	 */
-	psset_bin_stats_t full_slabs;
-	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES];
+	psset_bin_stats_t full_slabs[2];
 };
 
 typedef struct psset_s psset_t;
diff --git a/src/ctl.c b/src/ctl.c
index 8871fd1..516add4 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3525,45 +3525,46 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugifies,
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndehugifies,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndehugifies, uint64_t);
 
+/* Full, nonhuge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].npageslabs,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].ninactive, size_t);
+
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive_huge, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].nactive, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive_huge, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ninactive, size_t);
 
-/* Full, nonhuge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_nonhuge,
+/* Nonfull, nonhuge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].npageslabs,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].nactive,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].ninactive,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive_nonhuge, size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive_nonhuge, size_t);
 
 /* Nonfull, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].nactive,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].ninactive,
     size_t);
 
-/* Nonfull, nonhuge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs_nonhuge,
-    size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive_nonhuge,
-    size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive_nonhuge,
-    size_t);
 
 static const ctl_named_node_t *
 stats_arenas_i_hpa_shard_nonfull_slabs_j_index(tsdn_t *tsdn, const size_t *mib,
diff --git a/src/hpa.c b/src/hpa.c
index 4069c1e..a206cff 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -662,12 +662,9 @@ hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 static void
 hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
-	assert(bin_stats->npageslabs_huge == 0);
-	assert(bin_stats->nactive_huge == 0);
-	assert(bin_stats->ninactive_huge == 0);
-	assert(bin_stats->npageslabs_nonhuge == 0);
-	assert(bin_stats->nactive_nonhuge == 0);
-	assert(bin_stats->ninactive_nonhuge == 0);
+	assert(bin_stats->npageslabs == 0);
+	assert(bin_stats->nactive == 0);
+	assert(bin_stats->ninactive == 0);
 }
 
 static void
@@ -675,10 +672,12 @@ hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	hpdata_t *ps = psset_fit(psset, PAGE);
 	assert(ps == NULL);
-	hpa_shard_assert_stats_empty(&psset->stats.full_slabs);
-	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
-		hpa_shard_assert_stats_empty(
-		    &psset->stats.nonfull_slabs[i]);
+	for (int huge = 0; huge <= 1; huge++) {
+		hpa_shard_assert_stats_empty(&psset->stats.full_slabs[huge]);
+		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+			hpa_shard_assert_stats_empty(
+			    &psset->stats.nonfull_slabs[i][huge]);
+		}
 	}
 }
 
diff --git a/src/psset.c b/src/psset.c
index 688cd62..a91653f 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -19,21 +19,20 @@ psset_init(psset_t *psset) {
 
 static void
 psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
-	dst->npageslabs_huge += src->npageslabs_huge;
-	dst->nactive_huge += src->nactive_huge;
-	dst->ninactive_huge += src->ninactive_huge;
-
-	dst->npageslabs_nonhuge += src->npageslabs_nonhuge;
-	dst->nactive_nonhuge += src->nactive_nonhuge;
-	dst->ninactive_nonhuge += src->ninactive_nonhuge;
+	dst->npageslabs += src->npageslabs;
+	dst->nactive += src->nactive;
+	dst->ninactive += src->ninactive;
 }
 
 void
 psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
-	psset_bin_stats_accum(&dst->full_slabs, &src->full_slabs);
+	psset_bin_stats_accum(&dst->full_slabs[0], &src->full_slabs[0]);
+	psset_bin_stats_accum(&dst->full_slabs[1], &src->full_slabs[1]);
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
-		psset_bin_stats_accum(&dst->nonfull_slabs[i],
-		    &src->nonfull_slabs[i]);
+		psset_bin_stats_accum(&dst->nonfull_slabs[i][0],
+		    &src->nonfull_slabs[i][0]);
+		psset_bin_stats_accum(&dst->nonfull_slabs[i][1],
+		    &src->nonfull_slabs[i][1]);
 	}
 }
 
@@ -50,42 +49,34 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
 JEMALLOC_ALWAYS_INLINE void
 psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, hpdata_t *ps,
     bool insert) {
-	size_t *npageslabs_dst = hpdata_huge_get(ps)
-	    ? &binstats->npageslabs_huge : &binstats->npageslabs_nonhuge;
-	size_t *nactive_dst = hpdata_huge_get(ps)
-	    ? &binstats->nactive_huge : &binstats->nactive_nonhuge;
-	size_t *ninactive_dst = hpdata_huge_get(ps)
-	    ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
-
-	size_t nactive = hpdata_nactive_get(ps);
-	size_t ninactive = HUGEPAGE_PAGES - nactive;
-
 	size_t mul = insert ? (size_t)1 : (size_t)-1;
-	*npageslabs_dst += mul * 1;
-	*nactive_dst += mul * nactive;
-	*ninactive_dst += mul * ninactive;
+	size_t huge_idx = (size_t)hpdata_huge_get(ps);
+	binstats[huge_idx].npageslabs += mul * 1;
+	size_t nactive = hpdata_nactive_get(ps);
+	binstats[huge_idx].nactive += mul * nactive;
+	binstats[huge_idx].ninactive += mul * (HUGEPAGE_PAGES - nactive);
 }
 
 static void
 psset_bin_stats_insert(psset_bin_stats_t *binstats, hpdata_t *ps) {
-	psset_bin_stats_insert_remove(binstats, ps, /* insert */ true);
+	psset_bin_stats_insert_remove(binstats, ps, true);
 }
 
 static void
 psset_bin_stats_remove(psset_bin_stats_t *binstats, hpdata_t *ps) {
-	psset_bin_stats_insert_remove(binstats, ps, /* insert */ false);
+	psset_bin_stats_insert_remove(binstats, ps, false);
 }
 
 static void
 psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
-	psset_bin_stats_remove(&psset->stats.nonfull_slabs[pind], ps);
+	psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
 }
 
 static void
 psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
-	psset_bin_stats_insert(&psset->stats.nonfull_slabs[pind], ps);
+	psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
 }
 
 void
@@ -101,7 +92,7 @@ psset_insert(psset_t *psset, hpdata_t *ps) {
 		 * We don't ned to track full slabs; just pretend to for stats
 		 * purposes.  See the comment at psset_bin_stats_adjust.
 		 */
-		psset_bin_stats_insert(&psset->stats.full_slabs, ps);
+		psset_bin_stats_insert(psset->stats.full_slabs, ps);
 		return;
 	}
 
@@ -124,7 +115,7 @@ psset_remove(psset_t *psset, hpdata_t *ps) {
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
-		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
+		psset_bin_stats_remove(psset->stats.full_slabs, ps);
 		return;
 	}
 
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 6f35fa8..020a832 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -321,26 +321,26 @@ TEST_END
 
 static void
 stats_expect_empty(psset_bin_stats_t *stats) {
-	assert_zu_eq(0, stats->npageslabs_nonhuge,
+	assert_zu_eq(0, stats->npageslabs,
 	    "Supposedly empty bin had positive npageslabs");
-	expect_zu_eq(0, stats->nactive_nonhuge, "Unexpected nonempty bin"
+	expect_zu_eq(0, stats->nactive, "Unexpected nonempty bin"
 	    "Supposedly empty bin had positive nactive");
-	expect_zu_eq(0, stats->ninactive_nonhuge, "Unexpected nonempty bin"
+	expect_zu_eq(0, stats->ninactive, "Unexpected nonempty bin"
 	    "Supposedly empty bin had positive ninactive");
 }
 
 static void
 stats_expect(psset_t *psset, size_t nactive) {
 	if (nactive == HUGEPAGE_PAGES) {
-		expect_zu_eq(1, psset->stats.full_slabs.npageslabs_nonhuge,
+		expect_zu_eq(1, psset->stats.full_slabs[0].npageslabs,
 		    "Expected a full slab");
 		expect_zu_eq(HUGEPAGE_PAGES,
-		    psset->stats.full_slabs.nactive_nonhuge,
+		    psset->stats.full_slabs[0].nactive,
 		    "Should have exactly filled the bin");
-		expect_zu_eq(0, psset->stats.full_slabs.ninactive_nonhuge,
+		expect_zu_eq(0, psset->stats.full_slabs[0].ninactive,
 		    "Should never have inactive pages in a full slab");
 	} else {
-		stats_expect_empty(&psset->stats.full_slabs);
+		stats_expect_empty(&psset->stats.full_slabs[0]);
 	}
 	size_t ninactive = HUGEPAGE_PAGES - nactive;
 	pszind_t nonempty_pind = PSSET_NPSIZES;
@@ -351,16 +351,16 @@ stats_expect(psset_t *psset, size_t nactive) {
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		if (i == nonempty_pind) {
 			assert_zu_eq(1,
-			    psset->stats.nonfull_slabs[i].npageslabs_nonhuge,
+			    psset->stats.nonfull_slabs[i][0].npageslabs,
 			    "Should have found a slab");
 			expect_zu_eq(nactive,
-			    psset->stats.nonfull_slabs[i].nactive_nonhuge,
+			    psset->stats.nonfull_slabs[i][0].nactive,
 			    "Mismatch in active pages");
 			expect_zu_eq(ninactive,
-			    psset->stats.nonfull_slabs[i].ninactive_nonhuge,
+			    psset->stats.nonfull_slabs[i][0].ninactive,
 			    "Mismatch in inactive pages");
 		} else {
-			stats_expect_empty(&psset->stats.nonfull_slabs[i]);
+			stats_expect_empty(&psset->stats.nonfull_slabs[i][0]);
 		}
 	}
 }
-- 
cgit v0.12


From be0d7a53f3ca361d68f9a820157e9af49c989398 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 18:43:10 -0800
Subject: HPA: Don't track inactive pages.

This is really only useful for human consumption.  Correspondingly, emit it only
in the human-readable stats, and let everybody else compute from the hugepage
size and nactive.
---
 include/jemalloc/internal/psset.h |  2 --
 src/ctl.c                         | 27 ++-------------------------
 src/hpa.c                         |  1 -
 src/psset.c                       |  2 --
 src/stats.c                       | 23 +++++++----------------
 test/unit/psset.c                 |  7 -------
 6 files changed, 9 insertions(+), 53 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index d818966..3320d4e 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -29,8 +29,6 @@ struct psset_bin_stats_s {
 	size_t npageslabs;
 	/* Of them, how many pages are active? */
 	size_t nactive;
-	/* How many are inactive? */
-	size_t ninactive;
 };
 
 typedef struct psset_stats_s psset_stats_t;
diff --git a/src/ctl.c b/src/ctl.c
index 516add4..aa87858 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -233,16 +233,12 @@ CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
 CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge)
 INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
@@ -659,14 +655,10 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
 		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
 	{NAME("nactive_huge"),
 		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
-	{NAME("ninactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)},
 	{NAME("npageslabs_nonhuge"),
 		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
 	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)},
-	{NAME("ninactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge)},
+		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)}
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
@@ -674,14 +666,10 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] =
 		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
 	{NAME("nactive_huge"),
 		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
-	{NAME("ninactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge)},
 	{NAME("npageslabs_nonhuge"),
 		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
 	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)},
-	{NAME("ninactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge)}
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)}
 };
 
 static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
@@ -3531,8 +3519,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].nactive, size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].ninactive, size_t);
 
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
@@ -3540,8 +3526,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].nactive, size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ninactive, size_t);
 
 /* Nonfull, nonhuge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
@@ -3550,9 +3534,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_no
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].nactive,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].ninactive,
-    size_t);
 
 /* Nonfull, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
@@ -3561,10 +3542,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_hu
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].nactive,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].ninactive,
-    size_t);
-
 
 static const ctl_named_node_t *
 stats_arenas_i_hpa_shard_nonfull_slabs_j_index(tsdn_t *tsdn, const size_t *mib,
diff --git a/src/hpa.c b/src/hpa.c
index a206cff..4397c9d 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -664,7 +664,6 @@ static void
 hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
 	assert(bin_stats->npageslabs == 0);
 	assert(bin_stats->nactive == 0);
-	assert(bin_stats->ninactive == 0);
 }
 
 static void
diff --git a/src/psset.c b/src/psset.c
index a91653f..e8d847a 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -21,7 +21,6 @@ static void
 psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
 	dst->npageslabs += src->npageslabs;
 	dst->nactive += src->nactive;
-	dst->ninactive += src->ninactive;
 }
 
 void
@@ -54,7 +53,6 @@ psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, hpdata_t *ps,
 	binstats[huge_idx].npageslabs += mul * 1;
 	size_t nactive = hpdata_nactive_get(ps);
 	binstats[huge_idx].nactive += mul * nactive;
-	binstats[huge_idx].ninactive += mul * (HUGEPAGE_PAGES - nactive);
 }
 
 static void
diff --git a/src/stats.c b/src/stats.c
index 1b51c8b..a8d3ffe 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -819,15 +819,14 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
 	    i, &nactive_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive_huge",
-	    i, &ninactive_huge, size_t);
+	ninactive_huge = npageslabs_huge * HUGEPAGE_PAGES - nactive_huge;
 
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge",
 	    i, &npageslabs_nonhuge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge",
 	    i, &nactive_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive_nonhuge",
-	    i, &ninactive_nonhuge, size_t);
+	ninactive_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
+	    - nactive_nonhuge;
 
 	size_t sec_bytes;
 	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
@@ -875,10 +874,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    &nactive_huge);
 	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
 	    &nactive_nonhuge);
-	emitter_json_kv(emitter, "ninactive_huge", emitter_type_size,
-	    &ninactive_huge);
-	emitter_json_kv(emitter, "ninactive_nonhuge", emitter_type_size,
-	    &ninactive_nonhuge);
 	emitter_json_object_end(emitter); /* End "full_slabs" */
 
 	COL_HDR(row, size, NULL, right, 20, size)
@@ -905,14 +900,14 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		    &npageslabs_huge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "nactive_huge",
 		    &nactive_huge, size_t);
-		CTL_LEAF(stats_arenas_mib, 6, "ninactive_huge",
-		    &ninactive_huge, size_t);
+		ninactive_huge = npageslabs_huge * HUGEPAGE_PAGES
+		    - nactive_huge;
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_nonhuge",
 		    &npageslabs_nonhuge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "nactive_nonhuge",
 		    &nactive_nonhuge, size_t);
-		CTL_LEAF(stats_arenas_mib, 6, "ninactive_nonhuge",
-		    &ninactive_nonhuge, size_t);
+		ninactive_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
+		    - nactive_nonhuge;
 
 		bool in_gap_prev = in_gap;
 		in_gap = (npageslabs_huge == 0 && npageslabs_nonhuge == 0);
@@ -938,14 +933,10 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		    &npageslabs_huge);
 		emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
 		    &nactive_huge);
-		emitter_json_kv(emitter, "ninactive_huge", emitter_type_size,
-		    &ninactive_huge);
 		emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 		    &npageslabs_nonhuge);
 		emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
 		    &nactive_nonhuge);
-		emitter_json_kv(emitter, "ninactive_nonhuge", emitter_type_size,
-		    &ninactive_huge);
 		emitter_json_object_end(emitter);
 	}
 	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 020a832..8801444 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -325,8 +325,6 @@ stats_expect_empty(psset_bin_stats_t *stats) {
 	    "Supposedly empty bin had positive npageslabs");
 	expect_zu_eq(0, stats->nactive, "Unexpected nonempty bin"
 	    "Supposedly empty bin had positive nactive");
-	expect_zu_eq(0, stats->ninactive, "Unexpected nonempty bin"
-	    "Supposedly empty bin had positive ninactive");
 }
 
 static void
@@ -337,8 +335,6 @@ stats_expect(psset_t *psset, size_t nactive) {
 		expect_zu_eq(HUGEPAGE_PAGES,
 		    psset->stats.full_slabs[0].nactive,
 		    "Should have exactly filled the bin");
-		expect_zu_eq(0, psset->stats.full_slabs[0].ninactive,
-		    "Should never have inactive pages in a full slab");
 	} else {
 		stats_expect_empty(&psset->stats.full_slabs[0]);
 	}
@@ -356,9 +352,6 @@ stats_expect(psset_t *psset, size_t nactive) {
 			expect_zu_eq(nactive,
 			    psset->stats.nonfull_slabs[i][0].nactive,
 			    "Mismatch in active pages");
-			expect_zu_eq(ninactive,
-			    psset->stats.nonfull_slabs[i][0].ninactive,
-			    "Mismatch in inactive pages");
 		} else {
 			stats_expect_empty(&psset->stats.nonfull_slabs[i][0]);
 		}
-- 
cgit v0.12


From 68a1666e915382cec716247d3b5950a066ef0768 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 18:58:58 -0800
Subject: hpdata: Rename "dirty" to "touched".

This matches the usage in the rest of the codebase.
---
 include/jemalloc/internal/hpdata.h | 25 +++++++++++++++----------
 src/hpa.c                          |  2 +-
 src/hpdata.c                       | 24 ++++++++++++------------
 test/unit/hpdata.c                 | 12 ++++++------
 4 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 12a72a6..f800158 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -67,14 +67,14 @@ struct hpdata_s {
 	fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 
 	/*
-	 * Number of dirty pages, and a bitmap tracking them.  This really means
-	 * "dirty" from the OS's point of view; it includes both active and
-	 * inactive pages that have been touched by the user.
+	 * Number of dirty or active pages, and a bitmap tracking them.  One
+	 * way to think of this is as which pages are dirty from the OS's
+	 * perspective.
 	 */
-	size_t h_ndirty;
+	size_t h_ntouched;
 
 	/* The dirty pages (using the same definition as above). */
-	fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
+	fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 };
 
 TYPED_LIST(hpdata_list, hpdata_t, ql_link)
@@ -149,8 +149,13 @@ hpdata_nactive_get(hpdata_t *hpdata) {
 }
 
 static inline size_t
+hpdata_ntouched_get(hpdata_t *hpdata) {
+	return hpdata->h_ntouched;
+}
+
+static inline size_t
 hpdata_ndirty_get(hpdata_t *hpdata) {
-	return hpdata->h_ndirty;
+	return hpdata->h_ntouched - hpdata->h_nactive;
 }
 
 static inline void
@@ -174,14 +179,14 @@ hpdata_consistent(hpdata_t *hpdata) {
 	    != hpdata->h_nactive) {
 		return false;
 	}
-	if (fb_scount(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
-	    != hpdata->h_ndirty) {
+	if (fb_scount(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
+	    != hpdata->h_ntouched) {
 		return false;
 	}
-	if (hpdata->h_ndirty < hpdata->h_nactive) {
+	if (hpdata->h_ntouched < hpdata->h_nactive) {
 		return false;
 	}
-	if (hpdata->h_huge && hpdata->h_ndirty != HUGEPAGE_PAGES) {
+	if (hpdata->h_huge && hpdata->h_ntouched != HUGEPAGE_PAGES) {
 		return false;
 	}
 	return true;
diff --git a/src/hpa.c b/src/hpa.c
index 4397c9d..822e3ba 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -161,7 +161,7 @@ hpa_should_purge(hpa_shard_t *shard, hpdata_t *ps) {
 	if (hpdata_changing_state_get(ps)) {
 		return false;
 	}
-	size_t purgeable = hpdata_ndirty_get(ps) - hpdata_nactive_get(ps);
+	size_t purgeable = hpdata_ndirty_get(ps);
 	return purgeable > HUGEPAGE_PAGES * 25 / 100
 	    || (purgeable > 0 && hpdata_empty(ps));
 }
diff --git a/src/hpdata.c b/src/hpdata.c
index 7881619..e2a0b37 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -28,8 +28,8 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
-	hpdata->h_ndirty = 0;
-	fb_init(hpdata->dirty_pages, HUGEPAGE_PAGES);
+	hpdata->h_ntouched = 0;
+	fb_init(hpdata->touched_pages, HUGEPAGE_PAGES);
 
 	hpdata_assert_consistent(hpdata);
 }
@@ -84,10 +84,10 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	 * We might be about to dirty some memory for the first time; update our
 	 * count if so.
 	 */
-	size_t new_dirty = fb_ucount(hpdata->dirty_pages,  HUGEPAGE_PAGES,
+	size_t new_dirty = fb_ucount(hpdata->touched_pages,  HUGEPAGE_PAGES,
 	    result, npages);
-	fb_set_range(hpdata->dirty_pages, HUGEPAGE_PAGES, result, npages);
-	hpdata->h_ndirty += new_dirty;
+	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
+	hpdata->h_ntouched += new_dirty;
 
 	/*
 	 * We might have shrunk the longest free range.  We have to keep
@@ -167,10 +167,10 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	 */
 	fb_bit_not(purge_state->to_purge, hpdata->active_pages, HUGEPAGE_PAGES);
 	fb_bit_and(purge_state->to_purge, purge_state->to_purge,
-	    hpdata->dirty_pages, HUGEPAGE_PAGES);
+	    hpdata->touched_pages, HUGEPAGE_PAGES);
 
 	/* We purge everything we can. */
-	assert(hpdata->h_ndirty - hpdata->h_nactive == fb_scount(
+	assert(hpdata->h_ntouched - hpdata->h_nactive == fb_scount(
 	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
 
 	hpdata_assert_consistent(hpdata);
@@ -225,10 +225,10 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 
 	fb_bit_not(purge_state->to_purge, purge_state->to_purge,
 	    HUGEPAGE_PAGES);
-	fb_bit_and(hpdata->dirty_pages, hpdata->dirty_pages,
+	fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
 	    purge_state->to_purge, HUGEPAGE_PAGES);
-	assert(hpdata->h_ndirty >= purge_state->npurged);
-	hpdata->h_ndirty -= purge_state->npurged;
+	assert(hpdata->h_ntouched >= purge_state->npurged);
+	hpdata->h_ntouched -= purge_state->npurged;
 
 	hpdata_assert_consistent(hpdata);
 }
@@ -241,8 +241,8 @@ hpdata_hugify_begin(hpdata_t *hpdata) {
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = true;
 	hpdata->h_huge = true;
-	fb_set_range(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
-	hpdata->h_ndirty = HUGEPAGE_PAGES;
+	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+	hpdata->h_ntouched = HUGEPAGE_PAGES;
 	hpdata_assert_consistent(hpdata);
 }
 
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index aa4506f..688911a 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -65,7 +65,7 @@ TEST_BEGIN(test_purge_simple) {
 	/* Create HUGEPAGE_PAGES / 4 dirty inactive pages at the beginning. */
 	hpdata_unreserve(&hpdata, alloc, HUGEPAGE_PAGES / 4 * PAGE);
 
-	expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 2, "");
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
 
 	expect_false(hpdata_changing_state_get(&hpdata), "");
 
@@ -93,7 +93,7 @@ TEST_BEGIN(test_purge_simple) {
 
 	hpdata_purge_end(&hpdata, &purge_state);
 	expect_false(hpdata_changing_state_get(&hpdata), "");
-	expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 4, "");
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 4, "");
 }
 TEST_END
 
@@ -118,7 +118,7 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 	    (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
 	    HUGEPAGE_PAGES / 4 * PAGE);
 
-	expect_zu_eq(hpdata_ndirty_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
 
 	hpdata_purge_state_t purge_state;
 	hpdata_purge_begin(&hpdata, &purge_state);
@@ -153,7 +153,7 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 
 	hpdata_purge_end(&hpdata, &purge_state);
 
-	expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 4, "");
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 4, "");
 }
 TEST_END
 
@@ -164,7 +164,7 @@ TEST_BEGIN(test_hugify) {
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
 
-	expect_zu_eq(HUGEPAGE_PAGES / 2, hpdata_ndirty_get(&hpdata), "");
+	expect_zu_eq(HUGEPAGE_PAGES / 2, hpdata_ntouched_get(&hpdata), "");
 
 	expect_false(hpdata_changing_state_get(&hpdata), "");
 	hpdata_hugify_begin(&hpdata);
@@ -174,7 +174,7 @@ TEST_BEGIN(test_hugify) {
 	expect_false(hpdata_changing_state_get(&hpdata), "");
 
 	/* Hugeifying should have increased the dirty page count. */
-	expect_zu_eq(HUGEPAGE_PAGES, hpdata_ndirty_get(&hpdata), "");
+	expect_zu_eq(HUGEPAGE_PAGES, hpdata_ntouched_get(&hpdata), "");
 }
 TEST_END
 
-- 
cgit v0.12


From d3e5ea03c5660ba46b6efcc10ad0b804140e2690 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 19:15:54 -0800
Subject: HPA: Track dirty stats.

---
 include/jemalloc/internal/psset.h |  2 ++
 src/ctl.c                         | 47 +++++++++++++++++++++++++++++----------
 src/psset.c                       |  5 +++--
 src/stats.c                       | 34 ++++++++++++++++++++++++----
 4 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 3320d4e..fef0468 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -29,6 +29,8 @@ struct psset_bin_stats_s {
 	size_t npageslabs;
 	/* Of them, how many pages are active? */
 	size_t nactive;
+	/* And how many are dirty? */
+	size_t ndirty;
 };
 
 typedef struct psset_stats_s psset_stats_t;
diff --git a/src/ctl.c b/src/ctl.c
index aa87858..80fb90e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -231,14 +231,18 @@ CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
 CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)
 INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
@@ -651,25 +655,33 @@ MUTEX_PROF_ARENA_MUTEXES
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
+	{NAME("npageslabs_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
 	{NAME("npageslabs_huge"),
 		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
+	{NAME("nactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)},
 	{NAME("nactive_huge"),
 		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
-	{NAME("npageslabs_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
-	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)}
+	{NAME("ndirty_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge)},
+	{NAME("ndirty_huge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)}
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
+	{NAME("npageslabs_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
 	{NAME("npageslabs_huge"),
 		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
+	{NAME("nactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)},
 	{NAME("nactive_huge"),
 		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
-	{NAME("npageslabs_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
-	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)}
+	{NAME("ndirty_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)},
+	{NAME("ndirty_huge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)}
 };
 
 static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
@@ -3519,6 +3531,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].ndirty, size_t);
 
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
@@ -3526,6 +3540,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ndirty_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ndirty, size_t);
+
 
 /* Nonfull, nonhuge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
@@ -3534,6 +3551,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_no
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].nactive,
     size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].ndirty,
+    size_t);
 
 /* Nonfull, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
@@ -3542,6 +3562,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_hu
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].nactive,
     size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].ndirty,
+    size_t);
 
 static const ctl_named_node_t *
 stats_arenas_i_hpa_shard_nonfull_slabs_j_index(tsdn_t *tsdn, const size_t *mib,
diff --git a/src/psset.c b/src/psset.c
index e8d847a..a09913c 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -21,6 +21,7 @@ static void
 psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
 	dst->npageslabs += src->npageslabs;
 	dst->nactive += src->nactive;
+	dst->ndirty += src->ndirty;
 }
 
 void
@@ -51,8 +52,8 @@ psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, hpdata_t *ps,
 	size_t mul = insert ? (size_t)1 : (size_t)-1;
 	size_t huge_idx = (size_t)hpdata_huge_get(ps);
 	binstats[huge_idx].npageslabs += mul * 1;
-	size_t nactive = hpdata_nactive_get(ps);
-	binstats[huge_idx].nactive += mul * nactive;
+	binstats[huge_idx].nactive += mul * hpdata_nactive_get(ps);
+	binstats[huge_idx].ndirty += mul * hpdata_ndirty_get(ps);
 }
 
 static void
diff --git a/src/stats.c b/src/stats.c
index a8d3ffe..ea0be98 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -810,16 +810,20 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	size_t npageslabs_huge;
 	size_t nactive_huge;
 	size_t ninactive_huge;
+	size_t ndirty_huge;
 
 	size_t npageslabs_nonhuge;
 	size_t nactive_nonhuge;
 	size_t ninactive_nonhuge;
+	size_t ndirty_nonhuge;
 
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
 	    i, &nactive_huge, size_t);
 	ninactive_huge = npageslabs_huge * HUGEPAGE_PAGES - nactive_huge;
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_huge",
+	    i, &ndirty_huge, size_t);
 
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge",
 	    i, &npageslabs_nonhuge, size_t);
@@ -827,6 +831,8 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    i, &nactive_nonhuge, size_t);
 	ninactive_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 	    - nactive_nonhuge;
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_nonhuge",
+	    i, &ndirty_nonhuge, size_t);
 
 	size_t sec_bytes;
 	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
@@ -844,7 +850,8 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    "  In full slabs:\n"
 	    "      npageslabs: %zu huge, %zu nonhuge\n"
 	    "      nactive: %zu huge, %zu nonhuge \n"
-	    "      ninactive: %zu huge, %zu nonhuge \n",
+	    "      ninactive: %zu huge, %zu nonhuge \n"
+	    "      ndirty: %zu huge, %zu nonhuge \n",
 	    nevictions, rate_per_second(nevictions, uptime),
 	    npurge_passes, rate_per_second(npurge_passes, uptime),
 	    npurges, rate_per_second(npurges, uptime),
@@ -852,7 +859,9 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    ndehugifies, rate_per_second(ndehugifies, uptime),
 	    npageslabs_huge, npageslabs_nonhuge,
 	    nactive_huge, nactive_nonhuge,
-	    ninactive_huge, ninactive_nonhuge);
+	    ninactive_huge, ninactive_nonhuge,
+	    ndirty_huge, ndirty_nonhuge);
+
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
 	emitter_json_kv(emitter, "nevictions", emitter_type_uint64,
 	    &nevictions);
@@ -868,12 +877,16 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_json_object_kv_begin(emitter, "full_slabs");
 	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
 	    &npageslabs_huge);
-	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
-	    &npageslabs_nonhuge);
 	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
 	    &nactive_huge);
+	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
+	    &nactive_huge);
+	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
+	    &npageslabs_nonhuge);
 	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
 	    &nactive_nonhuge);
+	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
+	    &ndirty_nonhuge);
 	emitter_json_object_end(emitter); /* End "full_slabs" */
 
 	COL_HDR(row, size, NULL, right, 20, size)
@@ -881,9 +894,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	COL_HDR(row, npageslabs_huge, NULL, right, 16, size)
 	COL_HDR(row, nactive_huge, NULL, right, 16, size)
 	COL_HDR(row, ninactive_huge, NULL, right, 16, size)
+	COL_HDR(row, ndirty_huge, NULL, right, 16, size)
 	COL_HDR(row, npageslabs_nonhuge, NULL, right, 20, size)
 	COL_HDR(row, nactive_nonhuge, NULL, right, 20, size)
 	COL_HDR(row, ninactive_nonhuge, NULL, right, 20, size)
+	COL_HDR(row, ndirty_nonhuge, NULL, right, 20, size)
 
 	size_t stats_arenas_mib[CTL_MAX_DEPTH];
 	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
@@ -900,12 +915,17 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		    &npageslabs_huge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "nactive_huge",
 		    &nactive_huge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "ndirty_huge",
+		    &ndirty_huge, size_t);
 		ninactive_huge = npageslabs_huge * HUGEPAGE_PAGES
 		    - nactive_huge;
+
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_nonhuge",
 		    &npageslabs_nonhuge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "nactive_nonhuge",
 		    &nactive_nonhuge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "ndirty_nonhuge",
+		    &ndirty_nonhuge, size_t);
 		ninactive_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 		    - nactive_nonhuge;
 
@@ -921,9 +941,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		col_npageslabs_huge.size_val = npageslabs_huge;
 		col_nactive_huge.size_val = nactive_huge;
 		col_ninactive_huge.size_val = ninactive_huge;
+		col_ndirty_huge.size_val = ndirty_huge;
 		col_npageslabs_nonhuge.size_val = npageslabs_nonhuge;
 		col_nactive_nonhuge.size_val = nactive_nonhuge;
 		col_ninactive_nonhuge.size_val = ninactive_nonhuge;
+		col_ndirty_nonhuge.size_val = ndirty_nonhuge;
 		if (!in_gap) {
 			emitter_table_row(emitter, &row);
 		}
@@ -933,10 +955,14 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		    &npageslabs_huge);
 		emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
 		    &nactive_huge);
+		emitter_json_kv(emitter, "ndirty_huge", emitter_type_size,
+		    &ndirty_huge);
 		emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 		    &npageslabs_nonhuge);
 		emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
 		    &nactive_nonhuge);
+		emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
+		    &ndirty_nonhuge);
 		emitter_json_object_end(emitter);
 	}
 	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
-- 
cgit v0.12


From 061cabb7122d1fd63b8bfbe980a1fb1dcf3033f4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 19:35:21 -0800
Subject: HPA stats: report retained instead of inactive.

This more closely maps to the PAC.
---
 src/stats.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index ea0be98..355921c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -809,19 +809,17 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 
 	size_t npageslabs_huge;
 	size_t nactive_huge;
-	size_t ninactive_huge;
 	size_t ndirty_huge;
 
 	size_t npageslabs_nonhuge;
 	size_t nactive_nonhuge;
-	size_t ninactive_nonhuge;
 	size_t ndirty_nonhuge;
+	size_t nretained_nonhuge;
 
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
 	    i, &nactive_huge, size_t);
-	ninactive_huge = npageslabs_huge * HUGEPAGE_PAGES - nactive_huge;
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_huge",
 	    i, &ndirty_huge, size_t);
 
@@ -829,10 +827,10 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    i, &npageslabs_nonhuge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge",
 	    i, &nactive_nonhuge, size_t);
-	ninactive_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
-	    - nactive_nonhuge;
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_nonhuge",
 	    i, &ndirty_nonhuge, size_t);
+	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
+	    - nactive_nonhuge - ndirty_nonhuge;
 
 	size_t sec_bytes;
 	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
@@ -850,8 +848,8 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    "  In full slabs:\n"
 	    "      npageslabs: %zu huge, %zu nonhuge\n"
 	    "      nactive: %zu huge, %zu nonhuge \n"
-	    "      ninactive: %zu huge, %zu nonhuge \n"
-	    "      ndirty: %zu huge, %zu nonhuge \n",
+	    "      ndirty: %zu huge, %zu nonhuge \n"
+	    "      nretained: 0 huge, %zu nonhuge \n",
 	    nevictions, rate_per_second(nevictions, uptime),
 	    npurge_passes, rate_per_second(npurge_passes, uptime),
 	    npurges, rate_per_second(npurges, uptime),
@@ -859,8 +857,8 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    ndehugifies, rate_per_second(ndehugifies, uptime),
 	    npageslabs_huge, npageslabs_nonhuge,
 	    nactive_huge, nactive_nonhuge,
-	    ninactive_huge, ninactive_nonhuge,
-	    ndirty_huge, ndirty_nonhuge);
+	    ndirty_huge, ndirty_nonhuge,
+	    nretained_nonhuge);
 
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
 	emitter_json_kv(emitter, "nevictions", emitter_type_uint64,
@@ -893,12 +891,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
 	COL_HDR(row, npageslabs_huge, NULL, right, 16, size)
 	COL_HDR(row, nactive_huge, NULL, right, 16, size)
-	COL_HDR(row, ninactive_huge, NULL, right, 16, size)
 	COL_HDR(row, ndirty_huge, NULL, right, 16, size)
 	COL_HDR(row, npageslabs_nonhuge, NULL, right, 20, size)
 	COL_HDR(row, nactive_nonhuge, NULL, right, 20, size)
-	COL_HDR(row, ninactive_nonhuge, NULL, right, 20, size)
 	COL_HDR(row, ndirty_nonhuge, NULL, right, 20, size)
+	COL_HDR(row, nretained_nonhuge, NULL, right, 20, size)
 
 	size_t stats_arenas_mib[CTL_MAX_DEPTH];
 	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
@@ -917,8 +914,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		    &nactive_huge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "ndirty_huge",
 		    &ndirty_huge, size_t);
-		ninactive_huge = npageslabs_huge * HUGEPAGE_PAGES
-		    - nactive_huge;
 
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_nonhuge",
 		    &npageslabs_nonhuge, size_t);
@@ -926,8 +921,8 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		    &nactive_nonhuge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "ndirty_nonhuge",
 		    &ndirty_nonhuge, size_t);
-		ninactive_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
-		    - nactive_nonhuge;
+		nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
+		    - nactive_nonhuge - ndirty_nonhuge;
 
 		bool in_gap_prev = in_gap;
 		in_gap = (npageslabs_huge == 0 && npageslabs_nonhuge == 0);
@@ -940,12 +935,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		col_ind.size_val = j;
 		col_npageslabs_huge.size_val = npageslabs_huge;
 		col_nactive_huge.size_val = nactive_huge;
-		col_ninactive_huge.size_val = ninactive_huge;
 		col_ndirty_huge.size_val = ndirty_huge;
 		col_npageslabs_nonhuge.size_val = npageslabs_nonhuge;
 		col_nactive_nonhuge.size_val = nactive_nonhuge;
-		col_ninactive_nonhuge.size_val = ninactive_nonhuge;
 		col_ndirty_nonhuge.size_val = ndirty_nonhuge;
+		col_nretained_nonhuge.size_val = nretained_nonhuge;
 		if (!in_gap) {
 			emitter_table_row(emitter, &row);
 		}
-- 
cgit v0.12


From 99fc0717e653277c3d7fe77fe84316ad47381936 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 5 Dec 2020 15:58:31 -0800
Subject: psset: Reconceptualize insertion/removal.

Really, this isn't a functional change, just a naming change.  We start thinking
of pageslabs as being always in the psset.  What we used to think of as removal
is now thought of as being in the psset, but in the process of being updated
(and therefore, unavalable for serving new allocations).

This is in preparation of subsequent changes to support deferred purging;
allocations will still be in the psset for the purposes of choosing when to
purge, but not for purposes of allocation/deallocation.
---
 include/jemalloc/internal/hpdata.h | 18 +++++++++++------
 include/jemalloc/internal/psset.h  | 10 +++++++---
 src/hpa.c                          | 35 ++++++++++++++++++++-------------
 src/hpdata.c                       | 23 +++++++++++-----------
 src/psset.c                        | 40 +++++++++++++++++++-------------------
 test/unit/hpdata.c                 |  6 ++++++
 test/unit/psset.c                  | 33 +++++++++++++++++++------------
 7 files changed, 100 insertions(+), 65 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index f800158..2e2e1d8 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -44,8 +44,13 @@ struct hpdata_s {
 	bool h_mid_purge;
 	bool h_mid_hugify;
 
-	/* Whether or not the hpdata is a the psset. */
-	bool h_in_psset;
+	/*
+	 * Whether or not the hpdata is being updated in the psset (i.e. if
+	 * there has been a psset_update_begin call issued without a matching
+	 * psset_update_end call).  Eventually this will expand to other types
+	 * of updates.
+	 */
+	bool h_updating;
 
 	union {
 		/* When nonempty, used by the psset bins. */
@@ -123,13 +128,14 @@ hpdata_mid_hugify_get(const hpdata_t *hpdata) {
 }
 
 static inline bool
-hpdata_in_psset_get(const hpdata_t *hpdata) {
-	return hpdata->h_in_psset;
+hpdata_updating_get(const hpdata_t *hpdata) {
+	return hpdata->h_updating;
 }
 
 static inline void
-hpdata_in_psset_set(hpdata_t *hpdata, bool in_psset) {
-	hpdata->h_in_psset = in_psset;
+hpdata_updating_set(hpdata_t *hpdata, bool updating) {
+	assert(updating != hpdata->h_updating);
+	hpdata->h_updating = updating;
 }
 
 static inline size_t
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index fef0468..a7c9a8b 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -64,10 +64,14 @@ struct psset_s {
 void psset_init(psset_t *psset);
 void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
 
-void psset_insert(psset_t *psset, hpdata_t *ps);
-void psset_remove(psset_t *psset, hpdata_t *ps);
+/*
+ * Begin or end updating the given pageslab's metadata.  While the pageslab is
+ * being updated, it won't be returned from psset_fit calls.
+ */
+void psset_update_begin(psset_t *psset, hpdata_t *ps);
+void psset_update_end(psset_t *psset, hpdata_t *ps);
 
 /* Analogous to the eset_fit; pick a hpdata to serve the request. */
-hpdata_t *psset_fit(psset_t *psset, size_t size);
+hpdata_t *psset_pick_alloc(psset_t *psset, size_t size);
 
 #endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/src/hpa.c b/src/hpa.c
index 822e3ba..6a4f2a6 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -333,14 +333,14 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	}
 	assert(edata_arena_ind_get(edata) == shard->ind);
 
-	hpdata_t *ps = psset_fit(&shard->psset, size);
+	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
 	if (ps == NULL) {
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		return NULL;
 	}
 
-	psset_remove(&shard->psset, ps);
+	psset_update_begin(&shard->psset, ps);
 	void *addr = hpdata_reserve_alloc(ps, size);
 	edata_init(edata, shard->ind, addr, size, /* slab */ false,
 	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
@@ -365,7 +365,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		 * require some sort of prepare + commit functionality that's a
 		 * little much to deal with for now.
 		 */
-		psset_insert(&shard->psset, ps);
+		psset_update_end(&shard->psset, ps);
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		*oom = true;
@@ -377,7 +377,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		hpdata_hugify_begin(ps);
 		shard->stats.nhugifies++;
 	}
-	psset_insert(&shard->psset, ps);
+	psset_update_end(&shard->psset, ps);
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	if (hugify) {
@@ -409,9 +409,9 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 			 * hugified.  Undo our operation, taking care to meet
 			 * the precondition that the ps isn't in the psset.
 			 */
-			psset_remove(&shard->psset, ps);
+			psset_update_begin(&shard->psset, ps);
 			hpa_purge(tsdn, shard, ps);
-			psset_insert(&shard->psset, ps);
+			psset_update_end(&shard->psset, ps);
 		}
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
@@ -455,6 +455,15 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 
 	/* We got the new edata; allocate from it. */
 	malloc_mutex_lock(tsdn, &shard->mtx);
+	/*
+	 * This will go away soon.  The psset doesn't draw a distinction between
+	 * pageslab removal and updating.  If this is a new pageslab, we pretend
+	 * that it's an old one that's been getting updated.
+	 */
+	if (!hpdata_updating_get(ps)) {
+		hpdata_updating_set(ps, true);
+	}
+
 	edata = edata_cache_small_get(tsdn, &shard->ecs);
 	if (edata == NULL) {
 		shard->stats.nevictions++;
@@ -500,7 +509,7 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		hpa_handle_ps_eviction(tsdn, shard, ps);
 		return NULL;
 	}
-	psset_insert(&shard->psset, ps);
+	psset_update_end(&shard->psset, ps);
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
@@ -615,7 +624,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 		 * psset and we can do our metadata update.  The other thread is
 		 * in charge of reinserting the ps, so we're done.
 		 */
-		assert(!hpdata_in_psset_get(ps));
+		assert(hpdata_updating_get(ps));
 		hpdata_unreserve(ps, unreserve_addr, unreserve_size);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		return;
@@ -624,15 +633,15 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	 * No other thread is purging, and the ps is non-empty, so it should be
 	 * in the psset.
 	 */
-	assert(hpdata_in_psset_get(ps));
-	psset_remove(&shard->psset, ps);
+	assert(!hpdata_updating_get(ps));
+	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
 	if (!hpa_should_purge(shard, ps)) {
 		/*
 		 * This should be the common case; no other thread is purging,
 		 * and we won't purge either.
 		 */
-		psset_insert(&shard->psset, ps);
+		psset_update_end(&shard->psset, ps);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		return;
 	}
@@ -648,7 +657,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		hpa_handle_ps_eviction(tsdn, shard, ps);
 	} else {
-		psset_insert(&shard->psset, ps);
+		psset_update_end(&shard->psset, ps);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
 }
@@ -669,7 +678,7 @@ hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
 static void
 hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	hpdata_t *ps = psset_fit(psset, PAGE);
+	hpdata_t *ps = psset_pick_alloc(psset, PAGE);
 	assert(ps == NULL);
 	for (int huge = 0; huge <= 1; huge++) {
 		hpa_shard_assert_stats_empty(&psset->stats.full_slabs[huge]);
diff --git a/src/hpdata.c b/src/hpdata.c
index e2a0b37..0af7da0 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -24,7 +24,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata->h_huge = false;
 	hpdata->h_mid_purge = false;
 	hpdata->h_mid_hugify = false;
-	hpdata->h_in_psset = false;
+	hpdata->h_updating = false;
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
@@ -37,7 +37,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 void *
 hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	hpdata_assert_consistent(hpdata);
-	assert(!hpdata_in_psset_get(hpdata));
+	assert(hpdata->h_updating);
 	assert((sz & PAGE_MASK) == 0);
 	size_t npages = sz >> LG_PAGE;
 	assert(npages <= hpdata_longest_free_range_get(hpdata));
@@ -118,7 +118,7 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 void
 hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	hpdata_assert_consistent(hpdata);
-	assert(!hpdata->h_in_psset);
+	assert(hpdata->h_updating);
 	assert(((uintptr_t)addr & PAGE_MASK) == 0);
 	assert((sz & PAGE_MASK) == 0);
 	size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
@@ -147,7 +147,7 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 void
 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
-	assert(!hpdata->h_in_psset);
+	assert(hpdata->h_updating);
 	assert(!hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_purge = true;
@@ -185,7 +185,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	 * a consistent state.
 	 */
 	assert(hpdata->h_mid_purge);
-	assert(!hpdata->h_in_psset);
+	assert(hpdata->h_updating);
 	/* Should have dehugified already (if necessary). */
 	assert(!hpdata->h_huge);
 	assert(!hpdata->h_mid_hugify);
@@ -215,7 +215,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 void
 hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
-	assert(!hpdata->h_in_psset);
+	assert(hpdata->h_updating);
 	assert(hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_purge = false;
@@ -236,7 +236,7 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 void
 hpdata_hugify_begin(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
-	assert(!hpdata_in_psset_get(hpdata));
+	assert(hpdata->h_updating);
 	assert(!hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = true;
@@ -250,10 +250,10 @@ void
 hpdata_hugify_end(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
 	/*
-	 * This is the exception to the "no metadata tweaks while in the psset"
-	 * rule.
+	 * This is the exception to the "no-metadata updates without informing
+	 * the psset first" rule; this assert would be incorrect.
 	 */
-	/* assert(!hpdata_in_psset_get(hpdata)); */
+	/* assert(hpdata->h_updating); */
 	assert(!hpdata->h_mid_purge);
 	assert(hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = false;
@@ -263,7 +263,8 @@ hpdata_hugify_end(hpdata_t *hpdata) {
 void
 hpdata_dehugify(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
-	assert(!hpdata_in_psset_get(hpdata));
+	assert(hpdata->h_updating);
+	assert(hpdata->h_updating);
 	assert(hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_huge = false;
diff --git a/src/psset.c b/src/psset.c
index a09913c..2256460 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -79,56 +79,56 @@ psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 }
 
 void
-psset_insert(psset_t *psset, hpdata_t *ps) {
-	assert(!hpdata_empty(ps));
+psset_update_begin(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
-	assert(!hpdata_in_psset_get(ps));
-	hpdata_in_psset_set(ps, true);
+	assert(!hpdata_updating_get(ps));
+	hpdata_updating_set(ps, true);
+
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
-		/*
-		 * We don't ned to track full slabs; just pretend to for stats
-		 * purposes.  See the comment at psset_bin_stats_adjust.
-		 */
-		psset_bin_stats_insert(psset->stats.full_slabs, ps);
+		psset_bin_stats_remove(psset->stats.full_slabs, ps);
 		return;
 	}
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
 	    longest_free_range << LG_PAGE));
-
 	assert(pind < PSSET_NPSIZES);
+	psset_hpdata_heap_remove(psset, pind, ps);
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+		bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
 	}
-	psset_hpdata_heap_insert(psset, pind, ps);
 }
 
 void
-psset_remove(psset_t *psset, hpdata_t *ps) {
+psset_update_end(psset_t *psset, hpdata_t *ps) {
+	assert(!hpdata_empty(ps));
 	hpdata_assert_consistent(ps);
-	assert(hpdata_in_psset_get(ps));
-	hpdata_in_psset_set(ps, false);
-
+	assert(hpdata_updating_get(ps));
+	hpdata_updating_set(ps, false);
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
-		psset_bin_stats_remove(psset->stats.full_slabs, ps);
+		/*
+		 * We don't ned to track full slabs; just pretend to for stats
+		 * purposes.  See the comment at psset_bin_stats_adjust.
+		 */
+		psset_bin_stats_insert(psset->stats.full_slabs, ps);
 		return;
 	}
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
 	    longest_free_range << LG_PAGE));
+
 	assert(pind < PSSET_NPSIZES);
-	psset_hpdata_heap_remove(psset, pind, ps);
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
 	}
+	psset_hpdata_heap_insert(psset, pind, ps);
 }
 
 hpdata_t *
-psset_fit(psset_t *psset, size_t size) {
+psset_pick_alloc(psset_t *psset, size_t size) {
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
 	    (size_t)min_pind);
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 688911a..cf7b89f 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -7,6 +7,8 @@ TEST_BEGIN(test_reserve_alloc) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
 
+	hpdata_updating_set(&hpdata, true);
+
 	/* Allocating a page at a time, we should do first fit. */
 	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		expect_true(hpdata_consistent(&hpdata), "");
@@ -59,6 +61,8 @@ TEST_BEGIN(test_purge_simple) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
 
+	hpdata_updating_set(&hpdata, true);
+
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
 
@@ -107,6 +111,7 @@ TEST_END
 TEST_BEGIN(test_purge_intervening_dalloc) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_updating_set(&hpdata, true);
 
 	/* Allocate the first 3/4 of the pages. */
 	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
@@ -160,6 +165,7 @@ TEST_END
 TEST_BEGIN(test_hugify) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_updating_set(&hpdata, true);
 
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 8801444..2043e4e 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -19,41 +19,50 @@ static void
 test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
     size_t size) {
 	hpdata_assert_empty(ps);
+
+	/*
+	 * As in hpa.c; pretend that the ps is already in the psset and just
+	 * being updated, until we implement true insert/removal support.
+	 */
+	if (!hpdata_updating_get(ps)) {
+		hpdata_updating_set(ps, true);
+	}
+
         void *addr = hpdata_reserve_alloc(ps, size);
         edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
 	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
             /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
             EXTENT_NOT_HEAD);
         edata_ps_set(r_edata, ps);
-	psset_insert(psset, ps);
+	psset_update_end(psset, ps);
 }
 
 static bool
 test_psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
-	hpdata_t *ps = psset_fit(psset, size);
+	hpdata_t *ps = psset_pick_alloc(psset, size);
 	if (ps == NULL) {
 		return true;
 	}
-	psset_remove(psset, ps);
+	psset_update_begin(psset, ps);
 	void *addr = hpdata_reserve_alloc(ps, size);
 	edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
 	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	    EXTENT_NOT_HEAD);
 	edata_ps_set(r_edata, ps);
-	psset_insert(psset, ps);
+	psset_update_end(psset, ps);
 	return false;
 }
 
 static hpdata_t *
 test_psset_dalloc(psset_t *psset, edata_t *edata) {
 	hpdata_t *ps = edata_ps_get(edata);
-	psset_remove(psset, ps);
+	psset_update_begin(psset, ps);
 	hpdata_unreserve(ps, edata_addr_get(edata), edata_size_get(edata));
 	if (hpdata_empty(ps)) {
 		return ps;
 	} else {
-		psset_insert(psset, ps);
+		psset_update_end(psset, ps);
 		return NULL;
 	}
 }
@@ -390,9 +399,9 @@ TEST_BEGIN(test_stats) {
 
 	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
 	stats_expect(&psset, 1);
-	psset_remove(&psset, &pageslab);
+	psset_update_begin(&psset, &pageslab);
 	stats_expect(&psset, 0);
-	psset_insert(&psset, &pageslab);
+	psset_update_end(&psset, &pageslab);
 	stats_expect(&psset, 1);
 }
 TEST_END
@@ -490,7 +499,7 @@ TEST_BEGIN(test_insert_remove) {
 	    worse_alloc);
 
 	/* Remove better; should still be able to alloc from worse. */
-	psset_remove(&psset, &pageslab);
+	psset_update_begin(&psset, &pageslab);
 	err = test_psset_alloc_reuse(&psset, &worse_alloc[HUGEPAGE_PAGES - 1],
 	    PAGE);
 	expect_false(err, "Removal should still leave an empty page");
@@ -504,7 +513,7 @@ TEST_BEGIN(test_insert_remove) {
 	 */
 	ps = test_psset_dalloc(&psset, &worse_alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Incorrect eviction of nonempty pageslab");
-	psset_insert(&psset, &pageslab);
+	psset_update_end(&psset, &pageslab);
 	err = test_psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_false(err, "psset should be nonempty");
 	expect_ptr_eq(&pageslab, edata_ps_get(&alloc[HUGEPAGE_PAGES - 1]),
@@ -514,8 +523,8 @@ TEST_BEGIN(test_insert_remove) {
 	 */
 	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Incorrect eviction");
-	psset_remove(&psset, &pageslab);
-	psset_remove(&psset, &worse_pageslab);
+	psset_update_begin(&psset, &pageslab);
+	psset_update_begin(&psset, &worse_pageslab);
 	err = test_psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_true(err, "psset should be empty, but an alloc succeeded");
 }
-- 
cgit v0.12


From bf64557ed66897b6833875542a6674652e640653 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 5 Dec 2020 17:42:04 -0800
Subject: Move empty slab tracking to the psset.

We're moving towards a world in which purging decisions are less rigidly
enforced at a single-hugepage level.  In that world, it makes sense to keep
around some hpdatas which are not completely purged, in which case we'll need to
track them.
---
 include/jemalloc/internal/hpa.h    |  17 -----
 include/jemalloc/internal/hpdata.h |  25 +++++++-
 include/jemalloc/internal/psset.h  |  11 +++-
 src/ctl.c                          |   4 --
 src/hpa.c                          |  94 ++++++++-------------------
 src/hpdata.c                       |  25 ++++++--
 src/psset.c                        | 128 ++++++++++++++++++++++++++-----------
 src/stats.c                        |   7 --
 test/unit/hpdata.c                 |   6 --
 test/unit/psset.c                  |  25 +++++---
 10 files changed, 183 insertions(+), 159 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index bea88c3..f62c327 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -9,14 +9,6 @@
 typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
 struct hpa_shard_nonderived_stats_s {
 	/*
-	 * The number of times we've fully purged a hugepage and evicted it from
-	 * the psset.
-	 *
-	 * Guarded by grow_mtx.
-	 */
-	uint64_t nevictions;
-
-	/*
 	 * The number of times we've purged within a hugepage.
 	 *
 	 * Guarded by mtx.
@@ -81,15 +73,6 @@ struct hpa_shard_s {
 	size_t alloc_max;
 
 	/*
-	 * Slabs currently purged away.  They are hugepage-sized and
-	 * hugepage-aligned, but have had pages_nohuge and pages_purge_forced
-	 * called on them.
-	 *
-	 * Guarded by grow_mtx.
-	 */
-	hpdata_list_t unused_slabs;
-
-	/*
 	 * How many grow operations have occurred.
 	 *
 	 * Guarded by grow_mtx.
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 2e2e1d8..393ed27 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -52,14 +52,17 @@ struct hpdata_s {
 	 */
 	bool h_updating;
 
+	/* Whether or not the hpdata is in a psset. */
+	bool h_in_psset;
+
 	union {
-		/* When nonempty, used by the psset bins. */
+		/* When nonempty (and also nonfull), used by the psset bins. */
 		phn(hpdata_t) ph_link;
 		/*
 		 * When empty (or not corresponding to any hugepage), list
 		 * linkage.
 		 */
-		ql_elm(hpdata_t) ql_link;
+		ql_elm(hpdata_t) ql_link_empty;
 	};
 
 	/* The length of the largest contiguous sequence of inactive pages. */
@@ -82,7 +85,7 @@ struct hpdata_s {
 	fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 };
 
-TYPED_LIST(hpdata_list, hpdata_t, ql_link)
+TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
 typedef ph(hpdata_t) hpdata_age_heap_t;
 ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
 
@@ -138,6 +141,17 @@ hpdata_updating_set(hpdata_t *hpdata, bool updating) {
 	hpdata->h_updating = updating;
 }
 
+static inline bool
+hpdata_in_psset_get(const hpdata_t *hpdata) {
+	return hpdata->h_in_psset;
+}
+
+static inline void
+hpdata_in_psset_set(hpdata_t *hpdata, bool in_psset) {
+	assert(in_psset != hpdata->h_in_psset);
+	hpdata->h_in_psset = in_psset;
+}
+
 static inline size_t
 hpdata_longest_free_range_get(const hpdata_t *hpdata) {
 	return hpdata->h_longest_free_range;
@@ -208,6 +222,11 @@ hpdata_empty(hpdata_t *hpdata) {
 	return hpdata->h_nactive == 0;
 }
 
+static inline bool
+hpdata_full(hpdata_t *hpdata) {
+	return hpdata->h_nactive == HUGEPAGE_PAGES;
+}
+
 void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
 
 /*
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index a7c9a8b..b220609 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -35,7 +35,6 @@ struct psset_bin_stats_s {
 
 typedef struct psset_stats_s psset_stats_t;
 struct psset_stats_s {
-
 	/*
 	 * The second index is huge stats; nonfull_slabs[pszind][0] contains
 	 * stats for the non-huge slabs in bucket pszind, while
@@ -44,10 +43,13 @@ struct psset_stats_s {
 	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][2];
 
 	/*
-	 * Full slabs don't live in any edata heap.  But we still track their
+	 * Full slabs don't live in any edata heap, but we still track their
 	 * stats.
 	 */
 	psset_bin_stats_t full_slabs[2];
+
+	/* Empty slabs are similar. */
+	psset_bin_stats_t empty_slabs[2];
 };
 
 typedef struct psset_s psset_t;
@@ -59,6 +61,8 @@ struct psset_s {
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
 	psset_stats_t stats;
+	/* Slabs with no active allocations. */
+	hpdata_empty_list_t empty_slabs;
 };
 
 void psset_init(psset_t *psset);
@@ -74,4 +78,7 @@ void psset_update_end(psset_t *psset, hpdata_t *ps);
 /* Analogous to the eset_fit; pick a hpdata to serve the request. */
 hpdata_t *psset_pick_alloc(psset_t *psset, size_t size);
 
+void psset_insert(psset_t *psset, hpdata_t *ps);
+void psset_remove(psset_t *psset, hpdata_t *ps);
+
 #endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/src/ctl.c b/src/ctl.c
index 80fb90e..3cec637 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -226,7 +226,6 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
 INDEX_PROTO(stats_arenas_i_extents_j)
-CTL_PROTO(stats_arenas_i_hpa_shard_nevictions)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
@@ -700,7 +699,6 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
 	{NAME("nonfull_slabs"),	CHILD(indexed,
 	    stats_arenas_i_hpa_shard_nonfull_slabs)},
 
-	{NAME("nevictions"),	CTL(stats_arenas_i_hpa_shard_nevictions)},
 	{NAME("npurge_passes"),	CTL(stats_arenas_i_hpa_shard_npurge_passes)},
 	{NAME("npurges"),	CTL(stats_arenas_i_hpa_shard_npurges)},
 	{NAME("nhugifies"),	CTL(stats_arenas_i_hpa_shard_nhugifies)},
@@ -3514,8 +3512,6 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 	return super_stats_arenas_i_extents_j_node;
 }
 
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nevictions,
-    arenas_i(mib[2])->astats->hpastats.nonderived_stats.nevictions, uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurge_passes,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes, uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges,
diff --git a/src/hpa.c b/src/hpa.c
index 6a4f2a6..8f4642c 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -68,14 +68,12 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	edata_cache_small_init(&shard->ecs, edata_cache);
 	psset_init(&shard->psset);
 	shard->alloc_max = alloc_max;
-	hpdata_list_init(&shard->unused_slabs);
 	shard->age_counter = 0;
 	shard->eden = NULL;
 	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
 
-	shard->stats.nevictions = 0;
 	shard->stats.npurge_passes = 0;
 	shard->stats.npurges = 0;
 	shard->stats.nhugifies = 0;
@@ -103,7 +101,6 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 static void
 hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
     hpa_shard_nonderived_stats_t *src) {
-	dst->nevictions += src->nevictions;
 	dst->npurge_passes += src->npurge_passes;
 	dst->npurges += src->npurges;
 	dst->nhugifies += src->nhugifies;
@@ -171,15 +168,6 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
 	hpdata_t *ps = NULL;
 
-	/* Is there address space waiting for reuse? */
-	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
-	ps = hpdata_list_first(&shard->unused_slabs);
-	if (ps != NULL) {
-		hpdata_list_remove(&shard->unused_slabs, ps);
-		hpdata_age_set(ps, shard->age_counter++);
-		return ps;
-	}
-
 	/* Is eden a perfect fit? */
 	if (shard->eden != NULL && shard->eden_len == HUGEPAGE) {
 		ps = hpa_alloc_ps(tsdn, shard);
@@ -300,26 +288,6 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	}
 }
 
-/*
- * Does the metadata tracking associated with a page slab becoming empty.  The
- * psset doesn't hold empty pageslabs, but we do want address space reuse, so we
- * track these pages outside the psset.
- */
-static void
-hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
-	/*
-	 * We do relatively expensive system calls.  The ps was evicted, so no
-	 * one should touch it while we're also touching it.
-	 */
-	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
-	malloc_mutex_assert_not_owner(tsdn, &shard->grow_mtx);
-
-	malloc_mutex_lock(tsdn, &shard->grow_mtx);
-	shard->stats.nevictions++;
-	hpdata_list_prepend(&shard->unused_slabs, ps);
-	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-}
-
 static edata_t *
 hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
 	bool err;
@@ -341,6 +309,18 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	}
 
 	psset_update_begin(&shard->psset, ps);
+
+	if (hpdata_empty(ps)) {
+		/*
+		 * If the pageslab used to be empty, treat it as though it's
+		 * brand new for fragmentation-avoidance purposes; what we're
+		 * trying to approximate is the age of the allocations *in* that
+		 * pageslab, and the allocations in the new pageslab are
+		 * definitionally the youngest in this hpa shard.
+		 */
+		hpdata_age_set(ps, shard->age_counter++);
+	}
+
 	void *addr = hpdata_reserve_alloc(ps, size);
 	edata_init(edata, shard->ind, addr, size, /* slab */ false,
 	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
@@ -453,26 +433,20 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		return NULL;
 	}
 
-	/* We got the new edata; allocate from it. */
+	/* We got the pageslab; allocate from it. */
 	malloc_mutex_lock(tsdn, &shard->mtx);
-	/*
-	 * This will go away soon.  The psset doesn't draw a distinction between
-	 * pageslab removal and updating.  If this is a new pageslab, we pretend
-	 * that it's an old one that's been getting updated.
-	 */
-	if (!hpdata_updating_get(ps)) {
-		hpdata_updating_set(ps, true);
-	}
+
+	psset_insert(&shard->psset, ps);
 
 	edata = edata_cache_small_get(tsdn, &shard->ecs);
 	if (edata == NULL) {
-		shard->stats.nevictions++;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		hpa_handle_ps_eviction(tsdn, shard, ps);
 		return NULL;
 	}
 
+	psset_update_begin(&shard->psset, ps);
+
 	void *addr = hpdata_reserve_alloc(ps, size);
 	edata_init(edata, shard->ind, addr, size, /* slab */ false,
 	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
@@ -487,10 +461,6 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 
-		shard->stats.nevictions++;
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-
 		/* We'll do a fake purge; the pages weren't really touched. */
 		hpdata_purge_state_t purge_state;
 		void *purge_addr;
@@ -506,7 +476,9 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		assert(!found_extent);
 		hpdata_purge_end(ps, &purge_state);
 
-		hpa_handle_ps_eviction(tsdn, shard, ps);
+		psset_update_end(&shard->psset, ps);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return NULL;
 	}
 	psset_update_end(&shard->psset, ps);
@@ -614,9 +586,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	 *   management.
 	 * - The ps must not be in the psset while purging.  This is because we
 	 *   can't handle purge/alloc races.
-	 * - Whoever removes the ps from the psset is the one to reinsert it (or
-	 *   to pass it to hpa_handle_ps_eviction upon emptying).  This keeps
-	 *   responsibility tracking simple.
+	 * - Whoever removes the ps from the psset is the one to reinsert it.
 	 */
 	if (hpdata_mid_purge_get(ps)) {
 		/*
@@ -649,17 +619,9 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	/* It's our job to purge. */
 	hpa_purge(tsdn, shard, ps);
 
-	/*
-	 * OK, the hpdata is as purged as we want it to be, and it's going back
-	 * into the psset (if nonempty) or getting evicted (if empty).
-	 */
-	if (hpdata_empty(ps)) {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		hpa_handle_ps_eviction(tsdn, shard, ps);
-	} else {
-		psset_update_end(&shard->psset, ps);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-	}
+	psset_update_end(&shard->psset, ps);
+
+	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
 void
@@ -678,8 +640,6 @@ hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
 static void
 hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	hpdata_t *ps = psset_pick_alloc(psset, PAGE);
-	assert(ps == NULL);
 	for (int huge = 0; huge <= 1; huge++) {
 		hpa_shard_assert_stats_empty(&psset->stats.full_slabs[huge]);
 		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
@@ -703,8 +663,10 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
 	hpdata_t *ps;
-	while ((ps = hpdata_list_first(&shard->unused_slabs)) != NULL) {
-		hpdata_list_remove(&shard->unused_slabs, ps);
+	while ((ps = psset_pick_alloc(&shard->psset, PAGE)) != NULL) {
+		/* There should be no allocations anywhere. */
+		assert(hpdata_empty(ps));
+		psset_remove(&shard->psset, ps);
 		pages_unmap(hpdata_addr_get(ps), HUGEPAGE);
 	}
 }
diff --git a/src/hpdata.c b/src/hpdata.c
index 0af7da0..0cfeeed 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -25,6 +25,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata->h_mid_purge = false;
 	hpdata->h_mid_hugify = false;
 	hpdata->h_updating = false;
+	hpdata->h_in_psset = false;
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
@@ -37,7 +38,12 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 void *
 hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	hpdata_assert_consistent(hpdata);
-	assert(hpdata->h_updating);
+	/*
+	 * This is a metadata change; the hpdata should therefore either not be
+	 * in the psset, or should have explicitly marked itself as being
+	 * mid-update.
+	 */
+	assert(!hpdata->h_in_psset || hpdata->h_updating);
 	assert((sz & PAGE_MASK) == 0);
 	size_t npages = sz >> LG_PAGE;
 	assert(npages <= hpdata_longest_free_range_get(hpdata));
@@ -118,7 +124,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 void
 hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	hpdata_assert_consistent(hpdata);
-	assert(hpdata->h_updating);
+	/* See the comment in reserve. */
+	assert(!hpdata->h_in_psset || hpdata->h_updating);
 	assert(((uintptr_t)addr & PAGE_MASK) == 0);
 	assert((sz & PAGE_MASK) == 0);
 	size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
@@ -147,7 +154,8 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 void
 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
-	assert(hpdata->h_updating);
+	/* See the comment in reserve. */
+	assert(!hpdata->h_in_psset || hpdata->h_updating);
 	assert(!hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_purge = true;
@@ -185,7 +193,8 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	 * a consistent state.
 	 */
 	assert(hpdata->h_mid_purge);
-	assert(hpdata->h_updating);
+	/* See the comment in reserve. */
+	assert(!hpdata->h_in_psset || hpdata->h_updating);
 	/* Should have dehugified already (if necessary). */
 	assert(!hpdata->h_huge);
 	assert(!hpdata->h_mid_hugify);
@@ -215,7 +224,8 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 void
 hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
-	assert(hpdata->h_updating);
+	/* See the comment in reserve. */
+	assert(!hpdata->h_in_psset || hpdata->h_updating);
 	assert(hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_purge = false;
@@ -236,7 +246,8 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 void
 hpdata_hugify_begin(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
-	assert(hpdata->h_updating);
+	/* See the comment in reserve. */
+	assert(!hpdata->h_in_psset || hpdata->h_updating);
 	assert(!hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = true;
@@ -253,7 +264,7 @@ hpdata_hugify_end(hpdata_t *hpdata) {
 	 * This is the exception to the "no-metadata updates without informing
 	 * the psset first" rule; this assert would be incorrect.
 	 */
-	/* assert(hpdata->h_updating); */
+	/* assert(!hpdata->h_in_psset || hpdata->h_updating); */
 	assert(!hpdata->h_mid_purge);
 	assert(hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = false;
diff --git a/src/psset.c b/src/psset.c
index 2256460..8997102 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -15,6 +15,7 @@ psset_init(psset_t *psset) {
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
 	memset(&psset->stats, 0, sizeof(psset->stats));
+	hpdata_empty_list_init(&psset->empty_slabs);
 }
 
 static void
@@ -28,6 +29,8 @@ void
 psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
 	psset_bin_stats_accum(&dst->full_slabs[0], &src->full_slabs[0]);
 	psset_bin_stats_accum(&dst->full_slabs[1], &src->full_slabs[1]);
+	psset_bin_stats_accum(&dst->empty_slabs[0], &src->empty_slabs[0]);
+	psset_bin_stats_accum(&dst->empty_slabs[1], &src->empty_slabs[1]);
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		psset_bin_stats_accum(&dst->nonfull_slabs[i][0],
 		    &src->nonfull_slabs[i][0]);
@@ -69,71 +72,104 @@ psset_bin_stats_remove(psset_bin_stats_t *binstats, hpdata_t *ps) {
 static void
 psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
-	psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
+	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
+		bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+	}
 }
 
 static void
 psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
+	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
+		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+	}
 	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
-	psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
 }
 
-void
-psset_update_begin(psset_t *psset, hpdata_t *ps) {
-	hpdata_assert_consistent(ps);
-	assert(!hpdata_updating_get(ps));
-	hpdata_updating_set(ps, true);
+/*
+ * Insert ps into the data structures we use to track allocation stats and pick
+ * the pageslabs for new allocations.
+ *
+ * In particular, this does *not* remove ps from any hugification / purging
+ * queues it may be in.
+ */
+static void
+psset_do_alloc_tracking_insert(psset_t *psset, hpdata_t *ps) {
+	if (hpdata_empty(ps)) {
+		psset_bin_stats_insert(psset->stats.empty_slabs, ps);
+		/*
+		 * This prepend, paired with popping the head in psset_fit,
+		 * means we implement LIFO ordering for the empty slabs set,
+		 * which seems reasonable.
+		 */
+		hpdata_empty_list_prepend(&psset->empty_slabs, ps);
+	} else if (hpdata_full(ps)) {
+		psset_bin_stats_insert(psset->stats.full_slabs, ps);
+		/*
+		 * We don't need to keep track of the full slabs; we're never
+		 * going to return them from a psset_pick_alloc call.
+		 */
+	} else {
+		size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
-	size_t longest_free_range = hpdata_longest_free_range_get(ps);
+		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+		    longest_free_range << LG_PAGE));
+		assert(pind < PSSET_NPSIZES);
 
-	if (longest_free_range == 0) {
-		psset_bin_stats_remove(psset->stats.full_slabs, ps);
-		return;
+		psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
+		psset_hpdata_heap_insert(psset, pind, ps);
 	}
+}
 
-	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-	    longest_free_range << LG_PAGE));
-	assert(pind < PSSET_NPSIZES);
-	psset_hpdata_heap_remove(psset, pind, ps);
-	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+/* Remove ps from those collections. */
+static void
+psset_do_alloc_tracking_remove(psset_t *psset, hpdata_t *ps) {
+	if (hpdata_empty(ps)) {
+		psset_bin_stats_remove(psset->stats.empty_slabs, ps);
+		hpdata_empty_list_remove(&psset->empty_slabs, ps);
+	} else if (hpdata_full(ps)) {
+		/*
+		 * We don't need to maintain an explicit container of full
+		 * pageslabs anywhere, but we do have to update stats.
+		 */
+		psset_bin_stats_remove(psset->stats.full_slabs, ps);
+	} else {
+		size_t longest_free_range = hpdata_longest_free_range_get(ps);
+
+		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+		    longest_free_range << LG_PAGE));
+		assert(pind < PSSET_NPSIZES);
+
+		psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
+		psset_hpdata_heap_remove(psset, pind, ps);
 	}
 }
 
 void
+psset_update_begin(psset_t *psset, hpdata_t *ps) {
+	hpdata_assert_consistent(ps);
+	assert(hpdata_in_psset_get(ps));
+	hpdata_updating_set(ps, true);
+	psset_do_alloc_tracking_remove(psset, ps);
+}
+
+void
 psset_update_end(psset_t *psset, hpdata_t *ps) {
-	assert(!hpdata_empty(ps));
 	hpdata_assert_consistent(ps);
-	assert(hpdata_updating_get(ps));
+	assert(hpdata_in_psset_get(ps));
 	hpdata_updating_set(ps, false);
-	size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-	if (longest_free_range == 0) {
-		/*
-		 * We don't ned to track full slabs; just pretend to for stats
-		 * purposes.  See the comment at psset_bin_stats_adjust.
-		 */
-		psset_bin_stats_insert(psset->stats.full_slabs, ps);
-		return;
-	}
-
-	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-	    longest_free_range << LG_PAGE));
-
-	assert(pind < PSSET_NPSIZES);
-	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
-	}
-	psset_hpdata_heap_insert(psset, pind, ps);
+	psset_do_alloc_tracking_insert(psset, ps);
 }
 
 hpdata_t *
 psset_pick_alloc(psset_t *psset, size_t size) {
+	assert((size & PAGE_MASK) == 0);
+	assert(size <= HUGEPAGE);
+
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
 	    (size_t)min_pind);
 	if (pind == PSSET_NPSIZES) {
-		return NULL;
+		return hpdata_empty_list_first(&psset->empty_slabs);
 	}
 	hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
 	if (ps == NULL) {
@@ -144,3 +180,17 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 
 	return ps;
 }
+
+void
+psset_insert(psset_t *psset, hpdata_t *ps) {
+	/* We only support inserting empty pageslabs, for now. */
+	assert(hpdata_empty(ps));
+	hpdata_in_psset_set(ps, true);
+	psset_do_alloc_tracking_insert(psset, ps);
+}
+
+void
+psset_remove(psset_t *psset, hpdata_t *ps) {
+	hpdata_in_psset_set(ps, false);
+	psset_do_alloc_tracking_remove(psset, ps);
+}
diff --git a/src/stats.c b/src/stats.c
index 355921c..7f56014 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -790,14 +790,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_row_t row;
 	emitter_row_init(&row);
 
-	uint64_t nevictions;
 	uint64_t npurge_passes;
 	uint64_t npurges;
 	uint64_t nhugifies;
 	uint64_t ndehugifies;
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.nevictions",
-	    i, &nevictions, uint64_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes",
 	    i, &npurge_passes, uint64_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.npurges",
@@ -839,7 +836,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
-	    "  Evictions: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
@@ -850,7 +846,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ndirty: %zu huge, %zu nonhuge \n"
 	    "      nretained: 0 huge, %zu nonhuge \n",
-	    nevictions, rate_per_second(nevictions, uptime),
 	    npurge_passes, rate_per_second(npurge_passes, uptime),
 	    npurges, rate_per_second(npurges, uptime),
 	    nhugifies, rate_per_second(nhugifies, uptime),
@@ -861,8 +856,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    nretained_nonhuge);
 
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
-	emitter_json_kv(emitter, "nevictions", emitter_type_uint64,
-	    &nevictions);
 	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
 	    &npurge_passes);
 	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index cf7b89f..688911a 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -7,8 +7,6 @@ TEST_BEGIN(test_reserve_alloc) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
 
-	hpdata_updating_set(&hpdata, true);
-
 	/* Allocating a page at a time, we should do first fit. */
 	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		expect_true(hpdata_consistent(&hpdata), "");
@@ -61,8 +59,6 @@ TEST_BEGIN(test_purge_simple) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
 
-	hpdata_updating_set(&hpdata, true);
-
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
 
@@ -111,7 +107,6 @@ TEST_END
 TEST_BEGIN(test_purge_intervening_dalloc) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
-	hpdata_updating_set(&hpdata, true);
 
 	/* Allocate the first 3/4 of the pages. */
 	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
@@ -165,7 +160,6 @@ TEST_END
 TEST_BEGIN(test_hugify) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
-	hpdata_updating_set(&hpdata, true);
 
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 2043e4e..f5e1bad 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -16,17 +16,25 @@ edata_init_test(edata_t *edata) {
 }
 
 static void
+test_psset_fake_purge(hpdata_t *ps) {
+	hpdata_purge_state_t purge_state;
+	hpdata_purge_begin(ps, &purge_state);
+	void *addr;
+	size_t size;
+	while (hpdata_purge_next(ps, &purge_state, &addr, &size)) {
+	}
+	hpdata_purge_end(ps, &purge_state);
+}
+
+static void
 test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
     size_t size) {
 	hpdata_assert_empty(ps);
 
-	/*
-	 * As in hpa.c; pretend that the ps is already in the psset and just
-	 * being updated, until we implement true insert/removal support.
-	 */
-	if (!hpdata_updating_get(ps)) {
-		hpdata_updating_set(ps, true);
-	}
+	test_psset_fake_purge(ps);
+
+	psset_insert(psset, ps);
+	psset_update_begin(psset, ps);
 
         void *addr = hpdata_reserve_alloc(ps, size);
         edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
@@ -59,10 +67,11 @@ test_psset_dalloc(psset_t *psset, edata_t *edata) {
 	hpdata_t *ps = edata_ps_get(edata);
 	psset_update_begin(psset, ps);
 	hpdata_unreserve(ps, edata_addr_get(edata), edata_size_get(edata));
+	psset_update_end(psset, ps);
 	if (hpdata_empty(ps)) {
+		psset_remove(psset, ps);
 		return ps;
 	} else {
-		psset_update_end(psset, ps);
 		return NULL;
 	}
 }
-- 
cgit v0.12


From 0ea3d6307cb7eb899c90b86e286ee7b8368f9bb7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 5 Dec 2020 19:24:23 -0800
Subject: CTL, Stats: report HPA empty slab stats.

---
 src/ctl.c   | 50 +++++++++++++++++++++++++++++++
 src/stats.c | 98 +++++++++++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 123 insertions(+), 25 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 3cec637..feefa68 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -230,18 +230,34 @@ CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
 CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
+
+/* We have a set of stats for full slabs. */
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)
+
+/* A parallel set for the empty slabs. */
+CTL_PROTO(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_empty_slabs_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge)
+
+/*
+ * And one for the slabs that are neither empty nor full, but indexed by how
+ * full they are.
+ */
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)
+
 INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
@@ -668,6 +684,21 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
 		CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)}
 };
 
+static const ctl_named_node_t stats_arenas_i_hpa_shard_empty_slabs_node[] = {
+	{NAME("npageslabs_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge)},
+	{NAME("npageslabs_huge"),
+		CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge)},
+	{NAME("nactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge)},
+	{NAME("nactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_huge)},
+	{NAME("ndirty_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge)},
+	{NAME("ndirty_huge"),
+		CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge)}
+};
+
 static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
 	{NAME("npageslabs_nonhuge"),
 		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
@@ -696,6 +727,8 @@ static const ctl_indexed_node_t stats_arenas_i_hpa_shard_nonfull_slabs_node[] =
 static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
 	{NAME("full_slabs"),	CHILD(named,
 	    stats_arenas_i_hpa_shard_full_slabs)},
+	{NAME("empty_slabs"),	CHILD(named,
+	    stats_arenas_i_hpa_shard_empty_slabs)},
 	{NAME("nonfull_slabs"),	CHILD(indexed,
 	    stats_arenas_i_hpa_shard_nonfull_slabs)},
 
@@ -3539,6 +3572,23 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ndirty_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ndirty, size_t);
 
+/* Empty, nonhuge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].npageslabs,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].ndirty, size_t);
+
+/* Empty, huge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].npageslabs,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_nactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].ndirty, size_t);
 
 /* Nonfull, nonhuge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
diff --git a/src/stats.c b/src/stats.c
index 7f56014..8e29656 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -813,6 +813,35 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	size_t ndirty_nonhuge;
 	size_t nretained_nonhuge;
 
+	size_t sec_bytes;
+	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
+	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
+	    emitter_type_size, &sec_bytes);
+
+	/* First, global stats. */
+	emitter_table_printf(emitter,
+	    "HPA shard stats:\n"
+	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "\n",
+	    npurge_passes, rate_per_second(npurge_passes, uptime),
+	    npurges, rate_per_second(npurges, uptime),
+	    nhugifies, rate_per_second(nhugifies, uptime),
+	    ndehugifies, rate_per_second(ndehugifies, uptime));
+
+	emitter_json_object_kv_begin(emitter, "hpa_shard");
+	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
+	    &npurge_passes);
+	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
+	    &npurges);
+	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64,
+	    &nhugifies);
+	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
+	    &ndehugifies);
+
+	/* Next, full slab stats. */
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
@@ -829,42 +858,17 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 	    - nactive_nonhuge - ndirty_nonhuge;
 
-	size_t sec_bytes;
-	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
-	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
-	    emitter_type_size, &sec_bytes);
-
 	emitter_table_printf(emitter,
-	    "HPA shard stats:\n"
-	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "\n"
 	    "  In full slabs:\n"
 	    "      npageslabs: %zu huge, %zu nonhuge\n"
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ndirty: %zu huge, %zu nonhuge \n"
 	    "      nretained: 0 huge, %zu nonhuge \n",
-	    npurge_passes, rate_per_second(npurge_passes, uptime),
-	    npurges, rate_per_second(npurges, uptime),
-	    nhugifies, rate_per_second(nhugifies, uptime),
-	    ndehugifies, rate_per_second(ndehugifies, uptime),
 	    npageslabs_huge, npageslabs_nonhuge,
 	    nactive_huge, nactive_nonhuge,
 	    ndirty_huge, ndirty_nonhuge,
 	    nretained_nonhuge);
 
-	emitter_json_object_kv_begin(emitter, "hpa_shard");
-	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
-	    &npurge_passes);
-	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
-	    &npurges);
-	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64,
-	    &nhugifies);
-	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
-	    &ndehugifies);
-
 	emitter_json_object_kv_begin(emitter, "full_slabs");
 	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
 	    &npageslabs_huge);
@@ -880,6 +884,50 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    &ndirty_nonhuge);
 	emitter_json_object_end(emitter); /* End "full_slabs" */
 
+	/* Next, empty slab stats. */
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_huge",
+	    i, &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_huge",
+	    i, &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge",
+	    i, &ndirty_huge, size_t);
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_nonhuge",
+	    i, &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_nonhuge",
+	    i, &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge",
+	    i, &ndirty_nonhuge, size_t);
+	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
+	    - nactive_nonhuge - ndirty_nonhuge;
+
+	emitter_table_printf(emitter,
+	    "  In empty slabs:\n"
+	    "      npageslabs: %zu huge, %zu nonhuge\n"
+	    "      nactive: %zu huge, %zu nonhuge \n"
+	    "      ndirty: %zu huge, %zu nonhuge \n"
+	    "      nretained: 0 huge, %zu nonhuge \n"
+	    "\n",
+	    npageslabs_huge, npageslabs_nonhuge,
+	    nactive_huge, nactive_nonhuge,
+	    ndirty_huge, ndirty_nonhuge,
+	    nretained_nonhuge);
+
+	emitter_json_object_kv_begin(emitter, "empty_slabs");
+	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
+	    &npageslabs_huge);
+	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
+	    &nactive_huge);
+	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
+	    &nactive_huge);
+	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
+	    &npageslabs_nonhuge);
+	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
+	    &nactive_nonhuge);
+	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
+	    &ndirty_nonhuge);
+	emitter_json_object_end(emitter); /* End "empty_slabs" */
+
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
 	COL_HDR(row, npageslabs_huge, NULL, right, 16, size)
-- 
cgit v0.12


From da63f23e68069e967e6759e2ffa578970243df9e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 6 Dec 2020 09:49:26 -0800
Subject: HPA: Track pending purges/hugifies in the psset.

This finishes the refactoring of the HPA/psset interactions the past few commits
have been building towards.

Rather than the HPA removing and then reinserting hpdatas, it simply begins
updates and ends them.  These updates can set flags on the hpdata that prevent
it from being returned for certain types of requests.  For example, it can call
hpdata_alloc_allowed_set(hpdata, false) during an update, at which point the
given hpdata will no longer be returned for psset_pick_alloc requests.

This has various of benefits:
- It maintains stats correctness during purges and hugifies.
- It allows simpler and more explicit concurrency control for the various
  special cases (e.g. allocations are disallowed during purge, but not during
  hugify).
- It lets allocations and deallocations avoid disturbing the purging and
  hugification orderings.  If an hpdata "loses its place" in one of the queues
  just do to an alloc / dalloc, it can result in pathological edge cases where
  very hot, very full hugepages never get hugified  (and cold extents on the
  same hugepage as hot ones never get purged).

The key benefit though is that tracking hpdatas to be purged / hugified in a
principled way will let us do delayed purging and hugification.  Eventually this
will let us move these operations to background threads, but in the short term
the benefit is that it will let us have global purging policies (e.g. purge when
the entire arena has too many dirty pages, rather than any particular hugepage).
---
 include/jemalloc/internal/hpdata.h | 137 +++++++++++++----
 include/jemalloc/internal/psset.h  |  15 +-
 src/hpa.c                          | 291 +++++++++++++++++++++----------------
 src/hpdata.c                       |  45 +-----
 src/psset.c                        | 158 ++++++++++++++++----
 test/unit/hpdata.c                 |  16 +-
 6 files changed, 429 insertions(+), 233 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 393ed27..feca5f5 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -36,11 +36,30 @@ struct hpdata_s {
 	bool h_huge;
 
 	/*
-	 * Whether or not some thread is purging this hpdata (i.e. has called
-	 * hpdata_purge_begin but not yet called hpdata_purge_end), or
-	 * hugifying it.  Only one thread at a time is allowed to change a
-	 * hugepage's state.
+	 * For some properties, we keep parallel sets of bools; h_foo_allowed
+	 * and h_in_psset_foo_container.  This is a decoupling mechanism to
+	 * avoid bothering the hpa (which manages policies) from the psset
+	 * (which is the mechanism used to enforce those policies).  This allows
+	 * all the container management logic to live in one place, without the
+	 * HPA needing to know or care how that happens.
 	 */
+
+	/*
+	 * Whether or not the hpdata is allowed to be used to serve allocations,
+	 * and whether or not the psset is currently tracking it as such.
+	 */
+	bool h_alloc_allowed;
+	bool h_in_psset_alloc_container;
+
+	/* The same, but with purging. */
+	bool h_purge_allowed;
+	bool h_in_psset_purge_container;
+
+	/* And with hugifying. */
+	bool h_hugify_allowed;
+	bool h_in_psset_hugify_container;
+
+	/* Whether or not a purge or hugify is currently happening. */
 	bool h_mid_purge;
 	bool h_mid_hugify;
 
@@ -65,6 +84,12 @@ struct hpdata_s {
 		ql_elm(hpdata_t) ql_link_empty;
 	};
 
+	/*
+	 * Linkage for the psset to track candidates for purging and hugifying.
+	 */
+	ql_elm(hpdata_t) ql_link_purge;
+	ql_elm(hpdata_t) ql_link_hugify;
+
 	/* The length of the largest contiguous sequence of inactive pages. */
 	size_t h_longest_free_range;
 
@@ -86,6 +111,9 @@ struct hpdata_s {
 };
 
 TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
+TYPED_LIST(hpdata_purge_list, hpdata_t, ql_link_purge)
+TYPED_LIST(hpdata_hugify_list, hpdata_t, ql_link_hugify)
+
 typedef ph(hpdata_t) hpdata_age_heap_t;
 ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
 
@@ -116,8 +144,66 @@ hpdata_huge_get(const hpdata_t *hpdata) {
 }
 
 static inline bool
-hpdata_changing_state_get(const hpdata_t *hpdata) {
-	return hpdata->h_mid_purge || hpdata->h_mid_hugify;
+hpdata_alloc_allowed_get(const hpdata_t *hpdata) {
+	return hpdata->h_alloc_allowed;
+}
+
+static inline void
+hpdata_alloc_allowed_set(hpdata_t *hpdata, bool alloc_allowed) {
+	hpdata->h_alloc_allowed = alloc_allowed;
+}
+
+static inline bool
+hpdata_in_psset_alloc_container_get(const hpdata_t *hpdata) {
+	return hpdata->h_in_psset_alloc_container;
+}
+
+static inline void
+hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) {
+	assert(in_container != hpdata->h_in_psset_alloc_container);
+	hpdata->h_in_psset_alloc_container = in_container;
+}
+
+static inline bool
+hpdata_purge_allowed_get(const hpdata_t *hpdata) {
+	return hpdata->h_purge_allowed;
+}
+
+static inline void
+hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
+	hpdata->h_purge_allowed = purge_allowed;
+}
+
+static inline bool
+hpdata_in_psset_purge_container_get(const hpdata_t *hpdata) {
+	return hpdata->h_in_psset_purge_container;
+}
+
+static inline void
+hpdata_in_psset_purge_container_set(hpdata_t *hpdata, bool in_container) {
+	assert(in_container != hpdata->h_in_psset_purge_container);
+	hpdata->h_in_psset_purge_container = in_container;
+}
+
+static inline bool
+hpdata_hugify_allowed_get(const hpdata_t *hpdata) {
+	return hpdata->h_hugify_allowed;
+}
+
+static inline void
+hpdata_hugify_allowed_set(hpdata_t *hpdata, bool hugify_allowed) {
+	hpdata->h_hugify_allowed = hugify_allowed;
+}
+
+static inline bool
+hpdata_in_psset_hugify_container_get(const hpdata_t *hpdata) {
+	return hpdata->h_in_psset_hugify_container;
+}
+
+static inline void
+hpdata_in_psset_hugify_container_set(hpdata_t *hpdata, bool in_container) {
+	assert(in_container != hpdata->h_in_psset_hugify_container);
+	hpdata->h_in_psset_hugify_container = in_container;
 }
 
 static inline bool
@@ -125,11 +211,29 @@ hpdata_mid_purge_get(const hpdata_t *hpdata) {
 	return hpdata->h_mid_purge;
 }
 
+static inline void
+hpdata_mid_purge_set(hpdata_t *hpdata, bool mid_purge) {
+	assert(mid_purge != hpdata->h_mid_purge);
+	hpdata->h_mid_purge = mid_purge;
+}
+
 static inline bool
 hpdata_mid_hugify_get(const hpdata_t *hpdata) {
 	return hpdata->h_mid_hugify;
 }
 
+static inline void
+hpdata_mid_hugify_set(hpdata_t *hpdata, bool mid_hugify) {
+	assert(mid_hugify != hpdata->h_mid_hugify);
+	hpdata->h_mid_hugify = mid_hugify;
+}
+
+static inline bool
+hpdata_changing_state_get(const hpdata_t *hpdata) {
+	return hpdata->h_mid_purge || hpdata->h_mid_hugify;
+}
+
+
 static inline bool
 hpdata_updating_get(const hpdata_t *hpdata) {
 	return hpdata->h_updating;
@@ -278,26 +382,7 @@ bool hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
  */
 void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
 
-/*
- * Similarly, when hugifying , callers can do the metadata modifications while
- * holding a lock (thereby setting the change_state field), but actually do the
- * operation without blocking other threads.
- *
- * Unlike most metadata operations, hugification ending should happen while an
- * hpdata is in the psset (or upcoming hugepage collections).  This is because
- * while purge/use races are unsafe, purge/hugepageify races are perfectly
- * reasonable.
- */
-void hpdata_hugify_begin(hpdata_t *hpdata);
-void hpdata_hugify_end(hpdata_t *hpdata);
-
-/*
- * Tell the hpdata that it's no longer a hugepage (all its pages are still
- * counted as dirty, though; an explicit purge call is required to change that).
- *
- * This should only be done after starting to purge, and before actually purging
- * any contents.
- */
+void hpdata_hugify(hpdata_t *hpdata);
 void hpdata_dehugify(hpdata_t *hpdata);
 
 #endif /* JEMALLOC_INTERNAL_HPDATA_H */
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index b220609..6e08e8b 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -61,8 +61,15 @@ struct psset_s {
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
 	psset_stats_t stats;
-	/* Slabs with no active allocations. */
-	hpdata_empty_list_t empty_slabs;
+	/*
+	 * Slabs with no active allocations, but which are allowed to serve new
+	 * allocations.
+	 */
+	hpdata_empty_list_t empty;
+	/* Slabs which are available to be purged. */
+	hpdata_purge_list_t to_purge;
+	/* Slabs which are available to be hugified. */
+	hpdata_hugify_list_t to_hugify;
 };
 
 void psset_init(psset_t *psset);
@@ -77,6 +84,10 @@ void psset_update_end(psset_t *psset, hpdata_t *ps);
 
 /* Analogous to the eset_fit; pick a hpdata to serve the request. */
 hpdata_t *psset_pick_alloc(psset_t *psset, size_t size);
+/* Pick one to purge. */
+hpdata_t *psset_pick_purge(psset_t *psset);
+/* Pick one to hugify. */
+hpdata_t *psset_pick_hugify(psset_t *psset);
 
 void psset_insert(psset_t *psset, hpdata_t *ps);
 void psset_remove(psset_t *psset, hpdata_t *ps);
diff --git a/src/hpa.c b/src/hpa.c
index 8f4642c..5dd34c3 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -227,65 +227,150 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return ps;
 }
 
-/*
- * As a precondition, ps should not be in the psset (we can handle deallocation
- * races, but not allocation ones), and we should hold the shard mutex.
- */
-static void
-hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
+/* Returns whether or not we purged anything. */
+static bool
+hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	while (hpa_should_purge(shard, ps)) {
-		/* Do the metadata update bit while holding the lock. */
-		hpdata_purge_state_t purge_state;
-		hpdata_purge_begin(ps, &purge_state);
-		shard->stats.npurge_passes++;
 
-		/*
-		 * Dehugifying can only happen on the first loop iteration,
-		 * since no other threads can allocate out of this ps while
-		 * we're purging (and thus, can't hugify it), but there's not a
-		 * natural way to express that in the control flow.
-		 */
-		bool needs_dehugify = false;
-		if (hpdata_huge_get(ps)) {
-			needs_dehugify = true;
-			shard->stats.ndehugifies++;
-			hpdata_dehugify(ps);
-		}
-
-		/* Drop the lock to do the OS calls. */
-		malloc_mutex_unlock(tsdn, &shard->mtx);
+	hpdata_t *to_purge = psset_pick_purge(&shard->psset);
+	if (to_purge == NULL) {
+		return false;
+	}
+	assert(hpdata_purge_allowed_get(to_purge));
+	assert(!hpdata_changing_state_get(to_purge));
 
-		if (needs_dehugify) {
-			pages_nohuge(hpdata_addr_get(ps), HUGEPAGE);
-		}
+	/*
+	 * Don't let anyone else purge or hugify this page while
+	 * we're purging it (allocations and deallocations are
+	 * OK).
+	 */
+	psset_update_begin(&shard->psset, to_purge);
+	assert(hpdata_alloc_allowed_get(to_purge));
+	hpdata_mid_purge_set(to_purge, true);
+	hpdata_purge_allowed_set(to_purge, false);
+	hpdata_hugify_allowed_set(to_purge, false);
+	/*
+	 * Unlike with hugification (where concurrent
+	 * allocations are allowed), concurrent allocation out
+	 * of a hugepage being purged is unsafe; we might hand
+	 * out an extent for an allocation and then purge it
+	 * (clearing out user data).
+	 */
+	hpdata_alloc_allowed_set(to_purge, false);
+	psset_update_end(&shard->psset, to_purge);
 
-		size_t total_purged = 0;
-		uint64_t purges_this_pass = 0;
-		void *purge_addr;
-		size_t purge_size;
-		while (hpdata_purge_next(ps, &purge_state, &purge_addr,
-		    &purge_size)) {
-			purges_this_pass++;
-			pages_purge_forced(purge_addr, purge_size);
-			total_purged += purge_size;
-		}
+	/* Gather all the metadata we'll need during the purge. */
+	bool dehugify = hpdata_huge_get(to_purge);
+	hpdata_purge_state_t purge_state;
+	hpdata_purge_begin(to_purge, &purge_state);
 
-		/* Reacquire to finish our metadata update. */
-		malloc_mutex_lock(tsdn, &shard->mtx);
-		shard->stats.npurges += purges_this_pass;
-		hpdata_purge_end(ps, &purge_state);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
 
+	/* Actually do the purging, now that the lock is dropped. */
+	if (dehugify) {
+		pages_nohuge(hpdata_addr_get(to_purge), HUGEPAGE);
+	}
+	size_t total_purged = 0;
+	uint64_t purges_this_pass = 0;
+	void *purge_addr;
+	size_t purge_size;
+	while (hpdata_purge_next(to_purge, &purge_state, &purge_addr,
+	    &purge_size)) {
+		total_purged += purge_size;
 		assert(total_purged <= HUGEPAGE);
+		purges_this_pass++;
+		pages_purge_forced(purge_addr, purge_size);
+	}
 
-		/*
-		 * We're not done here; other threads can't allocate out of ps
-		 * while purging, but they can still deallocate.  Those
-		 * deallocations could have meant more purging than what we
-		 * planned ought to happen.  We have to re-check now that we've
-		 * reacquired the mutex again.
-		 */
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	/* The shard updates */
+	shard->stats.npurge_passes++;
+	shard->stats.npurges += purges_this_pass;
+	if (dehugify) {
+		shard->stats.ndehugifies++;
+	}
+
+	/* The hpdata updates. */
+	psset_update_begin(&shard->psset, to_purge);
+	if (dehugify) {
+		hpdata_dehugify(to_purge);
 	}
+	hpdata_purge_end(to_purge, &purge_state);
+	hpdata_mid_purge_set(to_purge, false);
+
+	hpdata_alloc_allowed_set(to_purge, true);
+	hpdata_purge_allowed_set(to_purge, hpa_should_purge(shard, to_purge));
+	hpdata_hugify_allowed_set(to_purge, hpa_should_hugify(shard, to_purge));
+
+	psset_update_end(&shard->psset, to_purge);
+
+	return true;
+}
+
+/* Returns whether or not we hugified anything. */
+static bool
+hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
+	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
+	if (to_hugify == NULL) {
+		return false;
+	}
+	assert(hpdata_hugify_allowed_get(to_hugify));
+	assert(!hpdata_changing_state_get(to_hugify));
+
+	/*
+	 * Don't let anyone else purge or hugify this page while
+	 * we're hugifying it (allocations and deallocations are
+	 * OK).
+	 */
+	psset_update_begin(&shard->psset, to_hugify);
+	hpdata_mid_hugify_set(to_hugify, true);
+	hpdata_purge_allowed_set(to_hugify, false);
+	hpdata_hugify_allowed_set(to_hugify, false);
+	assert(hpdata_alloc_allowed_get(to_hugify));
+	psset_update_end(&shard->psset, to_hugify);
+
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+
+	bool err = pages_huge(hpdata_addr_get(to_hugify),
+	    HUGEPAGE);
+	/*
+	 * It's not clear what we could do in case of error; we
+	 * might get into situations where we loop trying to
+	 * hugify some page and failing over and over again.
+	 * Just eat the error and pretend we were successful.
+	 */
+	(void)err;
+
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	shard->stats.nhugifies++;
+
+	psset_update_begin(&shard->psset, to_hugify);
+	hpdata_hugify(to_hugify);
+	hpdata_mid_hugify_set(to_hugify, false);
+	hpdata_purge_allowed_set(to_hugify,
+	    hpa_should_purge(shard, to_hugify));
+	hpdata_hugify_allowed_set(to_hugify, false);
+	psset_update_end(&shard->psset, to_hugify);
+
+	return true;
+}
+
+
+static void
+hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
+	bool hugified;
+	bool purged;
+	size_t nloop = 0;
+	/* Just *some* bound, to impose a worst-case latency bound. */
+	size_t maxloops = 100;;
+	do {
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+		hugified = hpa_try_hugify(tsdn, shard);
+		purged = hpa_try_purge(tsdn, shard);
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	} while ((hugified || purged) && nloop++ < maxloops);
 }
 
 static edata_t *
@@ -344,6 +429,10 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		 * We should arguably reset dirty state here, but this would
 		 * require some sort of prepare + commit functionality that's a
 		 * little much to deal with for now.
+		 *
+		 * We don't have a do_deferred_work down this pathway, on the
+		 * principle that we didn't *really* affect shard state (we
+		 * tweaked the stats, but our tweaks weren't really accurate).
 		 */
 		psset_update_end(&shard->psset, ps);
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
@@ -352,49 +441,14 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		return NULL;
 	}
 
-	bool hugify = hpa_should_hugify(shard, ps);
-	if (hugify) {
-		hpdata_hugify_begin(ps);
-		shard->stats.nhugifies++;
+	if (hpa_should_hugify(shard, ps)) {
+		hpdata_hugify_allowed_set(ps, true);
 	}
 	psset_update_end(&shard->psset, ps);
 
+	hpa_do_deferred_work(tsdn, shard);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
-	if (hugify) {
-		/*
-		 * Hugifying with the lock dropped is safe, even with
-		 * concurrent modifications to the ps.  This relies on
-		 * the fact that the current implementation will never
-		 * dehugify a non-empty pageslab, and ps will never
-		 * become empty before we return edata to the user to be
-		 * freed.
-		 *
-		 * Note that holding the lock would prevent not just operations
-		 * on this page slab, but also operations any other alloc/dalloc
-		 * operations in this hpa shard.
-		 */
-		bool err = pages_huge(hpdata_addr_get(ps), HUGEPAGE);
-		/*
-		 * Pretending we succeed when we actually failed is safe; trying
-		 * to rolllback would be tricky, though.  Eat the error.
-		 */
-		(void)err;
 
-		malloc_mutex_lock(tsdn, &shard->mtx);
-		hpdata_hugify_end(ps);
-		if (hpa_should_purge(shard, ps)) {
-			/*
-			 * There was a race in which the ps went from being
-			 * almost full to having lots of free space while we
-			 * hugified.  Undo our operation, taking care to meet
-			 * the precondition that the ps isn't in the psset.
-			 */
-			psset_update_begin(&shard->psset, ps);
-			hpa_purge(tsdn, shard, ps);
-			psset_update_end(&shard->psset, ps);
-		}
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-	}
 	return edata;
 }
 
@@ -445,6 +499,14 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		return NULL;
 	}
 
+	/*
+	 * TODO: the tail of this function is quite similar to the tail of
+	 * hpa_try_alloc_no_grow (both, broadly, do the metadata management of
+	 * initializing an edata_t from an hpdata_t once both have been
+	 * allocated).  The only differences are in error case handling and lock
+	 * management (we hold grow_mtx, but should drop it before doing any
+	 * deferred work).  With a little refactoring, we could unify the paths.
+	 */
 	psset_update_begin(&shard->psset, ps);
 
 	void *addr = hpdata_reserve_alloc(ps, size);
@@ -481,10 +543,20 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return NULL;
 	}
+	if (hpa_should_hugify(shard, ps)) {
+		hpdata_hugify_allowed_set(ps, true);
+	}
 	psset_update_end(&shard->psset, ps);
 
-	malloc_mutex_unlock(tsdn, &shard->mtx);
+	/*
+	 * Drop grow_mtx before doing deferred work; other threads blocked on it
+	 * should be allowed to proceed while we're working.
+	 */
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+
+	hpa_do_deferred_work(tsdn, shard);
+
+	malloc_mutex_unlock(tsdn, &shard->mtx);
 	return edata;
 }
 
@@ -579,48 +651,15 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	size_t unreserve_size = edata_size_get(edata);
 	edata_cache_small_put(tsdn, &shard->ecs, edata);
 
-	/*
-	 * We have three rules interacting here:
-	 * - You can't update ps metadata while it's still in the psset.  We
-	 *   enforce this because it's necessary for stats tracking and metadata
-	 *   management.
-	 * - The ps must not be in the psset while purging.  This is because we
-	 *   can't handle purge/alloc races.
-	 * - Whoever removes the ps from the psset is the one to reinsert it.
-	 */
-	if (hpdata_mid_purge_get(ps)) {
-		/*
-		 * Another thread started purging, and so the ps is not in the
-		 * psset and we can do our metadata update.  The other thread is
-		 * in charge of reinserting the ps, so we're done.
-		 */
-		assert(hpdata_updating_get(ps));
-		hpdata_unreserve(ps, unreserve_addr, unreserve_size);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		return;
-	}
-	/*
-	 * No other thread is purging, and the ps is non-empty, so it should be
-	 * in the psset.
-	 */
-	assert(!hpdata_updating_get(ps));
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
-	if (!hpa_should_purge(shard, ps)) {
-		/*
-		 * This should be the common case; no other thread is purging,
-		 * and we won't purge either.
-		 */
-		psset_update_end(&shard->psset, ps);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		return;
+	if (hpa_should_purge(shard, ps)) {
+		hpdata_purge_allowed_set(ps, true);
 	}
-
-	/* It's our job to purge. */
-	hpa_purge(tsdn, shard, ps);
-
 	psset_update_end(&shard->psset, ps);
 
+	hpa_do_deferred_work(tsdn, shard);
+
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
diff --git a/src/hpdata.c b/src/hpdata.c
index 0cfeeed..bb4808a 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -22,6 +22,12 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
 	hpdata->h_huge = false;
+	hpdata->h_alloc_allowed = true;
+	hpdata->h_in_psset_alloc_container = false;
+	hpdata->h_purge_allowed = false;
+	hpdata->h_in_psset_purge_container = false;
+	hpdata->h_hugify_allowed = false;
+	hpdata->h_in_psset_hugify_container = false;
 	hpdata->h_mid_purge = false;
 	hpdata->h_mid_hugify = false;
 	hpdata->h_updating = false;
@@ -44,6 +50,7 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	 * mid-update.
 	 */
 	assert(!hpdata->h_in_psset || hpdata->h_updating);
+	assert(hpdata->h_alloc_allowed);
 	assert((sz & PAGE_MASK) == 0);
 	size_t npages = sz >> LG_PAGE;
 	assert(npages <= hpdata_longest_free_range_get(hpdata));
@@ -155,10 +162,6 @@ void
 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
 	/* See the comment in reserve. */
-	assert(!hpdata->h_in_psset || hpdata->h_updating);
-	assert(!hpdata->h_mid_purge);
-	assert(!hpdata->h_mid_hugify);
-	hpdata->h_mid_purge = true;
 
 	purge_state->npurged = 0;
 	purge_state->next_purge_search_begin = 0;
@@ -192,12 +195,6 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	 * hpdata without synchronization, and therefore have no right to expect
 	 * a consistent state.
 	 */
-	assert(hpdata->h_mid_purge);
-	/* See the comment in reserve. */
-	assert(!hpdata->h_in_psset || hpdata->h_updating);
-	/* Should have dehugified already (if necessary). */
-	assert(!hpdata->h_huge);
-	assert(!hpdata->h_mid_hugify);
 
 	if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
 		return false;
@@ -226,9 +223,6 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
 	/* See the comment in reserve. */
 	assert(!hpdata->h_in_psset || hpdata->h_updating);
-	assert(hpdata->h_mid_purge);
-	assert(!hpdata->h_mid_hugify);
-	hpdata->h_mid_purge = false;
 
 	assert(purge_state->npurged == fb_scount(purge_state->to_purge,
 	    HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
@@ -244,13 +238,8 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 }
 
 void
-hpdata_hugify_begin(hpdata_t *hpdata) {
+hpdata_hugify(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
-	/* See the comment in reserve. */
-	assert(!hpdata->h_in_psset || hpdata->h_updating);
-	assert(!hpdata->h_mid_purge);
-	assert(!hpdata->h_mid_hugify);
-	hpdata->h_mid_hugify = true;
 	hpdata->h_huge = true;
 	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
 	hpdata->h_ntouched = HUGEPAGE_PAGES;
@@ -258,26 +247,8 @@ hpdata_hugify_begin(hpdata_t *hpdata) {
 }
 
 void
-hpdata_hugify_end(hpdata_t *hpdata) {
-	hpdata_assert_consistent(hpdata);
-	/*
-	 * This is the exception to the "no-metadata updates without informing
-	 * the psset first" rule; this assert would be incorrect.
-	 */
-	/* assert(!hpdata->h_in_psset || hpdata->h_updating); */
-	assert(!hpdata->h_mid_purge);
-	assert(hpdata->h_mid_hugify);
-	hpdata->h_mid_hugify = false;
-	hpdata_assert_consistent(hpdata);
-}
-
-void
 hpdata_dehugify(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
-	assert(hpdata->h_updating);
-	assert(hpdata->h_updating);
-	assert(hpdata->h_mid_purge);
-	assert(!hpdata->h_mid_hugify);
 	hpdata->h_huge = false;
 	hpdata_assert_consistent(hpdata);
 }
diff --git a/src/psset.c b/src/psset.c
index 8997102..bb51e21 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -15,7 +15,9 @@ psset_init(psset_t *psset) {
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
 	memset(&psset->stats, 0, sizeof(psset->stats));
-	hpdata_empty_list_init(&psset->empty_slabs);
+	hpdata_empty_list_init(&psset->empty);
+	hpdata_purge_list_init(&psset->to_purge);
+	hpdata_hugify_list_init(&psset->to_hugify);
 }
 
 static void
@@ -85,25 +87,56 @@ psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
 }
 
+static void
+psset_stats_insert(psset_t* psset, hpdata_t *ps) {
+	if (hpdata_empty(ps)) {
+		psset_bin_stats_insert(psset->stats.empty_slabs, ps);
+	} else if (hpdata_full(ps)) {
+		psset_bin_stats_insert(psset->stats.full_slabs, ps);
+	} else {
+		size_t longest_free_range = hpdata_longest_free_range_get(ps);
+
+		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+		    longest_free_range << LG_PAGE));
+		assert(pind < PSSET_NPSIZES);
+
+		psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
+	}
+}
+
+static void
+psset_stats_remove(psset_t *psset, hpdata_t *ps) {
+	if (hpdata_empty(ps)) {
+		psset_bin_stats_remove(psset->stats.empty_slabs, ps);
+	} else if (hpdata_full(ps)) {
+		psset_bin_stats_remove(psset->stats.full_slabs, ps);
+	} else {
+		size_t longest_free_range = hpdata_longest_free_range_get(ps);
+
+		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+		    longest_free_range << LG_PAGE));
+		assert(pind < PSSET_NPSIZES);
+
+		psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
+	}
+}
+
 /*
- * Insert ps into the data structures we use to track allocation stats and pick
- * the pageslabs for new allocations.
- *
- * In particular, this does *not* remove ps from any hugification / purging
- * queues it may be in.
+ * Put ps into some container so that it can be found during future allocation
+ * requests.
  */
 static void
-psset_do_alloc_tracking_insert(psset_t *psset, hpdata_t *ps) {
+psset_alloc_container_insert(psset_t *psset, hpdata_t *ps) {
+	assert(!hpdata_in_psset_alloc_container_get(ps));
+	hpdata_in_psset_alloc_container_set(ps, true);
 	if (hpdata_empty(ps)) {
-		psset_bin_stats_insert(psset->stats.empty_slabs, ps);
 		/*
 		 * This prepend, paired with popping the head in psset_fit,
 		 * means we implement LIFO ordering for the empty slabs set,
 		 * which seems reasonable.
 		 */
-		hpdata_empty_list_prepend(&psset->empty_slabs, ps);
+		hpdata_empty_list_prepend(&psset->empty, ps);
 	} else if (hpdata_full(ps)) {
-		psset_bin_stats_insert(psset->stats.full_slabs, ps);
 		/*
 		 * We don't need to keep track of the full slabs; we're never
 		 * going to return them from a psset_pick_alloc call.
@@ -115,23 +148,20 @@ psset_do_alloc_tracking_insert(psset_t *psset, hpdata_t *ps) {
 		    longest_free_range << LG_PAGE));
 		assert(pind < PSSET_NPSIZES);
 
-		psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
 		psset_hpdata_heap_insert(psset, pind, ps);
 	}
 }
 
 /* Remove ps from those collections. */
 static void
-psset_do_alloc_tracking_remove(psset_t *psset, hpdata_t *ps) {
+psset_alloc_container_remove(psset_t *psset, hpdata_t *ps) {
+	assert(hpdata_in_psset_alloc_container_get(ps));
+	hpdata_in_psset_alloc_container_set(ps, false);
+
 	if (hpdata_empty(ps)) {
-		psset_bin_stats_remove(psset->stats.empty_slabs, ps);
-		hpdata_empty_list_remove(&psset->empty_slabs, ps);
+		hpdata_empty_list_remove(&psset->empty, ps);
 	} else if (hpdata_full(ps)) {
-		/*
-		 * We don't need to maintain an explicit container of full
-		 * pageslabs anywhere, but we do have to update stats.
-		 */
-		psset_bin_stats_remove(psset->stats.full_slabs, ps);
+		/* Same as above -- do nothing in this case. */
 	} else {
 		size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
@@ -139,7 +169,6 @@ psset_do_alloc_tracking_remove(psset_t *psset, hpdata_t *ps) {
 		    longest_free_range << LG_PAGE));
 		assert(pind < PSSET_NPSIZES);
 
-		psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
 		psset_hpdata_heap_remove(psset, pind, ps);
 	}
 }
@@ -149,7 +178,21 @@ psset_update_begin(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
 	assert(hpdata_in_psset_get(ps));
 	hpdata_updating_set(ps, true);
-	psset_do_alloc_tracking_remove(psset, ps);
+	psset_stats_remove(psset, ps);
+	if (hpdata_in_psset_alloc_container_get(ps)) {
+		/*
+		 * Some metadata updates can break alloc container invariants
+		 * (e.g. the longest free range determines the hpdata_heap_t the
+		 * pageslab lives in).
+		 */
+		assert(hpdata_alloc_allowed_get(ps));
+		psset_alloc_container_remove(psset, ps);
+	}
+	/*
+	 * We don't update presence in the purge list or hugify list; we try to
+	 * keep those FIFO, even in the presence of other metadata updates.
+	 * We'll update presence at the end of the metadata update if necessary.
+	 */
 }
 
 void
@@ -157,7 +200,36 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
 	assert(hpdata_in_psset_get(ps));
 	hpdata_updating_set(ps, false);
-	psset_do_alloc_tracking_insert(psset, ps);
+	psset_stats_insert(psset, ps);
+
+	/*
+	 * The update begin should have removed ps from whatever alloc container
+	 * it was in.
+	 */
+	assert(!hpdata_in_psset_alloc_container_get(ps));
+	if (hpdata_alloc_allowed_get(ps)) {
+		psset_alloc_container_insert(psset, ps);
+	}
+
+	if (hpdata_purge_allowed_get(ps)
+	    && !hpdata_in_psset_purge_container_get(ps)) {
+		hpdata_in_psset_purge_container_set(ps, true);
+		hpdata_purge_list_append(&psset->to_purge, ps);
+	} else if (!hpdata_purge_allowed_get(ps)
+	    && hpdata_in_psset_purge_container_get(ps)) {
+		hpdata_in_psset_purge_container_set(ps, false);
+		hpdata_purge_list_remove(&psset->to_purge, ps);
+	}
+
+	if (hpdata_hugify_allowed_get(ps)
+	    && !hpdata_in_psset_hugify_container_get(ps)) {
+		hpdata_in_psset_hugify_container_set(ps, true);
+		hpdata_hugify_list_append(&psset->to_hugify, ps);
+	} else if (!hpdata_hugify_allowed_get(ps)
+	    && hpdata_in_psset_hugify_container_get(ps)) {
+		hpdata_in_psset_hugify_container_set(ps, false);
+		hpdata_hugify_list_remove(&psset->to_hugify, ps);
+	}
 }
 
 hpdata_t *
@@ -169,7 +241,7 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
 	    (size_t)min_pind);
 	if (pind == PSSET_NPSIZES) {
-		return hpdata_empty_list_first(&psset->empty_slabs);
+		return hpdata_empty_list_first(&psset->empty);
 	}
 	hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
 	if (ps == NULL) {
@@ -181,16 +253,48 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 	return ps;
 }
 
+hpdata_t *
+psset_pick_purge(psset_t *psset) {
+	return hpdata_purge_list_first(&psset->to_purge);
+}
+
+hpdata_t *
+psset_pick_hugify(psset_t *psset) {
+	return hpdata_hugify_list_first(&psset->to_hugify);
+}
+
 void
 psset_insert(psset_t *psset, hpdata_t *ps) {
-	/* We only support inserting empty pageslabs, for now. */
-	assert(hpdata_empty(ps));
 	hpdata_in_psset_set(ps, true);
-	psset_do_alloc_tracking_insert(psset, ps);
+
+	psset_stats_insert(psset, ps);
+	if (hpdata_alloc_allowed_get(ps)) {
+		psset_alloc_container_insert(psset, ps);
+	}
+	if (hpdata_purge_allowed_get(ps)) {
+		hpdata_in_psset_purge_container_set(ps, true);
+		hpdata_purge_list_append(&psset->to_purge, ps);
+	}
+	if (hpdata_hugify_allowed_get(ps)) {
+		hpdata_in_psset_hugify_container_set(ps, true);
+		hpdata_hugify_list_append(&psset->to_hugify, ps);
+	}
 }
 
 void
 psset_remove(psset_t *psset, hpdata_t *ps) {
 	hpdata_in_psset_set(ps, false);
-	psset_do_alloc_tracking_remove(psset, ps);
+
+	psset_stats_remove(psset, ps);
+	if (hpdata_in_psset_alloc_container_get(ps)) {
+		psset_alloc_container_remove(psset, ps);
+	}
+	if (hpdata_in_psset_purge_container_get(ps)) {
+		hpdata_in_psset_purge_container_set(ps, false);
+		hpdata_purge_list_remove(&psset->to_purge, ps);
+	}
+	if (hpdata_in_psset_purge_container_get(ps)) {
+		hpdata_in_psset_purge_container_set(ps, false);
+		hpdata_purge_list_remove(&psset->to_purge, ps);
+	}
 }
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 688911a..2a70233 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -67,13 +67,9 @@ TEST_BEGIN(test_purge_simple) {
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
 
-	expect_false(hpdata_changing_state_get(&hpdata), "");
-
 	hpdata_purge_state_t purge_state;
 	hpdata_purge_begin(&hpdata, &purge_state);
 
-	expect_true(hpdata_changing_state_get(&hpdata), "");
-
 	void *purge_addr;
 	size_t purge_size;
 	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
@@ -82,17 +78,12 @@ TEST_BEGIN(test_purge_simple) {
 	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
 	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	expect_true(hpdata_changing_state_get(&hpdata), "");
-
 	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
 	    &purge_size);
 	expect_false(got_result, "Unexpected additional purge range: "
 	    "extent at %p of size %zu", purge_addr, purge_size);
 
-	expect_true(hpdata_changing_state_get(&hpdata), "");
-
 	hpdata_purge_end(&hpdata, &purge_state);
-	expect_false(hpdata_changing_state_get(&hpdata), "");
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 4, "");
 }
 TEST_END
@@ -166,12 +157,7 @@ TEST_BEGIN(test_hugify) {
 
 	expect_zu_eq(HUGEPAGE_PAGES / 2, hpdata_ntouched_get(&hpdata), "");
 
-	expect_false(hpdata_changing_state_get(&hpdata), "");
-	hpdata_hugify_begin(&hpdata);
-	expect_true(hpdata_changing_state_get(&hpdata), "");
-
-	hpdata_hugify_end(&hpdata);
-	expect_false(hpdata_changing_state_get(&hpdata), "");
+	hpdata_hugify(&hpdata);
 
 	/* Hugeifying should have increased the dirty page count. */
 	expect_zu_eq(HUGEPAGE_PAGES, hpdata_ntouched_get(&hpdata), "");
-- 
cgit v0.12


From 9fd9c876bb99acc957f8ec411837138a9b588a1e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 6 Dec 2020 12:49:03 -0800
Subject: psset: keep aggregate stats.

This will let us quickly query these stats to make purging decisions quickly.
---
 include/jemalloc/internal/psset.h | 24 +++++++++++++++---
 src/psset.c                       | 52 ++++++++++++++++++++++++++++++---------
 test/unit/psset.c                 |  1 +
 3 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 6e08e8b..d2a8b24 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -8,9 +8,6 @@
  * a collection of page-slabs (the intent being that they are backed by
  * hugepages, or at least could be), and handles allocation and deallocation
  * requests.
- *
- * It has the same synchronization guarantees as the eset; stats queries don't
- * need any external synchronization, everything else does.
  */
 
 /*
@@ -60,6 +57,12 @@ struct psset_s {
 	 */
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
+	/*
+	 * The sum of all bin stats in stats.  This lets us quickly answer
+	 * queries for the number of dirty, active, and retained pages in the
+	 * entire set.
+	 */
+	psset_bin_stats_t merged_stats;
 	psset_stats_t stats;
 	/*
 	 * Slabs with no active allocations, but which are allowed to serve new
@@ -92,4 +95,19 @@ hpdata_t *psset_pick_hugify(psset_t *psset);
 void psset_insert(psset_t *psset, hpdata_t *ps);
 void psset_remove(psset_t *psset, hpdata_t *ps);
 
+static inline size_t
+psset_npageslabs(psset_t *psset) {
+	return psset->merged_stats.npageslabs;
+}
+
+static inline size_t
+psset_nactive(psset_t *psset) {
+	return psset->merged_stats.nactive;
+}
+
+static inline size_t
+psset_ndirty(psset_t *psset) {
+	return psset->merged_stats.ndirty;
+}
+
 #endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/src/psset.c b/src/psset.c
index bb51e21..66d3739 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -14,6 +14,7 @@ psset_init(psset_t *psset) {
 		hpdata_age_heap_new(&psset->pageslabs[i]);
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
+	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
 	memset(&psset->stats, 0, sizeof(psset->stats));
 	hpdata_empty_list_init(&psset->empty);
 	hpdata_purge_list_init(&psset->to_purge);
@@ -52,23 +53,48 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
  * ensure we don't miss any heap modification operations.
  */
 JEMALLOC_ALWAYS_INLINE void
-psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, hpdata_t *ps,
-    bool insert) {
+psset_bin_stats_insert_remove(psset_t *psset, psset_bin_stats_t *binstats,
+    hpdata_t *ps, bool insert) {
 	size_t mul = insert ? (size_t)1 : (size_t)-1;
 	size_t huge_idx = (size_t)hpdata_huge_get(ps);
+
 	binstats[huge_idx].npageslabs += mul * 1;
 	binstats[huge_idx].nactive += mul * hpdata_nactive_get(ps);
 	binstats[huge_idx].ndirty += mul * hpdata_ndirty_get(ps);
+
+	psset->merged_stats.npageslabs += mul * 1;
+	psset->merged_stats.nactive += mul * hpdata_nactive_get(ps);
+	psset->merged_stats.ndirty += mul * hpdata_ndirty_get(ps);
+
+	if (config_debug) {
+		psset_bin_stats_t check_stats = {0};
+		for (size_t huge = 0; huge <= 1; huge++) {
+			psset_bin_stats_accum(&check_stats,
+			    &psset->stats.full_slabs[huge]);
+			psset_bin_stats_accum(&check_stats,
+			    &psset->stats.empty_slabs[huge]);
+			for (pszind_t pind = 0; pind < PSSET_NPSIZES; pind++) {
+				psset_bin_stats_accum(&check_stats,
+				    &psset->stats.nonfull_slabs[pind][huge]);
+			}
+		}
+		assert(psset->merged_stats.npageslabs
+		    == check_stats.npageslabs);
+		assert(psset->merged_stats.nactive == check_stats.nactive);
+		assert(psset->merged_stats.ndirty == check_stats.ndirty);
+	}
 }
 
 static void
-psset_bin_stats_insert(psset_bin_stats_t *binstats, hpdata_t *ps) {
-	psset_bin_stats_insert_remove(binstats, ps, true);
+psset_bin_stats_insert(psset_t *psset, psset_bin_stats_t *binstats,
+    hpdata_t *ps) {
+	psset_bin_stats_insert_remove(psset, binstats, ps, true);
 }
 
 static void
-psset_bin_stats_remove(psset_bin_stats_t *binstats, hpdata_t *ps) {
-	psset_bin_stats_insert_remove(binstats, ps, false);
+psset_bin_stats_remove(psset_t *psset, psset_bin_stats_t *binstats,
+    hpdata_t *ps) {
+	psset_bin_stats_insert_remove(psset, binstats, ps, false);
 }
 
 static void
@@ -90,9 +116,9 @@ psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 static void
 psset_stats_insert(psset_t* psset, hpdata_t *ps) {
 	if (hpdata_empty(ps)) {
-		psset_bin_stats_insert(psset->stats.empty_slabs, ps);
+		psset_bin_stats_insert(psset, psset->stats.empty_slabs, ps);
 	} else if (hpdata_full(ps)) {
-		psset_bin_stats_insert(psset->stats.full_slabs, ps);
+		psset_bin_stats_insert(psset, psset->stats.full_slabs, ps);
 	} else {
 		size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
@@ -100,16 +126,17 @@ psset_stats_insert(psset_t* psset, hpdata_t *ps) {
 		    longest_free_range << LG_PAGE));
 		assert(pind < PSSET_NPSIZES);
 
-		psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
+		psset_bin_stats_insert(psset, psset->stats.nonfull_slabs[pind],
+		    ps);
 	}
 }
 
 static void
 psset_stats_remove(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_empty(ps)) {
-		psset_bin_stats_remove(psset->stats.empty_slabs, ps);
+		psset_bin_stats_remove(psset, psset->stats.empty_slabs, ps);
 	} else if (hpdata_full(ps)) {
-		psset_bin_stats_remove(psset->stats.full_slabs, ps);
+		psset_bin_stats_remove(psset, psset->stats.full_slabs, ps);
 	} else {
 		size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
@@ -117,7 +144,8 @@ psset_stats_remove(psset_t *psset, hpdata_t *ps) {
 		    longest_free_range << LG_PAGE));
 		assert(pind < PSSET_NPSIZES);
 
-		psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
+		psset_bin_stats_remove(psset, psset->stats.nonfull_slabs[pind],
+		    ps);
 	}
 }
 
diff --git a/test/unit/psset.c b/test/unit/psset.c
index f5e1bad..b93dfbf 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -374,6 +374,7 @@ stats_expect(psset_t *psset, size_t nactive) {
 			stats_expect_empty(&psset->stats.nonfull_slabs[i][0]);
 		}
 	}
+	expect_zu_eq(nactive, psset_nactive(psset), "");
 }
 
 TEST_BEGIN(test_stats) {
-- 
cgit v0.12


From dc886e5608d553ff2b8f2538cb8d6595bc90e9ac Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 6 Dec 2020 13:16:51 -0800
Subject: hpdata: Return the number of pages to be purged.

We'll use this in the next commit.
---
 include/jemalloc/internal/hpdata.h | 5 ++++-
 src/hpdata.c                       | 7 +++++--
 test/unit/hpdata.c                 | 6 ++++--
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index feca5f5..30dd672 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -363,8 +363,11 @@ struct hpdata_purge_state_s {
  * Once you begin purging, you have to follow through and call hpdata_purge_next
  * until you're done, and then end.  Allocating out of an hpdata undergoing
  * purging is not allowed.
+ *
+ * Returns the number of pages that will be purged.
  */
-void hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
+size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
+
 /*
  * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
  * true, and returns true.  Otherwise, returns false to indicate that we're
diff --git a/src/hpdata.c b/src/hpdata.c
index bb4808a..e11ba8d 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -158,7 +158,7 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	hpdata_assert_consistent(hpdata);
 }
 
-void
+size_t
 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
 	/* See the comment in reserve. */
@@ -181,10 +181,13 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	    hpdata->touched_pages, HUGEPAGE_PAGES);
 
 	/* We purge everything we can. */
-	assert(hpdata->h_ntouched - hpdata->h_nactive == fb_scount(
+	size_t to_purge = hpdata->h_ntouched - hpdata->h_nactive;
+	assert(to_purge == fb_scount(
 	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
 
 	hpdata_assert_consistent(hpdata);
+
+	return to_purge;
 }
 
 bool
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 2a70233..11bccc5 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -68,7 +68,8 @@ TEST_BEGIN(test_purge_simple) {
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
 
 	hpdata_purge_state_t purge_state;
-	hpdata_purge_begin(&hpdata, &purge_state);
+	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
+	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge, "");
 
 	void *purge_addr;
 	size_t purge_size;
@@ -112,7 +113,8 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
 
 	hpdata_purge_state_t purge_state;
-	hpdata_purge_begin(&hpdata, &purge_state);
+	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
+	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge, "");
 
 	void *purge_addr;
 	size_t purge_size;
-- 
cgit v0.12


From 56e85c0e47f0a4a19cc0f6c71771ece69ef10080 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 6 Dec 2020 13:48:46 -0800
Subject: HPA: Use a whole-shard purging heuristic.

Previously, we used only hpdata-local information to decide whether to purge.
---
 include/jemalloc/internal/hpa.h    |  6 +++
 include/jemalloc/internal/hpdata.h | 14 ++++++
 src/hpa.c                          | 98 +++++++++++++++++++++++++-------------
 src/psset.c                        |  2 +-
 4 files changed, 86 insertions(+), 34 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index f62c327..de9cc75 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -94,6 +94,12 @@ struct hpa_shard_s {
 	emap_t *emap;
 
 	/*
+	 * How many pages have we started but not yet finished purging in this
+	 * hpa shard.
+	 */
+	size_t npending_purge;
+
+	/*
 	 * Those stats which are copied directly into the CTL-centric hpa shard
 	 * stats.
 	 */
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 30dd672..e489e62 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -171,6 +171,7 @@ hpdata_purge_allowed_get(const hpdata_t *hpdata) {
 
 static inline void
 hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
+	assert(purge_allowed == false || !hpdata->h_mid_purge);
 	hpdata->h_purge_allowed = purge_allowed;
 }
 
@@ -192,6 +193,7 @@ hpdata_hugify_allowed_get(const hpdata_t *hpdata) {
 
 static inline void
 hpdata_hugify_allowed_set(hpdata_t *hpdata, bool hugify_allowed) {
+	assert(hugify_allowed == false || !hpdata->h_mid_hugify);
 	hpdata->h_hugify_allowed = hugify_allowed;
 }
 
@@ -313,6 +315,18 @@ hpdata_consistent(hpdata_t *hpdata) {
 	if (hpdata->h_huge && hpdata->h_ntouched != HUGEPAGE_PAGES) {
 		return false;
 	}
+	if (hpdata_changing_state_get(hpdata)
+	    && (hpdata->h_purge_allowed || hpdata->h_hugify_allowed)) {
+		return false;
+	}
+	if (hpdata_purge_allowed_get(hpdata)
+	    != hpdata_in_psset_purge_container_get(hpdata)) {
+		return false;
+	}
+	if (hpdata_hugify_allowed_get(hpdata)
+	    != hpdata_in_psset_hugify_container_get(hpdata)) {
+		return false;
+	}
 	return true;
 }
 
diff --git a/src/hpa.c b/src/hpa.c
index 5dd34c3..cd0e803 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -74,6 +74,8 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->ind = ind;
 	shard->emap = emap;
 
+	shard->npending_purge = 0;
+
 	shard->stats.npurge_passes = 0;
 	shard->stats.npurges = 0;
 	shard->stats.nhugifies = 0;
@@ -141,26 +143,58 @@ hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 }
 
 static bool
-hpa_should_hugify(hpa_shard_t *shard, hpdata_t *ps) {
-	if (hpdata_changing_state_get(ps) || hpdata_huge_get(ps)) {
-		return false;
-	}
-	return hpa_good_hugification_candidate(shard, ps);
+hpa_should_purge(hpa_shard_t *shard) {
+	size_t adjusted_ndirty = psset_ndirty(&shard->psset)
+	    - shard->npending_purge;
+	/*
+	 * Another simple static check; purge whenever dirty exceeds 25% of
+	 * active.
+	 */
+	return adjusted_ndirty > psset_nactive(&shard->psset) / 4;
 }
 
-/*
- * Whether or not the given pageslab meets the criteria for being purged (and,
- * if necessary, dehugified).
- */
-static bool
-hpa_should_purge(hpa_shard_t *shard, hpdata_t *ps) {
-	/* Ditto. */
+static void
+hpa_update_purge_hugify_eligibility(hpa_shard_t *shard, hpdata_t *ps) {
 	if (hpdata_changing_state_get(ps)) {
-		return false;
+		hpdata_purge_allowed_set(ps, false);
+		hpdata_hugify_allowed_set(ps, false);
+		return;
+	}
+	/*
+	 * Hugepages are distinctly costly to purge, so do it only if they're
+	 * *particularly* full of dirty pages.  Eventually, we should use a
+	 * smarter / more dynamic heuristic for situations where we have to
+	 * manually hugify.
+	 *
+	 * In situations where we don't manually hugify, this problem is
+	 * reduced.  The "bad" situation we're trying to avoid is one's that's
+	 * common in some Linux configurations (where both enabled and defrag
+	 * are set to madvise) that can lead to long latency spikes on the first
+	 * access after a hugification.  The ideal policy in such configurations
+	 * is probably time-based for both purging and hugifying; only hugify a
+	 * hugepage if it's met the criteria for some extended period of time,
+	 * and only dehugify it if it's failed to meet the criteria for an
+	 * extended period of time.  When background threads are on, we should
+	 * try to take this hit on one of them, as well.
+	 *
+	 * I think the ideal setting is THP always enabled, and defrag set to
+	 * deferred; in that case we don't need any explicit calls on the
+	 * allocator's end at all; we just try to pack allocations in a
+	 * hugepage-friendly manner and let the OS hugify in the background.
+	 *
+	 * Anyways, our strategy to delay dehugification is to only consider
+	 * purging a hugified hugepage if it's individually dirtier than the
+	 * overall max dirty pages setting.  That setting is 1 dirty page per 4
+	 * active pages; i.e. 4/5s of hugepage pages must be active.
+	 */
+	if ((!hpdata_huge_get(ps) && hpdata_ndirty_get(ps) > 0)
+	    || hpdata_ndirty_get(ps) > HUGEPAGE_PAGES / 5) {
+		hpdata_purge_allowed_set(ps, true);
+	}
+	if (hpa_good_hugification_candidate(shard, ps)
+	    && !hpdata_huge_get(ps)) {
+		hpdata_hugify_allowed_set(ps, true);
 	}
-	size_t purgeable = hpdata_ndirty_get(ps);
-	return purgeable > HUGEPAGE_PAGES * 25 / 100
-	    || (purgeable > 0 && hpdata_empty(ps));
 }
 
 static hpdata_t *
@@ -262,7 +296,9 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	/* Gather all the metadata we'll need during the purge. */
 	bool dehugify = hpdata_huge_get(to_purge);
 	hpdata_purge_state_t purge_state;
-	hpdata_purge_begin(to_purge, &purge_state);
+	size_t num_to_purge = hpdata_purge_begin(to_purge, &purge_state);
+
+	shard->npending_purge += num_to_purge;
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
@@ -284,6 +320,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	/* The shard updates */
+	shard->npending_purge -= num_to_purge;
 	shard->stats.npurge_passes++;
 	shard->stats.npurges += purges_this_pass;
 	if (dehugify) {
@@ -299,8 +336,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpdata_mid_purge_set(to_purge, false);
 
 	hpdata_alloc_allowed_set(to_purge, true);
-	hpdata_purge_allowed_set(to_purge, hpa_should_purge(shard, to_purge));
-	hpdata_hugify_allowed_set(to_purge, hpa_should_hugify(shard, to_purge));
+	hpa_update_purge_hugify_eligibility(shard, to_purge);
 
 	psset_update_end(&shard->psset, to_purge);
 
@@ -349,15 +385,12 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_hugify(to_hugify);
 	hpdata_mid_hugify_set(to_hugify, false);
-	hpdata_purge_allowed_set(to_hugify,
-	    hpa_should_purge(shard, to_hugify));
-	hpdata_hugify_allowed_set(to_hugify, false);
+	hpa_update_purge_hugify_eligibility(shard, to_hugify);
 	psset_update_end(&shard->psset, to_hugify);
 
 	return true;
 }
 
-
 static void
 hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	bool hugified;
@@ -368,7 +401,11 @@ hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	do {
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 		hugified = hpa_try_hugify(tsdn, shard);
-		purged = hpa_try_purge(tsdn, shard);
+
+		purged = false;
+		if (hpa_should_purge(shard)) {
+			purged = hpa_try_purge(tsdn, shard);
+		}
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	} while ((hugified || purged) && nloop++ < maxloops);
 }
@@ -441,9 +478,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		return NULL;
 	}
 
-	if (hpa_should_hugify(shard, ps)) {
-		hpdata_hugify_allowed_set(ps, true);
-	}
+	hpa_update_purge_hugify_eligibility(shard, ps);
 	psset_update_end(&shard->psset, ps);
 
 	hpa_do_deferred_work(tsdn, shard);
@@ -543,9 +578,7 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return NULL;
 	}
-	if (hpa_should_hugify(shard, ps)) {
-		hpdata_hugify_allowed_set(ps, true);
-	}
+	hpa_update_purge_hugify_eligibility(shard, ps);
 	psset_update_end(&shard->psset, ps);
 
 	/*
@@ -653,9 +686,8 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
-	if (hpa_should_purge(shard, ps)) {
-		hpdata_purge_allowed_set(ps, true);
-	}
+
+	hpa_update_purge_hugify_eligibility(shard, ps);
 	psset_update_end(&shard->psset, ps);
 
 	hpa_do_deferred_work(tsdn, shard);
diff --git a/src/psset.c b/src/psset.c
index 66d3739..08c9b6c 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -225,7 +225,6 @@ psset_update_begin(psset_t *psset, hpdata_t *ps) {
 
 void
 psset_update_end(psset_t *psset, hpdata_t *ps) {
-	hpdata_assert_consistent(ps);
 	assert(hpdata_in_psset_get(ps));
 	hpdata_updating_set(ps, false);
 	psset_stats_insert(psset, ps);
@@ -258,6 +257,7 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 		hpdata_in_psset_hugify_container_set(ps, false);
 		hpdata_hugify_list_remove(&psset->to_hugify, ps);
 	}
+	hpdata_assert_consistent(ps);
 }
 
 hpdata_t *
-- 
cgit v0.12


From caef4c2868fce6b0cc0087c20ba00a5d50b67c3a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 8 Dec 2020 13:22:59 -0800
Subject: FXP: add fxp_mul_frac.

This can multiply size_ts by a fraction without the risk of overflow.
---
 include/jemalloc/internal/fxp.h | 25 +++++++++++++++++++++++++
 test/unit/fxp.c                 | 25 +++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/include/jemalloc/internal/fxp.h b/include/jemalloc/internal/fxp.h
index d943809..b9803a6 100644
--- a/include/jemalloc/internal/fxp.h
+++ b/include/jemalloc/internal/fxp.h
@@ -91,6 +91,31 @@ fxp_round_nearest(fxp_t a) {
 }
 
 /*
+ * Approximately computes x * frac, without the size limitations that would be
+ * imposed by converting u to an fxp_t.
+ */
+static inline size_t
+fxp_mul_frac(size_t x_orig, fxp_t frac) {
+	assert(frac <= (1U << 16));
+	/*
+	 * Work around an over-enthusiastic warning about type limits below (on
+	 * 32-bit platforms, a size_t is always less than 1ULL << 48).
+	 */
+	uint64_t x = (uint64_t)x_orig;
+	/*
+	 * If we can guarantee no overflow, multiply first before shifting, to
+	 * preserve some precision.  Otherwise, shift first and then multiply.
+	 * In the latter case, we only lose the low 16 bits of a 48-bit number,
+	 * so we're still accurate to within 1/2**32.
+	 */
+	if (x < (1ULL << 48)) {
+		return (size_t)((x * frac) >> 16);
+	} else {
+		return (size_t)((x >> 16) * (uint64_t)frac);
+	}
+}
+
+/*
  * Returns true on error.  Otherwise, returns false and updates *ptr to point to
  * the first character not parsed (because it wasn't a digit).
  */
diff --git a/test/unit/fxp.c b/test/unit/fxp.c
index 89f0ca6..0fe5d67 100644
--- a/test/unit/fxp.c
+++ b/test/unit/fxp.c
@@ -223,6 +223,30 @@ TEST_BEGIN(test_round_simple) {
 TEST_END
 
 static void
+expect_mul_frac(size_t a, const char *fracstr, size_t expected) {
+	fxp_t frac = xparse_fxp(fracstr);
+	size_t result = fxp_mul_frac(a, frac);
+	expect_true(double_close(expected, result),
+	    "Expected %zu * %s == %zu (fracmul); got %zu", a, fracstr,
+	    expected, result);
+}
+
+TEST_BEGIN(test_mul_frac_simple) {
+	expect_mul_frac(SIZE_MAX, "1.0", SIZE_MAX);
+	expect_mul_frac(SIZE_MAX, ".75", SIZE_MAX / 4 * 3);
+	expect_mul_frac(SIZE_MAX, ".5", SIZE_MAX / 2);
+	expect_mul_frac(SIZE_MAX, ".25", SIZE_MAX / 4);
+	expect_mul_frac(1U << 16, "1.0", 1U << 16);
+	expect_mul_frac(1U << 30, "0.5", 1U << 29);
+	expect_mul_frac(1U << 30, "0.25", 1U << 28);
+	expect_mul_frac(1U << 30, "0.125", 1U << 27);
+	expect_mul_frac((1U << 30) + 1, "0.125", 1U << 27);
+	expect_mul_frac(100, "0.25", 25);
+	expect_mul_frac(1000 * 1000, "0.001", 1000);
+}
+TEST_END
+
+static void
 expect_print(const char *str) {
 	fxp_t fxp = xparse_fxp(str);
 	char buf[FXP_BUF_SIZE];
@@ -339,6 +363,7 @@ main(void) {
 	    test_mul_simple,
 	    test_div_simple,
 	    test_round_simple,
+	    test_mul_frac_simple,
 	    test_print_simple,
 	    test_stress);
 }
-- 
cgit v0.12


From bdb7307ff28cdee92861a32ecae16919cc9af614 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 8 Dec 2020 15:28:28 -0800
Subject: fxp: Add FXP_INIT_PERCENT

This lets us specify fxp values easily in source.
---
 include/jemalloc/internal/fxp.h |  1 +
 test/unit/fxp.c                 | 27 ++++++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/fxp.h b/include/jemalloc/internal/fxp.h
index b9803a6..415a982 100644
--- a/include/jemalloc/internal/fxp.h
+++ b/include/jemalloc/internal/fxp.h
@@ -22,6 +22,7 @@
  */
 typedef uint32_t fxp_t;
 #define FXP_INIT_INT(x) ((x) << 16)
+#define FXP_INIT_PERCENT(pct) (((pct) << 16) / 100)
 
 /*
  * Amount of precision used in parsing and printing numbers.  The integer bound
diff --git a/test/unit/fxp.c b/test/unit/fxp.c
index 0fe5d67..27f1097 100644
--- a/test/unit/fxp.c
+++ b/test/unit/fxp.c
@@ -96,7 +96,8 @@ TEST_BEGIN(test_parse_valid) {
 }
 TEST_END
 
-static void expect_parse_failure(const char *str) {
+static void
+expect_parse_failure(const char *str) {
 	fxp_t result = FXP_INIT_INT(333);
 	char *end = (void *)0x123;
 	bool err = fxp_parse(&result, str, &end);
@@ -121,6 +122,29 @@ TEST_BEGIN(test_parse_invalid) {
 TEST_END
 
 static void
+expect_init_percent(unsigned percent, const char *str) {
+	fxp_t result_init = FXP_INIT_PERCENT(percent);
+	fxp_t result_parse = xparse_fxp(str);
+	expect_u32_eq(result_init, result_parse,
+	    "Expect representations of FXP_INIT_PERCENT(%u) and "
+	    "fxp_parse(\"%s\") to be equal; got %x and %x",
+	    percent, str, result_init, result_parse);
+
+}
+
+/*
+ * Every other test uses either parsing or FXP_INIT_INT; it gets tested in those
+ * ways.  We need a one-off for the percent-based initialization, though.
+ */
+TEST_BEGIN(test_init_percent) {
+	expect_init_percent(100, "1");
+	expect_init_percent(75, ".75");
+	expect_init_percent(1, ".01");
+	expect_init_percent(50, ".5");
+}
+TEST_END
+
+static void
 expect_add(const char *astr, const char *bstr, const char* resultstr) {
 	fxp_t a = xparse_fxp(astr);
 	fxp_t b = xparse_fxp(bstr);
@@ -358,6 +382,7 @@ main(void) {
 	return test_no_reentrancy(
 	    test_parse_valid,
 	    test_parse_invalid,
+	    test_init_percent,
 	    test_add_simple,
 	    test_sub_simple,
 	    test_mul_simple,
-- 
cgit v0.12


From b3df80bc797f1578b0f51a6919e18049663ffae1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 8 Dec 2020 16:33:39 -0800
Subject: Pull HPA options into a containing struct.

Currently that just means max_alloc, but we're about to add more.  While we're
touching these lines anyways, tweak things to be more in line with testing.
---
 include/jemalloc/internal/hpa.h                    | 15 +++++--------
 include/jemalloc/internal/hpa_opts.h               | 25 ++++++++++++++++++++++
 .../jemalloc/internal/jemalloc_internal_externs.h  |  4 ++--
 include/jemalloc/internal/pa.h                     |  2 +-
 src/arena.c                                        |  4 ++--
 src/ctl.c                                          |  2 +-
 src/hpa.c                                          |  9 ++++----
 src/jemalloc.c                                     |  8 +++----
 src/pa.c                                           |  6 +++---
 test/unit/hpa.c                                    |  5 ++++-
 10 files changed, 52 insertions(+), 28 deletions(-)
 create mode 100644 include/jemalloc/internal/hpa_opts.h

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index de9cc75..778d1c9 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_HPA_H
 
 #include "jemalloc/internal/exp_grow.h"
-#include "jemalloc/internal/hpa_central.h"
+#include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
@@ -65,14 +65,6 @@ struct hpa_shard_s {
 	psset_t psset;
 
 	/*
-	 * The largest size we'll allocate out of the shard.  For those
-	 * allocations refused, the caller (in practice, the PA module) will
-	 * fall back to the more general (for now) PAC, which can always handle
-	 * any allocation request.
-	 */
-	size_t alloc_max;
-
-	/*
 	 * How many grow operations have occurred.
 	 *
 	 * Guarded by grow_mtx.
@@ -93,6 +85,9 @@ struct hpa_shard_s {
 	unsigned ind;
 	emap_t *emap;
 
+	/* The configuration choices for this hpa shard. */
+	hpa_shard_opts_t opts;
+
 	/*
 	 * How many pages have we started but not yet finished purging in this
 	 * hpa shard.
@@ -113,7 +108,7 @@ struct hpa_shard_s {
  */
 bool hpa_supported();
 bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
-    edata_cache_t *edata_cache, unsigned ind, size_t alloc_max);
+    edata_cache_t *edata_cache, unsigned ind, const hpa_shard_opts_t *opts);
 
 void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
 void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
new file mode 100644
index 0000000..95e86b4
--- /dev/null
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -0,0 +1,25 @@
+#ifndef JEMALLOC_INTERNAL_HPA_OPTS_H
+#define JEMALLOC_INTERNAL_HPA_OPTS_H
+
+/*
+ * This file is morally part of hpa.h, but is split out for header-ordering
+ * reasons.
+ */
+
+typedef struct hpa_shard_opts_s hpa_shard_opts_t;
+struct hpa_shard_opts_s {
+	/*
+	 * The largest size we'll allocate out of the shard.  For those
+	 * allocations refused, the caller (in practice, the PA module) will
+	 * fall back to the more general (for now) PAC, which can always handle
+	 * any allocation request.
+	 */
+	size_t slab_max_alloc;
+};
+
+#define HPA_SHARD_OPTS_DEFAULT {					\
+	/* slab_max_alloc */						\
+	64 * 1024							\
+}
+
+#endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index c78db06..166c91d 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_EXTERNS_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/tsd_types.h"
 #include "jemalloc/internal/nstime.h"
 
@@ -14,8 +15,7 @@ extern bool opt_abort_conf;
 extern bool opt_trust_madvise;
 extern bool opt_confirm_conf;
 extern bool opt_hpa;
-extern size_t opt_hpa_slab_max_alloc;
-
+extern hpa_shard_opts_t opt_hpa_opts;
 extern size_t opt_hpa_sec_max_alloc;
 extern size_t opt_hpa_sec_max_bytes;
 extern size_t opt_hpa_sec_nshards;
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index b903022..6ded54f 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -130,7 +130,7 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * This isn't exposed to users; we allow late enablement of the HPA shard so
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
-bool pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max,
+bool pa_shard_enable_hpa(pa_shard_t *shard, const hpa_shard_opts_t *hpa_opts,
     size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
diff --git a/src/arena.c b/src/arena.c
index 3448160..da0f1f0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1480,8 +1480,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
 		if (pa_shard_enable_hpa(&arena->pa_shard,
-		    opt_hpa_slab_max_alloc, opt_hpa_sec_nshards,
-		    opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
+		    &opt_hpa_opts, opt_hpa_sec_nshards, opt_hpa_sec_max_alloc,
+		    opt_hpa_sec_max_bytes)) {
 			goto label_error;
 		}
 	}
diff --git a/src/ctl.c b/src/ctl.c
index feefa68..195a46e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2090,7 +2090,7 @@ CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
-CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
+CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
diff --git a/src/hpa.c b/src/hpa.c
index cd0e803..dd9be5a 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -48,7 +48,7 @@ hpa_supported() {
 
 bool
 hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
-    edata_cache_t *edata_cache, unsigned ind, size_t alloc_max) {
+    edata_cache_t *edata_cache, unsigned ind, const hpa_shard_opts_t *opts) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@@ -67,13 +67,14 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->base = base;
 	edata_cache_small_init(&shard->ecs, edata_cache);
 	psset_init(&shard->psset);
-	shard->alloc_max = alloc_max;
 	shard->age_counter = 0;
 	shard->eden = NULL;
 	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
 
+	shard->opts = *opts;
+
 	shard->npending_purge = 0;
 
 	shard->stats.npurge_passes = 0;
@@ -489,7 +490,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 
 static edata_t *
 hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
-	assert(size <= shard->alloc_max);
+	assert(size <= shard->opts.slab_max_alloc);
 	bool err;
 	bool oom;
 	edata_t *edata;
@@ -614,7 +615,7 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
 	if (alignment > PAGE || zero) {
 		return NULL;
 	}
-	if (size > shard->alloc_max) {
+	if (size > shard->opts.slab_max_alloc) {
 		return NULL;
 	}
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ca8a7de..d1b09dd 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -144,7 +144,7 @@ malloc_mutex_t arenas_lock;
 
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
-size_t opt_hpa_slab_max_alloc = 256 * 1024;
+hpa_shard_opts_t opt_hpa_opts = HPA_SHARD_OPTS_DEFAULT;
 
 size_t opt_hpa_sec_max_alloc = 32 * 1024;
 /* These settings correspond to a maximum of 1MB cached per arena. */
@@ -1410,8 +1410,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			CONF_HANDLE_BOOL(opt_hpa, "hpa")
-			CONF_HANDLE_SIZE_T(opt_hpa_slab_max_alloc,
-			    "hpa_slab_max_alloc", PAGE, 512 * PAGE,
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
+			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
@@ -1717,7 +1717,7 @@ malloc_init_hard_a0_locked() {
 			opt_hpa = false;
 		}
 	} else if (opt_hpa) {
-		if (pa_shard_enable_hpa(&a0->pa_shard, opt_hpa_slab_max_alloc,
+		if (pa_shard_enable_hpa(&a0->pa_shard, &opt_hpa_opts,
 		    opt_hpa_sec_nshards, opt_hpa_sec_max_alloc,
 		    opt_hpa_sec_max_bytes)) {
 			return true;
diff --git a/src/pa.c b/src/pa.c
index da64b82..abe3f00 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -49,10 +49,10 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 }
 
 bool
-pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max, size_t sec_nshards,
-    size_t sec_alloc_max, size_t sec_bytes_max) {
+pa_shard_enable_hpa(pa_shard_t *shard, const hpa_shard_opts_t *hpa_opts,
+    size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max) {
 	if (hpa_shard_init(&shard->hpa_shard, shard->emap, shard->base,
-	    &shard->edata_cache, shard->ind, alloc_max)) {
+	    &shard->edata_cache, shard->ind, hpa_opts)) {
 		return true;
 	}
 	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards,
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 90ec89e..924795f 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -37,9 +37,12 @@ create_test_data() {
 	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
 	assert_false(err, "");
 
+	hpa_shard_opts_t opts = HPA_SHARD_OPTS_DEFAULT;
+	opts.slab_max_alloc = ALLOC_MAX;
+
 	err = hpa_shard_init(&test_data->shard, &test_data->emap,
 	    test_data->base, &test_data->shard_edata_cache, SHARD_IND,
-	    ALLOC_MAX);
+	    &opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
-- 
cgit v0.12


From 4790db15ed2bc751f1b96404358a42bd50c8a461 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 9 Dec 2020 13:52:29 -0800
Subject: HPA: make the hugification threshold configurable.

---
 include/jemalloc/internal/hpa_opts.h |  9 ++++++++-
 src/ctl.c                            |  5 +++++
 src/hpa.c                            |  8 +++++---
 src/jemalloc.c                       | 23 +++++++++++++++++++++++
 src/stats.c                          |  1 +
 5 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 95e86b4..bce0de2 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -15,11 +15,18 @@ struct hpa_shard_opts_s {
 	 * any allocation request.
 	 */
 	size_t slab_max_alloc;
+	/*
+	 * When the number of active bytes in a hugepage is >=
+	 * hugification_threshold, we force hugify it.
+	 */
+	size_t hugification_threshold;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
 	/* slab_max_alloc */						\
-	64 * 1024							\
+	64 * 1024,							\
+	/* hugification_threshold */					\
+	HUGEPAGE * 95 / 100,						\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/ctl.c b/src/ctl.c
index 195a46e..5096162 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -94,6 +94,7 @@ CTL_PROTO(opt_trust_madvise)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
+CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
 CTL_PROTO(opt_hpa_sec_nshards)
@@ -396,6 +397,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("hpa"),		CTL(opt_hpa)},
 	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
+	{NAME("hpa_hugification_threshold"),
+		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
 	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
@@ -2091,6 +2094,8 @@ CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
+CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
+    opt_hpa_opts.hugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
diff --git a/src/hpa.c b/src/hpa.c
index dd9be5a..00fb279 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -137,10 +137,12 @@ hpa_alloc_ps(tsdn_t *tsdn, hpa_shard_t *shard) {
 static bool
 hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	/*
-	 * For now, just use a static check; hugify a page if it's <= 5%
-	 * inactive.  Eventually, this should be a malloc conf option.
+	 * Note that this needs to be >= rather than just >, because of the
+	 * important special case in which the hugification threshold is exactly
+	 * HUGEPAGE.
 	 */
-	return hpdata_nactive_get(ps) >= (HUGEPAGE_PAGES) * 95 / 100;
+	return hpdata_nactive_get(ps) * PAGE
+	    >= shard->opts.hugification_threshold;
 }
 
 static bool
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d1b09dd..cd40262 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1414,6 +1414,29 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 
+			/*
+			 * Accept either a ratio-based or an exact hugification
+			 * threshold.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.hugification_threshold,
+			    "hpa_hugification_threshold", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			if (CONF_MATCH("hpa_hugification_threshold_ratio")) {
+				fxp_t ratio;
+				char *end;
+				bool err = fxp_parse(&ratio, v,
+				    &end);
+				if (err || (size_t)(end - v) != vlen
+				    || ratio > FXP_INIT_INT(1)) {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				} else {
+					opt_hpa_opts.hugification_threshold =
+					    fxp_mul_frac(HUGEPAGE, ratio);
+				}
+				CONF_CONTINUE;
+			}
+
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
 			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_bytes, "hpa_sec_max_bytes",
diff --git a/src/stats.c b/src/stats.c
index 8e29656..27fe5b7 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1464,6 +1464,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("oversize_threshold")
 	OPT_WRITE_BOOL("hpa")
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
+	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")
-- 
cgit v0.12


From 32dd15379696429dc1807c3c05fe125428a6faac Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 9 Dec 2020 14:42:05 -0800
Subject: HPA: Make dehugification threshold configurable.

---
 include/jemalloc/internal/hpa_opts.h |  7 +++++++
 src/ctl.c                            |  5 +++++
 src/hpa.c                            |  4 +++-
 src/jemalloc.c                       | 21 +++++++++++++++++++++
 src/stats.c                          |  1 +
 5 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index bce0de2..0ed1c41 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -20,6 +20,11 @@ struct hpa_shard_opts_s {
 	 * hugification_threshold, we force hugify it.
 	 */
 	size_t hugification_threshold;
+	/*
+	 * When the number of dirty bytes in a hugepage is >=
+	 * dehugification_threshold, we force dehugify it.
+	 */
+	size_t dehugification_threshold;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -27,6 +32,8 @@ struct hpa_shard_opts_s {
 	64 * 1024,							\
 	/* hugification_threshold */					\
 	HUGEPAGE * 95 / 100,						\
+	/* dehugification_threshold */					\
+	HUGEPAGE * 20 / 100						\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/ctl.c b/src/ctl.c
index 5096162..ba667b5 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -95,6 +95,7 @@ CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
+CTL_PROTO(opt_hpa_dehugification_threshold)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
 CTL_PROTO(opt_hpa_sec_nshards)
@@ -399,6 +400,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
 	{NAME("hpa_hugification_threshold"),
 		CTL(opt_hpa_hugification_threshold)},
+	{NAME("hpa_dehugification_threshold"),
+		CTL(opt_hpa_dehugification_threshold)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
 	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
@@ -2096,6 +2099,8 @@ CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
     opt_hpa_opts.hugification_threshold, size_t)
+CTL_RO_NL_GEN(opt_hpa_dehugification_threshold,
+    opt_hpa_opts.dehugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
diff --git a/src/hpa.c b/src/hpa.c
index 00fb279..0e704b8 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -191,7 +191,9 @@ hpa_update_purge_hugify_eligibility(hpa_shard_t *shard, hpdata_t *ps) {
 	 * active pages; i.e. 4/5s of hugepage pages must be active.
 	 */
 	if ((!hpdata_huge_get(ps) && hpdata_ndirty_get(ps) > 0)
-	    || hpdata_ndirty_get(ps) > HUGEPAGE_PAGES / 5) {
+	    || (hpdata_ndirty_get(ps) != 0
+	    && hpdata_ndirty_get(ps) * PAGE
+	    >= shard->opts.dehugification_threshold)) {
 		hpdata_purge_allowed_set(ps, true);
 	}
 	if (hpa_good_hugification_candidate(shard, ps)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index cd40262..fe8e09e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1437,6 +1437,27 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 
+			/* And the same for the dehugification_threhsold. */
+			CONF_HANDLE_SIZE_T(
+			    opt_hpa_opts.dehugification_threshold,
+			    "hpa_dehugification_threshold", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			if (CONF_MATCH("hpa_dehugification_threshold_ratio")) {
+				fxp_t ratio;
+				char *end;
+				bool err = fxp_parse(&ratio, v,
+				    &end);
+				if (err || (size_t)(end - v) != vlen
+				    || ratio > FXP_INIT_INT(1)) {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				} else {
+					opt_hpa_opts.dehugification_threshold =
+					    fxp_mul_frac(HUGEPAGE, ratio);
+				}
+				CONF_CONTINUE;
+			}
+
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
 			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_bytes, "hpa_sec_max_bytes",
diff --git a/src/stats.c b/src/stats.c
index 27fe5b7..7a0f20b 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1465,6 +1465,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("hpa")
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
+	OPT_WRITE_SIZE_T("hpa_dehugification_threshold")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")
-- 
cgit v0.12


From 79f81a3732c434e9b648561bf8ab6ab6bf74385a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 9 Dec 2020 15:55:17 -0800
Subject: HPA: Make dirty_mult configurable.

---
 include/jemalloc/internal/hpa_opts.h | 11 ++++++++++-
 src/ctl.c                            |  7 +++++++
 src/hpa.c                            |  7 ++++++-
 src/jemalloc.c                       | 18 ++++++++++++++++++
 src/stats.c                          | 21 ++++++++++++++++++++-
 5 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 0ed1c41..5ff0072 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_HPA_OPTS_H
 #define JEMALLOC_INTERNAL_HPA_OPTS_H
 
+#include "jemalloc/internal/fxp.h"
+
 /*
  * This file is morally part of hpa.h, but is split out for header-ordering
  * reasons.
@@ -25,6 +27,11 @@ struct hpa_shard_opts_s {
 	 * dehugification_threshold, we force dehugify it.
 	 */
 	size_t dehugification_threshold;
+	/*
+	 * The HPA purges whenever the number of pages exceeds dirty_mult *
+	 * active_pages.  This may be set to (fxp_t)-1 to disable purging.
+	 */
+	fxp_t dirty_mult;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -33,7 +40,9 @@ struct hpa_shard_opts_s {
 	/* hugification_threshold */					\
 	HUGEPAGE * 95 / 100,						\
 	/* dehugification_threshold */					\
-	HUGEPAGE * 20 / 100						\
+	HUGEPAGE * 20 / 100,						\
+	/* dirty_mult */						\
+	FXP_INIT_PERCENT(25)						\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/ctl.c b/src/ctl.c
index ba667b5..1c5e32b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -96,6 +96,7 @@ CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_dehugification_threshold)
+CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
 CTL_PROTO(opt_hpa_sec_nshards)
@@ -402,6 +403,7 @@ static const ctl_named_node_t opt_node[] = {
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_dehugification_threshold"),
 		CTL(opt_hpa_dehugification_threshold)},
+	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
 	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
@@ -2101,6 +2103,11 @@ CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
     opt_hpa_opts.hugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_dehugification_threshold,
     opt_hpa_opts.dehugification_threshold, size_t)
+/*
+ * This will have to change before we publicly document this option; fxp_t and
+ * its representation are internal implementation details.
+ */
+CTL_RO_NL_GEN(opt_hpa_dirty_mult, opt_hpa_opts.dirty_mult, fxp_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
diff --git a/src/hpa.c b/src/hpa.c
index 0e704b8..3c706cb 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -147,13 +147,18 @@ hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 
 static bool
 hpa_should_purge(hpa_shard_t *shard) {
+	if (shard->opts.dirty_mult == (fxp_t)-1) {
+		return false;
+	}
 	size_t adjusted_ndirty = psset_ndirty(&shard->psset)
 	    - shard->npending_purge;
 	/*
 	 * Another simple static check; purge whenever dirty exceeds 25% of
 	 * active.
 	 */
-	return adjusted_ndirty > psset_nactive(&shard->psset) / 4;
+	size_t max_ndirty = fxp_mul_frac(psset_nactive(&shard->psset),
+	    shard->opts.dirty_mult);
+	return adjusted_ndirty > max_ndirty;
 }
 
 static void
diff --git a/src/jemalloc.c b/src/jemalloc.c
index fe8e09e..c2c75fa 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1458,6 +1458,24 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 
+			if (CONF_MATCH("hpa_dirty_mult")) {
+				if (CONF_MATCH_VALUE("-1")) {
+					opt_hpa_opts.dirty_mult = (fxp_t)-1;
+					CONF_CONTINUE;
+				}
+				fxp_t ratio;
+				char *end;
+				bool err = fxp_parse(&ratio, v,
+				    &end);
+				if (err || (size_t)(end - v) != vlen) {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				} else {
+					opt_hpa_opts.dirty_mult = ratio;
+				}
+				CONF_CONTINUE;
+			}
+
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
 			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_bytes, "hpa_sec_max_bytes",
diff --git a/src/stats.c b/src/stats.c
index 7a0f20b..1a7e6e4 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/emitter.h"
+#include "jemalloc/internal/fxp.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/prof_stats.h"
@@ -1375,13 +1376,14 @@ stats_general_print(emitter_t *emitter) {
 	uint64_t u64v;
 	int64_t i64v;
 	ssize_t ssv, ssv2;
-	size_t sv, bsz, usz, i64sz, ssz, sssz, cpsz;
+	size_t sv, bsz, usz, u32sz, i64sz, ssz, sssz, cpsz;
 
 	bsz = sizeof(bool);
 	usz = sizeof(unsigned);
 	ssz = sizeof(size_t);
 	sssz = sizeof(ssize_t);
 	cpsz = sizeof(const char *);
+	u32sz = sizeof(uint32_t);
 	i64sz = sizeof(int64_t);
 
 	CTL_GET("version", &cpv, const char *);
@@ -1466,6 +1468,23 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_SIZE_T("hpa_dehugification_threshold")
+	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
+	    == 0) {
+		/*
+		 * We cheat a little and "know" the secret meaning of this
+		 * representation.
+		 */
+		if (u32v == (uint32_t)-1) {
+			emitter_kv(emitter, "hpa_dirty_mult",
+			    "opt.hpa_dirty_mult", emitter_type_string, "-1");
+		} else {
+			char buf[FXP_BUF_SIZE];
+			fxp_print(u32v, buf);
+			const char *bufp = buf;
+			emitter_kv(emitter, "hpa_dirty_mult",
+			    "opt.hpa_dirty_mult", emitter_type_string, &bufp);
+		}
+	}
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")
-- 
cgit v0.12


From edbfe6912c1b7e8b561dfee1b058425de6c06285 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 8 Feb 2021 08:49:34 -0800
Subject: Inline malloc fastpath into operator new.

This saves a small but non-negligible amount of CPU in C++ programs.
---
 .../jemalloc/internal/jemalloc_internal_externs.h  |   3 +-
 .../internal/jemalloc_internal_inlines_c.h         | 118 +++++++++++++++++++++
 .../jemalloc/internal/jemalloc_internal_types.h    |   8 ++
 src/jemalloc.c                                     | 116 +-------------------
 src/jemalloc_cpp.cpp                               |  15 ++-
 5 files changed, 141 insertions(+), 119 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 166c91d..8054ad9 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -30,6 +30,7 @@ extern bool opt_xmalloc;
 extern bool opt_zero;
 extern unsigned opt_narenas;
 extern zero_realloc_action_t opt_zero_realloc_action;
+extern malloc_init_t malloc_init_state;
 extern const char *zero_realloc_mode_names[];
 extern atomic_zu_t zero_realloc_count;
 
@@ -64,7 +65,7 @@ size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
 void jemalloc_prefork(void);
 void jemalloc_postfork_parent(void);
 void jemalloc_postfork_child(void);
-bool malloc_initialized(void);
 void je_sdallocx_noflags(void *ptr, size_t size);
+void *malloc_default(size_t size);
 
 #endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 0a5ffba..b0868b7 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -3,7 +3,9 @@
 
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/log.h"
 #include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/witness.h"
 
 /*
@@ -219,4 +221,120 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
 	    newsize);
 }
 
+JEMALLOC_ALWAYS_INLINE void
+fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
+    cache_bin_t *bin, void *ret) {
+	thread_allocated_set(tsd, allocated_after);
+	if (config_stats) {
+		bin->tstats.nrequests++;
+	}
+
+	LOG("core.malloc.exit", "result: %p", ret);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+malloc_initialized(void) {
+	return (malloc_init_state == malloc_init_initialized);
+}
+
+/*
+ * malloc() fastpath.  Included here so that we can inline it into operator new;
+ * function call overhead there is non-negligible as a fraction of total CPU in
+ * allocation-heavy C++ programs.  We take the fallback alloc to allow malloc
+ * (which can return NULL) to differ in its behavior from operator new (which
+ * can't).  It matches the signature of malloc / operator new so that we can
+ * tail-call the fallback allocator, allowing us to avoid setting up the call
+ * frame in the common case.
+ *
+ * Fastpath assumes size <= SC_LOOKUP_MAXCLASS, and that we hit
+ * tcache.  If either of these is false, we tail-call to the slowpath,
+ * malloc_default().  Tail-calling is used to avoid any caller-saved
+ * registers.
+ *
+ * fastpath supports ticker and profiling, both of which will also
+ * tail-call to the slowpath if they fire.
+ */
+JEMALLOC_ALWAYS_INLINE void *
+imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
+	LOG("core.malloc.entry", "size: %zu", size);
+	if (tsd_get_allocates() && unlikely(!malloc_initialized())) {
+		return fallback_alloc(size);
+	}
+
+	tsd_t *tsd = tsd_get(false);
+	if (unlikely((size > SC_LOOKUP_MAXCLASS) || tsd == NULL)) {
+		return fallback_alloc(size);
+	}
+	/*
+	 * The code below till the branch checking the next_event threshold may
+	 * execute before malloc_init(), in which case the threshold is 0 to
+	 * trigger slow path and initialization.
+	 *
+	 * Note that when uninitialized, only the fast-path variants of the sz /
+	 * tsd facilities may be called.
+	 */
+	szind_t ind;
+	/*
+	 * The thread_allocated counter in tsd serves as a general purpose
+	 * accumulator for bytes of allocation to trigger different types of
+	 * events.  usize is always needed to advance thread_allocated, though
+	 * it's not always needed in the core allocation logic.
+	 */
+	size_t usize;
+	sz_size2index_usize_fastpath(size, &ind, &usize);
+	/* Fast path relies on size being a bin. */
+	assert(ind < SC_NBINS);
+	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
+	    (size <= SC_SMALL_MAXCLASS));
+
+	uint64_t allocated, threshold;
+	te_malloc_fastpath_ctx(tsd, &allocated, &threshold);
+	uint64_t allocated_after = allocated + usize;
+	/*
+	 * The ind and usize might be uninitialized (or partially) before
+	 * malloc_init().  The assertions check for: 1) full correctness (usize
+	 * & ind) when initialized; and 2) guaranteed slow-path (threshold == 0)
+	 * when !initialized.
+	 */
+	if (!malloc_initialized()) {
+		assert(threshold == 0);
+	} else {
+		assert(ind == sz_size2index(size));
+		assert(usize > 0 && usize == sz_index2size(ind));
+	}
+	/*
+	 * Check for events and tsd non-nominal (fast_threshold will be set to
+	 * 0) in a single branch.
+	 */
+	if (unlikely(allocated_after >= threshold)) {
+		return fallback_alloc(size);
+	}
+	assert(tsd_fast(tsd));
+
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache == tcache_get(tsd));
+	cache_bin_t *bin = &tcache->bins[ind];
+	bool tcache_success;
+	void *ret;
+
+	/*
+	 * We split up the code this way so that redundant low-water
+	 * computation doesn't happen on the (more common) case in which we
+	 * don't touch the low water mark.  The compiler won't do this
+	 * duplication on its own.
+	 */
+	ret = cache_bin_alloc_easy(bin, &tcache_success);
+	if (tcache_success) {
+		fastpath_success_finish(tsd, allocated_after, bin, ret);
+		return ret;
+	}
+	ret = cache_bin_alloc(bin, &tcache_success);
+	if (tcache_success) {
+		fastpath_success_finish(tsd, allocated_after, bin, ret);
+		return ret;
+	}
+
+	return fallback_alloc(size);
+}
+
 #endif /* JEMALLOC_INTERNAL_INLINES_C_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 1ce0f3a..61c1f31 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -20,6 +20,14 @@ typedef enum zero_realloc_action_e zero_realloc_action_t;
 /* Signature of write callback. */
 typedef void (write_cb_t)(void *, const char *);
 
+enum malloc_init_e {
+	malloc_init_uninitialized	= 3,
+	malloc_init_a0_initialized	= 2,
+	malloc_init_recursible		= 1,
+	malloc_init_initialized		= 0 /* Common case --> jnz. */
+};
+typedef enum malloc_init_e malloc_init_t;
+
 /*
  * Flags bits:
  *
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c2c75fa..dc3c98b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -169,13 +169,7 @@ static arena_t		*a0; /* arenas[0]. */
 unsigned		narenas_auto;
 unsigned		manual_arena_base;
 
-typedef enum {
-	malloc_init_uninitialized	= 3,
-	malloc_init_a0_initialized	= 2,
-	malloc_init_recursible		= 1,
-	malloc_init_initialized		= 0 /* Common case --> jnz. */
-} malloc_init_t;
-static malloc_init_t	malloc_init_state = malloc_init_uninitialized;
+malloc_init_t malloc_init_state = malloc_init_uninitialized;
 
 /* False should be the common case.  Set to true to trigger initialization. */
 bool			malloc_slow = true;
@@ -280,11 +274,6 @@ static bool	malloc_init_hard(void);
  * Begin miscellaneous support functions.
  */
 
-bool
-malloc_initialized(void) {
-	return (malloc_init_state == malloc_init_initialized);
-}
-
 JEMALLOC_ALWAYS_INLINE bool
 malloc_init_a0(void) {
 	if (unlikely(malloc_init_state == malloc_init_uninitialized)) {
@@ -2597,112 +2586,11 @@ malloc_default(size_t size) {
  * Begin malloc(3)-compatible functions.
  */
 
-JEMALLOC_ALWAYS_INLINE void
-fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
-    cache_bin_t *bin, void *ret) {
-	thread_allocated_set(tsd, allocated_after);
-	if (config_stats) {
-		bin->tstats.nrequests++;
-	}
-
-	LOG("core.malloc.exit", "result: %p", ret);
-}
-
-/*
- * malloc() fastpath.
- *
- * Fastpath assumes size <= SC_LOOKUP_MAXCLASS, and that we hit
- * tcache.  If either of these is false, we tail-call to the slowpath,
- * malloc_default().  Tail-calling is used to avoid any caller-saved
- * registers.
- *
- * fastpath supports ticker and profiling, both of which will also
- * tail-call to the slowpath if they fire.
- */
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_malloc(size_t size) {
-	LOG("core.malloc.entry", "size: %zu", size);
-
-	if (tsd_get_allocates() && unlikely(!malloc_initialized())) {
-		return malloc_default(size);
-	}
-
-	tsd_t *tsd = tsd_get(false);
-	if (unlikely((size > SC_LOOKUP_MAXCLASS) || tsd == NULL)) {
-		return malloc_default(size);
-	}
-	/*
-	 * The code below till the branch checking the next_event threshold may
-	 * execute before malloc_init(), in which case the threshold is 0 to
-	 * trigger slow path and initialization.
-	 *
-	 * Note that when uninitialized, only the fast-path variants of the sz /
-	 * tsd facilities may be called.
-	 */
-	szind_t ind;
-	/*
-	 * The thread_allocated counter in tsd serves as a general purpose
-	 * accumulator for bytes of allocation to trigger different types of
-	 * events.  usize is always needed to advance thread_allocated, though
-	 * it's not always needed in the core allocation logic.
-	 */
-	size_t usize;
-	sz_size2index_usize_fastpath(size, &ind, &usize);
-	/* Fast path relies on size being a bin. */
-	assert(ind < SC_NBINS);
-	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
-	    (size <= SC_SMALL_MAXCLASS));
-
-	uint64_t allocated, threshold;
-	te_malloc_fastpath_ctx(tsd, &allocated, &threshold);
-	uint64_t allocated_after = allocated + usize;
-	/*
-	 * The ind and usize might be uninitialized (or partially) before
-	 * malloc_init().  The assertions check for: 1) full correctness (usize
-	 * & ind) when initialized; and 2) guaranteed slow-path (threshold == 0)
-	 * when !initialized.
-	 */
-	if (!malloc_initialized()) {
-		assert(threshold == 0);
-	} else {
-		assert(ind == sz_size2index(size));
-		assert(usize > 0 && usize == sz_index2size(ind));
-	}
-	/*
-	 * Check for events and tsd non-nominal (fast_threshold will be set to
-	 * 0) in a single branch.
-	 */
-	if (unlikely(allocated_after >= threshold)) {
-		return malloc_default(size);
-	}
-	assert(tsd_fast(tsd));
-
-	tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
-	    /* slow */ false, /* is_alloc */ true);
-	cache_bin_t *bin = &tcache->bins[ind];
-	bool tcache_success;
-	void *ret;
-
-	/*
-	 * We split up the code this way so that redundant low-water
-	 * computation doesn't happen on the (more common) case in which we
-	 * don't touch the low water mark.  The compiler won't do this
-	 * duplication on its own.
-	 */
-	ret = cache_bin_alloc_easy(bin, &tcache_success);
-	if (tcache_success) {
-		fastpath_success_finish(tsd, allocated_after, bin, ret);
-		return ret;
-	}
-	ret = cache_bin_alloc(bin, &tcache_success);
-	if (tcache_success) {
-		fastpath_success_finish(tsd, allocated_after, bin, ret);
-		return ret;
-	}
-
-	return malloc_default(size);
+	return imalloc_fastpath(size, &malloc_default);
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 6959b27..47ba92a 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -86,10 +86,10 @@ handleOOM(std::size_t size, bool nothrow) {
 }
 
 template <bool IsNoExcept>
-JEMALLOC_ALWAYS_INLINE
-void *
-newImpl(std::size_t size) noexcept(IsNoExcept) {
-	void *ptr = je_malloc(size);
+JEMALLOC_NOINLINE
+static void *
+fallback_impl(std::size_t size) noexcept(IsNoExcept) {
+	void *ptr = malloc_default(size);
 	if (likely(ptr != nullptr)) {
 		return ptr;
 	}
@@ -97,6 +97,13 @@ newImpl(std::size_t size) noexcept(IsNoExcept) {
 	return handleOOM(size, IsNoExcept);
 }
 
+template <bool IsNoExcept>
+JEMALLOC_ALWAYS_INLINE
+void *
+newImpl(std::size_t size) noexcept(IsNoExcept) {
+	return imalloc_fastpath(size, &fallback_impl<IsNoExcept>);
+}
+
 void *
 operator new(std::size_t size) {
 	return newImpl<false>(size);
-- 
cgit v0.12


From f3b2668b3219e108348b9a28d00c4f805a1b5ab6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 5 Feb 2021 16:47:09 -0800
Subject: Report the offending pointer on sized dealloc bug detection.

---
 include/jemalloc/internal/arena_inlines_b.h |  9 +++++----
 include/jemalloc/internal/safety_check.h    |  2 +-
 src/jemalloc.c                              |  5 +++--
 src/safety_check.c                          |  8 ++++----
 src/tcache.c                                | 21 ++++++++++++++++++---
 5 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 13e6eb5..5df8e85 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -211,7 +211,7 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-large_dalloc_safety_checks(edata_t *edata, szind_t szind) {
+large_dalloc_safety_checks(edata_t *edata, void *ptr, szind_t szind) {
 	if (!config_opt_safety_checks) {
 		return false;
 	}
@@ -229,7 +229,8 @@ large_dalloc_safety_checks(edata_t *edata, szind_t szind) {
 		return true;
 	}
 	if (unlikely(sz_index2size(szind) != edata_usize_get(edata))) {
-		safety_check_fail_sized_dealloc(/* current_dealloc */ true);
+		safety_check_fail_sized_dealloc(/* current_dealloc */ true,
+		    ptr);
 		return true;
 	}
 
@@ -243,7 +244,7 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	} else {
 		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
 		    ptr);
-		if (large_dalloc_safety_checks(edata, szind)) {
+		if (large_dalloc_safety_checks(edata, ptr, szind)) {
 			/* See the comment in isfree. */
 			return;
 		}
@@ -287,7 +288,7 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 	} else {
 		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
 		    ptr);
-		if (large_dalloc_safety_checks(edata, szind)) {
+		if (large_dalloc_safety_checks(edata, ptr, szind)) {
 			/* See the comment in isfree. */
 			return;
 		}
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index a7a4433..b27ac08 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
 #define JEMALLOC_INTERNAL_SAFETY_CHECK_H
 
-void safety_check_fail_sized_dealloc(bool current_dealloc);
+void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr);
 void safety_check_fail(const char *format, ...);
 /* Can set to NULL for a default. */
 void safety_check_set_abort(void (*abort_fn)(const char *));
diff --git a/src/jemalloc.c b/src/jemalloc.c
index dc3c98b..9d03880 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2751,7 +2751,7 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 		    &dbg_ctx);
 		if (alloc_ctx->szind != dbg_ctx.szind) {
 			safety_check_fail_sized_dealloc(
-			    /* current_dealloc */ true);
+			    /* current_dealloc */ true, ptr);
 			return true;
 		}
 		if (alloc_ctx->slab != dbg_ctx.slab) {
@@ -2801,7 +2801,8 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 			if (config_opt_safety_checks) {
 				/* Small alloc may have !slab (sampled). */
 				if (alloc_ctx.szind != sz_size2index(usize)) {
-					safety_check_fail_sized_dealloc(true);
+					safety_check_fail_sized_dealloc(true,
+					    ptr);
 				}
 			}
 		} else {
diff --git a/src/safety_check.c b/src/safety_check.c
index c692835..0dff934 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -3,14 +3,14 @@
 
 static void (*safety_check_abort)(const char *message);
 
-void safety_check_fail_sized_dealloc(bool current_dealloc) {
+void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr) {
 	char *src = current_dealloc ? "the current pointer being freed" :
 	    "in thread cache, possibly from previous deallocations";
 
 	safety_check_fail("<jemalloc>: size mismatch detected, likely caused by"
-	   " application sized deallocation bugs (source: %s). Suggest building"
-	    "with --enable-debug or address sanitizer for debugging. Abort.\n",
-	    src);
+	    " application sized deallocation bugs (source address: %p, %s). "
+	    "Suggest building with --enable-debug or address sanitizer for "
+	    "debugging. Abort.\n", ptr, src);
 }
 
 void safety_check_set_abort(void (*abort_fn)(const char *)) {
diff --git a/src/tcache.c b/src/tcache.c
index 7c4047f..3489e72 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -250,6 +250,20 @@ tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
 	util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
 }
 
+JEMALLOC_NOINLINE static void
+tcache_bin_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind,
+    size_t nptrs, emap_batch_lookup_result_t *edatas) {
+	bool found_mismatch = false;
+	for (size_t i = 0; i < nptrs; i++) {
+		if (edata_szind_get(edatas[i].edata) != szind) {
+			found_mismatch = true;
+			safety_check_fail_sized_dealloc(false,
+			    tcache_bin_flush_ptr_getter(arr, i));
+		}
+	}
+	assert(found_mismatch);
+}
+
 static void
 tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
     szind_t binind, size_t nflush, emap_batch_lookup_result_t *edatas) {
@@ -264,8 +278,8 @@ tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
 	    &tcache_bin_flush_ptr_getter, (void *)arr,
 	    &tcache_bin_flush_metadata_visitor, (void *)&szind_sum,
 	    edatas);
-	if (config_opt_safety_checks && szind_sum != 0) {
-		safety_check_fail_sized_dealloc(false);
+	if (config_opt_safety_checks && unlikely(szind_sum != 0)) {
+		tcache_bin_flush_size_check_fail(arr, binind, nflush, edatas);
 	}
 }
 
@@ -435,7 +449,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 					dalloc_count++;
 				}
 			} else {
-				if (large_dalloc_safety_checks(edata, binind)) {
+				if (large_dalloc_safety_checks(edata, ptr,
+				    binind)) {
 					/* See the comment in isfree. */
 					continue;
 				}
-- 
cgit v0.12


From 041145c272711b55f91aa42128b108674a12fd91 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 5 Feb 2021 17:26:45 -0800
Subject: Report the correct and wrong sizes on sized dealloc bug detection.

---
 include/jemalloc/internal/arena_inlines_b.h |  7 ++++---
 include/jemalloc/internal/safety_check.h    |  3 ++-
 src/jemalloc.c                              | 14 ++++++++++----
 src/safety_check.c                          | 12 +++++++-----
 src/tcache.c                                | 10 +++++++---
 5 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 5df8e85..5410b16 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -228,9 +228,10 @@ large_dalloc_safety_checks(edata_t *edata, void *ptr, szind_t szind) {
 		    (uintptr_t)edata_addr_get(edata));
 		return true;
 	}
-	if (unlikely(sz_index2size(szind) != edata_usize_get(edata))) {
-		safety_check_fail_sized_dealloc(/* current_dealloc */ true,
-		    ptr);
+	size_t input_size = sz_index2size(szind);
+	if (unlikely(input_size != edata_usize_get(edata))) {
+		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
+		    /* true_size */ edata_usize_get(edata), input_size);
 		return true;
 	}
 
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index b27ac08..f10c68e 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -1,7 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
 #define JEMALLOC_INTERNAL_SAFETY_CHECK_H
 
-void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr);
+void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
+    size_t true_size, size_t input_size);
 void safety_check_fail(const char *format, ...);
 /* Can set to NULL for a default. */
 void safety_check_set_abort(void (*abort_fn)(const char *));
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9d03880..3bccac9 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2751,7 +2751,9 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 		    &dbg_ctx);
 		if (alloc_ctx->szind != dbg_ctx.szind) {
 			safety_check_fail_sized_dealloc(
-			    /* current_dealloc */ true, ptr);
+			    /* current_dealloc */ true, ptr,
+			    /* true_size */ sz_size2index(dbg_ctx.szind),
+			    /* input_size */ sz_size2index(alloc_ctx->szind));
 			return true;
 		}
 		if (alloc_ctx->slab != dbg_ctx.slab) {
@@ -2800,9 +2802,13 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 
 			if (config_opt_safety_checks) {
 				/* Small alloc may have !slab (sampled). */
-				if (alloc_ctx.szind != sz_size2index(usize)) {
-					safety_check_fail_sized_dealloc(true,
-					    ptr);
+				if (unlikely(alloc_ctx.szind !=
+				    sz_size2index(usize))) {
+					safety_check_fail_sized_dealloc(
+					    /* current_dealloc */ true, ptr,
+					    /* true_size */ sz_index2size(
+					    alloc_ctx.szind),
+					    /* input_size */ usize);
 				}
 			}
 		} else {
diff --git a/src/safety_check.c b/src/safety_check.c
index 0dff934..9747afe 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -3,14 +3,16 @@
 
 static void (*safety_check_abort)(const char *message);
 
-void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr) {
+void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
+    size_t true_size, size_t input_size) {
 	char *src = current_dealloc ? "the current pointer being freed" :
 	    "in thread cache, possibly from previous deallocations";
 
-	safety_check_fail("<jemalloc>: size mismatch detected, likely caused by"
-	    " application sized deallocation bugs (source address: %p, %s). "
-	    "Suggest building with --enable-debug or address sanitizer for "
-	    "debugging. Abort.\n", ptr, src);
+	safety_check_fail("<jemalloc>: size mismatch detected (true size %zu "
+	    "vs input size %zu), likely caused by application sized "
+	    "dealloction bugs (source address: %p, %s). Suggest building with "
+	    "--enable-debug or address sanitizer for debugging. Abort.\n",
+	    true_size, input_size, ptr, src);
 }
 
 void safety_check_set_abort(void (*abort_fn)(const char *)) {
diff --git a/src/tcache.c b/src/tcache.c
index 3489e72..39a4ea6 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -255,10 +255,14 @@ tcache_bin_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind,
     size_t nptrs, emap_batch_lookup_result_t *edatas) {
 	bool found_mismatch = false;
 	for (size_t i = 0; i < nptrs; i++) {
-		if (edata_szind_get(edatas[i].edata) != szind) {
+		szind_t true_szind = edata_szind_get(edatas[i].edata);
+		if (true_szind != szind) {
 			found_mismatch = true;
-			safety_check_fail_sized_dealloc(false,
-			    tcache_bin_flush_ptr_getter(arr, i));
+			safety_check_fail_sized_dealloc(
+			    /* current_dealloc */ false,
+			    /* ptr */ tcache_bin_flush_ptr_getter(arr, i),
+			    /* true_size */ sz_index2size(true_szind),
+			    /* input_size */ sz_index2size(szind));
 		}
 	}
 	assert(found_mismatch);
-- 
cgit v0.12


From 8c5e5f50a29d6ca636bf7394d93be1814de6d74c Mon Sep 17 00:00:00 2001
From: Jordan Rome <jordalgo@fb.com>
Date: Wed, 10 Feb 2021 11:08:18 -0500
Subject: Fix stats for "tcache_max" (was "lg_tcache_max")

This opt was changed here: c8209150f9d219a137412b06431c9d52839c7272
and looks like this got missed.

Also update the write type to be unsigned.
---
 src/stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index 1a7e6e4..20ff299 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1498,7 +1498,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("utrace")
 	OPT_WRITE_BOOL("xmalloc")
 	OPT_WRITE_BOOL("tcache")
-	OPT_WRITE_SSIZE_T("lg_tcache_max")
+	OPT_WRITE_SIZE_T("tcache_max")
 	OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
 	OPT_WRITE_UNSIGNED("tcache_nslots_small_max")
 	OPT_WRITE_UNSIGNED("tcache_nslots_large")
-- 
cgit v0.12


From a11be50332c5cdae7ce74d8e0551e7f3143630b8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 9 Feb 2021 22:24:35 -0800
Subject: Implement opt.cache_oblivious.

Keep config.cache_oblivious for now to remain backward-compatible.
---
 INSTALL.md                                            | 14 +++++++-------
 doc/jemalloc.xml.in                                   | 16 ++++++++++++++++
 include/jemalloc/internal/jemalloc_internal_externs.h |  1 +
 include/jemalloc/internal/prof_inlines.h              |  2 +-
 include/jemalloc/internal/sz.h                        | 14 ++++++--------
 src/ctl.c                                             |  3 +++
 src/jemalloc.c                                        | 17 ++++++++++-------
 src/large.c                                           |  3 ++-
 src/stats.c                                           |  1 +
 src/sz.c                                              |  6 ++++--
 test/unit/extent_quantize.c                           |  2 +-
 test/unit/mallctl.c                                   |  1 +
 12 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index eb55acf..adc72b8 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -185,13 +185,13 @@ any of the following arguments (not a definitive list) to 'configure':
 
 * `--disable-cache-oblivious`
 
-    Disable cache-oblivious large allocation alignment for large allocation
-    requests with no alignment constraints.  If this feature is disabled, all
-    large allocations are page-aligned as an implementation artifact, which can
-    severely harm CPU cache utilization.  However, the cache-oblivious layout
-    comes at the cost of one extra page per large allocation, which in the
-    most extreme case increases physical memory usage for the 16 KiB size class
-    to 20 KiB.
+    Disable cache-oblivious large allocation alignment by default, for large
+    allocation requests with no alignment constraints.  If this feature is
+    disabled, all large allocations are page-aligned as an implementation
+    artifact, which can severely harm CPU cache utilization.  However, the
+    cache-oblivious layout comes at the cost of one extra page per large
+    allocation, which in the most extreme case increases physical memory usage
+    for the 16 KiB size class to 20 KiB.
 
 * `--disable-syscall`
 
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 4b93c5a..018170c 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -936,6 +936,22 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.cache_oblivious">
+        <term>
+          <mallctl>opt.cache_oblivious</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Enable / Disable cache-oblivious large allocation
+        alignment, for large requests with no alignment constraints.  If this
+        feature is disabled, all large allocations are page-aligned as an
+        implementation artifact, which can severely harm CPU cache utilization.
+        However, the cache-oblivious layout comes at the cost of one extra page
+        per large allocation, which in the most extreme case increases physical
+        memory usage for the 16 KiB size class to 20 KiB. This option is enabled
+        by default.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.metadata_thp">
         <term>
           <mallctl>opt.metadata_thp</mallctl>
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 8054ad9..da69355 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -33,6 +33,7 @@ extern zero_realloc_action_t opt_zero_realloc_action;
 extern malloc_init_t malloc_init_state;
 extern const char *zero_realloc_mode_names[];
 extern atomic_zu_t zero_realloc_count;
+extern bool opt_cache_oblivious;
 
 /* Number of CPUs. */
 extern unsigned ncpus;
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index c76d2ae..7884e9a 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -223,7 +223,7 @@ prof_sample_align(size_t orig_align) {
 	 * w/o metadata lookup.
 	 */
 	assert(opt_prof);
-	return (config_cache_oblivious && orig_align < PAGE) ? PAGE :
+	return (opt_cache_oblivious && orig_align < PAGE) ? PAGE :
 	    orig_align;
 }
 
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 91940cc..f2be613 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -45,15 +45,13 @@ extern size_t sz_index2size_tab[SC_NSIZES];
  */
 extern uint8_t sz_size2index_tab[];
 
-static const size_t sz_large_pad =
-#ifdef JEMALLOC_CACHE_OBLIVIOUS
-    PAGE
-#else
-    0
-#endif
-    ;
+/*
+ * Padding for large allocations: PAGE when opt_cache_oblivious == true (to
+ * enable cache index randomization); 0 otherwise.
+ */
+extern size_t sz_large_pad;
 
-extern void sz_boot(const sc_data_t *sc_data);
+extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
 
 JEMALLOC_ALWAYS_INLINE pszind_t
 sz_psz2ind(size_t psz) {
diff --git a/src/ctl.c b/src/ctl.c
index 1c5e32b..4fc3ad0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -90,6 +90,7 @@ CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
+CTL_PROTO(opt_cache_oblivious)
 CTL_PROTO(opt_trust_madvise)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
@@ -395,6 +396,7 @@ static const ctl_named_node_t	config_node[] = {
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
+	{NAME("cache_oblivious"),	CTL(opt_cache_oblivious)},
 	{NAME("trust_madvise"),	CTL(opt_trust_madvise)},
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("hpa"),		CTL(opt_hpa)},
@@ -2095,6 +2097,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
+CTL_RO_NL_GEN(opt_cache_oblivious, opt_cache_oblivious, bool)
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3bccac9..125682b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -102,6 +102,14 @@ bool	opt_trust_madvise =
 #endif
     ;
 
+bool opt_cache_oblivious =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+    true
+#else
+    false
+#endif
+    ;
+
 zero_realloc_action_t opt_zero_realloc_action =
     zero_realloc_action_strict;
 
@@ -1697,7 +1705,7 @@ malloc_init_hard_a0_locked() {
 		prof_boot0();
 	}
 	malloc_conf_init(&sc_data, bin_shard_sizes);
-	sz_boot(&sc_data);
+	sz_boot(&sc_data, opt_cache_oblivious);
 	bin_info_boot(&sc_data, bin_shard_sizes);
 
 	if (opt_stats_print) {
@@ -2790,12 +2798,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 			 * usize can be trusted to determine szind and slab.
 			 */
 			alloc_ctx.szind = sz_size2index(usize);
-			if (config_cache_oblivious) {
-				alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
-			} else {
-				/* Non page aligned must be slab allocated. */
-				alloc_ctx.slab = true;
-			}
+			alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 		} else if (opt_prof) {
 			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
 			    ptr, &alloc_ctx);
diff --git a/src/large.c b/src/large.c
index f23839f..bd29e5c 100644
--- a/src/large.c
+++ b/src/large.c
@@ -95,7 +95,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	}
 
 	if (zero) {
-		if (config_cache_oblivious) {
+		if (opt_cache_oblivious) {
+			assert(sz_large_pad == PAGE);
 			/*
 			 * Zero the trailing bytes of the original allocation's
 			 * last page, since they are in an indeterminate state.
diff --git a/src/stats.c b/src/stats.c
index 20ff299..7a0526c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1458,6 +1458,7 @@ stats_general_print(emitter_t *emitter) {
 
 	OPT_WRITE_BOOL("abort")
 	OPT_WRITE_BOOL("abort_conf")
+	OPT_WRITE_BOOL("cache_oblivious")
 	OPT_WRITE_BOOL("confirm_conf")
 	OPT_WRITE_BOOL("retain")
 	OPT_WRITE_CHAR_P("dss")
diff --git a/src/sz.c b/src/sz.c
index 7734f39..d3115dd 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -1,9 +1,10 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 #include "jemalloc/internal/sz.h"
 
 JEMALLOC_ALIGNED(CACHELINE)
 size_t sz_pind2sz_tab[SC_NPSIZES+1];
-
+size_t sz_large_pad;
 
 size_t
 sz_psz_quantize_floor(size_t size) {
@@ -105,7 +106,8 @@ sz_boot_size2index_tab(const sc_data_t *sc_data) {
 }
 
 void
-sz_boot(const sc_data_t *sc_data) {
+sz_boot(const sc_data_t *sc_data, bool cache_oblivious) {
+	sz_large_pad = cache_oblivious ? PAGE : 0;
 	sz_boot_pind2sz_tab(sc_data);
 	sz_boot_index2size_tab(sc_data);
 	sz_boot_size2index_tab(sc_data);
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index 27a4a7e..e6bbd53 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -47,7 +47,7 @@ TEST_BEGIN(test_large_extent_size) {
 	 */
 
 	sz = sizeof(bool);
-	expect_d_eq(mallctl("config.cache_oblivious", (void *)&cache_oblivious,
+	expect_d_eq(mallctl("opt.cache_oblivious", (void *)&cache_oblivious,
 	    &sz, NULL, 0), 0, "Unexpected mallctl failure");
 
 	sz = sizeof(unsigned);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 6f5a8f1..1fb7466 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -279,6 +279,7 @@ TEST_BEGIN(test_mallctl_opt) {
 
 	TEST_MALLCTL_OPT(bool, abort, always);
 	TEST_MALLCTL_OPT(bool, abort_conf, always);
+	TEST_MALLCTL_OPT(bool, cache_oblivious, always);
 	TEST_MALLCTL_OPT(bool, trust_madvise, always);
 	TEST_MALLCTL_OPT(bool, confirm_conf, always);
 	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
-- 
cgit v0.12


From cde7097ecaba08b50c5594137175e0e1e567f4c4 Mon Sep 17 00:00:00 2001
From: Jordan Rome <jordalgo@fb.com>
Date: Mon, 15 Feb 2021 20:12:23 -0500
Subject: Update INSTALL.md to mention 'autoconf'

'autoconf' needs to be installed for './autogen.sh' to work.
---
 INSTALL.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/INSTALL.md b/INSTALL.md
index adc72b8..14dacfa 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -12,7 +12,9 @@ that might work is:
     make
     make install
 
-Note that documentation is built by the default target only when xsltproc is
+Notes:
+ - "autoconf" needs to be installed
+ - Documentation is built by the default target only when xsltproc is
 available.  Build will warn but not stop if the dependency is missing.
 
 
-- 
cgit v0.12


From 4b8870c7dbfaeea7136a8e0b9f93a2ad85d31a55 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 4 Jan 2021 18:22:02 -0800
Subject: SEC: Fix a comment typo.

---
 include/jemalloc/internal/sec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index 7c1465e..6bf5687 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -80,7 +80,7 @@ struct sec_s {
 	size_t alloc_max;
 	/*
 	 * Exceeding this amount of cached extents in a shard causes *all* of
-	 * the shards in that bin to be flushed.
+	 * the bins in that shard to be flushed.
 	 */
 	size_t bytes_max;
 
-- 
cgit v0.12


From f47b4c2cd8ed3e843b987ee972d187df45391b69 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 4 Jan 2021 18:40:27 -0800
Subject: PAI/SEC: Add a dalloc_batch function.

This lets the SEC flush all of its items in a single call, rather than flushing
everything at once.
---
 Makefile.in                                        |  1 +
 include/jemalloc/internal/pai.h                    | 14 +++++++++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 src/hpa.c                                          |  1 +
 src/pac.c                                          |  1 +
 src/pai.c                                          | 13 +++++++++
 src/sec.c                                          |  8 ++---
 test/unit/sec.c                                    | 34 ++++++++++++++++++----
 11 files changed, 69 insertions(+), 11 deletions(-)
 create mode 100644 src/pai.c

diff --git a/Makefile.in b/Makefile.in
index 40c4144..11a553b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -132,6 +132,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
 	$(srcroot)src/pa_extra.c \
+	$(srcroot)src/pai.c \
 	$(srcroot)src/pac.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/peak_event.c \
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 45edd69..f7f3e07 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -13,6 +13,8 @@ struct pai_s {
 	bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    size_t old_size, size_t new_size);
 	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+	void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
+	    edata_list_active_t *list);
 };
 
 /*
@@ -42,4 +44,16 @@ pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	self->dalloc(tsdn, self, edata);
 }
 
+static inline void
+pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
+	return self->dalloc_batch(tsdn, self, list);
+}
+
+/*
+ * An implementation of batch deallocation that simply calls dalloc once for
+ * each item in the list.
+ */
+void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list);
+
 #endif /* JEMALLOC_INTERNAL_PAI_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index a93511d..9ec953a 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -73,6 +73,7 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 06460e5..210204a 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -103,6 +103,9 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pai.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 916460a..171b95f 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -73,6 +73,7 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 06460e5..210204a 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -103,6 +103,9 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pai.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/hpa.c b/src/hpa.c
index 3c706cb..013cd7e 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -91,6 +91,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->pai.expand = &hpa_expand;
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
+	shard->pai.dalloc_batch = &pai_dalloc_batch_default;
 
 	return false;
 }
diff --git a/src/pac.c b/src/pac.c
index 8064615..0ba0f2f 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -94,6 +94,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	pac->pai.expand = &pac_expand_impl;
 	pac->pai.shrink = &pac_shrink_impl;
 	pac->pai.dalloc = &pac_dalloc_impl;
+	pac->pai.dalloc_batch = &pai_dalloc_batch_default;
 
 	return false;
 }
diff --git a/src/pai.c b/src/pai.c
new file mode 100644
index 0000000..1035c85
--- /dev/null
+++ b/src/pai.c
@@ -0,0 +1,13 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+
+void
+pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list) {
+	edata_t *edata;
+	while ((edata = edata_list_active_first(list)) != NULL) {
+		edata_list_active_remove(list, edata);
+		pai_dalloc(tsdn, self, edata);
+	}
+}
diff --git a/src/sec.c b/src/sec.c
index 262d813..41e75b9 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -46,6 +46,7 @@ bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
 	sec->pai.expand = &sec_expand;
 	sec->pai.shrink = &sec_shrink;
 	sec->pai.dalloc = &sec_dalloc;
+	sec->pai.dalloc_batch = &pai_dalloc_batch_default;
 
 	return false;
 }
@@ -142,6 +143,7 @@ sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	for (pszind_t i = 0; i < SEC_NPSIZES; i++) {
 		edata_list_active_concat(&to_flush, &shard->freelist[i]);
 	}
+
 	/*
 	 * A better way to do this would be to add a batch dalloc function to
 	 * the pai_t.  Practically, the current method turns into O(n) locks and
@@ -149,11 +151,7 @@ sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	 * HPA) can straightforwardly do many deallocations in a single lock /
 	 * unlock pair.
 	 */
-	while (!edata_list_active_empty(&to_flush)) {
-		edata_t *e = edata_list_active_first(&to_flush);
-		edata_list_active_remove(&to_flush, e);
-		pai_dalloc(tsdn, sec->fallback, e);
-	}
+	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
 }
 
 static void
diff --git a/test/unit/sec.c b/test/unit/sec.c
index cb0c17d..7657537 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -8,6 +8,7 @@ struct pai_test_allocator_s {
 	bool alloc_fail;
 	size_t alloc_count;
 	size_t dalloc_count;
+	size_t dalloc_batch_count;
 	/*
 	 * We use a simple bump allocator as the implementation.  This isn't
 	 * *really* correct, since we may allow expansion into a subsequent
@@ -64,11 +65,25 @@ pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	free(edata);
 }
 
+static void
+pai_test_allocator_dalloc_batch(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list) {
+	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+
+	edata_t *edata;
+	while ((edata = edata_list_active_first(list)) != NULL) {
+		edata_list_active_remove(list, edata);
+		ta->dalloc_batch_count++;
+		free(edata);
+	}
+}
+
 static inline void
 pai_test_allocator_init(pai_test_allocator_t *ta) {
 	ta->alloc_fail = false;
 	ta->alloc_count = 0;
 	ta->dalloc_count = 0;
+	ta->dalloc_batch_count = 0;
 	/* Just don't start the edata at 0. */
 	ta->next_ptr = 10 * PAGE;
 	ta->expand_count = 0;
@@ -79,6 +94,7 @@ pai_test_allocator_init(pai_test_allocator_t *ta) {
 	ta->pai.expand = &pai_test_allocator_expand;
 	ta->pai.shrink = &pai_test_allocator_shrink;
 	ta->pai.dalloc = &pai_test_allocator_dalloc;
+	ta->pai.dalloc_batch = &pai_test_allocator_dalloc_batch;
 }
 
 TEST_BEGIN(test_reuse) {
@@ -190,8 +206,10 @@ TEST_BEGIN(test_auto_flush) {
 	pai_dalloc(tsdn, &sec.pai, extra_alloc);
 	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
 	    "Incorrect number of allocations");
-	expect_zu_eq(NALLOCS + 1, ta.dalloc_count,
-	    "Incorrect number of deallocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of (non-batch) deallocations");
+	expect_zu_eq(NALLOCS + 1, ta.dalloc_batch_count,
+	    "Incorrect number of batch deallocations");
 }
 TEST_END
 
@@ -233,8 +251,10 @@ do_disable_flush_test(bool is_disable) {
 
 	expect_zu_eq(NALLOCS, ta.alloc_count,
 	    "Incorrect number of allocations");
-	expect_zu_eq(NALLOCS - 1, ta.dalloc_count,
-	    "Incorrect number of deallocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of (non-batch) deallocations");
+	expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count,
+	    "Incorrect number of batch deallocations");
 
 	/*
 	 * If we free into a disabled SEC, it should forward to the fallback.
@@ -244,8 +264,10 @@ do_disable_flush_test(bool is_disable) {
 
 	expect_zu_eq(NALLOCS, ta.alloc_count,
 	    "Incorrect number of allocations");
-	expect_zu_eq(is_disable ? NALLOCS : NALLOCS - 1, ta.dalloc_count,
-	    "Incorrect number of deallocations");
+	expect_zu_eq(is_disable ? 1 : 0, ta.dalloc_count,
+	    "Incorrect number of (non-batch) deallocations");
+	expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count,
+	    "Incorrect number of batch deallocations");
 }
 
 TEST_BEGIN(test_disable) {
-- 
cgit v0.12


From 1944ebbe7f079e79fbeda836dc0333f7a049ac26 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 4 Jan 2021 19:43:08 -0800
Subject: HPA: Implement batch deallocation.

This saves O(n) mutex locks/unlocks during SEC flush.
---
 include/jemalloc/internal/pai.h |  1 +
 src/hpa.c                       | 63 ++++++++++++++++++++++++++++++-----------
 src/sec.c                       |  7 -----
 3 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index f7f3e07..73f5433 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -13,6 +13,7 @@ struct pai_s {
 	bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    size_t old_size, size_t new_size);
 	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+	/* This function empties out list as a side-effect of being called. */
 	void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
 	    edata_list_active_t *list);
 };
diff --git a/src/hpa.c b/src/hpa.c
index 013cd7e..fa58bb7 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -15,6 +15,8 @@ static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size);
 static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list);
 
 bool
 hpa_supported() {
@@ -91,7 +93,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->pai.expand = &hpa_expand;
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
-	shard->pai.dalloc_batch = &pai_dalloc_batch_default;
+	shard->pai.dalloc_batch = &hpa_dalloc_batch;
 
 	return false;
 }
@@ -663,11 +665,8 @@ hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 }
 
 static void
-hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
-	hpa_shard_t *shard = hpa_from_pai(self);
-
-	edata_addr_set(edata, edata_base_get(edata));
-	edata_zeroed_set(edata, false);
+hpa_dalloc_prepare_unlocked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
+	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
 
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
 	assert(edata_state_get(edata) == extent_state_active);
@@ -677,32 +676,62 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	assert(edata_committed_get(edata));
 	assert(edata_base_get(edata) != NULL);
 
-	hpdata_t *ps = edata_ps_get(edata);
-	/* Currently, all edatas come from pageslabs. */
-	assert(ps != NULL);
+	edata_addr_set(edata, edata_base_get(edata));
+	edata_zeroed_set(edata, false);
 	emap_deregister_boundary(tsdn, shard->emap, edata);
-	/*
-	 * Note that the shard mutex protects ps's metadata too; it wouldn't be
-	 * correct to try to read most information out of it without the lock.
-	 */
-	malloc_mutex_lock(tsdn, &shard->mtx);
+}
+
+static void
+hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 
 	/*
 	 * Release the metadata early, to avoid having to remember to do it
-	 * while we're also doing tricky purging logic.
+	 * while we're also doing tricky purging logic.  First, we need to grab
+	 * a few bits of metadata from it.
+	 *
+	 * Note that the shard mutex protects ps's metadata too; it wouldn't be
+	 * correct to try to read most information out of it without the lock.
 	 */
+	hpdata_t *ps = edata_ps_get(edata);
+	/* Currently, all edatas come from pageslabs. */
+	assert(ps != NULL);
 	void *unreserve_addr = edata_addr_get(edata);
 	size_t unreserve_size = edata_size_get(edata);
 	edata_cache_small_put(tsdn, &shard->ecs, edata);
 
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
-
 	hpa_update_purge_hugify_eligibility(shard, ps);
 	psset_update_end(&shard->psset, ps);
-
 	hpa_do_deferred_work(tsdn, shard);
+}
+
+static void
+hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	hpa_shard_t *shard = hpa_from_pai(self);
+
+	hpa_dalloc_prepare_unlocked(tsdn, shard, edata);
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	hpa_dalloc_locked(tsdn, shard, edata);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+}
+
+static void
+hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
+	hpa_shard_t *shard = hpa_from_pai(self);
 
+	edata_t *edata;
+	ql_foreach(edata, &list->head, ql_link_active) {
+		hpa_dalloc_prepare_unlocked(tsdn, shard, edata);
+	}
+
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	/* Now, remove from the list. */
+	while ((edata = edata_list_active_first(list)) != NULL) {
+		edata_list_active_remove(list, edata);
+		hpa_dalloc_locked(tsdn, shard, edata);
+	}
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
diff --git a/src/sec.c b/src/sec.c
index 41e75b9..3a3a0b9 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -144,13 +144,6 @@ sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 		edata_list_active_concat(&to_flush, &shard->freelist[i]);
 	}
 
-	/*
-	 * A better way to do this would be to add a batch dalloc function to
-	 * the pai_t.  Practically, the current method turns into O(n) locks and
-	 * unlocks at the fallback allocator.  But some implementations (e.g.
-	 * HPA) can straightforwardly do many deallocations in a single lock /
-	 * unlock pair.
-	 */
 	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
 }
 
-- 
cgit v0.12


From bf448d7a5a4c2aecbda7ef11767a75829d9aaf77 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 5 Jan 2021 15:52:25 -0800
Subject: SEC: Reduce lock hold times.

Only flush a subset of extents during flushing, and drop the lock while doing
so.
---
 include/jemalloc/internal/sec.h | 40 +++++++++++++++----
 src/sec.c                       | 87 ++++++++++++++++++++++++++++++++++-------
 test/unit/sec.c                 |  7 +++-
 3 files changed, 110 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index 6bf5687..815b4bb 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -8,13 +8,9 @@
  * Small extent cache.
  *
  * This includes some utilities to cache small extents.  We have a per-pszind
- * bin with its own lock and edata heap (including only extents of that size).
- * We don't try to do any coalescing of extents (since it would require
- * cross-bin locks).  As a result, we need to be careful about fragmentation.
- * As a gesture in that direction, we limit the size of caches, apply first-fit
- * within the bins, and, when flushing a bin, flush all of its extents rather
- * than just those up to some threshold.  When we allocate again, we'll get a
- * chance to move to better ones.
+ * bin with its own list of extents of that size.  We don't try to do any
+ * coalescing of extents (since it would in general require cross-shard locks or
+ * knowledge of the underlying PAI implementation).
  */
 
 /*
@@ -46,6 +42,19 @@ sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
 	dst->bytes += src->bytes;
 }
 
+/* A collections of free extents, all of the same size. */
+typedef struct sec_bin_s sec_bin_t;
+struct sec_bin_s {
+	/*
+	 * Number of bytes in this particular bin (as opposed to the
+	 * sec_shard_t's bytes_cur.  This isn't user visible or reported in
+	 * stats; rather, it allows us to quickly determine the change in the
+	 * centralized counter when flushing.
+	 */
+	size_t bytes_cur;
+	edata_list_active_t freelist;
+};
+
 typedef struct sec_shard_s sec_shard_t;
 struct sec_shard_s {
 	/*
@@ -64,8 +73,11 @@ struct sec_shard_s {
 	 * hooks are installed.
 	 */
 	bool enabled;
-	edata_list_active_t freelist[SEC_NPSIZES];
+	sec_bin_t bins[SEC_NPSIZES];
+	/* Number of bytes in all bins in the shard. */
 	size_t bytes_cur;
+	/* The next pszind to flush in the flush-some pathways. */
+	pszind_t to_flush_next;
 };
 
 typedef struct sec_s sec_t;
@@ -83,6 +95,18 @@ struct sec_s {
 	 * the bins in that shard to be flushed.
 	 */
 	size_t bytes_max;
+	/*
+	 * The number of bytes (in all bins) we flush down to when we exceed
+	 * bytes_cur.  We want this to be less than bytes_cur, because
+	 * otherwise we could get into situations where a shard undergoing
+	 * net-deallocation keeps bytes_cur very near to bytes_max, so that
+	 * most deallocations get immediately forwarded to the underlying PAI
+	 * implementation, defeating the point of the SEC.
+	 *
+	 * Currently this is just set to bytes_max / 2, but eventually can be
+	 * configurable.
+	 */
+	size_t bytes_after_flush;
 
 	/*
 	 * We don't necessarily always use all the shards; requests are
diff --git a/src/sec.c b/src/sec.c
index 3a3a0b9..49b4104 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -11,7 +11,14 @@ static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size);
 static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
 
-bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
+static void
+sec_bin_init(sec_bin_t *bin) {
+	bin->bytes_cur = 0;
+	edata_list_active_init(&bin->freelist);
+}
+
+bool
+sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
     size_t bytes_max) {
 	if (nshards > SEC_NSHARDS_MAX) {
 		nshards = SEC_NSHARDS_MAX;
@@ -25,9 +32,10 @@ bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
 		}
 		shard->enabled = true;
 		for (pszind_t j = 0; j < SEC_NPSIZES; j++) {
-			edata_list_active_init(&shard->freelist[j]);
+			sec_bin_init(&shard->bins[j]);
 		}
 		shard->bytes_cur = 0;
+		shard->to_flush_next = 0;
 	}
 	sec->fallback = fallback;
 	sec->alloc_max = alloc_max;
@@ -36,6 +44,7 @@ bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
 	}
 
 	sec->bytes_max = bytes_max;
+	sec->bytes_after_flush = bytes_max / 2;
 	sec->nshards = nshards;
 
 	/*
@@ -85,9 +94,12 @@ sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 	if (!shard->enabled) {
 		return NULL;
 	}
-	edata_t *edata = edata_list_active_first(&shard->freelist[pszind]);
+	sec_bin_t *bin = &shard->bins[pszind];
+	edata_t *edata = edata_list_active_first(&bin->freelist);
 	if (edata != NULL) {
-		edata_list_active_remove(&shard->freelist[pszind], edata);
+		edata_list_active_remove(&bin->freelist, edata);
+		assert(edata_size_get(edata) <= bin->bytes_cur);
+		bin->bytes_cur -= edata_size_get(edata);
 		assert(edata_size_get(edata) <= shard->bytes_cur);
 		shard->bytes_cur -= edata_size_get(edata);
 	}
@@ -135,30 +147,75 @@ sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 }
 
 static void
-sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
+sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	shard->bytes_cur = 0;
 	edata_list_active_t to_flush;
 	edata_list_active_init(&to_flush);
 	for (pszind_t i = 0; i < SEC_NPSIZES; i++) {
-		edata_list_active_concat(&to_flush, &shard->freelist[i]);
+		sec_bin_t *bin = &shard->bins[i];
+		bin->bytes_cur = 0;
+		edata_list_active_concat(&to_flush, &bin->freelist);
+	}
+
+	/*
+	 * Ordinarily we would try to avoid doing the batch deallocation while
+	 * holding the shard mutex, but the flush_all pathways only happen when
+	 * we're disabling the HPA or resetting the arena, both of which are
+	 * rare pathways.
+	 */
+	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
+}
+
+static void
+sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	edata_list_active_t to_flush;
+	edata_list_active_init(&to_flush);
+	while (shard->bytes_cur > sec->bytes_after_flush) {
+		/* Pick a victim. */
+		sec_bin_t *bin = &shard->bins[shard->to_flush_next];
+
+		/* Update our victim-picking state. */
+		shard->to_flush_next++;
+		if (shard->to_flush_next == SEC_NPSIZES) {
+			shard->to_flush_next = 0;
+		}
+
+		assert(shard->bytes_cur >= bin->bytes_cur);
+		if (bin->bytes_cur != 0) {
+			shard->bytes_cur -= bin->bytes_cur;
+			bin->bytes_cur = 0;
+			edata_list_active_concat(&to_flush, &bin->freelist);
+		}
+		/*
+		 * Either bin->bytes_cur was 0, in which case we didn't touch
+		 * the bin list but it should be empty anyways (or else we
+		 * missed a bytes_cur update on a list modification), or it
+		 * *was* 0 and we emptied it ourselves.  Either way, it should
+		 * be empty now.
+		 */
+		assert(edata_list_active_empty(&bin->freelist));
 	}
 
+	malloc_mutex_unlock(tsdn, &shard->mtx);
 	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
 }
 
 static void
-sec_shard_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
+sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
     edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	assert(shard->bytes_cur <= sec->bytes_max);
 	size_t size = edata_size_get(edata);
 	pszind_t pszind = sz_psz2ind(size);
 	/*
-	 * Prepending here results in FIFO allocation per bin, which seems
+	 * Prepending here results in LIFO allocation per bin, which seems
 	 * reasonable.
 	 */
-	edata_list_active_prepend(&shard->freelist[pszind], edata);
+	sec_bin_t *bin = &shard->bins[pszind];
+	edata_list_active_prepend(&bin->freelist, edata);
+	bin->bytes_cur += size;
 	shard->bytes_cur += size;
 	if (shard->bytes_cur > sec->bytes_max) {
 		/*
@@ -170,7 +227,10 @@ sec_shard_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 		 * in the backing allocator).  This has the extra advantage of
 		 * not requiring advanced cache balancing strategies.
 		 */
-		sec_do_flush_locked(tsdn, sec, shard);
+		sec_flush_some_and_unlock(tsdn, sec, shard);
+		malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
+	} else {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
 }
 
@@ -184,8 +244,7 @@ sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	if (shard->enabled) {
-		sec_shard_dalloc_locked(tsdn, sec, shard, edata);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
+		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
 	} else {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		pai_dalloc(tsdn, sec->fallback, edata);
@@ -196,7 +255,7 @@ void
 sec_flush(tsdn_t *tsdn, sec_t *sec) {
 	for (size_t i = 0; i < sec->nshards; i++) {
 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		sec_do_flush_locked(tsdn, sec, &sec->shards[i]);
+		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
 		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
 	}
 }
@@ -206,7 +265,7 @@ sec_disable(tsdn_t *tsdn, sec_t *sec) {
 	for (size_t i = 0; i < sec->nshards; i++) {
 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
 		sec->shards[i].enabled = false;
-		sec_do_flush_locked(tsdn, sec, &sec->shards[i]);
+		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
 		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
 	}
 }
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 7657537..5fe3550 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -200,8 +200,11 @@ TEST_BEGIN(test_auto_flush) {
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
 	/*
-	 * Free the extra allocation; this should trigger a flush of all
-	 * extents in the cache.
+	 * Free the extra allocation; this should trigger a flush.  The internal
+	 * flushing logic is allowed to get complicated; for now, we rely on our
+	 * whitebox knowledge of the fact that the SEC flushes bins in their
+	 * entirety when it decides to do so, and it has only one bin active
+	 * right now.
 	 */
 	pai_dalloc(tsdn, &sec.pai, extra_alloc);
 	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
-- 
cgit v0.12


From 480f3b11cd61c1cf37c90d61701829a0cebc98da Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 7 Jan 2021 12:27:43 -0800
Subject: Add a batch allocation interface to the PAI.

For now, no real allocator actually implements this interface; this will change
in subsequent diffs.
---
 include/jemalloc/internal/pai.h | 19 ++++++++++++++++++-
 src/hpa.c                       |  1 +
 src/pac.c                       |  1 +
 src/pai.c                       | 13 +++++++++++++
 src/sec.c                       |  1 +
 test/unit/sec.c                 | 25 +++++++++++++++++++++++++
 6 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 73f5433..16e022d 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -8,6 +8,14 @@ struct pai_s {
 	/* Returns NULL on failure. */
 	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
 	    size_t alignment, bool zero);
+	/*
+	 * Returns the number of extents added to the list (which may be fewer
+	 * than requested, in case of OOM).  The list should already be
+	 * initialized.  The only alignment guarantee is page-alignment, and
+	 * the results are not necessarily zeroed.
+	 */
+	size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size,
+	    size_t nallocs, edata_list_active_t *results);
 	bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    size_t old_size, size_t new_size, bool zero);
 	bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -28,6 +36,12 @@ pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 	return self->alloc(tsdn, self, size, alignment, zero);
 }
 
+static inline size_t
+pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
+    edata_list_active_t *results) {
+	return self->alloc_batch(tsdn, self, size, nallocs, results);
+}
+
 static inline bool
 pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero) {
@@ -51,9 +65,12 @@ pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
 }
 
 /*
- * An implementation of batch deallocation that simply calls dalloc once for
+ * An implementation of batch allocation that simply calls alloc once for
  * each item in the list.
  */
+size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t nallocs, edata_list_active_t *results);
+/* Ditto, for dalloc. */
 void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
     edata_list_active_t *list);
 
diff --git a/src/hpa.c b/src/hpa.c
index fa58bb7..338d575 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -90,6 +90,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	 * operating on corrupted data.
 	 */
 	shard->pai.alloc = &hpa_alloc;
+	shard->pai.alloc_batch = &pai_alloc_batch_default;
 	shard->pai.expand = &hpa_expand;
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
diff --git a/src/pac.c b/src/pac.c
index 0ba0f2f..93427ca 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -91,6 +91,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
 
 	pac->pai.alloc = &pac_alloc_impl;
+	pac->pai.alloc_batch = &pai_alloc_batch_default;
 	pac->pai.expand = &pac_expand_impl;
 	pac->pai.shrink = &pac_shrink_impl;
 	pac->pai.dalloc = &pac_dalloc_impl;
diff --git a/src/pai.c b/src/pai.c
index 1035c85..bd6966c 100644
--- a/src/pai.c
+++ b/src/pai.c
@@ -1,6 +1,19 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+size_t
+pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t nallocs, edata_list_active_t *results) {
+	for (size_t i = 0; i < nallocs; i++) {
+		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
+		    /* zero */ false);
+		if (edata == NULL) {
+			return i;
+		}
+		edata_list_active_append(results, edata);
+	}
+	return nallocs;
+}
 
 void
 pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
diff --git a/src/sec.c b/src/sec.c
index 49b4104..af7c291 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -52,6 +52,7 @@ sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
 	 * initialization failed will segfault in an easy-to-spot way.
 	 */
 	sec->pai.alloc = &sec_alloc;
+	sec->pai.alloc_batch = &pai_alloc_batch_default;
 	sec->pai.expand = &sec_expand;
 	sec->pai.shrink = &sec_shrink;
 	sec->pai.dalloc = &sec_dalloc;
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 5fe3550..69132c1 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -7,6 +7,7 @@ struct pai_test_allocator_s {
 	pai_t pai;
 	bool alloc_fail;
 	size_t alloc_count;
+	size_t alloc_batch_count;
 	size_t dalloc_count;
 	size_t dalloc_batch_count;
 	/*
@@ -42,6 +43,28 @@ pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
 	return edata;
 }
 
+static inline size_t
+pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t nallocs, edata_list_active_t *results) {
+	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	if (ta->alloc_fail) {
+		return 0;
+	}
+	for (size_t i = 0; i < nallocs; i++) {
+		edata_t *edata = malloc(sizeof(edata_t));
+		assert_ptr_not_null(edata, "");
+		edata_init(edata, /* arena_ind */ 0,
+		    (void *)ta->next_ptr, size,
+		    /* slab */ false, /* szind */ 0, /* sn */ 1,
+		    extent_state_active, /* zero */ false, /* comitted */ true,
+		    /* ranged */ false, EXTENT_NOT_HEAD);
+		ta->next_ptr += size;
+		ta->alloc_batch_count++;
+		edata_list_active_append(results, edata);
+	}
+	return nallocs;
+}
+
 static bool
 pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero) {
@@ -82,6 +105,7 @@ static inline void
 pai_test_allocator_init(pai_test_allocator_t *ta) {
 	ta->alloc_fail = false;
 	ta->alloc_count = 0;
+	ta->alloc_batch_count = 0;
 	ta->dalloc_count = 0;
 	ta->dalloc_batch_count = 0;
 	/* Just don't start the edata at 0. */
@@ -91,6 +115,7 @@ pai_test_allocator_init(pai_test_allocator_t *ta) {
 	ta->shrink_count = 0;
 	ta->shrink_return_value = false;
 	ta->pai.alloc = &pai_test_allocator_alloc;
+	ta->pai.alloc_batch = &pai_test_allocator_alloc_batch;
 	ta->pai.expand = &pai_test_allocator_expand;
 	ta->pai.shrink = &pai_test_allocator_shrink;
 	ta->pai.dalloc = &pai_test_allocator_dalloc;
-- 
cgit v0.12


From cdae6706a6dbe6ab75688ea24a82ef4165c3b0b1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 19 Jan 2021 13:06:43 -0800
Subject: SEC: Use batch fills.

Currently, this doesn't help much, since no PAI implementation supports
flushing.  This will change in subsequent commits.
---
 include/jemalloc/internal/sec.h |  28 ++++++++
 src/sec.c                       | 147 ++++++++++++++++++++++++++++------------
 test/unit/sec.c                 |  63 +++++++++--------
 3 files changed, 167 insertions(+), 71 deletions(-)

diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index 815b4bb..fadf4b6 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -46,6 +46,24 @@ sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
 typedef struct sec_bin_s sec_bin_t;
 struct sec_bin_s {
 	/*
+	 * When we fail to fulfill an allocation, we do a batch-alloc on the
+	 * underlying allocator to fill extra items, as well.  We drop the SEC
+	 * lock while doing so, to allow operations on other bins to succeed.
+	 * That introduces the possibility of other threads also trying to
+	 * allocate out of this bin, failing, and also going to the backing
+	 * allocator.  To avoid a thundering herd problem in which lots of
+	 * threads do batch allocs and overfill this bin as a result, we only
+	 * allow one batch allocation at a time for a bin.  This bool tracks
+	 * whether or not some thread is already batch allocating.
+	 *
+	 * Eventually, the right answer may be a smarter sharding policy for the
+	 * bins (e.g. a mutex per bin, which would also be more scalable
+	 * generally; the batch-allocating thread could hold it while
+	 * batch-allocating).
+	 */
+	bool being_batch_filled;
+
+	/*
 	 * Number of bytes in this particular bin (as opposed to the
 	 * sec_shard_t's bytes_cur.  This isn't user visible or reported in
 	 * stats; rather, it allows us to quickly determine the change in the
@@ -109,6 +127,16 @@ struct sec_s {
 	size_t bytes_after_flush;
 
 	/*
+	 * When we can't satisfy an allocation out of the SEC because there are
+	 * no available ones cached, we allocate multiple of that size out of
+	 * the fallback allocator.  Eventually we might want to do something
+	 * cleverer, but for now we just grab a fixed number.
+	 *
+	 * For now, just the constant 4.  Eventually, it should be configurable.
+	 */
+	size_t batch_fill_extra;
+
+	/*
 	 * We don't necessarily always use all the shards; requests are
 	 * distributed across shards [0, nshards - 1).
 	 */
diff --git a/src/sec.c b/src/sec.c
index af7c291..f177bbe 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -13,6 +13,7 @@ static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
 
 static void
 sec_bin_init(sec_bin_t *bin) {
+	bin->being_batch_filled = false;
 	bin->bytes_cur = 0;
 	edata_list_active_init(&bin->freelist);
 }
@@ -45,6 +46,7 @@ sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
 
 	sec->bytes_max = bytes_max;
 	sec->bytes_after_flush = bytes_max / 2;
+	sec->batch_fill_extra = 4;
 	sec->nshards = nshards;
 
 	/*
@@ -88,14 +90,52 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 	return &sec->shards[*idxp];
 }
 
+/*
+ * Perhaps surprisingly, this can be called on the alloc pathways; if we hit an
+ * empty cache, we'll try to fill it, which can push the shard over it's limit.
+ */
+static void
+sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	edata_list_active_t to_flush;
+	edata_list_active_init(&to_flush);
+	while (shard->bytes_cur > sec->bytes_after_flush) {
+		/* Pick a victim. */
+		sec_bin_t *bin = &shard->bins[shard->to_flush_next];
+
+		/* Update our victim-picking state. */
+		shard->to_flush_next++;
+		if (shard->to_flush_next == SEC_NPSIZES) {
+			shard->to_flush_next = 0;
+		}
+
+		assert(shard->bytes_cur >= bin->bytes_cur);
+		if (bin->bytes_cur != 0) {
+			shard->bytes_cur -= bin->bytes_cur;
+			bin->bytes_cur = 0;
+			edata_list_active_concat(&to_flush, &bin->freelist);
+		}
+		/*
+		 * Either bin->bytes_cur was 0, in which case we didn't touch
+		 * the bin list but it should be empty anyways (or else we
+		 * missed a bytes_cur update on a list modification), or it
+		 * *was* 0 and we emptied it ourselves.  Either way, it should
+		 * be empty now.
+		 */
+		assert(edata_list_active_empty(&bin->freelist));
+	}
+
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
+}
+
 static edata_t *
 sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
-    pszind_t pszind) {
+    sec_bin_t *bin) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (!shard->enabled) {
 		return NULL;
 	}
-	sec_bin_t *bin = &shard->bins[pszind];
 	edata_t *edata = edata_list_active_first(&bin->freelist);
 	if (edata != NULL) {
 		edata_list_active_remove(&bin->freelist, edata);
@@ -108,6 +148,50 @@ sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 }
 
 static edata_t *
+sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
+    sec_bin_t *bin, size_t size) {
+	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
+
+	edata_list_active_t result;
+	edata_list_active_init(&result);
+	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
+	    1 + sec->batch_fill_extra, &result);
+
+	edata_t *ret = edata_list_active_first(&result);
+	if (ret != NULL) {
+		edata_list_active_remove(&result, ret);
+	}
+
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	bin->being_batch_filled = false;
+	/*
+	 * Handle the easy case first: nothing to cache.  Note that this can
+	 * only happen in case of OOM, since sec_alloc checks the expected
+	 * number of allocs, and doesn't bother going down the batch_fill
+	 * pathway if there won't be anything left to cache.  So to be in this
+	 * code path, we must have asked for > 1 alloc, but only gotten 1 back.
+	 */
+	if (nalloc <= 1) {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		return ret;
+	}
+
+	size_t new_cached_bytes = (nalloc - 1) * size;
+
+	edata_list_active_concat(&bin->freelist, &result);
+	bin->bytes_cur += new_cached_bytes;
+	shard->bytes_cur += new_cached_bytes;
+
+	if (shard->bytes_cur > sec->bytes_max) {
+		sec_flush_some_and_unlock(tsdn, sec, shard);
+	} else {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+	}
+
+	return ret;
+}
+
+static edata_t *
 sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 	assert((size & PAGE_MASK) == 0);
 
@@ -119,16 +203,26 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 	}
 	pszind_t pszind = sz_psz2ind(size);
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
+	sec_bin_t *bin = &shard->bins[pszind];
+	bool do_batch_fill = false;
+
 	malloc_mutex_lock(tsdn, &shard->mtx);
-	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, pszind);
+	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
+	if (edata == NULL) {
+		if (!bin->being_batch_filled && sec->batch_fill_extra > 0) {
+			bin->being_batch_filled = true;
+			do_batch_fill = true;
+		}
+	}
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	if (edata == NULL) {
-		/*
-		 * See the note in dalloc, below; really, we should add a
-		 * batch_alloc method to the PAI and get more than one extent at
-		 * a time.
-		 */
-		edata = pai_alloc(tsdn, sec->fallback, size, alignment, zero);
+		if (do_batch_fill) {
+			edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
+			    size);
+		} else {
+			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
+			    zero);
+		}
 	}
 	return edata;
 }
@@ -169,41 +263,6 @@ sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 }
 
 static void
-sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	edata_list_active_t to_flush;
-	edata_list_active_init(&to_flush);
-	while (shard->bytes_cur > sec->bytes_after_flush) {
-		/* Pick a victim. */
-		sec_bin_t *bin = &shard->bins[shard->to_flush_next];
-
-		/* Update our victim-picking state. */
-		shard->to_flush_next++;
-		if (shard->to_flush_next == SEC_NPSIZES) {
-			shard->to_flush_next = 0;
-		}
-
-		assert(shard->bytes_cur >= bin->bytes_cur);
-		if (bin->bytes_cur != 0) {
-			shard->bytes_cur -= bin->bytes_cur;
-			bin->bytes_cur = 0;
-			edata_list_active_concat(&to_flush, &bin->freelist);
-		}
-		/*
-		 * Either bin->bytes_cur was 0, in which case we didn't touch
-		 * the bin list but it should be empty anyways (or else we
-		 * missed a bytes_cur update on a list modification), or it
-		 * *was* 0 and we emptied it ourselves.  Either way, it should
-		 * be empty now.
-		 */
-		assert(edata_list_active_empty(&bin->freelist));
-	}
-
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
-}
-
-static void
 sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
     edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 69132c1..ff39453 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -134,14 +134,17 @@ TEST_BEGIN(test_reuse) {
 	 */
 	tsdn_t *tsdn = TSDN_NULL;
 	/*
-	 * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
-	 * able to get to 30 pages in the cache before triggering a flush.
+	 * 11 allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
+	 * able to get to 33 pages in the cache before triggering a flush.  We
+	 * set the flush liimt to twice this amount, to avoid accidentally
+	 * triggering a flush caused by the batch-allocation down the cache fill
+	 * pathway disrupting ordering.
 	 */
-	enum { NALLOCS = 10 };
+	enum { NALLOCS = 11 };
 	edata_t *one_page[NALLOCS];
 	edata_t *two_page[NALLOCS];
 	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 2 * PAGE,
-	    /* bytes_max */ NALLOCS * PAGE + NALLOCS * 2 * PAGE);
+	    /* bytes_max */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
 		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 		    /* zero */ false);
@@ -150,7 +153,9 @@ TEST_BEGIN(test_reuse) {
 		    /* zero */ false);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 	}
-	expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
+	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
+	expect_zu_le(2 * NALLOCS, max_allocs,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
@@ -164,7 +169,7 @@ TEST_BEGIN(test_reuse) {
 	for (int i = NALLOCS - 1; i >= 0; i--) {
 		pai_dalloc(tsdn, &sec.pai, two_page[i]);
 	}
-	expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
@@ -182,7 +187,7 @@ TEST_BEGIN(test_reuse) {
 		expect_ptr_eq(two_page[i], alloc2,
 		    "Got unexpected allocation");
 	}
-	expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
@@ -198,7 +203,12 @@ TEST_BEGIN(test_auto_flush) {
 	tsdn_t *tsdn = TSDN_NULL;
 	/*
 	 * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
-	 * able to get to 30 pages in the cache before triggering a flush.
+	 * able to get to 30 pages in the cache before triggering a flush.  The
+	 * choice of NALLOCS here is chosen to match the batch allocation
+	 * default (4 extra + 1 == 5; so 10 allocations leaves the cache exactly
+	 * empty, even in the presence of batch allocation on fill).
+	 * Eventually, once our allocation batching strategies become smarter,
+	 * this should change.
 	 */
 	enum { NALLOCS = 10 };
 	edata_t *extra_alloc;
@@ -212,7 +222,8 @@ TEST_BEGIN(test_auto_flush) {
 	}
 	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
 	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
-	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
+	expect_zu_le(NALLOCS + 1, max_allocs,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
@@ -220,7 +231,7 @@ TEST_BEGIN(test_auto_flush) {
 	for (int i = 0; i < NALLOCS; i++) {
 		pai_dalloc(tsdn, &sec.pai, allocs[i]);
 	}
-	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+	expect_zu_le(NALLOCS + 1, max_allocs,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
@@ -232,7 +243,7 @@ TEST_BEGIN(test_auto_flush) {
 	 * right now.
 	 */
 	pai_dalloc(tsdn, &sec.pai, extra_alloc);
-	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of (non-batch) deallocations");
@@ -253,7 +264,7 @@ do_disable_flush_test(bool is_disable) {
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 
-	enum { NALLOCS = 10 };
+	enum { NALLOCS = 11 };
 	edata_t *allocs[NALLOCS];
 	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
 	    /* bytes_max */ NALLOCS * PAGE);
@@ -266,8 +277,9 @@ do_disable_flush_test(bool is_disable) {
 	for (int i = 0; i < NALLOCS - 1; i++) {
 		pai_dalloc(tsdn, &sec.pai, allocs[i]);
 	}
-	expect_zu_eq(NALLOCS, ta.alloc_count,
-	    "Incorrect number of allocations");
+	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
+
+	expect_zu_le(NALLOCS, max_allocs, "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
 
@@ -277,12 +289,13 @@ do_disable_flush_test(bool is_disable) {
 		sec_flush(tsdn, &sec);
 	}
 
-	expect_zu_eq(NALLOCS, ta.alloc_count,
+	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of (non-batch) deallocations");
-	expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count,
+	expect_zu_le(NALLOCS - 1, ta.dalloc_batch_count,
 	    "Incorrect number of batch deallocations");
+	size_t old_dalloc_batch_count = ta.dalloc_batch_count;
 
 	/*
 	 * If we free into a disabled SEC, it should forward to the fallback.
@@ -290,11 +303,11 @@ do_disable_flush_test(bool is_disable) {
 	 */
 	pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1]);
 
-	expect_zu_eq(NALLOCS, ta.alloc_count,
+	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
 	expect_zu_eq(is_disable ? 1 : 0, ta.dalloc_count,
 	    "Incorrect number of (non-batch) deallocations");
-	expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count,
+	expect_zu_eq(old_dalloc_batch_count, ta.dalloc_batch_count,
 	    "Incorrect number of batch deallocations");
 }
 
@@ -404,7 +417,7 @@ expect_stats_pages(tsdn_t *tsdn, sec_t *sec, size_t npages) {
 	 */
 	stats.bytes = 123;
 	sec_stats_merge(tsdn, sec, &stats);
-	assert_zu_eq(npages * PAGE + 123, stats.bytes, "");
+	assert_zu_le(npages * PAGE + 123, stats.bytes, "");
 }
 
 TEST_BEGIN(test_stats_simple) {
@@ -417,7 +430,7 @@ TEST_BEGIN(test_stats_simple) {
 
 	enum {
 		NITERS = 100,
-		FLUSH_PAGES = 10,
+		FLUSH_PAGES = 20,
 	};
 
 	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
@@ -470,26 +483,22 @@ TEST_BEGIN(test_stats_auto_flush) {
 	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 		    /* zero */ false);
-		expect_stats_pages(tsdn, &sec, 0);
 	}
 
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		pai_dalloc(tsdn, &sec.pai, allocs[i]);
-		expect_stats_pages(tsdn, &sec, i + 1);
 	}
 	pai_dalloc(tsdn, &sec.pai, extra_alloc0);
-	/* The last dalloc should have triggered a flush. */
-	expect_stats_pages(tsdn, &sec, 0);
 
 	/* Flush the remaining pages; stats should still work. */
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i]);
-		expect_stats_pages(tsdn, &sec, i + 1);
 	}
 
 	pai_dalloc(tsdn, &sec.pai, extra_alloc1);
-	/* The last dalloc should have triggered a flush, again. */
-	expect_stats_pages(tsdn, &sec, 0);
+
+	expect_stats_pages(tsdn, &sec, ta.alloc_count + ta.alloc_batch_count
+	    - ta.dalloc_count - ta.dalloc_batch_count);
 }
 TEST_END
 
-- 
cgit v0.12


From ce9386370ad67d4b12dc167600080fe17fcf3113 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 20 Jan 2021 14:55:42 -0800
Subject: HPA: Implement batch allocation.

---
 src/hpa.c       | 183 ++++++++++++++++++++++++++++----------------------------
 test/unit/hpa.c |  74 ++++++++++++++++++++++-
 2 files changed, 164 insertions(+), 93 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 338d575..0e9b152 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -10,6 +10,8 @@
 
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero);
+static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t nallocs, edata_list_active_t *results);
 static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero);
 static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -425,13 +427,11 @@ hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 static edata_t *
-hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
+hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
+    bool *oom) {
 	bool err;
-	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs);
-	*oom = false;
 	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
 		*oom = true;
 		return NULL;
 	}
@@ -440,7 +440,6 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
 	if (ps == NULL) {
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
 		return NULL;
 	}
 
@@ -487,42 +486,61 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		 */
 		psset_update_end(&shard->psset, ps);
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
 		*oom = true;
 		return NULL;
 	}
 
 	hpa_update_purge_hugify_eligibility(shard, ps);
 	psset_update_end(&shard->psset, ps);
+	return edata;
+}
+
+static size_t
+hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
+    bool *oom, size_t nallocs, edata_list_active_t *results) {
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	size_t nsuccess = 0;
+	for (; nsuccess < nallocs; nsuccess++) {
+		edata_t *edata = hpa_try_alloc_one_no_grow(tsdn, shard, size,
+		    oom);
+		if (edata == NULL) {
+			break;
+		}
+		edata_list_active_append(results, edata);
+	}
 
 	hpa_do_deferred_work(tsdn, shard);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
-
-	return edata;
+	return nsuccess;
 }
 
-static edata_t *
-hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
+static size_t
+hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
+    size_t nallocs, edata_list_active_t *results) {
 	assert(size <= shard->opts.slab_max_alloc);
-	bool err;
-	bool oom;
-	edata_t *edata;
+	bool oom = false;
+
+	size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
+	    nallocs, results);
 
-	edata = hpa_try_alloc_no_grow(tsdn, shard, size, &oom);
-	if (edata != NULL) {
-		return edata;
+	if (nsuccess == nallocs || oom) {
+		return nsuccess;
 	}
 
-	/* Nothing in the psset works; we have to grow it. */
+	/*
+	 * We didn't OOM, but weren't able to fill everything requested of us;
+	 * try to grow.
+	 */
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
 	/*
 	 * Check for grow races; maybe some earlier thread expanded the psset
 	 * in between when we dropped the main mutex and grabbed the grow mutex.
 	 */
-	edata = hpa_try_alloc_no_grow(tsdn, shard, size, &oom);
-	if (edata != NULL || oom) {
+	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
+	    nallocs - nsuccess, results);
+	if (nsuccess == nallocs || oom) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		return edata;
+		return nsuccess;
 	}
 
 	/*
@@ -533,78 +551,28 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	hpdata_t *ps = hpa_grow(tsdn, shard);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		return NULL;
-	}
-
-	/* We got the pageslab; allocate from it. */
-	malloc_mutex_lock(tsdn, &shard->mtx);
-
-	psset_insert(&shard->psset, ps);
-
-	edata = edata_cache_small_get(tsdn, &shard->ecs);
-	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		return NULL;
+		return nsuccess;
 	}
 
 	/*
-	 * TODO: the tail of this function is quite similar to the tail of
-	 * hpa_try_alloc_no_grow (both, broadly, do the metadata management of
-	 * initializing an edata_t from an hpdata_t once both have been
-	 * allocated).  The only differences are in error case handling and lock
-	 * management (we hold grow_mtx, but should drop it before doing any
-	 * deferred work).  With a little refactoring, we could unify the paths.
+	 * We got the pageslab; allocate from it.  This does an unlock followed
+	 * by a lock on the same mutex, and holds the grow mutex while doing
+	 * deferred work, but this is an uncommon path; the simplicity is worth
+	 * it.
 	 */
-	psset_update_begin(&shard->psset, ps);
-
-	void *addr = hpdata_reserve_alloc(ps, size);
-	edata_init(edata, shard->ind, addr, size, /* slab */ false,
-	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
-	    /* committed */ true, EXTENT_PAI_HPA, EXTENT_NOT_HEAD);
-	edata_ps_set(edata, ps);
-
-	err = emap_register_boundary(tsdn, shard->emap, edata,
-	    SC_NSIZES, /* slab */ false);
-	if (err) {
-		hpdata_unreserve(ps, edata_addr_get(edata),
-		    edata_size_get(edata));
-
-		edata_cache_small_put(tsdn, &shard->ecs, edata);
-
-		/* We'll do a fake purge; the pages weren't really touched. */
-		hpdata_purge_state_t purge_state;
-		void *purge_addr;
-		size_t purge_size;
-		hpdata_purge_begin(ps, &purge_state);
-		bool found_extent = hpdata_purge_next(ps, &purge_state,
-		    &purge_addr, &purge_size);
-		assert(found_extent);
-		assert(purge_addr == addr);
-		assert(purge_size == size);
-		found_extent = hpdata_purge_next(ps, &purge_state,
-		    &purge_addr, &purge_size);
-		assert(!found_extent);
-		hpdata_purge_end(ps, &purge_state);
-
-		psset_update_end(&shard->psset, ps);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		return NULL;
-	}
-	hpa_update_purge_hugify_eligibility(shard, ps);
-	psset_update_end(&shard->psset, ps);
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	psset_insert(&shard->psset, ps);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
 
+	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
+	    nallocs - nsuccess, results);
 	/*
 	 * Drop grow_mtx before doing deferred work; other threads blocked on it
 	 * should be allowed to proceed while we're working.
 	 */
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 
-	hpa_do_deferred_work(tsdn, shard);
-
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	return edata;
+	return nsuccess;
 }
 
 static hpa_shard_t *
@@ -616,28 +584,27 @@ hpa_from_pai(pai_t *self) {
 	return (hpa_shard_t *)self;
 }
 
-static edata_t *
-hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero) {
+static size_t
+hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
+    edata_list_active_t *results) {
+	assert(nallocs > 0);
 	assert((size & PAGE_MASK) == 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
-
 	hpa_shard_t *shard = hpa_from_pai(self);
-	/* We don't handle alignment or zeroing for now. */
-	if (alignment > PAGE || zero) {
-		return NULL;
-	}
+
 	if (size > shard->opts.slab_max_alloc) {
-		return NULL;
+		return 0;
 	}
 
-	edata_t *edata = hpa_alloc_psset(tsdn, shard, size);
+	size_t nsuccess = hpa_alloc_batch_psset(tsdn, shard, size, nallocs,
+	    results);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (edata != NULL) {
+	edata_t *edata;
+	ql_foreach(edata, &results->head, ql_link_active) {
 		emap_assert_mapped(tsdn, shard->emap, edata);
 		assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
 		assert(edata_state_get(edata) == extent_state_active);
@@ -648,6 +615,29 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
 		assert(edata_base_get(edata) == edata_addr_get(edata));
 		assert(edata_base_get(edata) != NULL);
 	}
+	return nsuccess;
+}
+
+static edata_t *
+hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
+	assert((size & PAGE_MASK) == 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	/* We don't handle alignment or zeroing for now. */
+	if (alignment > PAGE || zero) {
+		return NULL;
+	}
+	/*
+	 * An alloc with alignment == PAGE and zero == false is equivalent to a
+	 * batch alloc of 1.  Just do that, so we can share code.
+	 */
+	edata_list_active_t results;
+	edata_list_active_init(&results);
+	size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1,
+	    &results);
+	assert(nallocs == 0 || nallocs == 1);
+	edata_t *edata = edata_list_active_first(&results);
 	return edata;
 }
 
@@ -677,6 +667,15 @@ hpa_dalloc_prepare_unlocked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 	assert(edata_committed_get(edata));
 	assert(edata_base_get(edata) != NULL);
 
+	/*
+	 * Another thread shouldn't be trying to touch the metadata of an
+	 * allocation being freed.  The one exception is a merge attempt from a
+	 * lower-addressed PAC extent; in this case we have a nominal race on
+	 * the edata metadata bits, but in practice the fact that the PAI bits
+	 * are different will prevent any further access.  The race is bad, but
+	 * benign in practice, and the long term plan is to track enough state
+	 * in the rtree to prevent these merge attempts in the first place.
+	 */
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
 	emap_deregister_boundary(tsdn, shard->emap, edata);
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 924795f..4600983 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -211,6 +211,77 @@ TEST_BEGIN(test_stress) {
 }
 TEST_END
 
+static void
+expect_contiguous(edata_t **edatas, size_t nedatas) {
+	for (size_t i = 0; i < nedatas; i++) {
+		size_t expected = (size_t)edata_base_get(edatas[0])
+		    + i * PAGE;
+		expect_zu_eq(expected, (size_t)edata_base_get(edatas[i]),
+		    "Mismatch at index %zu", i);
+	}
+}
+
+TEST_BEGIN(test_alloc_dalloc_batch) {
+	test_skip_if(!hpa_supported());
+
+	hpa_shard_t *shard = create_test_data();
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	enum {NALLOCS = 8};
+
+	edata_t *allocs[NALLOCS];
+	/*
+	 * Allocate a mix of ways; first half from regular alloc, second half
+	 * from alloc_batch.
+	 */
+	for (size_t i = 0; i < NALLOCS / 2; i++) {
+		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
+	}
+	edata_list_active_t allocs_list;
+	edata_list_active_init(&allocs_list);
+	size_t nsuccess = pai_alloc_batch(tsdn, &shard->pai, PAGE, NALLOCS / 2,
+	    &allocs_list);
+	expect_zu_eq(NALLOCS / 2, nsuccess, "Unexpected oom");
+	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
+		allocs[i] = edata_list_active_first(&allocs_list);
+		edata_list_active_remove(&allocs_list, allocs[i]);
+	}
+
+	/*
+	 * Should have allocated them contiguously, despite the differing
+	 * methods used.
+	 */
+	void *orig_base = edata_base_get(allocs[0]);
+	expect_contiguous(allocs, NALLOCS);
+
+	/*
+	 * Batch dalloc the first half, individually deallocate the second half.
+	 */
+	for (size_t i = 0; i < NALLOCS / 2; i++) {
+		edata_list_active_append(&allocs_list, allocs[i]);
+	}
+	pai_dalloc_batch(tsdn, &shard->pai, &allocs_list);
+	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
+		pai_dalloc(tsdn, &shard->pai, allocs[i]);
+	}
+
+	/* Reallocate (individually), and ensure reuse and contiguity. */
+	for (size_t i = 0; i < NALLOCS; i++) {
+		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
+	}
+	void *new_base = edata_base_get(allocs[0]);
+	expect_ptr_eq(orig_base, new_base,
+	    "Failed to reuse the allocated memory.");
+	expect_contiguous(allocs, NALLOCS);
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -227,5 +298,6 @@ main(void) {
 	(void)mem_tree_destroy;
 	return test_no_reentrancy(
 	    test_alloc_max,
-	    test_stress);
+	    test_stress,
+	    test_alloc_dalloc_batch);
 }
-- 
cgit v0.12


From fb327368db39a2edca5f9659a70a53bd3bb0ed6c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 26 Jan 2021 18:35:18 -0800
Subject: SEC: Expand option configurability.

This change pulls the SEC options into a struct, which simplifies their handling
across various modules (e.g. PA needs to forward on SEC options from the
malloc_conf string, but it doesn't really need to know their names).  While
we're here, make some of the fixed constants configurable, and unify naming from
the configuration options to the internals.
---
 .../jemalloc/internal/jemalloc_internal_externs.h  |  5 +-
 include/jemalloc/internal/pa.h                     |  2 +-
 include/jemalloc/internal/sec.h                    | 42 +--------------
 include/jemalloc/internal/sec_opts.h               | 59 ++++++++++++++++++++
 src/arena.c                                        |  5 +-
 src/ctl.c                                          | 27 +++++++---
 src/jemalloc.c                                     | 30 ++++++-----
 src/pa.c                                           |  5 +-
 src/sec.c                                          | 63 +++++++++++-----------
 src/stats.c                                        |  4 +-
 test/unit/mallctl.c                                |  4 +-
 test/unit/sec.c                                    | 61 +++++++++++++--------
 12 files changed, 185 insertions(+), 122 deletions(-)
 create mode 100644 include/jemalloc/internal/sec_opts.h

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index da69355..de5731f 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/hpa_opts.h"
+#include "jemalloc/internal/sec_opts.h"
 #include "jemalloc/internal/tsd_types.h"
 #include "jemalloc/internal/nstime.h"
 
@@ -16,9 +17,7 @@ extern bool opt_trust_madvise;
 extern bool opt_confirm_conf;
 extern bool opt_hpa;
 extern hpa_shard_opts_t opt_hpa_opts;
-extern size_t opt_hpa_sec_max_alloc;
-extern size_t opt_hpa_sec_max_bytes;
-extern size_t opt_hpa_sec_nshards;
+extern sec_opts_t opt_hpa_sec_opts;
 
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 6ded54f..acb94eb 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -131,7 +131,7 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
 bool pa_shard_enable_hpa(pa_shard_t *shard, const hpa_shard_opts_t *hpa_opts,
-    size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max);
+    const sec_opts_t *hpa_sec_opts);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index fadf4b6..ddcdfbd 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -103,49 +103,11 @@ struct sec_s {
 	pai_t pai;
 	pai_t *fallback;
 
-	/*
-	 * We'll automatically refuse to cache any objects in this sec if
-	 * they're larger than alloc_max bytes.
-	 */
-	size_t alloc_max;
-	/*
-	 * Exceeding this amount of cached extents in a shard causes *all* of
-	 * the bins in that shard to be flushed.
-	 */
-	size_t bytes_max;
-	/*
-	 * The number of bytes (in all bins) we flush down to when we exceed
-	 * bytes_cur.  We want this to be less than bytes_cur, because
-	 * otherwise we could get into situations where a shard undergoing
-	 * net-deallocation keeps bytes_cur very near to bytes_max, so that
-	 * most deallocations get immediately forwarded to the underlying PAI
-	 * implementation, defeating the point of the SEC.
-	 *
-	 * Currently this is just set to bytes_max / 2, but eventually can be
-	 * configurable.
-	 */
-	size_t bytes_after_flush;
-
-	/*
-	 * When we can't satisfy an allocation out of the SEC because there are
-	 * no available ones cached, we allocate multiple of that size out of
-	 * the fallback allocator.  Eventually we might want to do something
-	 * cleverer, but for now we just grab a fixed number.
-	 *
-	 * For now, just the constant 4.  Eventually, it should be configurable.
-	 */
-	size_t batch_fill_extra;
-
-	/*
-	 * We don't necessarily always use all the shards; requests are
-	 * distributed across shards [0, nshards - 1).
-	 */
-	size_t nshards;
+	sec_opts_t opts;
 	sec_shard_t shards[SEC_NSHARDS_MAX];
 };
 
-bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
-    size_t bytes_max);
+bool sec_init(sec_t *sec, pai_t *fallback, const sec_opts_t *opts);
 void sec_flush(tsdn_t *tsdn, sec_t *sec);
 void sec_disable(tsdn_t *tsdn, sec_t *sec);
 
diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h
new file mode 100644
index 0000000..91b6d0d
--- /dev/null
+++ b/include/jemalloc/internal/sec_opts.h
@@ -0,0 +1,59 @@
+#ifndef JEMALLOC_INTERNAL_SEC_OPTS_H
+#define JEMALLOC_INTERNAL_SEC_OPTS_H
+
+/*
+ * The configuration settings used by an sec_t.  Morally, this is part of the
+ * SEC interface, but we put it here for header-ordering reasons.
+ */
+
+typedef struct sec_opts_s sec_opts_t;
+struct sec_opts_s {
+	/*
+	 * We don't necessarily always use all the shards; requests are
+	 * distributed across shards [0, nshards - 1).
+	 */
+	size_t nshards;
+	/*
+	 * We'll automatically refuse to cache any objects in this sec if
+	 * they're larger than max_alloc bytes, instead forwarding such objects
+	 * directly to the fallback.
+	 */
+	size_t max_alloc;
+	/*
+	 * Exceeding this amount of cached extents in a shard causes us to start
+	 * flushing bins in that shard until we fall below bytes_after_flush.
+	 */
+	size_t max_bytes;
+	/*
+	 * The number of bytes (in all bins) we flush down to when we exceed
+	 * bytes_cur.  We want this to be less than bytes_cur, because
+	 * otherwise we could get into situations where a shard undergoing
+	 * net-deallocation keeps bytes_cur very near to max_bytes, so that
+	 * most deallocations get immediately forwarded to the underlying PAI
+	 * implementation, defeating the point of the SEC.
+	 */
+	size_t bytes_after_flush;
+	/*
+	 * When we can't satisfy an allocation out of the SEC because there are
+	 * no available ones cached, we allocate multiple of that size out of
+	 * the fallback allocator.  Eventually we might want to do something
+	 * cleverer, but for now we just grab a fixed number.
+	 */
+	size_t batch_fill_extra;
+};
+
+#define SEC_OPTS_DEFAULT {						\
+	/* nshards */							\
+	4,								\
+	/* max_alloc */							\
+	32 * 1024,							\
+	/* max_bytes */							\
+	256 * 1024,							\
+	/* bytes_after_flush */						\
+	128 * 1024,							\
+	/* batch_fill_extra */						\
+	0								\
+}
+
+
+#endif /* JEMALLOC_INTERNAL_SEC_OPTS_H */
diff --git a/src/arena.c b/src/arena.c
index da0f1f0..f054f09 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1479,9 +1479,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 *   so arena_hpa_global is not yet initialized.
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
-		if (pa_shard_enable_hpa(&arena->pa_shard,
-		    &opt_hpa_opts, opt_hpa_sec_nshards, opt_hpa_sec_max_alloc,
-		    opt_hpa_sec_max_bytes)) {
+		if (pa_shard_enable_hpa(&arena->pa_shard, &opt_hpa_opts,
+		    &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 4fc3ad0..663cf86 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -98,9 +98,11 @@ CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_dehugification_threshold)
 CTL_PROTO(opt_hpa_dirty_mult)
+CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
-CTL_PROTO(opt_hpa_sec_nshards)
+CTL_PROTO(opt_hpa_sec_bytes_after_flush)
+CTL_PROTO(opt_hpa_sec_batch_fill_extra)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -406,9 +408,13 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_dehugification_threshold"),
 		CTL(opt_hpa_dehugification_threshold)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
+	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
 	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
-	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
+	{NAME("hpa_sec_bytes_after_flush"),
+		CTL(opt_hpa_sec_bytes_after_flush)},
+	{NAME("hpa_sec_batch_fill_extra"),
+		CTL(opt_hpa_sec_batch_fill_extra)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -2100,8 +2106,9 @@ CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_cache_oblivious, opt_cache_oblivious, bool)
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
+
+/* HPA options. */
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
-CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
     opt_hpa_opts.hugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_dehugification_threshold,
@@ -2111,9 +2118,17 @@ CTL_RO_NL_GEN(opt_hpa_dehugification_threshold,
  * its representation are internal implementation details.
  */
 CTL_RO_NL_GEN(opt_hpa_dirty_mult, opt_hpa_opts.dirty_mult, fxp_t)
-CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
-CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
-CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
+CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
+
+/* HPA SEC options */
+CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_bytes_after_flush, opt_hpa_sec_opts.bytes_after_flush,
+    size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra,
+    size_t)
+
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 125682b..613733f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -153,11 +153,7 @@ malloc_mutex_t arenas_lock;
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
 hpa_shard_opts_t opt_hpa_opts = HPA_SHARD_OPTS_DEFAULT;
-
-size_t opt_hpa_sec_max_alloc = 32 * 1024;
-/* These settings correspond to a maximum of 1MB cached per arena. */
-size_t opt_hpa_sec_max_bytes = 256 * 1024;
-size_t opt_hpa_sec_nshards = 4;
+sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
 
 /*
  * Arenas that are used to service external requests.  Not all elements of the
@@ -1473,12 +1469,21 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_bytes, "hpa_sec_max_bytes",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_nshards, "hpa_sec_nshards",
-			    0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.nshards,
+			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_alloc,
+			    "hpa_sec_max_alloc", PAGE, 0, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
+			    "hpa_sec_max_bytes", PAGE, 0, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.bytes_after_flush,
+			    "hpa_sec_bytes_after_flush", PAGE, 0,
+			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
+			    "hpa_sec_batch_fill_extra", PAGE, 0, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, true);
 
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
@@ -1777,8 +1782,7 @@ malloc_init_hard_a0_locked() {
 		}
 	} else if (opt_hpa) {
 		if (pa_shard_enable_hpa(&a0->pa_shard, &opt_hpa_opts,
-		    opt_hpa_sec_nshards, opt_hpa_sec_max_alloc,
-		    opt_hpa_sec_max_bytes)) {
+		    &opt_hpa_sec_opts)) {
 			return true;
 		}
 	}
diff --git a/src/pa.c b/src/pa.c
index abe3f00..dd61aaa 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -50,13 +50,12 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 
 bool
 pa_shard_enable_hpa(pa_shard_t *shard, const hpa_shard_opts_t *hpa_opts,
-    size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max) {
+    const sec_opts_t *hpa_sec_opts) {
 	if (hpa_shard_init(&shard->hpa_shard, shard->emap, shard->base,
 	    &shard->edata_cache, shard->ind, hpa_opts)) {
 		return true;
 	}
-	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards,
-	    sec_alloc_max, sec_bytes_max)) {
+	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, hpa_sec_opts)) {
 		return true;
 	}
 	shard->ever_used_hpa = true;
diff --git a/src/sec.c b/src/sec.c
index f177bbe..c37cf35 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -19,12 +19,12 @@ sec_bin_init(sec_bin_t *bin) {
 }
 
 bool
-sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
-    size_t bytes_max) {
-	if (nshards > SEC_NSHARDS_MAX) {
-		nshards = SEC_NSHARDS_MAX;
+sec_init(sec_t *sec, pai_t *fallback, const sec_opts_t *opts) {
+	size_t nshards_clipped = opts->nshards;
+	if (nshards_clipped > SEC_NSHARDS_MAX) {
+		nshards_clipped = SEC_NSHARDS_MAX;
 	}
-	for (size_t i = 0; i < nshards; i++) {
+	for (size_t i = 0; i < nshards_clipped; i++) {
 		sec_shard_t *shard = &sec->shards[i];
 		bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
 		    WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
@@ -39,15 +39,15 @@ sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
 		shard->to_flush_next = 0;
 	}
 	sec->fallback = fallback;
-	sec->alloc_max = alloc_max;
-	if (sec->alloc_max > sz_pind2sz(SEC_NPSIZES - 1)) {
-		sec->alloc_max = sz_pind2sz(SEC_NPSIZES - 1);
+
+	size_t max_alloc_clipped = opts->max_alloc;
+	if (max_alloc_clipped > sz_pind2sz(SEC_NPSIZES - 1)) {
+		max_alloc_clipped = sz_pind2sz(SEC_NPSIZES - 1);
 	}
 
-	sec->bytes_max = bytes_max;
-	sec->bytes_after_flush = bytes_max / 2;
-	sec->batch_fill_extra = 4;
-	sec->nshards = nshards;
+	sec->opts = *opts;
+	sec->opts.nshards = nshards_clipped;
+	sec->opts.max_alloc = max_alloc_clipped;
 
 	/*
 	 * Initialize these last so that an improper use of an SEC whose
@@ -83,8 +83,9 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 		 * when we multiply by the number of shards.
 		 */
 		uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
-		uint32_t idx = (uint32_t)((rand32 * (uint64_t)sec->nshards) >> 32);
-		assert(idx < (uint32_t)sec->nshards);
+		uint32_t idx =
+		    (uint32_t)((rand32 * (uint64_t)sec->opts.nshards) >> 32);
+		assert(idx < (uint32_t)sec->opts.nshards);
 		*idxp = (uint8_t)idx;
 	}
 	return &sec->shards[*idxp];
@@ -99,7 +100,7 @@ sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	edata_list_active_t to_flush;
 	edata_list_active_init(&to_flush);
-	while (shard->bytes_cur > sec->bytes_after_flush) {
+	while (shard->bytes_cur > sec->opts.bytes_after_flush) {
 		/* Pick a victim. */
 		sec_bin_t *bin = &shard->bins[shard->to_flush_next];
 
@@ -155,7 +156,7 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 	edata_list_active_t result;
 	edata_list_active_init(&result);
 	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
-	    1 + sec->batch_fill_extra, &result);
+	    1 + sec->opts.batch_fill_extra, &result);
 
 	edata_t *ret = edata_list_active_first(&result);
 	if (ret != NULL) {
@@ -182,7 +183,7 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 	bin->bytes_cur += new_cached_bytes;
 	shard->bytes_cur += new_cached_bytes;
 
-	if (shard->bytes_cur > sec->bytes_max) {
+	if (shard->bytes_cur > sec->opts.max_bytes) {
 		sec_flush_some_and_unlock(tsdn, sec, shard);
 	} else {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
@@ -197,8 +198,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 
 	sec_t *sec = (sec_t *)self;
 
-	if (zero || alignment > PAGE || sec->nshards == 0
-	    || size > sec->alloc_max) {
+	if (zero || alignment > PAGE || sec->opts.nshards == 0
+	    || size > sec->opts.max_alloc) {
 		return pai_alloc(tsdn, sec->fallback, size, alignment, zero);
 	}
 	pszind_t pszind = sz_psz2ind(size);
@@ -209,7 +210,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
 	if (edata == NULL) {
-		if (!bin->being_batch_filled && sec->batch_fill_extra > 0) {
+		if (!bin->being_batch_filled
+		    && sec->opts.batch_fill_extra > 0) {
 			bin->being_batch_filled = true;
 			do_batch_fill = true;
 		}
@@ -266,7 +268,7 @@ static void
 sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
     edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	assert(shard->bytes_cur <= sec->bytes_max);
+	assert(shard->bytes_cur <= sec->opts.max_bytes);
 	size_t size = edata_size_get(edata);
 	pszind_t pszind = sz_psz2ind(size);
 	/*
@@ -277,7 +279,7 @@ sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 	edata_list_active_prepend(&bin->freelist, edata);
 	bin->bytes_cur += size;
 	shard->bytes_cur += size;
-	if (shard->bytes_cur > sec->bytes_max) {
+	if (shard->bytes_cur > sec->opts.max_bytes) {
 		/*
 		 * We've exceeded the shard limit.  We make two nods in the
 		 * direction of fragmentation avoidance: we flush everything in
@@ -297,7 +299,8 @@ sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 static void
 sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	sec_t *sec = (sec_t *)self;
-	if (sec->nshards == 0 || edata_size_get(edata) > sec->alloc_max) {
+	if (sec->opts.nshards == 0
+	    || edata_size_get(edata) > sec->opts.max_alloc) {
 		pai_dalloc(tsdn, sec->fallback, edata);
 		return;
 	}
@@ -313,7 +316,7 @@ sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 
 void
 sec_flush(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
 		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
 		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
@@ -322,7 +325,7 @@ sec_flush(tsdn_t *tsdn, sec_t *sec) {
 
 void
 sec_disable(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
 		sec->shards[i].enabled = false;
 		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
@@ -333,7 +336,7 @@ sec_disable(tsdn_t *tsdn, sec_t *sec) {
 void
 sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
 	size_t sum = 0;
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		/*
 		 * We could save these lock acquisitions by making bytes_cur
 		 * atomic, but stats collection is rare anyways and we expect
@@ -349,7 +352,7 @@ sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
 void
 sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
     mutex_prof_data_t *mutex_prof_data) {
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
 		malloc_mutex_prof_accum(tsdn, mutex_prof_data,
 		    &sec->shards[i].mtx);
@@ -359,21 +362,21 @@ sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
 
 void
 sec_prefork2(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_prefork(tsdn, &sec->shards[i].mtx);
 	}
 }
 
 void
 sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx);
 	}
 }
 
 void
 sec_postfork_child(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx);
 	}
 }
diff --git a/src/stats.c b/src/stats.c
index 7a0526c..69cb2d3 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1486,9 +1486,11 @@ stats_general_print(emitter_t *emitter) {
 			    "opt.hpa_dirty_mult", emitter_type_string, &bufp);
 		}
 	}
+	OPT_WRITE_SIZE_T("hpa_sec_nshards")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
-	OPT_WRITE_SIZE_T("hpa_sec_nshards")
+	OPT_WRITE_SIZE_T("hpa_sec_bytes_after_flush")
+	OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 1fb7466..e9e0feb 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -287,9 +287,11 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(bool, hpa, always);
 	TEST_MALLCTL_OPT(size_t, hpa_slab_max_alloc, always);
+	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
-	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
+	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
+	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index ff39453..36ae1a5 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -23,6 +23,24 @@ struct pai_test_allocator_s {
 	bool shrink_return_value;
 };
 
+static void
+test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
+    size_t max_bytes) {
+	sec_opts_t opts;
+	opts.nshards = 1;
+	opts.max_alloc = max_alloc;
+	opts.max_bytes = max_bytes;
+	/*
+	 * Just choose reasonable defaults for these; most tests don't care so
+	 * long as they're something reasonable.
+	 */
+	opts.bytes_after_flush = max_bytes / 2;
+	opts.batch_fill_extra = 4;
+
+	bool err = sec_init(sec, fallback, &opts);
+	assert_false(err, "Unexpected initialization failure");
+}
+
 static inline edata_t *
 pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero) {
@@ -143,8 +161,8 @@ TEST_BEGIN(test_reuse) {
 	enum { NALLOCS = 11 };
 	edata_t *one_page[NALLOCS];
 	edata_t *two_page[NALLOCS];
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 2 * PAGE,
-	    /* bytes_max */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 2 * PAGE,
+	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
 		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 		    /* zero */ false);
@@ -213,8 +231,8 @@ TEST_BEGIN(test_auto_flush) {
 	enum { NALLOCS = 10 };
 	edata_t *extra_alloc;
 	edata_t *allocs[NALLOCS];
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
-	    /* bytes_max */ NALLOCS * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
+	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 		    /* zero */ false);
@@ -266,8 +284,8 @@ do_disable_flush_test(bool is_disable) {
 
 	enum { NALLOCS = 11 };
 	edata_t *allocs[NALLOCS];
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
-	    /* bytes_max */ NALLOCS * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
+	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 		    /* zero */ false);
@@ -321,18 +339,18 @@ TEST_BEGIN(test_flush) {
 }
 TEST_END
 
-TEST_BEGIN(test_alloc_max_respected) {
+TEST_BEGIN(test_max_alloc_respected) {
 	pai_test_allocator_t ta;
 	pai_test_allocator_init(&ta);
 	sec_t sec;
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 
-	size_t alloc_max = 2 * PAGE;
+	size_t max_alloc = 2 * PAGE;
 	size_t attempted_alloc = 3 * PAGE;
 
-	sec_init(&sec, &ta.pai, /* nshards */ 1, alloc_max,
-	    /* bytes_max */ 1000 * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, max_alloc,
+	    /* max_bytes */ 1000 * PAGE);
 
 	for (size_t i = 0; i < 100; i++) {
 		expect_zu_eq(i, ta.alloc_count,
@@ -362,8 +380,8 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 10 * PAGE,
-	    /* bytes_max */ 1000 * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
+	    /* max_bytes */ 1000 * PAGE);
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 	    /* zero */ false);
 	expect_ptr_not_null(edata, "Unexpected alloc failure");
@@ -395,8 +413,9 @@ TEST_BEGIN(test_nshards_0) {
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 
-	sec_init(&sec, &ta.pai, /* nshards */ 0, /* alloc_max */ 10 * PAGE,
-	    /* bytes_max */ 1000 * PAGE);
+	sec_opts_t opts = SEC_OPTS_DEFAULT;
+	opts.nshards = 0;
+	sec_init(&sec, &ta.pai, &opts);
 
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 	    /* zero */ false);
@@ -433,8 +452,8 @@ TEST_BEGIN(test_stats_simple) {
 		FLUSH_PAGES = 20,
 	};
 
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
-	    /* bytes_max */ FLUSH_PAGES * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
+	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
@@ -470,8 +489,8 @@ TEST_BEGIN(test_stats_auto_flush) {
 		FLUSH_PAGES = 10,
 	};
 
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
-	    /* bytes_max */ FLUSH_PAGES * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
+	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
 	edata_t *extra_alloc0;
 	edata_t *extra_alloc1;
@@ -514,8 +533,8 @@ TEST_BEGIN(test_stats_manual_flush) {
 		FLUSH_PAGES = 10,
 	};
 
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
-	    /* bytes_max */ FLUSH_PAGES * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
+	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
@@ -550,7 +569,7 @@ main(void) {
 	    test_auto_flush,
 	    test_disable,
 	    test_flush,
-	    test_alloc_max_respected,
+	    test_max_alloc_respected,
 	    test_expand_shrink_delegate,
 	    test_nshards_0,
 	    test_stats_simple,
-- 
cgit v0.12


From d21d5b46b607542398440d77b5f5ba22116dad5a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 6 Feb 2021 09:29:01 -0800
Subject: Edata: Move sn into its own field.

This lets the bins use a fragmentation avoidance policy that matches the HPA's
(without affecting the PAC).
---
 include/jemalloc/internal/edata.h | 41 +++++++++++++--------------------------
 src/base.c                        |  2 +-
 src/hpa.c                         |  5 +++--
 test/unit/psset.c                 |  2 +-
 4 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 11358ea..c71209e 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -87,9 +87,8 @@ struct edata_s {
 	 * i: szind
 	 * f: nfree
 	 * s: bin_shard
-	 * n: sn
 	 *
-	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zpcbaaaa aaaaaaaa
+	 * 00000000 ... 000000ss ssssffff ffffffii iiiiiitt zpcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -120,16 +119,6 @@ struct edata_s {
 	 * nfree: Number of free regions in slab.
 	 *
 	 * bin_shard: the shard of the bin from which this extent came.
-	 *
-	 * sn: Serial number (potentially non-unique).
-	 *
-	 *     Serial numbers may wrap around if !opt_retain, but as long as
-	 *     comparison functions fall back on address comparison for equal
-	 *     serial numbers, stable (if imperfect) ordering is maintained.
-	 *
-	 *     Serial numbers may not be unique even in the absence of
-	 *     wrap-around, e.g. when splitting an extent and assigning the same
-	 *     serial number to both resulting adjacent extents.
 	 */
 	uint64_t		e_bits;
 #define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
@@ -174,9 +163,6 @@ struct edata_s {
 #define EDATA_BITS_IS_HEAD_SHIFT  (EDATA_BITS_BINSHARD_WIDTH + EDATA_BITS_BINSHARD_SHIFT)
 #define EDATA_BITS_IS_HEAD_MASK  MASK(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT)
 
-#define EDATA_BITS_SN_SHIFT   (EDATA_BITS_IS_HEAD_WIDTH + EDATA_BITS_IS_HEAD_SHIFT)
-#define EDATA_BITS_SN_MASK  (UINT64_MAX << EDATA_BITS_SN_SHIFT)
-
 	/* Pointer to the extent that this structure is responsible for. */
 	void			*e_addr;
 
@@ -201,8 +187,11 @@ struct edata_s {
 	 * into pageslabs).  This tracks it.
 	 */
 	hpdata_t *e_ps;
-	/* Extra field reserved for HPA. */
-	void *e_reserved;
+	/*
+	 * Serial number.  These are not necessarily unique; splitting an extent
+	 * results in two extents with the same serial number.
+	 */
+	uint64_t e_sn;
 
 	union {
 		/*
@@ -274,10 +263,9 @@ edata_binshard_get(const edata_t *edata) {
 	return binshard;
 }
 
-static inline size_t
+static inline uint64_t
 edata_sn_get(const edata_t *edata) {
-	return (size_t)((edata->e_bits & EDATA_BITS_SN_MASK) >>
-	    EDATA_BITS_SN_SHIFT);
+	return edata->e_sn;
 }
 
 static inline extent_state_t
@@ -488,9 +476,8 @@ edata_nfree_sub(edata_t *edata, uint64_t n) {
 }
 
 static inline void
-edata_sn_set(edata_t *edata, size_t sn) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SN_MASK) |
-	    ((uint64_t)sn << EDATA_BITS_SN_SHIFT);
+edata_sn_set(edata_t *edata, uint64_t sn) {
+	edata->e_sn = sn;
 }
 
 static inline void
@@ -566,7 +553,7 @@ edata_is_head_set(edata_t *edata, bool is_head) {
  */
 static inline void
 edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
-    bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
+    bool slab, szind_t szind, uint64_t sn, extent_state_t state, bool zeroed,
     bool committed, extent_pai_t pai, extent_head_state_t is_head) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
@@ -587,7 +574,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 }
 
 static inline void
-edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
+edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn) {
 	edata_arena_ind_set(edata, (1U << MALLOCX_ARENA_BITS) - 1);
 	edata_addr_set(edata, addr);
 	edata_bsize_set(edata, bsize);
@@ -607,8 +594,8 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 
 static inline int
 edata_sn_comp(const edata_t *a, const edata_t *b) {
-	size_t a_sn = edata_sn_get(a);
-	size_t b_sn = edata_sn_get(b);
+	uint64_t a_sn = edata_sn_get(a);
+	uint64_t b_sn = edata_sn_get(b);
 
 	return (a_sn > b_sn) - (a_sn < b_sn);
 }
diff --git a/src/base.c b/src/base.c
index d3732ba..00440f4 100644
--- a/src/base.c
+++ b/src/base.c
@@ -448,7 +448,7 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
 
 	ret = base_extent_bump_alloc(base, edata, usize, alignment);
 	if (esn != NULL) {
-		*esn = edata_sn_get(edata);
+		*esn = (size_t)edata_sn_get(edata);
 	}
 label_return:
 	malloc_mutex_unlock(tsdn, &base->mtx);
diff --git a/src/hpa.c b/src/hpa.c
index 0e9b152..d078f18 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -458,8 +458,9 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 
 	void *addr = hpdata_reserve_alloc(ps, size);
 	edata_init(edata, shard->ind, addr, size, /* slab */ false,
-	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
-	    /* committed */ true, EXTENT_PAI_HPA, EXTENT_NOT_HEAD);
+	    SC_NSIZES, /* sn */ hpdata_age_get(ps), extent_state_active,
+	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
+	    EXTENT_NOT_HEAD);
 	edata_ps_set(edata, ps);
 
 	/*
diff --git a/test/unit/psset.c b/test/unit/psset.c
index b93dfbf..fdc28d3 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -91,7 +91,7 @@ edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 	expect_false(edata_slab_get(edata), "");
 	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata),
 	    "");
-	expect_zu_eq(0, edata_sn_get(edata), "");
+	expect_u64_eq(0, edata_sn_get(edata), "");
 	expect_d_eq(edata_state_get(edata), extent_state_active, "");
 	expect_false(edata_zeroed_get(edata), "");
 	expect_true(edata_committed_get(edata), "");
-- 
cgit v0.12


From 271a676dcd2d5ff863e8f6996089680f56fa0656 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 6 Feb 2021 11:57:32 -0800
Subject: hpdata: early bailout for longest free range.

A number of common special cases allow us to stop iterating through an hpdata's
bitmap earlier rather than later.
---
 src/hpdata.c | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/src/hpdata.c b/src/hpdata.c
index e11ba8d..0fc7b7d 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -74,6 +74,7 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 		 * to serve the allocation.
 		 */
 		assert(found);
+		assert(len <= hpdata_longest_free_range_get(hpdata));
 		if (len >= npages) {
 			/*
 			 * We use first-fit within the page slabs; this gives
@@ -103,25 +104,30 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	hpdata->h_ntouched += new_dirty;
 
 	/*
-	 * We might have shrunk the longest free range.  We have to keep
-	 * scanning until the end of the hpdata to be sure.
-	 *
-	 * TODO: As an optimization, we should only do this when the range we
-	 * just allocated from was equal to the longest free range size.
+	 * If we allocated out of a range that was the longest in the hpdata, it
+	 * might be the only one of that size and we'll have to adjust the
+	 * metadata.
 	 */
-	start = begin + npages;
-	while (start < HUGEPAGE_PAGES) {
-		bool found = fb_urange_iter(hpdata->active_pages,
-		    HUGEPAGE_PAGES, start, &begin, &len);
-		if (!found) {
-			break;
+	if (len == hpdata_longest_free_range_get(hpdata)) {
+		start = begin + npages;
+		while (start < HUGEPAGE_PAGES) {
+			bool found = fb_urange_iter(hpdata->active_pages,
+			    HUGEPAGE_PAGES, start, &begin, &len);
+			if (!found) {
+				break;
+			}
+			assert(len <= hpdata_longest_free_range_get(hpdata));
+			if (len == hpdata_longest_free_range_get(hpdata)) {
+				largest_unchosen_range = len;
+				break;
+			}
+			if (len > largest_unchosen_range) {
+				largest_unchosen_range = len;
+			}
+			start = begin + len;
 		}
-		if (len > largest_unchosen_range) {
-			largest_unchosen_range = len;
-		}
-		start = begin + len;
+		hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
 	}
-	hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
 
 	hpdata_assert_consistent(hpdata);
 	return (void *)(
-- 
cgit v0.12


From 154aa5fcc102172fcac0e111ff79df9d5ced7973 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 8 Feb 2021 11:04:46 -0800
Subject: Use the flat bitmap for eset and psset bitmaps.

This is simpler (note that the eset field comment was actually incorrect!), and
slightly faster.
---
 include/jemalloc/internal/eset.h  |  4 ++--
 include/jemalloc/internal/psset.h |  3 ++-
 src/eset.c                        | 30 +++++++++++++-----------------
 src/psset.c                       | 11 ++++-------
 4 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index d260bc1..7b53ecd 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_ESET_H
 
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/flat_bitmap.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 
@@ -22,7 +22,7 @@ struct eset_s {
 	atomic_zu_t nbytes[SC_NPSIZES + 1];
 
 	/* Bitmap for which set bits correspond to non-empty heaps. */
-	bitmap_t bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
+	fb_group_t bitmap[FB_NGROUPS(SC_NPSIZES + 1)];
 
 	/* LRU of all extents in heaps. */
 	edata_list_inactive_t lru;
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index d2a8b24..2b6ea7b 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -56,7 +56,8 @@ struct psset_s {
 	 * free run of pages in a pageslab.
 	 */
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
-	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
+	/* Bitmap for which set bits correspond to non-empty heaps. */
+	fb_group_t bitmap[FB_NGROUPS(PSSET_NPSIZES)];
 	/*
 	 * The sum of all bin stats in stats.  This lets us quickly answer
 	 * queries for the number of dirty, active, and retained pages in the
diff --git a/src/eset.c b/src/eset.c
index c9af80e..a52a6f7 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -3,15 +3,14 @@
 
 #include "jemalloc/internal/eset.h"
 
-const bitmap_info_t eset_bitmap_info =
-    BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
+#define ESET_NPSIZES (SC_NPSIZES + 1)
 
 void
 eset_init(eset_t *eset, extent_state_t state) {
-	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
+	for (unsigned i = 0; i < ESET_NPSIZES; i++) {
 		edata_heap_new(&eset->heaps[i]);
 	}
-	bitmap_init(eset->bitmap, &eset_bitmap_info, true);
+	fb_init(eset->bitmap, ESET_NPSIZES);
 	edata_list_inactive_init(&eset->lru);
 	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
 	eset->state = state;
@@ -56,8 +55,7 @@ eset_insert(eset_t *eset, edata_t *edata) {
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
 	if (edata_heap_empty(&eset->heaps[pind])) {
-		bitmap_unset(eset->bitmap, &eset_bitmap_info,
-		    (size_t)pind);
+		fb_set(eset->bitmap, ESET_NPSIZES, (size_t)pind);
 	}
 	edata_heap_insert(&eset->heaps[pind], edata);
 
@@ -92,8 +90,7 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	}
 
 	if (edata_heap_empty(&eset->heaps[pind])) {
-		bitmap_set(eset->bitmap, &eset_bitmap_info,
-		    (size_t)pind);
+		fb_unset(eset->bitmap, ESET_NPSIZES, (size_t)pind);
 	}
 	edata_list_inactive_remove(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
@@ -122,10 +119,10 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
         pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
         pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
 
-	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
-	    &eset_bitmap_info, (size_t)pind); i < pind_max; i =
-	    (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
-	    (size_t)i+1)) {
+	for (pszind_t i =
+	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+	    i < pind_max;
+	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(i < SC_NPSIZES);
 		assert(!edata_heap_empty(&eset->heaps[i]));
 		edata_t *edata = edata_heap_first(&eset->heaps[i]);
@@ -171,11 +168,10 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 		    edata_heap_first(&eset->heaps[pind]);
 	}
 
-	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
-	    &eset_bitmap_info, (size_t)pind);
-	    i < SC_NPSIZES + 1;
-	    i = (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
-	    (size_t)i+1)) {
+	for (pszind_t i =
+	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+	    i < ESET_NPSIZES;
+	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(!edata_heap_empty(&eset->heaps[i]));
 		edata_t *edata = edata_heap_first(&eset->heaps[i]);
 		assert(edata_size_get(edata) >= size);
diff --git a/src/psset.c b/src/psset.c
index 08c9b6c..a54e4b7 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -5,15 +5,12 @@
 
 #include "jemalloc/internal/flat_bitmap.h"
 
-static const bitmap_info_t psset_bitmap_info =
-    BITMAP_INFO_INITIALIZER(PSSET_NPSIZES);
-
 void
 psset_init(psset_t *psset) {
 	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
 		hpdata_age_heap_new(&psset->pageslabs[i]);
 	}
-	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
+	fb_init(psset->bitmap, PSSET_NPSIZES);
 	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
 	memset(&psset->stats, 0, sizeof(psset->stats));
 	hpdata_empty_list_init(&psset->empty);
@@ -101,14 +98,14 @@ static void
 psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+		fb_unset(psset->bitmap, PSSET_NPSIZES, (size_t)pind);
 	}
 }
 
 static void
 psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+		fb_set(psset->bitmap, PSSET_NPSIZES, (size_t)pind);
 	}
 	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
 }
@@ -266,7 +263,7 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 	assert(size <= HUGEPAGE);
 
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
-	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
+	pszind_t pind = (pszind_t)fb_ffs(psset->bitmap, PSSET_NPSIZES,
 	    (size_t)min_pind);
 	if (pind == PSSET_NPSIZES) {
 		return hpdata_empty_list_first(&psset->empty);
-- 
cgit v0.12


From 6bddb92ad64ee096a34c0d099736c237d46f1065 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 8 Feb 2021 11:26:56 -0800
Subject: psset: Rename "bitmap" to "pageslab_bitmap".

It tracks pageslabs.  Soon, we'll have another bitmap (to track dirty pages)
that we want to disambiguate.

While we're here, fix an out-of-date comment.
---
 include/jemalloc/internal/psset.h |  2 +-
 src/psset.c                       | 19 +++++++------------
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 2b6ea7b..271d144 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -57,7 +57,7 @@ struct psset_s {
 	 */
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	/* Bitmap for which set bits correspond to non-empty heaps. */
-	fb_group_t bitmap[FB_NGROUPS(PSSET_NPSIZES)];
+	fb_group_t pageslab_bitmap[FB_NGROUPS(PSSET_NPSIZES)];
 	/*
 	 * The sum of all bin stats in stats.  This lets us quickly answer
 	 * queries for the number of dirty, active, and retained pages in the
diff --git a/src/psset.c b/src/psset.c
index a54e4b7..66fd0c4 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -10,7 +10,7 @@ psset_init(psset_t *psset) {
 	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
 		hpdata_age_heap_new(&psset->pageslabs[i]);
 	}
-	fb_init(psset->bitmap, PSSET_NPSIZES);
+	fb_init(psset->pageslab_bitmap, PSSET_NPSIZES);
 	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
 	memset(&psset->stats, 0, sizeof(psset->stats));
 	hpdata_empty_list_init(&psset->empty);
@@ -40,14 +40,9 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
 }
 
 /*
- * The stats maintenance strategy is simple, but not necessarily obvious.
- * edata_nfree and the bitmap must remain consistent at all times.  If they
- * change while an edata is within an edata_heap (or full), then the associated
- * stats bin (or the full bin) must also change.  If they change while not in a
- * bin (say, in between extraction and reinsertion), then the bin stats need not
- * change.  If a pageslab is removed from a bin (or becomes nonfull), it should
- * no longer contribute to that bin's stats (or the full stats).  These help
- * ensure we don't miss any heap modification operations.
+ * The stats maintenance strategy is to remove a pageslab's contribution to the
+ * stats when we call psset_update_begin, and re-add it (to a potentially new
+ * bin) when we call psset_update_end.
  */
 JEMALLOC_ALWAYS_INLINE void
 psset_bin_stats_insert_remove(psset_t *psset, psset_bin_stats_t *binstats,
@@ -98,14 +93,14 @@ static void
 psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		fb_unset(psset->bitmap, PSSET_NPSIZES, (size_t)pind);
+		fb_unset(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
 	}
 }
 
 static void
 psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		fb_set(psset->bitmap, PSSET_NPSIZES, (size_t)pind);
+		fb_set(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
 	}
 	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
 }
@@ -263,7 +258,7 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 	assert(size <= HUGEPAGE);
 
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
-	pszind_t pind = (pszind_t)fb_ffs(psset->bitmap, PSSET_NPSIZES,
+	pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap, PSSET_NPSIZES,
 	    (size_t)min_pind);
 	if (pind == PSSET_NPSIZES) {
 		return hpdata_empty_list_first(&psset->empty);
-- 
cgit v0.12


From 0f6c420f83a52c3927cc1c78d155622de05e3ba5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 5 Feb 2021 10:46:17 -0800
Subject: HPA: Make purging/hugifying more principled.

Before this change, purge/hugify decisions had several sharp edges that could
lead to pathological behavior if tuning parameters weren't carefully chosen.
It's the first of a series; this introduces basic "make every hugepage with
dirty pages purgeable" functionality, and the next commit expands that
functionality to have a smarter policy for picking hugepages to purge.

Previously, the dehugify logic would *never* dehugify a hugepage unless it was
dirtier than the dehugification threshold.  This can lead to situations in which
these pages (which themselves could never be purged) would push us above the
maximum allowed dirty pages in the shard.  This forces immediate purging of any
pages deallocated in non-hugified hugepages, which in turn places nonobvious
practical limitations on the relationships between various config settings.

Instead, we make our preference not to dehugify to purge a soft one rather than
a hard one.  We'll avoid purging them, but only so long as we can do so by
purging non-hugified pages.  If we need to purge them to satisfy our dirty page
limits, or to hugify other, more worthy candidates, we'll still do so.
---
 include/jemalloc/internal/hpdata.h |  76 +++++++++++++++++++++------
 include/jemalloc/internal/psset.h  |   4 +-
 src/hpa.c                          | 105 ++++++++++++++++++++++++-------------
 src/hpdata.c                       |   4 +-
 src/psset.c                        |  69 +++++++++++++++++-------
 5 files changed, 183 insertions(+), 75 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index e489e62..3bbb7cc 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -7,6 +7,42 @@
 #include "jemalloc/internal/typed_list.h"
 
 /*
+ * How badly we want to purge some region of memory.  This is a temporary
+ * definition; it gets deleted in the next commit (where we adopt a more
+ * explicit dirtiest-first policy that only considers hugification status).
+ */
+enum hpdata_purge_level_e {
+	/*
+	 * The level number is important -- we use it as indices into an array
+	 * of size 2 (one for each purge level).
+	 */
+
+	/* "Regular" candidates for purging. */
+	hpdata_purge_level_default = 0,
+
+	/*
+	 * Candidates for purging, but as a last resort.  Practically,
+	 * nonpreferred corresponds to hugified regions that are below the
+	 * hugification threshold but have not yet reached the dehugification
+	 * threshold, while strongly nonpreferred candidates are those which are
+	 * above the hugification threshold.
+	 */
+	hpdata_purge_level_nonpreferred = 1,
+	hpdata_purge_level_strongly_nonpreferred = 2,
+
+	/* Don't purge, no matter what. */
+	hpdata_purge_level_never = 2,
+
+	/*
+	 * How big an array has to be to accomodate all purge levels.  This
+	 * relies on the fact that we don't actually keep unpurgable hpdatas in
+	 * a container.
+	 */
+	hpdata_purge_level_count = hpdata_purge_level_never
+};
+typedef enum hpdata_purge_level_e hpdata_purge_level_t;
+
+/*
  * The metadata representation we use for extents in hugepages.  While the PAC
  * uses the edata_t to represent both active and inactive extents, the HP only
  * uses the edata_t for active ones; instead, inactive extent state is tracked
@@ -52,8 +88,8 @@ struct hpdata_s {
 	bool h_in_psset_alloc_container;
 
 	/* The same, but with purging. */
-	bool h_purge_allowed;
-	bool h_in_psset_purge_container;
+	uint8_t h_purge_level;
+	uint8_t h_purge_container_level;
 
 	/* And with hugifying. */
 	bool h_hugify_allowed;
@@ -164,26 +200,26 @@ hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) {
 	hpdata->h_in_psset_alloc_container = in_container;
 }
 
-static inline bool
-hpdata_purge_allowed_get(const hpdata_t *hpdata) {
-	return hpdata->h_purge_allowed;
+static inline hpdata_purge_level_t
+hpdata_purge_level_get(const hpdata_t *hpdata) {
+	return (hpdata_purge_level_t)hpdata->h_purge_level;
 }
 
 static inline void
-hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
-	assert(purge_allowed == false || !hpdata->h_mid_purge);
-	hpdata->h_purge_allowed = purge_allowed;
+hpdata_purge_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) {
+	assert(level == hpdata_purge_level_never || !hpdata->h_mid_purge);
+	hpdata->h_purge_level = (uint8_t)level;
 }
 
-static inline bool
-hpdata_in_psset_purge_container_get(const hpdata_t *hpdata) {
-	return hpdata->h_in_psset_purge_container;
+static inline hpdata_purge_level_t
+hpdata_purge_container_level_get(const hpdata_t *hpdata) {
+	return (hpdata_purge_level_t)hpdata->h_purge_container_level;
 }
 
 static inline void
-hpdata_in_psset_purge_container_set(hpdata_t *hpdata, bool in_container) {
-	assert(in_container != hpdata->h_in_psset_purge_container);
-	hpdata->h_in_psset_purge_container = in_container;
+hpdata_purge_container_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) {
+	assert(level != hpdata->h_purge_container_level);
+	hpdata->h_purge_container_level = level;
 }
 
 static inline bool
@@ -284,6 +320,11 @@ hpdata_ndirty_get(hpdata_t *hpdata) {
 	return hpdata->h_ntouched - hpdata->h_nactive;
 }
 
+static inline size_t
+hpdata_nretained_get(hpdata_t *hpdata) {
+	return hpdata->h_nactive - hpdata->h_ntouched;
+}
+
 static inline void
 hpdata_assert_empty(hpdata_t *hpdata) {
 	assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
@@ -316,11 +357,12 @@ hpdata_consistent(hpdata_t *hpdata) {
 		return false;
 	}
 	if (hpdata_changing_state_get(hpdata)
-	    && (hpdata->h_purge_allowed || hpdata->h_hugify_allowed)) {
+	    && ((hpdata->h_purge_level != hpdata_purge_level_never)
+	    || hpdata->h_hugify_allowed)) {
 		return false;
 	}
-	if (hpdata_purge_allowed_get(hpdata)
-	    != hpdata_in_psset_purge_container_get(hpdata)) {
+	if (hpdata_purge_level_get(hpdata)
+	    != hpdata_purge_container_level_get(hpdata)) {
 		return false;
 	}
 	if (hpdata_hugify_allowed_get(hpdata)
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 271d144..285bf6d 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -70,8 +70,8 @@ struct psset_s {
 	 * allocations.
 	 */
 	hpdata_empty_list_t empty;
-	/* Slabs which are available to be purged. */
-	hpdata_purge_list_t to_purge;
+	/* Slabs which are available to be purged, ordered by purge level. */
+	hpdata_purge_list_t to_purge[hpdata_purge_level_count];
 	/* Slabs which are available to be hugified. */
 	hpdata_hugify_list_t to_hugify;
 };
diff --git a/src/hpa.c b/src/hpa.c
index d078f18..90fec35 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -151,34 +151,59 @@ hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	    >= shard->opts.hugification_threshold;
 }
 
-static bool
-hpa_should_purge(hpa_shard_t *shard) {
+static size_t
+hpa_adjusted_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	return psset_ndirty(&shard->psset) - shard->npending_purge;
+}
+
+static size_t
+hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (shard->opts.dirty_mult == (fxp_t)-1) {
-		return false;
+		return (size_t)-1;
 	}
-	size_t adjusted_ndirty = psset_ndirty(&shard->psset)
-	    - shard->npending_purge;
-	/*
-	 * Another simple static check; purge whenever dirty exceeds 25% of
-	 * active.
-	 */
-	size_t max_ndirty = fxp_mul_frac(psset_nactive(&shard->psset),
+	return fxp_mul_frac(psset_nactive(&shard->psset),
 	    shard->opts.dirty_mult);
-	return adjusted_ndirty > max_ndirty;
+}
+
+static bool
+hpa_hugify_blocked_by_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
+	if (to_hugify == NULL) {
+		return false;
+	}
+	return hpa_adjusted_ndirty(tsdn, shard)
+	    + hpdata_nretained_get(to_hugify) > hpa_ndirty_max(tsdn, shard);
+}
+
+static bool
+hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	if (hpa_adjusted_ndirty(tsdn, shard) > hpa_ndirty_max(tsdn, shard)) {
+		return true;
+	}
+	if (hpa_hugify_blocked_by_ndirty(tsdn, shard)) {
+		return true;
+	}
+	return false;
 }
 
 static void
-hpa_update_purge_hugify_eligibility(hpa_shard_t *shard, hpdata_t *ps) {
+hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
+    hpdata_t *ps) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (hpdata_changing_state_get(ps)) {
-		hpdata_purge_allowed_set(ps, false);
+		hpdata_purge_level_set(ps, hpdata_purge_level_never);
 		hpdata_hugify_allowed_set(ps, false);
 		return;
 	}
 	/*
-	 * Hugepages are distinctly costly to purge, so do it only if they're
-	 * *particularly* full of dirty pages.  Eventually, we should use a
-	 * smarter / more dynamic heuristic for situations where we have to
-	 * manually hugify.
+	 * Hugepages are distinctly costly to purge, so try to avoid it unless
+	 * they're *particularly* full of dirty pages.  Eventually, we should
+	 * use a smarter / more dynamic heuristic for situations where we have
+	 * to manually hugify.
 	 *
 	 * In situations where we don't manually hugify, this problem is
 	 * reduced.  The "bad" situation we're trying to avoid is one's that's
@@ -195,17 +220,23 @@ hpa_update_purge_hugify_eligibility(hpa_shard_t *shard, hpdata_t *ps) {
 	 * deferred; in that case we don't need any explicit calls on the
 	 * allocator's end at all; we just try to pack allocations in a
 	 * hugepage-friendly manner and let the OS hugify in the background.
-	 *
-	 * Anyways, our strategy to delay dehugification is to only consider
-	 * purging a hugified hugepage if it's individually dirtier than the
-	 * overall max dirty pages setting.  That setting is 1 dirty page per 4
-	 * active pages; i.e. 4/5s of hugepage pages must be active.
 	 */
-	if ((!hpdata_huge_get(ps) && hpdata_ndirty_get(ps) > 0)
-	    || (hpdata_ndirty_get(ps) != 0
-	    && hpdata_ndirty_get(ps) * PAGE
-	    >= shard->opts.dehugification_threshold)) {
-		hpdata_purge_allowed_set(ps, true);
+	if (hpdata_ndirty_get(ps) > 0) {
+		if (hpdata_huge_get(ps)) {
+			if (hpa_good_hugification_candidate(shard, ps)) {
+				hpdata_purge_level_set(ps,
+				    hpdata_purge_level_strongly_nonpreferred);
+			} else if (hpdata_ndirty_get(ps) * PAGE
+			    >= shard->opts.dehugification_threshold) {
+				hpdata_purge_level_set(ps,
+				    hpdata_purge_level_nonpreferred);
+			} else {
+				hpdata_purge_level_set(ps,
+				    hpdata_purge_level_default);
+			}
+		} else {
+			hpdata_purge_level_set(ps, hpdata_purge_level_default);
+		}
 	}
 	if (hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
@@ -286,7 +317,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	if (to_purge == NULL) {
 		return false;
 	}
-	assert(hpdata_purge_allowed_get(to_purge));
+	assert(hpdata_purge_level_get(to_purge) != hpdata_purge_level_never);
 	assert(!hpdata_changing_state_get(to_purge));
 
 	/*
@@ -297,7 +328,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	psset_update_begin(&shard->psset, to_purge);
 	assert(hpdata_alloc_allowed_get(to_purge));
 	hpdata_mid_purge_set(to_purge, true);
-	hpdata_purge_allowed_set(to_purge, false);
+	hpdata_purge_level_set(to_purge, hpdata_purge_level_never);
 	hpdata_hugify_allowed_set(to_purge, false);
 	/*
 	 * Unlike with hugification (where concurrent
@@ -352,7 +383,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpdata_mid_purge_set(to_purge, false);
 
 	hpdata_alloc_allowed_set(to_purge, true);
-	hpa_update_purge_hugify_eligibility(shard, to_purge);
+	hpa_update_purge_hugify_eligibility(tsdn, shard, to_purge);
 
 	psset_update_end(&shard->psset, to_purge);
 
@@ -364,6 +395,10 @@ static bool
 hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 
+	if (hpa_hugify_blocked_by_ndirty(tsdn, shard)) {
+		return false;
+	}
+
 	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
 	if (to_hugify == NULL) {
 		return false;
@@ -378,7 +413,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 */
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_mid_hugify_set(to_hugify, true);
-	hpdata_purge_allowed_set(to_hugify, false);
+	hpdata_purge_level_set(to_hugify, hpdata_purge_level_never);
 	hpdata_hugify_allowed_set(to_hugify, false);
 	assert(hpdata_alloc_allowed_get(to_hugify));
 	psset_update_end(&shard->psset, to_hugify);
@@ -401,7 +436,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_hugify(to_hugify);
 	hpdata_mid_hugify_set(to_hugify, false);
-	hpa_update_purge_hugify_eligibility(shard, to_hugify);
+	hpa_update_purge_hugify_eligibility(tsdn, shard, to_hugify);
 	psset_update_end(&shard->psset, to_hugify);
 
 	return true;
@@ -419,7 +454,7 @@ hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 		hugified = hpa_try_hugify(tsdn, shard);
 
 		purged = false;
-		if (hpa_should_purge(shard)) {
+		if (hpa_should_purge(tsdn, shard)) {
 			purged = hpa_try_purge(tsdn, shard);
 		}
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
@@ -491,7 +526,7 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 		return NULL;
 	}
 
-	hpa_update_purge_hugify_eligibility(shard, ps);
+	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
 	psset_update_end(&shard->psset, ps);
 	return edata;
 }
@@ -703,7 +738,7 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
-	hpa_update_purge_hugify_eligibility(shard, ps);
+	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
 	psset_update_end(&shard->psset, ps);
 	hpa_do_deferred_work(tsdn, shard);
 }
diff --git a/src/hpdata.c b/src/hpdata.c
index 0fc7b7d..6aee4f6 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -24,8 +24,8 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata->h_huge = false;
 	hpdata->h_alloc_allowed = true;
 	hpdata->h_in_psset_alloc_container = false;
-	hpdata->h_purge_allowed = false;
-	hpdata->h_in_psset_purge_container = false;
+	hpdata->h_purge_level = hpdata_purge_level_never;
+	hpdata->h_purge_container_level = hpdata_purge_level_never;
 	hpdata->h_hugify_allowed = false;
 	hpdata->h_in_psset_hugify_container = false;
 	hpdata->h_mid_purge = false;
diff --git a/src/psset.c b/src/psset.c
index 66fd0c4..6de8260 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -14,7 +14,9 @@ psset_init(psset_t *psset) {
 	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
 	memset(&psset->stats, 0, sizeof(psset->stats));
 	hpdata_empty_list_init(&psset->empty);
-	hpdata_purge_list_init(&psset->to_purge);
+	for (int i = 0; i < hpdata_purge_level_count; i++) {
+		hpdata_purge_list_init(&psset->to_purge[i]);
+	}
 	hpdata_hugify_list_init(&psset->to_hugify);
 }
 
@@ -230,14 +232,31 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 		psset_alloc_container_insert(psset, ps);
 	}
 
-	if (hpdata_purge_allowed_get(ps)
-	    && !hpdata_in_psset_purge_container_get(ps)) {
-		hpdata_in_psset_purge_container_set(ps, true);
-		hpdata_purge_list_append(&psset->to_purge, ps);
-	} else if (!hpdata_purge_allowed_get(ps)
-	    && hpdata_in_psset_purge_container_get(ps)) {
-		hpdata_in_psset_purge_container_set(ps, false);
-		hpdata_purge_list_remove(&psset->to_purge, ps);
+	if (hpdata_purge_level_get(ps) == hpdata_purge_level_never
+	    && hpdata_purge_container_level_get(ps)
+	    != hpdata_purge_level_never) {
+		/* In some purge container, but shouldn't be in any. */
+		hpdata_purge_list_remove(
+		    &psset->to_purge[hpdata_purge_container_level_get(ps)],
+		    ps);
+		hpdata_purge_container_level_set(ps, hpdata_purge_level_never);
+	} else if (hpdata_purge_level_get(ps) != hpdata_purge_level_never
+	    && hpdata_purge_container_level_get(ps)
+	    == hpdata_purge_level_never) {
+		/* Not in any purge container, but should be in one. */
+		hpdata_purge_list_append(
+		    &psset->to_purge[hpdata_purge_level_get(ps)], ps);
+		hpdata_purge_container_level_set(ps,
+		    hpdata_purge_level_get(ps));
+	} else if (hpdata_purge_level_get(ps)
+	    != hpdata_purge_container_level_get(ps)) {
+		/* Should switch containers. */
+		hpdata_purge_list_remove(
+		    &psset->to_purge[hpdata_purge_container_level_get(ps)], ps);
+		hpdata_purge_list_append(
+		    &psset->to_purge[hpdata_purge_level_get(ps)], ps);
+		hpdata_purge_container_level_set(ps,
+		    hpdata_purge_level_get(ps));
 	}
 
 	if (hpdata_hugify_allowed_get(ps)
@@ -275,7 +294,13 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 
 hpdata_t *
 psset_pick_purge(psset_t *psset) {
-	return hpdata_purge_list_first(&psset->to_purge);
+	for (int i = 0; i < hpdata_purge_level_count; i++) {
+		hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[i]);
+		if (ps != NULL) {
+			return ps;
+		}
+	}
+	return NULL;
 }
 
 hpdata_t *
@@ -291,10 +316,15 @@ psset_insert(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_alloc_allowed_get(ps)) {
 		psset_alloc_container_insert(psset, ps);
 	}
-	if (hpdata_purge_allowed_get(ps)) {
-		hpdata_in_psset_purge_container_set(ps, true);
-		hpdata_purge_list_append(&psset->to_purge, ps);
+	assert(
+	    hpdata_purge_container_level_get(ps) == hpdata_purge_level_never);
+	if (hpdata_purge_level_get(ps) != hpdata_purge_level_never) {
+		hpdata_purge_container_level_set(ps,
+		    hpdata_purge_level_get(ps));
+		hpdata_purge_list_append(
+		    &psset->to_purge[hpdata_purge_level_get(ps)], ps);
 	}
+
 	if (hpdata_hugify_allowed_get(ps)) {
 		hpdata_in_psset_hugify_container_set(ps, true);
 		hpdata_hugify_list_append(&psset->to_hugify, ps);
@@ -309,12 +339,13 @@ psset_remove(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_in_psset_alloc_container_get(ps)) {
 		psset_alloc_container_remove(psset, ps);
 	}
-	if (hpdata_in_psset_purge_container_get(ps)) {
-		hpdata_in_psset_purge_container_set(ps, false);
-		hpdata_purge_list_remove(&psset->to_purge, ps);
+	if (hpdata_purge_container_level_get(ps) != hpdata_purge_level_never) {
+		hpdata_purge_list_remove(
+		    &psset->to_purge[hpdata_purge_container_level_get(ps)], ps);
+		hpdata_purge_container_level_set(ps, hpdata_purge_level_never);
 	}
-	if (hpdata_in_psset_purge_container_get(ps)) {
-		hpdata_in_psset_purge_container_set(ps, false);
-		hpdata_purge_list_remove(&psset->to_purge, ps);
+	if (hpdata_in_psset_hugify_container_get(ps)) {
+		hpdata_in_psset_hugify_container_set(ps, false);
+		hpdata_hugify_list_remove(&psset->to_hugify, ps);
 	}
 }
-- 
cgit v0.12


From 73ca4b8ef81d2a54970804182c010b8c95a93587 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 8 Feb 2021 14:11:37 -0800
Subject: HPA: Use dirtiest-first purging.

This seems to be practically beneficial, despite some pathological corner cases.
---
 include/jemalloc/internal/hpdata.h |  76 +++++--------------------
 include/jemalloc/internal/psset.h  |  12 +++-
 src/hpa.c                          |  26 ++-------
 src/hpdata.c                       |   3 +-
 src/psset.c                        | 113 +++++++++++++++++++++----------------
 test/unit/psset.c                  |  86 +++++++++++++++++++++++++++-
 6 files changed, 179 insertions(+), 137 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 3bbb7cc..245116b 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -7,42 +7,6 @@
 #include "jemalloc/internal/typed_list.h"
 
 /*
- * How badly we want to purge some region of memory.  This is a temporary
- * definition; it gets deleted in the next commit (where we adopt a more
- * explicit dirtiest-first policy that only considers hugification status).
- */
-enum hpdata_purge_level_e {
-	/*
-	 * The level number is important -- we use it as indices into an array
-	 * of size 2 (one for each purge level).
-	 */
-
-	/* "Regular" candidates for purging. */
-	hpdata_purge_level_default = 0,
-
-	/*
-	 * Candidates for purging, but as a last resort.  Practically,
-	 * nonpreferred corresponds to hugified regions that are below the
-	 * hugification threshold but have not yet reached the dehugification
-	 * threshold, while strongly nonpreferred candidates are those which are
-	 * above the hugification threshold.
-	 */
-	hpdata_purge_level_nonpreferred = 1,
-	hpdata_purge_level_strongly_nonpreferred = 2,
-
-	/* Don't purge, no matter what. */
-	hpdata_purge_level_never = 2,
-
-	/*
-	 * How big an array has to be to accomodate all purge levels.  This
-	 * relies on the fact that we don't actually keep unpurgable hpdatas in
-	 * a container.
-	 */
-	hpdata_purge_level_count = hpdata_purge_level_never
-};
-typedef enum hpdata_purge_level_e hpdata_purge_level_t;
-
-/*
  * The metadata representation we use for extents in hugepages.  While the PAC
  * uses the edata_t to represent both active and inactive extents, the HP only
  * uses the edata_t for active ones; instead, inactive extent state is tracked
@@ -87,9 +51,13 @@ struct hpdata_s {
 	bool h_alloc_allowed;
 	bool h_in_psset_alloc_container;
 
-	/* The same, but with purging. */
-	uint8_t h_purge_level;
-	uint8_t h_purge_container_level;
+	/*
+	 * The same, but with purging.  There's no corresponding
+	 * h_in_psset_purge_container, because the psset (currently) always
+	 * removes hpdatas from their containers during updates (to implement
+	 * LRU for purging).
+	 */
+	bool h_purge_allowed;
 
 	/* And with hugifying. */
 	bool h_hugify_allowed;
@@ -200,26 +168,15 @@ hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) {
 	hpdata->h_in_psset_alloc_container = in_container;
 }
 
-static inline hpdata_purge_level_t
-hpdata_purge_level_get(const hpdata_t *hpdata) {
-	return (hpdata_purge_level_t)hpdata->h_purge_level;
-}
-
-static inline void
-hpdata_purge_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) {
-	assert(level == hpdata_purge_level_never || !hpdata->h_mid_purge);
-	hpdata->h_purge_level = (uint8_t)level;
-}
-
-static inline hpdata_purge_level_t
-hpdata_purge_container_level_get(const hpdata_t *hpdata) {
-	return (hpdata_purge_level_t)hpdata->h_purge_container_level;
+static inline bool
+hpdata_purge_allowed_get(const hpdata_t *hpdata) {
+	return hpdata->h_purge_allowed;
 }
 
 static inline void
-hpdata_purge_container_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) {
-	assert(level != hpdata->h_purge_container_level);
-	hpdata->h_purge_container_level = level;
+hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
+	assert(purge_allowed == false || !hpdata->h_mid_purge);
+	hpdata->h_purge_allowed = purge_allowed;
 }
 
 static inline bool
@@ -357,12 +314,7 @@ hpdata_consistent(hpdata_t *hpdata) {
 		return false;
 	}
 	if (hpdata_changing_state_get(hpdata)
-	    && ((hpdata->h_purge_level != hpdata_purge_level_never)
-	    || hpdata->h_hugify_allowed)) {
-		return false;
-	}
-	if (hpdata_purge_level_get(hpdata)
-	    != hpdata_purge_container_level_get(hpdata)) {
+	    && ((hpdata->h_purge_allowed) || hpdata->h_hugify_allowed)) {
 		return false;
 	}
 	if (hpdata_hugify_allowed_get(hpdata)
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 285bf6d..96fb300 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -20,6 +20,14 @@
  */
 #define PSSET_NPSIZES 64
 
+/*
+ * We keep two purge lists per page size class; one for hugified hpdatas (at
+ * index 2*pszind), and one for the non-hugified hpdatas (at index 2*pszind +
+ * 1).  This lets us implement a preference for purging non-hugified hpdatas
+ * among similarly-dirty ones.
+ */
+#define PSSET_NPURGE_LISTS (2 * PSSET_NPSIZES)
+
 typedef struct psset_bin_stats_s psset_bin_stats_t;
 struct psset_bin_stats_s {
 	/* How many pageslabs are in this bin? */
@@ -71,7 +79,9 @@ struct psset_s {
 	 */
 	hpdata_empty_list_t empty;
 	/* Slabs which are available to be purged, ordered by purge level. */
-	hpdata_purge_list_t to_purge[hpdata_purge_level_count];
+	hpdata_purge_list_t to_purge[PSSET_NPURGE_LISTS];
+	/* Bitmap for which set bits correspond to non-empty purge lists. */
+	fb_group_t purge_bitmap[FB_NGROUPS(PSSET_NPURGE_LISTS)];
 	/* Slabs which are available to be hugified. */
 	hpdata_hugify_list_t to_hugify;
 };
diff --git a/src/hpa.c b/src/hpa.c
index 90fec35..7d4fa1b 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -195,7 +195,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
     hpdata_t *ps) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (hpdata_changing_state_get(ps)) {
-		hpdata_purge_level_set(ps, hpdata_purge_level_never);
+		hpdata_purge_allowed_set(ps, false);
 		hpdata_hugify_allowed_set(ps, false);
 		return;
 	}
@@ -221,23 +221,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	 * allocator's end at all; we just try to pack allocations in a
 	 * hugepage-friendly manner and let the OS hugify in the background.
 	 */
-	if (hpdata_ndirty_get(ps) > 0) {
-		if (hpdata_huge_get(ps)) {
-			if (hpa_good_hugification_candidate(shard, ps)) {
-				hpdata_purge_level_set(ps,
-				    hpdata_purge_level_strongly_nonpreferred);
-			} else if (hpdata_ndirty_get(ps) * PAGE
-			    >= shard->opts.dehugification_threshold) {
-				hpdata_purge_level_set(ps,
-				    hpdata_purge_level_nonpreferred);
-			} else {
-				hpdata_purge_level_set(ps,
-				    hpdata_purge_level_default);
-			}
-		} else {
-			hpdata_purge_level_set(ps, hpdata_purge_level_default);
-		}
-	}
+	hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0);
 	if (hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
 		hpdata_hugify_allowed_set(ps, true);
@@ -317,7 +301,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	if (to_purge == NULL) {
 		return false;
 	}
-	assert(hpdata_purge_level_get(to_purge) != hpdata_purge_level_never);
+	assert(hpdata_purge_allowed_get(to_purge));
 	assert(!hpdata_changing_state_get(to_purge));
 
 	/*
@@ -328,7 +312,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	psset_update_begin(&shard->psset, to_purge);
 	assert(hpdata_alloc_allowed_get(to_purge));
 	hpdata_mid_purge_set(to_purge, true);
-	hpdata_purge_level_set(to_purge, hpdata_purge_level_never);
+	hpdata_purge_allowed_set(to_purge, false);
 	hpdata_hugify_allowed_set(to_purge, false);
 	/*
 	 * Unlike with hugification (where concurrent
@@ -413,7 +397,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 */
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_mid_hugify_set(to_hugify, true);
-	hpdata_purge_level_set(to_hugify, hpdata_purge_level_never);
+	hpdata_purge_allowed_set(to_hugify, false);
 	hpdata_hugify_allowed_set(to_hugify, false);
 	assert(hpdata_alloc_allowed_get(to_hugify));
 	psset_update_end(&shard->psset, to_hugify);
diff --git a/src/hpdata.c b/src/hpdata.c
index 6aee4f6..b861e9e 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -24,8 +24,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata->h_huge = false;
 	hpdata->h_alloc_allowed = true;
 	hpdata->h_in_psset_alloc_container = false;
-	hpdata->h_purge_level = hpdata_purge_level_never;
-	hpdata->h_purge_container_level = hpdata_purge_level_never;
+	hpdata->h_purge_allowed = false;
 	hpdata->h_hugify_allowed = false;
 	hpdata->h_in_psset_hugify_container = false;
 	hpdata->h_mid_purge = false;
diff --git a/src/psset.c b/src/psset.c
index 6de8260..c4053ef 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -14,9 +14,10 @@ psset_init(psset_t *psset) {
 	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
 	memset(&psset->stats, 0, sizeof(psset->stats));
 	hpdata_empty_list_init(&psset->empty);
-	for (int i = 0; i < hpdata_purge_level_count; i++) {
+	for (int i = 0; i < PSSET_NPURGE_LISTS; i++) {
 		hpdata_purge_list_init(&psset->to_purge[i]);
 	}
+	fb_init(psset->purge_bitmap, PSSET_NPURGE_LISTS);
 	hpdata_hugify_list_init(&psset->to_hugify);
 }
 
@@ -195,6 +196,51 @@ psset_alloc_container_remove(psset_t *psset, hpdata_t *ps) {
 	}
 }
 
+static size_t
+psset_purge_list_ind(hpdata_t *ps) {
+	size_t ndirty = hpdata_ndirty_get(ps);
+	/* Shouldn't have something with no dirty pages purgeable. */
+	assert(ndirty > 0);
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(ndirty << LG_PAGE));
+	/*
+	 * Higher indices correspond to lists we'd like to purge earlier;
+	 * increment the index for the nonhugified hpdatas first, so that we'll
+	 * pick them before picking hugified ones.
+	 */
+	return (size_t)pind * 2 + (hpdata_huge_get(ps) ? 0 : 1);
+}
+
+static void
+psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) {
+	/*
+	 * Remove the hpdata from its purge list (if it's in one).  Even if it's
+	 * going to stay in the same one, by appending it during
+	 * psset_update_end, we move it to the end of its queue, so that we
+	 * purge LRU within a given dirtiness bucket.
+	 */
+	if (hpdata_purge_allowed_get(ps)) {
+		size_t ind = psset_purge_list_ind(ps);
+		hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
+		hpdata_purge_list_remove(purge_list, ps);
+		if (hpdata_purge_list_empty(purge_list)) {
+			fb_unset(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind);
+		}
+	}
+}
+
+static void
+psset_maybe_insert_purge_list(psset_t *psset, hpdata_t *ps) {
+	if (hpdata_purge_allowed_get(ps)) {
+		size_t ind = psset_purge_list_ind(ps);
+		hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
+		if (hpdata_purge_list_empty(purge_list)) {
+			fb_set(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind);
+		}
+		hpdata_purge_list_append(purge_list, ps);
+	}
+
+}
+
 void
 psset_update_begin(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
@@ -210,10 +256,11 @@ psset_update_begin(psset_t *psset, hpdata_t *ps) {
 		assert(hpdata_alloc_allowed_get(ps));
 		psset_alloc_container_remove(psset, ps);
 	}
+	psset_maybe_remove_purge_list(psset, ps);
 	/*
-	 * We don't update presence in the purge list or hugify list; we try to
-	 * keep those FIFO, even in the presence of other metadata updates.
-	 * We'll update presence at the end of the metadata update if necessary.
+	 * We don't update presence in the hugify list; we try to keep it FIFO,
+	 * even in the presence of other metadata updates.  We'll update
+	 * presence at the end of the metadata update if necessary.
 	 */
 }
 
@@ -231,33 +278,7 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_alloc_allowed_get(ps)) {
 		psset_alloc_container_insert(psset, ps);
 	}
-
-	if (hpdata_purge_level_get(ps) == hpdata_purge_level_never
-	    && hpdata_purge_container_level_get(ps)
-	    != hpdata_purge_level_never) {
-		/* In some purge container, but shouldn't be in any. */
-		hpdata_purge_list_remove(
-		    &psset->to_purge[hpdata_purge_container_level_get(ps)],
-		    ps);
-		hpdata_purge_container_level_set(ps, hpdata_purge_level_never);
-	} else if (hpdata_purge_level_get(ps) != hpdata_purge_level_never
-	    && hpdata_purge_container_level_get(ps)
-	    == hpdata_purge_level_never) {
-		/* Not in any purge container, but should be in one. */
-		hpdata_purge_list_append(
-		    &psset->to_purge[hpdata_purge_level_get(ps)], ps);
-		hpdata_purge_container_level_set(ps,
-		    hpdata_purge_level_get(ps));
-	} else if (hpdata_purge_level_get(ps)
-	    != hpdata_purge_container_level_get(ps)) {
-		/* Should switch containers. */
-		hpdata_purge_list_remove(
-		    &psset->to_purge[hpdata_purge_container_level_get(ps)], ps);
-		hpdata_purge_list_append(
-		    &psset->to_purge[hpdata_purge_level_get(ps)], ps);
-		hpdata_purge_container_level_set(ps,
-		    hpdata_purge_level_get(ps));
-	}
+	psset_maybe_insert_purge_list(psset, ps);
 
 	if (hpdata_hugify_allowed_get(ps)
 	    && !hpdata_in_psset_hugify_container_get(ps)) {
@@ -294,13 +315,16 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 
 hpdata_t *
 psset_pick_purge(psset_t *psset) {
-	for (int i = 0; i < hpdata_purge_level_count; i++) {
-		hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[i]);
-		if (ps != NULL) {
-			return ps;
-		}
+	ssize_t ind_ssz = fb_fls(psset->purge_bitmap, PSSET_NPURGE_LISTS,
+	    PSSET_NPURGE_LISTS - 1);
+	if (ind_ssz < 0) {
+		return NULL;
 	}
-	return NULL;
+	pszind_t ind = (pszind_t)ind_ssz;
+	assert(ind < PSSET_NPSIZES);
+	hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]);
+	assert(ps != NULL);
+	return ps;
 }
 
 hpdata_t *
@@ -316,14 +340,7 @@ psset_insert(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_alloc_allowed_get(ps)) {
 		psset_alloc_container_insert(psset, ps);
 	}
-	assert(
-	    hpdata_purge_container_level_get(ps) == hpdata_purge_level_never);
-	if (hpdata_purge_level_get(ps) != hpdata_purge_level_never) {
-		hpdata_purge_container_level_set(ps,
-		    hpdata_purge_level_get(ps));
-		hpdata_purge_list_append(
-		    &psset->to_purge[hpdata_purge_level_get(ps)], ps);
-	}
+	psset_maybe_insert_purge_list(psset, ps);
 
 	if (hpdata_hugify_allowed_get(ps)) {
 		hpdata_in_psset_hugify_container_set(ps, true);
@@ -339,11 +356,7 @@ psset_remove(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_in_psset_alloc_container_get(ps)) {
 		psset_alloc_container_remove(psset, ps);
 	}
-	if (hpdata_purge_container_level_get(ps) != hpdata_purge_level_never) {
-		hpdata_purge_list_remove(
-		    &psset->to_purge[hpdata_purge_container_level_get(ps)], ps);
-		hpdata_purge_container_level_set(ps, hpdata_purge_level_never);
-	}
+	psset_maybe_remove_purge_list(psset, ps);
 	if (hpdata_in_psset_hugify_container_get(ps)) {
 		hpdata_in_psset_hugify_container_set(ps, false);
 		hpdata_hugify_list_remove(&psset->to_hugify, ps);
diff --git a/test/unit/psset.c b/test/unit/psset.c
index fdc28d3..fde403e 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -540,6 +540,89 @@ TEST_BEGIN(test_insert_remove) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_prefers_nonhuge) {
+	/*
+	 * All else being equal, we should prefer purging non-huge pages over
+	 * huge ones.
+	 */
+
+	/* Nothing magic about this constant. */
+	enum {
+		NHP = 23,
+	};
+	hpdata_t *hpdata;
+
+	psset_t psset;
+	psset_init(&psset);
+
+	hpdata_t hpdata_huge[NHP];
+	uintptr_t huge_begin = (uintptr_t)&hpdata_huge[0];
+	uintptr_t huge_end = (uintptr_t)&hpdata_huge[NHP];
+	hpdata_t hpdata_nonhuge[NHP];
+	uintptr_t nonhuge_begin = (uintptr_t)&hpdata_nonhuge[0];
+	uintptr_t nonhuge_end = (uintptr_t)&hpdata_nonhuge[NHP];
+
+	for (size_t i = 0; i < NHP; i++) {
+		hpdata_init(&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE),
+		    123 + i);
+		psset_insert(&psset, &hpdata_huge[i]);
+
+		hpdata_init(&hpdata_nonhuge[i],
+		    (void *)((10 + NHP + i) * HUGEPAGE),
+		    456 + i);
+		psset_insert(&psset, &hpdata_nonhuge[i]);
+
+	}
+	for (int i = 0; i < 2 * NHP; i++) {
+		hpdata = psset_pick_alloc(&psset, HUGEPAGE * 3 / 4);
+		psset_update_begin(&psset, hpdata);
+		void *ptr;
+		ptr = hpdata_reserve_alloc(hpdata, HUGEPAGE * 3 / 4);
+		/* Ignore the first alloc, which will stick around. */
+		(void)ptr;
+		/*
+		 * The second alloc is to dirty the pages; free it immediately
+		 * after allocating.
+		 */
+		ptr = hpdata_reserve_alloc(hpdata, HUGEPAGE / 4);
+		hpdata_unreserve(hpdata, ptr, HUGEPAGE / 4);
+
+		if (huge_begin <= (uintptr_t)hpdata
+		    && (uintptr_t)hpdata < huge_end) {
+			hpdata_hugify(hpdata);
+		}
+
+		hpdata_purge_allowed_set(hpdata, true);
+		psset_update_end(&psset, hpdata);
+	}
+
+	/*
+	 * We've got a bunch of 1/8th dirty hpdatas.  It should give us all the
+	 * non-huge ones to purge, then all the huge ones, then refuse to purge
+	 * further.
+	 */
+	for (int i = 0; i < NHP; i++) {
+		hpdata = psset_pick_purge(&psset);
+		assert_true(nonhuge_begin <= (uintptr_t)hpdata
+		    && (uintptr_t)hpdata < nonhuge_end, "");
+		psset_update_begin(&psset, hpdata);
+		test_psset_fake_purge(hpdata);
+		hpdata_purge_allowed_set(hpdata, false);
+		psset_update_end(&psset, hpdata);
+	}
+	for (int i = 0; i < NHP; i++) {
+		hpdata = psset_pick_purge(&psset);
+		expect_true(huge_begin <= (uintptr_t)hpdata
+		    && (uintptr_t)hpdata < huge_end, "");
+		psset_update_begin(&psset, hpdata);
+		hpdata_dehugify(hpdata);
+		test_psset_fake_purge(hpdata);
+		hpdata_purge_allowed_set(hpdata, false);
+		psset_update_end(&psset, hpdata);
+	}
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -550,5 +633,6 @@ main(void) {
 	    test_multi_pageslab,
 	    test_stats,
 	    test_oldest_fit,
-	    test_insert_remove);
+	    test_insert_remove,
+	    test_purge_prefers_nonhuge);
 }
-- 
cgit v0.12


From 22be724af4438014245c0336ac7212fe97ad004b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 11 Mar 2021 16:57:15 -0800
Subject: Set is_head in extent_alloc_wrapper w/ retain.

When retain is on, when extent_grow_retained failed (e.g. due to split hook
failures), we'll try extent_alloc_wrapper as the last resort.  Set the is_head
bit in that case to be consistent.  The allocated extent in that case will be
retained properly, but not merged with other extents.
---
 include/jemalloc/internal/extent.h | 2 --
 src/extent.c                       | 6 ++++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index f620736..f2fee5c 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -28,8 +28,6 @@ void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
-edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
 void extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
diff --git a/src/extent.c b/src/extent.c
index c41f17c..51711ef 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -45,6 +45,8 @@ static void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool zero,
     bool *commit);
+static edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
 
 /******************************************************************************/
 
@@ -771,7 +773,7 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	return edata;
 }
 
-edata_t *
+static edata_t *
 extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -791,7 +793,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr,
 	    size, /* slab */ false, SC_NSIZES, extent_sn_next(pac),
 	    extent_state_active, zero, *commit, EXTENT_PAI_PAC,
-	    EXTENT_NOT_HEAD);
+	    opt_retain ? EXTENT_IS_HEAD : EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, pac, edata)) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
 		return NULL;
-- 
cgit v0.12


From 11127240caefb579a213ad075ab4f52910f333e2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 11 Mar 2021 22:26:12 -0800
Subject: Remove redundant enable-debug definition in configure.

---
 configure.ac | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 34613fe..41a03d2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1203,9 +1203,6 @@ fi
 if test "x$enable_debug" = "x1" ; then
   AC_DEFINE([JEMALLOC_DEBUG], [ ])
 fi
-if test "x$enable_debug" = "x1" ; then
-  AC_DEFINE([JEMALLOC_DEBUG], [ ])
-fi
 AC_SUBST([enable_debug])
 
 dnl Only optimize if not debugging.
-- 
cgit v0.12


From 3913077146350bd1b720a757e33e8aa35a34e58b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 12 Mar 2021 11:27:00 -0800
Subject: Mark head state during dss alloc.

Specifically, the extent_dalloc_gap relies on the correct head state to
coalesce.
---
 src/extent_dss.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/extent_dss.c b/src/extent_dss.c
index 9857fd2..9a35bac 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -140,6 +140,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				goto label_oom;
 			}
 
+			bool head_state = opt_retain ? EXTENT_IS_HEAD :
+			    EXTENT_NOT_HEAD;
 			/*
 			 * Compute how much page-aligned gap space (if any) is
 			 * necessary to satisfy alignment.  This space can be
@@ -157,7 +159,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				    SC_NSIZES, extent_sn_next(
 					&arena->pa_shard.pac),
 				    extent_state_active, false, true,
-				    EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+				    EXTENT_PAI_PAC, head_state);
 			}
 			/*
 			 * Compute the address just past the end of the desired
@@ -206,7 +208,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    arena_ind_get(arena), ret, size,
 					    size, false, SC_NSIZES,
 					    extent_state_active, false, true,
-					    EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+					    EXTENT_PAI_PAC, head_state);
 					if (extent_purge_forced_wrapper(tsdn,
 					    ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);
-- 
cgit v0.12


From 9193ea2248e6265d2e649e60e246491d414d254a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 25 Feb 2021 10:17:44 -0800
Subject: Cirrus: fix build.

Remaining on 12.1 has started to break with an m4 error.  Upgrading fixes
things.

Mangle public symbols to work around a public definition error.
---
 .cirrus.yml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index d01954f..30fe830 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -5,7 +5,7 @@ env:
 task:
   freebsd_instance:
     matrix:
-      image: freebsd-12-1-release-amd64
+      image: freebsd-12-2-release-amd64
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y
@@ -13,7 +13,10 @@ task:
   script:
     - autoconf
     #- ./configure ${COMPILER_FLAGS:+       CC="$CC $COMPILER_FLAGS"       CXX="$CXX $COMPILER_FLAGS" }       $CONFIGURE_FLAGS
-    - ./configure
+    # We don't perfectly track freebsd stdlib.h definitions.  This is fine when
+    # we count as a system header, but breaks otherwise, like during these
+    # tests.
+    - ./configure --with-jemalloc-prefix=ci_
     - export JFLAG=`sysctl -n kern.smp.cpus`
     - gmake -j${JFLAG}
     - gmake -j${JFLAG} tests
-- 
cgit v0.12


From 61afb6a40572adfd7b9f03817ff0e62005110212 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 19 Mar 2021 22:50:22 -0700
Subject: Fix locking on arena_i_destroy_ctl().

The ctl_mtx should be held to protect against concurrent arenas.create.
---
 src/ctl.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/ctl.c b/src/ctl.c
index 663cf86..c713f0e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2650,6 +2650,8 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	arena_t *arena;
 	ctl_arena_t *ctl_darena, *ctl_arena;
 
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+
 	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
 	    newp, newlen, &arena_ind, &arena);
 	if (ret != 0) {
@@ -2680,6 +2682,8 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	assert(ret == 0);
 label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+
 	return ret;
 }
 
-- 
cgit v0.12


From 2ae1ef7dbd9aadfc80db9692004b5052fd3b36ea Mon Sep 17 00:00:00 2001
From: lirui <lirui05@kuaishou.com>
Date: Fri, 26 Mar 2021 17:32:35 +0800
Subject: Fix doc large size 54 KiB error

---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 018170c..fa53715 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -630,7 +630,7 @@ for (i = 0; i < nbins; i++) {
         </row>
         <row>
           <entry>8 KiB</entry>
-          <entry>[40 KiB, 48 KiB, 54 KiB, 64 KiB]</entry>
+          <entry>[40 KiB, 48 KiB, 56 KiB, 64 KiB]</entry>
         </row>
         <row>
           <entry>16 KiB</entry>
-- 
cgit v0.12


From a137a6825253da928b49149a81f82e73ed0d7b75 Mon Sep 17 00:00:00 2001
From: Evers Chen <evers_chen@163.com>
Date: Tue, 30 Mar 2021 07:27:37 +0800
Subject: Remove redundant declaration, pac_retain_grow_limit_get_set was
 declared twice in pac.h

---
 include/jemalloc/internal/pac.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 6d4dfba..d07ccc2 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -121,8 +121,6 @@ bool pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
     ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
     malloc_mutex_t *stats_mtx);
-bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
-    size_t *new_limit);
 void pac_stats_merge(tsdn_t *tsdn, pac_t *pac, pac_stats_t *pac_stats_out,
     pac_estats_t *estats_out, size_t *resident);
 
-- 
cgit v0.12


From 862219e461d642d860d2c9ddc122705b031b6d80 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 19 Mar 2021 22:50:01 -0700
Subject: Add quiescence sync before deleting base during arena_destroy.

---
 src/arena.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 87 insertions(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index f054f09..78ea92c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -605,6 +605,90 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
 }
 
+static void
+arena_prepare_base_deletion_sync_finish(tsd_t *tsd, malloc_mutex_t **mutexes,
+    unsigned n_mtx) {
+	for (unsigned i = 0; i < n_mtx; i++) {
+		malloc_mutex_lock(tsd_tsdn(tsd), mutexes[i]);
+		malloc_mutex_unlock(tsd_tsdn(tsd), mutexes[i]);
+	}
+}
+
+#define ARENA_DESTROY_MAX_DELAYED_MTX 32
+static void
+arena_prepare_base_deletion_sync(tsd_t *tsd, malloc_mutex_t *mtx,
+    malloc_mutex_t **delayed_mtx, unsigned *n_delayed) {
+	if (!malloc_mutex_trylock(tsd_tsdn(tsd), mtx)) {
+		/* No contention. */
+		malloc_mutex_unlock(tsd_tsdn(tsd), mtx);
+		return;
+	}
+	unsigned n = *n_delayed;
+	assert(n < ARENA_DESTROY_MAX_DELAYED_MTX);
+	/* Add another to the batch. */
+	delayed_mtx[n++] = mtx;
+
+	if (n == ARENA_DESTROY_MAX_DELAYED_MTX) {
+		arena_prepare_base_deletion_sync_finish(tsd, delayed_mtx, n);
+		n = 0;
+	}
+	*n_delayed = n;
+}
+
+static void
+arena_prepare_base_deletion(tsd_t *tsd, base_t *base_to_destroy) {
+	/*
+	 * In order to coalesce, emap_try_acquire_edata_neighbor will attempt to
+	 * check neighbor edata's state to determine eligibility.  This means
+	 * under certain conditions, the metadata from an arena can be accessed
+	 * w/o holding any locks from that arena.  In order to guarantee safe
+	 * memory access, the metadata and the underlying base allocator needs
+	 * to be kept alive, until all pending accesses are done.
+	 *
+	 * 1) with opt_retain, the arena boundary implies the is_head state
+	 * (tracked in the rtree leaf), and the coalesce flow will stop at the
+	 * head state branch.  Therefore no cross arena metadata access
+	 * possible.
+	 *
+	 * 2) w/o opt_retain, the arena id needs to be read from the edata_t,
+	 * meaning read only cross-arena metadata access is possible.  The
+	 * coalesce attempt will stop at the arena_id mismatch, and is always
+	 * under one of the ecache locks.  To allow safe passthrough of such
+	 * metadata accesses, the loop below will iterate through all manual
+	 * arenas' ecache locks.  As all the metadata from this base allocator
+	 * have been unlinked from the rtree, after going through all the
+	 * relevant ecache locks, it's safe to say that a) pending accesses are
+	 * all finished, and b) no new access will be generated.
+	 */
+	if (opt_retain) {
+		return;
+	}
+	unsigned destroy_ind = base_ind_get(base_to_destroy);
+	assert(destroy_ind >= manual_arena_base);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+	malloc_mutex_t *delayed_mtx[ARENA_DESTROY_MAX_DELAYED_MTX];
+	unsigned n_delayed = 0, total = narenas_total_get();
+	for (unsigned i = 0; i < total; i++) {
+		if (i == destroy_ind) {
+			continue;
+		}
+		arena_t *arena = arena_get(tsdn, i, false);
+		if (arena == NULL) {
+			continue;
+		}
+		pac_t *pac = &arena->pa_shard.pac;
+		arena_prepare_base_deletion_sync(tsd, &pac->ecache_dirty.mtx,
+		    delayed_mtx, &n_delayed);
+		arena_prepare_base_deletion_sync(tsd, &pac->ecache_muzzy.mtx,
+		    delayed_mtx, &n_delayed);
+		arena_prepare_base_deletion_sync(tsd, &pac->ecache_retained.mtx,
+		    delayed_mtx, &n_delayed);
+	}
+	arena_prepare_base_deletion_sync_finish(tsd, delayed_mtx, n_delayed);
+}
+#undef ARENA_DESTROY_MAX_DELAYED_MTX
+
 void
 arena_destroy(tsd_t *tsd, arena_t *arena) {
 	assert(base_ind_get(arena->base) >= narenas_auto);
@@ -633,8 +717,10 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 
 	/*
 	 * Destroy the base allocator, which manages all metadata ever mapped by
-	 * this arena.
+	 * this arena.  The prepare function will make sure no pending access to
+	 * the metadata in this base anymore.
 	 */
+	arena_prepare_base_deletion(tsd, arena->base);
 	base_delete(tsd_tsdn(tsd), arena->base);
 }
 
-- 
cgit v0.12


From 70d1541c5b60ffd3089d312f3e4e534c72738aaf Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 26 Feb 2021 15:11:58 -0800
Subject: Track extent is_head state in rtree leaf.

---
 include/jemalloc/internal/edata.h |  2 +-
 include/jemalloc/internal/rtree.h | 34 +++++++++++++++++++++++-----------
 src/ehooks.c                      |  4 ++++
 src/emap.c                        | 21 +++++++++++++++------
 src/extent.c                      |  2 ++
 test/unit/rtree.c                 | 10 ++++++++--
 6 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index c71209e..e75866b 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -23,7 +23,7 @@ typedef enum extent_state_e extent_state_t;
 
 enum extent_head_state_e {
 	EXTENT_NOT_HEAD,
-	EXTENT_IS_HEAD   /* Only relevant for Windows && opt.retain. */
+	EXTENT_IS_HEAD   /* See comments in ehooks_default_merge_impl(). */
 };
 typedef enum extent_head_state_e extent_head_state_t;
 
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 83dfdc8..3b7972e 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -46,6 +46,7 @@ struct rtree_node_elm_s {
 typedef struct rtree_metadata_s rtree_metadata_t;
 struct rtree_metadata_s {
 	szind_t szind;
+	bool is_head; /* Mirrors edata->is_head. */
 	bool slab;
 };
 
@@ -65,9 +66,10 @@ struct rtree_leaf_elm_s {
 	 *
 	 * x: index
 	 * e: edata
+	 * h: is_head
 	 * b: slab
 	 *
-	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee000b
+	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee00hb
 	 */
 	atomic_p_t	le_bits;
 #else
@@ -184,12 +186,16 @@ rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
 	    & (((uintptr_t)1 << LG_VADDR) - 1);
 	uintptr_t szind_bits = (uintptr_t)contents.metadata.szind << LG_VADDR;
 	/*
-	 * Slab shares the low bit of edata; we know edata is on an even address
-	 * (in fact, it's 128 bytes on 64-bit systems; we can enforce this
-	 * alignment if we want to steal 6 extra rtree leaf bits someday.
+	 * Metadata shares the low bits of edata. edata is CACHELINE aligned (in
+	 * fact, it's 128 bytes on 64-bit systems); we can enforce this
+	 * alignment if we want to steal the extra rtree leaf bits someday.
 	 */
 	uintptr_t slab_bits = (uintptr_t)contents.metadata.slab;
-	return szind_bits | edata_bits | slab_bits;
+	uintptr_t is_head_bits = (uintptr_t)contents.metadata.is_head << 1;
+	uintptr_t metadata_bits = szind_bits | is_head_bits | slab_bits;
+	assert((edata_bits & metadata_bits) == 0);
+
+	return edata_bits | metadata_bits;
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_contents_t
@@ -198,20 +204,23 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	/* Do the easy things first. */
 	contents.metadata.szind = bits >> LG_VADDR;
 	contents.metadata.slab = (bool)(bits & 1);
+	contents.metadata.is_head = (bool)(bits & (1 << 1));
+
+	uintptr_t metadata_mask = ~((uintptr_t)((1 << 2) - 1));
 #    ifdef __aarch64__
 	/*
 	 * aarch64 doesn't sign extend the highest virtual address bit to set
 	 * the higher ones.  Instead, the high bits get zeroed.
 	 */
 	uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
-	/* Mask off the slab bit. */
-	uintptr_t low_bit_mask = ~(uintptr_t)1;
+	/* Mask off metadata. */
+	uintptr_t low_bit_mask = metadata_mask;
 	uintptr_t mask = high_bit_mask & low_bit_mask;
 	contents.edata = (edata_t *)(bits & mask);
 #    else
-	/* Restore sign-extended high bits, mask slab bit. */
+	/* Restore sign-extended high bits, mask metadata bits. */
 	contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
-	    >> RTREE_NHIB) & ~((uintptr_t)0x1));
+	    >> RTREE_NHIB) & metadata_mask);
 #    endif
 	return contents;
 }
@@ -230,7 +239,8 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 	unsigned metadata_bits = atomic_load_u(&elm->le_metadata, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 	contents.metadata.slab = (bool)(metadata_bits & 1);
-	contents.metadata.szind = (metadata_bits >> 1);
+	contents.metadata.is_head = (bool)(metadata_bits & (1 << 1));
+	contents.metadata.szind = (metadata_bits >> 2);
 
 	contents.edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
@@ -247,7 +257,8 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	unsigned metadata_bits = ((unsigned)contents.metadata.slab
-	    | ((unsigned)contents.metadata.szind << 1));
+	    | ((unsigned)contents.metadata.is_head << 1)
+	    | ((unsigned)contents.metadata.szind << 2));
 	atomic_store_u(&elm->le_metadata, metadata_bits, ATOMIC_RELEASE);
 	/*
 	 * Write edata last, since the element is atomically considered valid
@@ -418,6 +429,7 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	contents.edata = NULL;
 	contents.metadata.szind = SC_NSIZES;
 	contents.metadata.slab = false;
+	contents.metadata.is_head = false;
 	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 }
 
diff --git a/src/ehooks.c b/src/ehooks.c
index f2525e1..e1815ee 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -227,10 +227,14 @@ bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	tsdn_t *tsdn = tsdn_fetch();
+
 	edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global, addr_a);
 	bool head_a = edata_is_head_get(a);
 	edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global, addr_b);
 	bool head_b = edata_is_head_get(b);
+	emap_assert_mapped(tsdn, &arena_emap_global, a);
+	emap_assert_mapped(tsdn, &arena_emap_global, b);
+
 	return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b, head_b);
 }
 
diff --git a/src/emap.c b/src/emap.c
index 537f588..62abf4d 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -141,6 +141,8 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 	contents.edata = edata;
 	contents.metadata.szind = szind;
 	contents.metadata.slab = slab;
+	contents.metadata.is_head = (edata == NULL) ? false :
+	    edata_is_head_get(edata);
 	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, contents);
 	if (elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree, elm_b, contents);
@@ -169,12 +171,14 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 
 	assert(edata_slab_get(edata));
 
+	rtree_contents_t contents;
+	contents.edata = edata;
+	contents.metadata.szind = szind;
+	contents.metadata.slab = true;
+	contents.metadata.is_head = false; /* Not allowed to access. */
+
 	/* Register interior. */
 	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_contents_t contents;
-		contents.edata = edata;
-		contents.metadata.szind = szind;
-		contents.metadata.slab = true;
 		rtree_write(tsdn, &emap->rtree, rtree_ctx,
 		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
 		    LG_PAGE), contents);
@@ -214,6 +218,8 @@ emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 		contents.edata = edata;
 		contents.metadata.szind = szind;
 		contents.metadata.slab = slab;
+		contents.metadata.is_head = edata_is_head_get(edata);
+
 		rtree_write(tsdn, &emap->rtree, rtree_ctx,
 		    (uintptr_t)edata_addr_get(edata), contents);
 		/*
@@ -297,6 +303,7 @@ emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	clear_contents.edata = NULL;
 	clear_contents.metadata.szind = SC_NSIZES;
 	clear_contents.metadata.slab = false;
+	clear_contents.metadata.is_head = false;
 
 	if (prepare->lead_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
@@ -320,8 +327,10 @@ void
 emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	assert(rtree_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata)).edata == edata);
+	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata));
+	assert(contents.edata == edata);
+	assert(contents.metadata.is_head == edata_is_head_get(edata));
 }
 
 void
diff --git a/src/extent.c b/src/extent.c
index 51711ef..a541e7b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1254,6 +1254,8 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 
 	assert(edata_arena_ind_get(a) == edata_arena_ind_get(b));
 	assert(edata_arena_ind_get(a) == ehooks_ind_get(ehooks));
+	emap_assert_mapped(tsdn, pac->emap, a);
+	emap_assert_mapped(tsdn, pac->emap, b);
 
 	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
 	    edata_size_get(a), edata_is_head_get(a), edata_base_get(b),
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 775bc19..a547f18 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -55,6 +55,7 @@ TEST_BEGIN(test_rtree_extrema) {
 	contents_a.edata = &edata_a;
 	contents_a.metadata.szind = edata_szind_get(&edata_a);
 	contents_a.metadata.slab = edata_slab_get(&edata_a);
+	contents_a.metadata.is_head = edata_is_head_get(&edata_a);
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
 	    "Unexpected rtree_write() failure");
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
@@ -63,20 +64,23 @@ TEST_BEGIN(test_rtree_extrema) {
 	    PAGE);
 	expect_true(contents_a.edata == read_contents_a.edata
 	    && contents_a.metadata.szind == read_contents_a.metadata.szind
-	    && contents_a.metadata.slab == read_contents_a.metadata.slab,
+	    && contents_a.metadata.slab == read_contents_a.metadata.slab
+	    && contents_a.metadata.is_head == read_contents_a.metadata.is_head,
 	    "rtree_read() should return previously set value");
 
 	rtree_contents_t contents_b;
 	contents_b.edata = &edata_b;
 	contents_b.metadata.szind = edata_szind_get_maybe_invalid(&edata_b);
 	contents_b.metadata.slab = edata_slab_get(&edata_b);
+	contents_b.metadata.is_head = edata_is_head_get(&edata_b);
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
 	    contents_b), "Unexpected rtree_write() failure");
 	rtree_contents_t read_contents_b = rtree_read(tsdn, rtree, &rtree_ctx,
 	    ~((uintptr_t)0));
 	assert_true(contents_b.edata == read_contents_b.edata
 	    && contents_b.metadata.szind == read_contents_b.metadata.szind
-	    && contents_b.metadata.slab == read_contents_b.metadata.slab,
+	    && contents_b.metadata.slab == read_contents_b.metadata.slab
+	    && contents_b.metadata.is_head == read_contents_b.metadata.is_head,
 	    "rtree_read() should return previously set value");
 
 	base_delete(tsdn, base);
@@ -106,6 +110,7 @@ TEST_BEGIN(test_rtree_bits) {
 		contents.edata = &edata;
 		contents.metadata.szind = SC_NSIZES;
 		contents.metadata.slab = false;
+		contents.metadata.is_head = false;
 
 		expect_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
 		    contents), "Unexpected rtree_write() failure");
@@ -158,6 +163,7 @@ TEST_BEGIN(test_rtree_random) {
 		contents.edata = &edata;
 		contents.metadata.szind = SC_NSIZES;
 		contents.metadata.slab = false;
+		contents.metadata.is_head = false;
 		rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
 		    keys[i]).edata, &edata,
-- 
cgit v0.12


From 4d8c22f9a57fb29d39394e2382628854542d1520 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 26 Feb 2021 15:32:41 -0800
Subject: Store edata->state in rtree leaf and make edata_t 128B aligned.

Verified that this doesn't result in any real increase of edata_t bytes
allocated.
---
 include/jemalloc/internal/edata.h | 10 +++++--
 include/jemalloc/internal/emap.h  | 30 +++++++++++++++++++
 include/jemalloc/internal/rtree.h | 46 +++++++++++++++++++----------
 src/base.c                        |  2 +-
 src/emap.c                        |  6 ++++
 src/extent.c                      | 36 +++++++++++++----------
 src/hpa_central.c                 |  7 +++--
 test/unit/rtree.c                 | 61 ++++++++++++++++++++++++---------------
 8 files changed, 138 insertions(+), 60 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index e75866b..648b478 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -13,6 +13,12 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/typed_list.h"
 
+/*
+ * sizeof(edata_t) is 128 bytes on 64-bit architectures.  Ensure the alignment
+ * to free up the low bits in the rtree leaf.
+ */
+#define EDATA_ALIGNMENT 128
+
 enum extent_state_e {
 	extent_state_active   = 0,
 	extent_state_dirty    = 1,
@@ -88,7 +94,7 @@ struct edata_s {
 	 * f: nfree
 	 * s: bin_shard
 	 *
-	 * 00000000 ... 000000ss ssssffff ffffffii iiiiiitt zpcbaaaa aaaaaaaa
+	 * 00000000 ... 00000sss sssfffff fffffiii iiiiittt zpcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -143,7 +149,7 @@ struct edata_s {
 #define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
 #define EDATA_BITS_ZEROED_MASK  MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
 
-#define EDATA_BITS_STATE_WIDTH  2
+#define EDATA_BITS_STATE_WIDTH  3
 #define EDATA_BITS_STATE_SHIFT  (EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
 #define EDATA_BITS_STATE_MASK  MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
 
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index ac0050b..3e39748 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -136,6 +136,36 @@ emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 }
 
+static inline void
+emap_update_rtree_at_addr(tsdn_t *tsdn, rtree_t *rtree, edata_t *expected_edata,
+    uintptr_t addr, extent_state_t state) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+	    addr, /* dependent */ true, /* init_missing */ false);
+	assert(elm != NULL);
+	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
+	    /* dependent */ true);
+	assert(contents.edata == expected_edata);
+	contents.metadata.state = state;
+	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
+}
+
+static inline void
+emap_edata_state_update(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_state_t state) {
+	/* Only emap is allowed to modify the edata internal state. */
+	edata_state_set(edata, state);
+
+	emap_update_rtree_at_addr(tsdn, &emap->rtree, edata,
+	    (uintptr_t)edata_base_get(edata), state);
+	emap_update_rtree_at_addr(tsdn, &emap->rtree, edata,
+	    (uintptr_t)edata_last_get(edata), state);
+
+	emap_assert_mapped(tsdn, emap, edata);
+}
+
 JEMALLOC_ALWAYS_INLINE edata_t *
 emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 3b7972e..89c08cb 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -46,6 +46,7 @@ struct rtree_node_elm_s {
 typedef struct rtree_metadata_s rtree_metadata_t;
 struct rtree_metadata_s {
 	szind_t szind;
+	extent_state_t state; /* Mirrors edata->state. */
 	bool is_head; /* Mirrors edata->is_head. */
 	bool slab;
 };
@@ -56,6 +57,10 @@ struct rtree_contents_s {
 	rtree_metadata_t metadata;
 };
 
+#define RTREE_LEAF_STATE_WIDTH EDATA_BITS_STATE_WIDTH
+#define RTREE_LEAF_STATE_SHIFT 2
+#define RTREE_LEAF_STATE_MASK MASK(RTREE_LEAF_STATE_WIDTH, RTREE_LEAF_STATE_SHIFT)
+
 struct rtree_leaf_elm_s {
 #ifdef RTREE_LEAF_COMPACT
 	/*
@@ -66,17 +71,17 @@ struct rtree_leaf_elm_s {
 	 *
 	 * x: index
 	 * e: edata
+	 * s: state
 	 * h: is_head
 	 * b: slab
 	 *
-	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee00hb
+	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee e00ssshb
 	 */
 	atomic_p_t	le_bits;
 #else
 	atomic_p_t	le_edata; /* (edata_t *) */
 	/*
-	 * slab is stored in the low bit; szind is stored in the next lowest
-	 * bits.
+	 * From low to high bits: slab, is_head, state.
 	 */
 	atomic_u_t	le_metadata;
 #endif
@@ -184,15 +189,14 @@ JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
 	uintptr_t edata_bits = (uintptr_t)contents.edata
 	    & (((uintptr_t)1 << LG_VADDR) - 1);
+
 	uintptr_t szind_bits = (uintptr_t)contents.metadata.szind << LG_VADDR;
-	/*
-	 * Metadata shares the low bits of edata. edata is CACHELINE aligned (in
-	 * fact, it's 128 bytes on 64-bit systems); we can enforce this
-	 * alignment if we want to steal the extra rtree leaf bits someday.
-	 */
 	uintptr_t slab_bits = (uintptr_t)contents.metadata.slab;
 	uintptr_t is_head_bits = (uintptr_t)contents.metadata.is_head << 1;
-	uintptr_t metadata_bits = szind_bits | is_head_bits | slab_bits;
+	uintptr_t state_bits = (uintptr_t)contents.metadata.state <<
+	    RTREE_LEAF_STATE_SHIFT;
+	uintptr_t metadata_bits = szind_bits | state_bits | is_head_bits |
+	    slab_bits;
 	assert((edata_bits & metadata_bits) == 0);
 
 	return edata_bits | metadata_bits;
@@ -206,7 +210,11 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	contents.metadata.slab = (bool)(bits & 1);
 	contents.metadata.is_head = (bool)(bits & (1 << 1));
 
-	uintptr_t metadata_mask = ~((uintptr_t)((1 << 2) - 1));
+	uintptr_t state_bits = (bits & RTREE_LEAF_STATE_MASK) >>
+	    RTREE_LEAF_STATE_SHIFT;
+	contents.metadata.state = (extent_state_t)state_bits;
+
+	uintptr_t low_bit_mask = ~((uintptr_t)EDATA_ALIGNMENT - 1);
 #    ifdef __aarch64__
 	/*
 	 * aarch64 doesn't sign extend the highest virtual address bit to set
@@ -214,13 +222,12 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	 */
 	uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
 	/* Mask off metadata. */
-	uintptr_t low_bit_mask = metadata_mask;
 	uintptr_t mask = high_bit_mask & low_bit_mask;
 	contents.edata = (edata_t *)(bits & mask);
 #    else
 	/* Restore sign-extended high bits, mask metadata bits. */
 	contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
-	    >> RTREE_NHIB) & metadata_mask);
+	    >> RTREE_NHIB) & low_bit_mask);
 #    endif
 	return contents;
 }
@@ -240,7 +247,12 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 	contents.metadata.slab = (bool)(metadata_bits & 1);
 	contents.metadata.is_head = (bool)(metadata_bits & (1 << 1));
-	contents.metadata.szind = (metadata_bits >> 2);
+
+	uintptr_t state_bits = (metadata_bits & RTREE_LEAF_STATE_MASK) >>
+	    RTREE_LEAF_STATE_SHIFT;
+	contents.metadata.state = (extent_state_t)state_bits;
+	contents.metadata.szind = metadata_bits >> (RTREE_LEAF_STATE_SHIFT +
+	    RTREE_LEAF_STATE_WIDTH);
 
 	contents.edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
@@ -252,13 +264,16 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 static inline void
 rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, rtree_contents_t contents) {
+	assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
-	unsigned metadata_bits = ((unsigned)contents.metadata.slab
+	unsigned metadata_bits = (unsigned)contents.metadata.slab
 	    | ((unsigned)contents.metadata.is_head << 1)
-	    | ((unsigned)contents.metadata.szind << 2));
+	    | ((unsigned)contents.metadata.state << RTREE_LEAF_STATE_SHIFT)
+	    | ((unsigned)contents.metadata.szind << (RTREE_LEAF_STATE_SHIFT +
+	    RTREE_LEAF_STATE_WIDTH));
 	atomic_store_u(&elm->le_metadata, metadata_bits, ATOMIC_RELEASE);
 	/*
 	 * Write edata last, since the element is atomically considered valid
@@ -430,6 +445,7 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	contents.metadata.szind = SC_NSIZES;
 	contents.metadata.slab = false;
 	contents.metadata.is_head = false;
+	contents.metadata.state = (extent_state_t)0;
 	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 }
 
diff --git a/src/base.c b/src/base.c
index 00440f4..9d4ce5c 100644
--- a/src/base.c
+++ b/src/base.c
@@ -472,7 +472,7 @@ edata_t *
 base_alloc_edata(tsdn_t *tsdn, base_t *base) {
 	size_t esn;
 	edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t),
-	    CACHELINE, &esn);
+	    EDATA_ALIGNMENT, &esn);
 	if (edata == NULL) {
 		return NULL;
 	}
diff --git a/src/emap.c b/src/emap.c
index 62abf4d..4f3915b 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -143,6 +143,7 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 	contents.metadata.slab = slab;
 	contents.metadata.is_head = (edata == NULL) ? false :
 	    edata_is_head_get(edata);
+	contents.metadata.state = (edata == NULL) ? 0 : edata_state_get(edata);
 	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, contents);
 	if (elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree, elm_b, contents);
@@ -170,11 +171,13 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	EMAP_DECLARE_RTREE_CTX;
 
 	assert(edata_slab_get(edata));
+	assert(edata_state_get(edata) == extent_state_active);
 
 	rtree_contents_t contents;
 	contents.edata = edata;
 	contents.metadata.szind = szind;
 	contents.metadata.slab = true;
+	contents.metadata.state = extent_state_active;
 	contents.metadata.is_head = false; /* Not allowed to access. */
 
 	/* Register interior. */
@@ -219,6 +222,7 @@ emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 		contents.metadata.szind = szind;
 		contents.metadata.slab = slab;
 		contents.metadata.is_head = edata_is_head_get(edata);
+		contents.metadata.state = edata_state_get(edata);
 
 		rtree_write(tsdn, &emap->rtree, rtree_ctx,
 		    (uintptr_t)edata_addr_get(edata), contents);
@@ -304,6 +308,7 @@ emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	clear_contents.metadata.szind = SC_NSIZES;
 	clear_contents.metadata.slab = false;
 	clear_contents.metadata.is_head = false;
+	clear_contents.metadata.state = (extent_state_t)0;
 
 	if (prepare->lead_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
@@ -331,6 +336,7 @@ emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	    (uintptr_t)edata_base_get(edata));
 	assert(contents.edata == edata);
 	assert(contents.metadata.is_head == edata_is_head_get(edata));
+	assert(contents.metadata.state == edata_state_get(edata));
 }
 
 void
diff --git a/src/extent.c b/src/extent.c
index a541e7b..56ea33f 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -64,11 +64,12 @@ extent_may_force_decay(pac_t *pac) {
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata) {
-	edata_state_set(edata, extent_state_active);
+	emap_edata_state_update(tsdn, pac->emap, edata, extent_state_active);
+
 	bool coalesced;
 	edata = extent_try_coalesce(tsdn, pac, ehooks, ecache,
 	    edata, &coalesced, false);
-	edata_state_set(edata, ecache->state);
+	emap_edata_state_update(tsdn, pac->emap, edata, ecache->state);
 
 	if (!coalesced) {
 		return true;
@@ -182,7 +183,8 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		not_reached();
 	case extent_state_dirty:
 	case extent_state_muzzy:
-		edata_state_set(edata, extent_state_active);
+		emap_edata_state_update(tsdn, pac->emap, edata,
+		    extent_state_active);
 		break;
 	case extent_state_retained:
 		extent_deregister(tsdn, pac, edata);
@@ -223,28 +225,30 @@ extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 static void
-extent_deactivate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
+extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+    edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == extent_state_active);
 
-	edata_state_set(edata, ecache->state);
+	emap_edata_state_update(tsdn, pac->emap, edata, ecache->state);
 	eset_insert(&ecache->eset, edata);
 }
 
 static void
-extent_deactivate(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
+extent_deactivate(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, edata_t *edata) {
 	malloc_mutex_lock(tsdn, &ecache->mtx);
-	extent_deactivate_locked(tsdn, ecache, edata);
+	extent_deactivate_locked(tsdn, pac, ecache, edata);
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 }
 
 static void
-extent_activate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
+extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+    edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == ecache->state);
 
 	eset_remove(&ecache->eset, edata);
-	edata_state_set(edata, extent_state_active);
+	emap_edata_state_update(tsdn, pac->emap, edata, extent_state_active);
 }
 
 static void
@@ -421,7 +425,7 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	extent_activate_locked(tsdn, ecache, edata);
+	extent_activate_locked(tsdn, pac, ecache, edata);
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 
 	return edata;
@@ -527,16 +531,16 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 * leaking the extent.
 		 */
 		assert(to_leak != NULL && lead == NULL && trail == NULL);
-		extent_deactivate(tsdn, ecache, to_leak);
+		extent_deactivate(tsdn, pac, ecache, to_leak);
 		return NULL;
 	}
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_deactivate(tsdn, ecache, lead);
+			extent_deactivate(tsdn, pac, ecache, lead);
 		}
 		if (trail != NULL) {
-			extent_deactivate(tsdn, ecache, trail);
+			extent_deactivate(tsdn, pac, ecache, trail);
 		}
 		return edata;
 	} else {
@@ -837,7 +841,7 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *inner, edata_t *outer, bool forward, bool growing_retained) {
 	assert(extent_can_coalesce(ecache, inner, outer));
 
-	extent_activate_locked(tsdn, ecache, outer);
+	extent_activate_locked(tsdn, pac, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	bool err = extent_merge_impl(tsdn, pac, ehooks,
@@ -845,7 +849,7 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	if (err) {
-		extent_deactivate_locked(tsdn, ecache, outer);
+		extent_deactivate_locked(tsdn, pac, ecache, outer);
 	}
 
 	return err;
@@ -1008,7 +1012,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			return;
 		}
 	}
-	extent_deactivate_locked(tsdn, ecache, edata);
+	extent_deactivate_locked(tsdn, pac, ecache, edata);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 }
diff --git a/src/hpa_central.c b/src/hpa_central.c
index 346d942..36758a0 100644
--- a/src/hpa_central.c
+++ b/src/hpa_central.c
@@ -91,7 +91,8 @@ label_success:
 	 */
 	assert(edata_state_get(edata) == extent_state_dirty);
 	assert(edata_base_get(edata) == edata_addr_get(edata));
-	edata_state_set(edata, extent_state_active);
+	emap_edata_state_update(tsdn, central->emap, edata,
+	    extent_state_active);
 	return edata;
 }
 
@@ -136,7 +137,7 @@ hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
 	edata_sn_set(edata, sn);
 	edata_sn_set(trail, sn);
 
-	edata_state_set(trail, extent_state_dirty);
+	emap_edata_state_update(tsdn, central->emap, trail, extent_state_dirty);
 	eset_insert(&central->eset, trail);
 	return false;
 }
@@ -203,6 +204,6 @@ hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata) {
 		eset_remove(&central->eset, trail);
 		hpa_central_dalloc_merge(tsdn, central, edata, trail);
 	}
-	edata_state_set(edata, extent_state_dirty);
+	emap_edata_state_update(tsdn, central->emap, edata, extent_state_dirty);
 	eset_insert(&central->eset, edata);
 }
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index a547f18..9251652 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -32,12 +32,22 @@ TEST_END
 #undef NITERS
 #undef SEED
 
+static edata_t *
+alloc_edata(void) {
+	void *ret = mallocx(sizeof(edata_t), MALLOCX_ALIGN(EDATA_ALIGNMENT));
+	assert_ptr_not_null(ret, "Unexpected mallocx() failure");
+
+	return ret;
+}
+
 TEST_BEGIN(test_rtree_extrema) {
-	edata_t edata_a = {0}, edata_b = {0};
-	edata_init(&edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
+	edata_t *edata_a, *edata_b;
+	edata_a = alloc_edata();
+	edata_b = alloc_edata();
+	edata_init(edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
 	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
-	edata_init(&edata_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	edata_init(edata_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	tsdn_t *tsdn = tsdn_fetch();
@@ -52,10 +62,11 @@ TEST_BEGIN(test_rtree_extrema) {
 	    "Unexpected rtree_new() failure");
 
 	rtree_contents_t contents_a;
-	contents_a.edata = &edata_a;
-	contents_a.metadata.szind = edata_szind_get(&edata_a);
-	contents_a.metadata.slab = edata_slab_get(&edata_a);
-	contents_a.metadata.is_head = edata_is_head_get(&edata_a);
+	contents_a.edata = edata_a;
+	contents_a.metadata.szind = edata_szind_get(edata_a);
+	contents_a.metadata.slab = edata_slab_get(edata_a);
+	contents_a.metadata.is_head = edata_is_head_get(edata_a);
+	contents_a.metadata.state = edata_state_get(edata_a);
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
 	    "Unexpected rtree_write() failure");
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
@@ -65,14 +76,16 @@ TEST_BEGIN(test_rtree_extrema) {
 	expect_true(contents_a.edata == read_contents_a.edata
 	    && contents_a.metadata.szind == read_contents_a.metadata.szind
 	    && contents_a.metadata.slab == read_contents_a.metadata.slab
-	    && contents_a.metadata.is_head == read_contents_a.metadata.is_head,
+	    && contents_a.metadata.is_head == read_contents_a.metadata.is_head
+	    && contents_a.metadata.state == read_contents_a.metadata.state,
 	    "rtree_read() should return previously set value");
 
 	rtree_contents_t contents_b;
-	contents_b.edata = &edata_b;
-	contents_b.metadata.szind = edata_szind_get_maybe_invalid(&edata_b);
-	contents_b.metadata.slab = edata_slab_get(&edata_b);
-	contents_b.metadata.is_head = edata_is_head_get(&edata_b);
+	contents_b.edata = edata_b;
+	contents_b.metadata.szind = edata_szind_get_maybe_invalid(edata_b);
+	contents_b.metadata.slab = edata_slab_get(edata_b);
+	contents_b.metadata.is_head = edata_is_head_get(edata_b);
+	contents_b.metadata.state = edata_state_get(edata_b);
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
 	    contents_b), "Unexpected rtree_write() failure");
 	rtree_contents_t read_contents_b = rtree_read(tsdn, rtree, &rtree_ctx,
@@ -80,7 +93,8 @@ TEST_BEGIN(test_rtree_extrema) {
 	assert_true(contents_b.edata == read_contents_b.edata
 	    && contents_b.metadata.szind == read_contents_b.metadata.szind
 	    && contents_b.metadata.slab == read_contents_b.metadata.slab
-	    && contents_b.metadata.is_head == read_contents_b.metadata.is_head,
+	    && contents_b.metadata.is_head == read_contents_b.metadata.is_head
+	    && contents_b.metadata.state == read_contents_b.metadata.state,
 	    "rtree_read() should return previously set value");
 
 	base_delete(tsdn, base);
@@ -94,9 +108,8 @@ TEST_BEGIN(test_rtree_bits) {
 
 	uintptr_t keys[] = {PAGE, PAGE + 1,
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
-
-	edata_t edata = {0};
-	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	edata_t *edata_c = alloc_edata();
+	edata_init(edata_c, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	rtree_t *rtree = &test_rtree;
@@ -107,16 +120,17 @@ TEST_BEGIN(test_rtree_bits) {
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
 		rtree_contents_t contents;
-		contents.edata = &edata;
+		contents.edata = edata_c;
 		contents.metadata.szind = SC_NSIZES;
 		contents.metadata.slab = false;
 		contents.metadata.is_head = false;
+		contents.metadata.state = extent_state_active;
 
 		expect_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
 		    contents), "Unexpected rtree_write() failure");
 		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
 			expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-			    keys[j]).edata, &edata,
+			    keys[j]).edata, edata_c,
 			    "rtree_edata_read() should return previously set "
 			    "value and ignore insignificant key bits; i=%u, "
 			    "j=%u, set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
@@ -146,8 +160,8 @@ TEST_BEGIN(test_rtree_random) {
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 
-	edata_t edata = {0};
-	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	edata_t *edata_d = alloc_edata();
+	edata_init(edata_d, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	expect_false(rtree_new(rtree, base, false),
@@ -160,18 +174,19 @@ TEST_BEGIN(test_rtree_random) {
 		expect_ptr_not_null(elm,
 		    "Unexpected rtree_leaf_elm_lookup() failure");
 		rtree_contents_t contents;
-		contents.edata = &edata;
+		contents.edata = edata_d;
 		contents.metadata.szind = SC_NSIZES;
 		contents.metadata.slab = false;
 		contents.metadata.is_head = false;
+		contents.metadata.state = edata_state_get(edata_d);
 		rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata, &edata,
+		    keys[i]).edata, edata_d,
 		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
 		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata, &edata,
+		    keys[i]).edata, edata_d,
 		    "rtree_edata_read() should return previously set value, "
 		    "i=%u", i);
 	}
-- 
cgit v0.12


From 9ea235f8feffc5f486f290b49a5a6752adbe70bf Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 2 Mar 2021 17:26:26 -0800
Subject: Add witness_assert_positive_depth_to_rank().

---
 include/jemalloc/internal/emap.h    |  3 +++
 include/jemalloc/internal/witness.h | 54 ++++++++++++++++++++++++-------------
 2 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 3e39748..afb4983 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -139,6 +139,9 @@ emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 static inline void
 emap_update_rtree_at_addr(tsdn_t *tsdn, rtree_t *rtree, edata_t *expected_edata,
     uintptr_t addr, extent_state_t state) {
+	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE);
+
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 66dcf66..4cebb6e 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -243,26 +243,13 @@ witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
 	}
 }
 
-static inline void
-witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn,
-    witness_rank_t rank_inclusive, unsigned depth) {
-	witness_tsd_t *witness_tsd;
-	unsigned d;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (!config_debug) {
-		return;
-	}
+/* Returns depth.  Not intended for direct use. */
+static inline unsigned
+witness_depth_to_rank(witness_list_t *witnesses, witness_rank_t rank_inclusive)
+{
+	unsigned d = 0;
+	witness_t *w = ql_last(witnesses, link);
 
-	if (witness_tsdn_null(witness_tsdn)) {
-		return;
-	}
-	witness_tsd = witness_tsdn_tsd(witness_tsdn);
-
-	d = 0;
-	witnesses = &witness_tsd->witnesses;
-	w = ql_last(witnesses, link);
 	if (w != NULL) {
 		ql_reverse_foreach(w, witnesses, link) {
 			if (w->rank < rank_inclusive) {
@@ -271,6 +258,20 @@ witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn,
 			d++;
 		}
 	}
+
+	return d;
+}
+
+static inline void
+witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn,
+    witness_rank_t rank_inclusive, unsigned depth) {
+	if (!config_debug || witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+
+	witness_list_t *witnesses = &witness_tsdn_tsd(witness_tsdn)->witnesses;
+	unsigned d = witness_depth_to_rank(witnesses, rank_inclusive);
+
 	if (d != depth) {
 		witness_depth_error(witnesses, rank_inclusive, depth);
 	}
@@ -287,6 +288,21 @@ witness_assert_lockless(witness_tsdn_t *witness_tsdn) {
 }
 
 static inline void
+witness_assert_positive_depth_to_rank(witness_tsdn_t *witness_tsdn,
+    witness_rank_t rank_inclusive) {
+	if (!config_debug || witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+
+	witness_list_t *witnesses = &witness_tsdn_tsd(witness_tsdn)->witnesses;
+	unsigned d = witness_depth_to_rank(witnesses, rank_inclusive);
+
+	if (d == 0) {
+		witness_depth_error(witnesses, rank_inclusive, 1);
+	}
+}
+
+static inline void
 witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
 	witness_tsd_t *witness_tsd;
 	witness_list_t *witnesses;
-- 
cgit v0.12


From 1784939688b86e459ecb39615e463176dd609685 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 4 Mar 2021 14:33:40 -0800
Subject: Use rtree tracked states to protect edata outside of ecache locks.

This avoids the addr-based mutexes (i.e. the mutex_pool), and instead relies on
the metadata tracked in rtree leaf: the head state and extent_state.  Before
trying to access the neighbor edata (e.g. for coalescing), the states will be
verified first -- only neighbor edatas from the same arena and with the same
state will be accessed.
---
 include/jemalloc/internal/edata.h |  11 +-
 include/jemalloc/internal/emap.h  | 123 +++++++++++--------
 include/jemalloc/internal/rtree.h |  31 ++++-
 src/emap.c                        | 241 +++++++++++++++++++++++++++-----------
 src/eset.c                        |   3 +-
 src/extent.c                      | 151 +++++++-----------------
 src/hpa_central.c                 |  65 ++++------
 7 files changed, 357 insertions(+), 268 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 648b478..b2e6ee9 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -23,7 +23,11 @@ enum extent_state_e {
 	extent_state_active   = 0,
 	extent_state_dirty    = 1,
 	extent_state_muzzy    = 2,
-	extent_state_retained = 3
+	extent_state_retained = 3,
+	extent_state_transition = 4, /* States below are intermediate. */
+	extent_state_updating = 4,
+	extent_state_merging = 5,
+	extent_state_max = 5 /* Sanity checking only. */
 };
 typedef enum extent_state_e extent_state_t;
 
@@ -550,6 +554,11 @@ edata_is_head_set(edata_t *edata, bool is_head) {
 	    ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
 }
 
+static inline bool
+edata_state_in_transition(extent_state_t state) {
+	return state >= extent_state_transition;
+}
+
 /*
  * Because this function is implemented as a sequence of bitfield modifications,
  * even though each individual bit is properly initialized, we technically read
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index afb4983..239f3e4 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -5,6 +5,15 @@
 #include "jemalloc/internal/mutex_pool.h"
 #include "jemalloc/internal/rtree.h"
 
+/*
+ * Note: Ends without at semicolon, so that
+ *     EMAP_DECLARE_RTREE_CTX;
+ * in uses will avoid empty-statement warnings.
+ */
+#define EMAP_DECLARE_RTREE_CTX						\
+    rtree_ctx_t rtree_ctx_fallback;					\
+    rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
+
 typedef struct emap_s emap_t;
 struct emap_s {
 	rtree_t rtree;
@@ -31,20 +40,16 @@ bool emap_init(emap_t *emap, base_t *base, bool zeroed);
 void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
     bool slab);
 
-/*
- * Grab the lock or locks associated with the edata or edatas indicated (which
- * is done just by simple address hashing).  The hashing strategy means that
- * it's never safe to grab locks incrementally -- you have to grab all the locks
- * you'll need at once, and release them all at once.
- */
-void emap_lock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
-void emap_unlock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
-void emap_lock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
-    edata_t *edata2);
-void emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
-    edata_t *edata2);
-edata_t *emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, void *addr,
-    bool inactive_only);
+void emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_state_t state);
+
+edata_t *emap_try_acquire_edata(tsdn_t *tsdn, emap_t *emap, void *addr,
+    extent_state_t expected_state, bool allow_head_extent);
+edata_t *emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap,
+    edata_t *edata, extent_pai_t pai, extent_state_t expected_state,
+    bool forward);
+void emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_state_t new_state);
 
 /*
  * Associate the given edata with its beginning and end address, setting the
@@ -136,43 +141,66 @@ emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 }
 
-static inline void
-emap_update_rtree_at_addr(tsdn_t *tsdn, rtree_t *rtree, edata_t *expected_edata,
-    uintptr_t addr, extent_state_t state) {
-	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE);
-
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
-	    addr, /* dependent */ true, /* init_missing */ false);
-	assert(elm != NULL);
-	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ true);
-	assert(contents.edata == expected_edata);
-	contents.metadata.state = state;
-	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
+JEMALLOC_ALWAYS_INLINE bool
+emap_edata_in_transition(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	assert(config_debug);
+	emap_assert_mapped(tsdn, emap, edata);
+
+	EMAP_DECLARE_RTREE_CTX;
+	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata));
+
+	return edata_state_in_transition(contents.metadata.state);
 }
 
-static inline void
-emap_edata_state_update(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t state) {
-	/* Only emap is allowed to modify the edata internal state. */
-	edata_state_set(edata, state);
+JEMALLOC_ALWAYS_INLINE bool
+emap_edata_is_acquired(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	if (!config_debug) {
+		/* For assertions only. */
+		return false;
+	}
 
-	emap_update_rtree_at_addr(tsdn, &emap->rtree, edata,
-	    (uintptr_t)edata_base_get(edata), state);
-	emap_update_rtree_at_addr(tsdn, &emap->rtree, edata,
-	    (uintptr_t)edata_last_get(edata), state);
+	/*
+	 * The edata is considered acquired if no other threads will attempt to
+	 * read / write any fields from it.  This includes a few cases:
+	 *
+	 * 1) edata not hooked into emap yet -- This implies the edata just got
+	 * allocated or initialized.
+	 *
+	 * 2) in an active or transition state -- In both cases, the edata can
+	 * be discovered from the emap, however the state tracked in the rtree
+	 * will prevent other threads from accessing the actual edata.
+	 */
+	EMAP_DECLARE_RTREE_CTX;
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
+	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
+	    /* init_missing */ false);
+	if (elm == NULL) {
+		return true;
+	}
+	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
+	    /* dependent */ true);
+	if (contents.edata == NULL ||
+	    contents.metadata.state == extent_state_active ||
+	    edata_state_in_transition(contents.metadata.state)) {
+		return true;
+	}
 
-	emap_assert_mapped(tsdn, emap, edata);
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) {
+	assert(edata_arena_ind_get(inner) == edata_arena_ind_get(outer));
+	assert(edata_pai_get(inner) == edata_pai_get(outer));
+	assert(edata_committed_get(inner) == edata_committed_get(outer));
+	assert(edata_state_get(inner) == extent_state_active);
+	assert(edata_state_get(outer) == extent_state_merging);
 }
 
 JEMALLOC_ALWAYS_INLINE edata_t *
 emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	EMAP_DECLARE_RTREE_CTX;
 
 	return rtree_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr).edata;
 }
@@ -181,8 +209,7 @@ emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_alloc_ctx_t *alloc_ctx) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_metadata_t metadata = rtree_metadata_read(tsdn, &emap->rtree,
 	    rtree_ctx, (uintptr_t)ptr);
@@ -194,8 +221,7 @@ emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 JEMALLOC_ALWAYS_INLINE void
 emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_full_alloc_ctx_t *full_alloc_ctx) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
 	    (uintptr_t)ptr);
@@ -212,8 +238,7 @@ emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 JEMALLOC_ALWAYS_INLINE bool
 emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_full_alloc_ctx_t *full_alloc_ctx) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_contents_t contents;
 	bool err = rtree_read_independent(tsdn, &emap->rtree, rtree_ctx,
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 89c08cb..42aa11c 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -81,7 +81,7 @@ struct rtree_leaf_elm_s {
 #else
 	atomic_p_t	le_edata; /* (edata_t *) */
 	/*
-	 * From low to high bits: slab, is_head, state.
+	 * From high to low bits: szind (8 bits), state (4 bits), is_head, slab
 	 */
 	atomic_u_t	le_metadata;
 #endif
@@ -187,6 +187,7 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
+	assert((uintptr_t)contents.edata % (uintptr_t)EDATA_ALIGNMENT == 0);
 	uintptr_t edata_bits = (uintptr_t)contents.edata
 	    & (((uintptr_t)1 << LG_VADDR) - 1);
 
@@ -212,6 +213,7 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 
 	uintptr_t state_bits = (bits & RTREE_LEAF_STATE_MASK) >>
 	    RTREE_LEAF_STATE_SHIFT;
+	assert(state_bits <= extent_state_max);
 	contents.metadata.state = (extent_state_t)state_bits;
 
 	uintptr_t low_bit_mask = ~((uintptr_t)EDATA_ALIGNMENT - 1);
@@ -229,6 +231,7 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
 	    >> RTREE_NHIB) & low_bit_mask);
 #    endif
+	assert((uintptr_t)contents.edata % (uintptr_t)EDATA_ALIGNMENT == 0);
 	return contents;
 }
 
@@ -250,6 +253,7 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 
 	uintptr_t state_bits = (metadata_bits & RTREE_LEAF_STATE_MASK) >>
 	    RTREE_LEAF_STATE_SHIFT;
+	assert(state_bits <= extent_state_max);
 	contents.metadata.state = (extent_state_t)state_bits;
 	contents.metadata.szind = metadata_bits >> (RTREE_LEAF_STATE_SHIFT +
 	    RTREE_LEAF_STATE_WIDTH);
@@ -283,6 +287,31 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
 #endif
 }
 
+/* The state field can be updated independently (and more frequently). */
+static inline void
+rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm1, rtree_leaf_elm_t *elm2, extent_state_t state) {
+	assert(elm1 != NULL);
+#ifdef RTREE_LEAF_COMPACT
+	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm1,
+	    /* dependent */ true);
+	bits &= ~RTREE_LEAF_STATE_MASK;
+	bits |= state << RTREE_LEAF_STATE_SHIFT;
+	atomic_store_p(&elm1->le_bits, (void *)bits, ATOMIC_RELEASE);
+	if (elm2 != NULL) {
+		atomic_store_p(&elm2->le_bits, (void *)bits, ATOMIC_RELEASE);
+	}
+#else
+	unsigned bits = atomic_load_u(&elm1->le_metadata, ATOMIC_RELAXED);
+	bits &= ~RTREE_LEAF_STATE_MASK;
+	bits |= state << RTREE_LEAF_STATE_SHIFT;
+	atomic_store_u(&elm1->le_metadata, bits, ATOMIC_RELEASE);
+	if (elm2 != NULL) {
+		atomic_store_u(&elm2->le_metadata, bits, ATOMIC_RELEASE);
+	}
+#endif
+}
+
 /*
  * Tries to look up the key in the L1 cache, returning it if there's a hit, or
  * NULL if there's a miss.
diff --git a/src/emap.c b/src/emap.c
index 4f3915b..26a079c 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -3,15 +3,6 @@
 
 #include "jemalloc/internal/emap.h"
 
-/*
- * Note: Ends without at semicolon, so that
- *     EMAP_DECLARE_RTREE_CTX;
- * in uses will avoid empty-statement warnings.
- */
-#define EMAP_DECLARE_RTREE_CTX						\
-    rtree_ctx_t rtree_ctx_fallback;					\
-    rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
-
 enum emap_lock_result_e {
 	emap_lock_result_success,
 	emap_lock_result_failure,
@@ -35,82 +26,186 @@ emap_init(emap_t *emap, base_t *base, bool zeroed) {
 }
 
 void
-emap_lock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
-	assert(edata != NULL);
-	mutex_pool_lock(tsdn, &emap->mtx_pool, (uintptr_t)edata);
-}
+emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_state_t state) {
+	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE);
 
-void
-emap_unlock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
-	assert(edata != NULL);
-	mutex_pool_unlock(tsdn, &emap->mtx_pool, (uintptr_t)edata);
-}
+	edata_state_set(edata, state);
 
-void
-emap_lock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
-    edata_t *edata2) {
-	assert(edata1 != NULL && edata2 != NULL);
-	mutex_pool_lock2(tsdn, &emap->mtx_pool, (uintptr_t)edata1,
-	    (uintptr_t)edata2);
+	EMAP_DECLARE_RTREE_CTX;
+	rtree_leaf_elm_t *elm1 = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
+	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
+	    /* init_missing */ false);
+	assert(elm1 != NULL);
+	rtree_leaf_elm_t *elm2 = edata_size_get(edata) == PAGE ? NULL :
+	    rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_last_get(edata), /* dependent */ true,
+	    /* init_missing */ false);
+
+	rtree_leaf_elm_state_update(tsdn, &emap->rtree, elm1, elm2, state);
+
+	emap_assert_mapped(tsdn, emap, edata);
 }
 
-void
-emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
-    edata_t *edata2) {
-	assert(edata1 != NULL && edata2 != NULL);
-	mutex_pool_unlock2(tsdn, &emap->mtx_pool, (uintptr_t)edata1,
-	    (uintptr_t)edata2);
+static inline bool
+edata_neighbor_head_state_mergeable(bool edata_is_head,
+    bool neighbor_is_head, bool forward) {
+	/*
+	 * Head states checking: disallow merging if the higher addr extent is a
+	 * head extent.  This helps preserve first-fit, and more importantly
+	 * makes sure no merge across arenas.
+	 */
+	if (forward) {
+		if (neighbor_is_head) {
+			return false;
+		}
+	} else {
+		if (edata_is_head) {
+			return false;
+		}
+	}
+	return true;
 }
 
-static inline emap_lock_result_t
-emap_try_lock_rtree_leaf_elm(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm,
-    edata_t **result, bool inactive_only) {
-	edata_t *edata1 = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
-	    /* dependent */ true).edata;
-
-	/* Slab implies active extents and should be skipped. */
-	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_read(tsdn,
-	    &emap->rtree, elm, /* dependent */ true).metadata.slab)) {
-		return emap_lock_result_no_extent;
+static inline bool
+edata_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
+    extent_pai_t pai, extent_state_t expected_state, bool forward) {
+	edata_t *neighbor = contents.edata;
+	if (neighbor == NULL) {
+		return false;
+	}
+	/* It's not safe to access *neighbor yet; must verify states first. */
+	bool neighbor_is_head = contents.metadata.is_head;
+	if (!edata_neighbor_head_state_mergeable(edata_is_head_get(edata),
+	    neighbor_is_head, forward)) {
+		return NULL;
+	}
+	extent_state_t neighbor_state = contents.metadata.state;
+	if (pai == EXTENT_PAI_PAC) {
+		if (neighbor_state != expected_state) {
+			return false;
+		}
+		/* From this point, it's safe to access *neighbor. */
+		if (edata_committed_get(edata) !=
+		    edata_committed_get(neighbor)) {
+			/*
+			 * Some platforms (e.g. Windows) require an explicit
+			 * commit step (and writing to uncomitted memory is not
+			 * allowed).
+			 */
+			return false;
+		}
+	} else {
+		if (neighbor_state == extent_state_active) {
+			return false;
+		}
+		/* From this point, it's safe to access *neighbor. */
 	}
 
-	/*
-	 * It's possible that the extent changed out from under us, and with it
-	 * the leaf->edata mapping.  We have to recheck while holding the lock.
-	 */
-	emap_lock_edata(tsdn, emap, edata1);
-	edata_t *edata2 = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
-	    /* dependent */ true).edata;
-
-	if (edata1 == edata2) {
-		*result = edata1;
-		return emap_lock_result_success;
+	assert(edata_pai_get(edata) == pai);
+	if (edata_pai_get(neighbor) != pai) {
+		return false;
+	}
+	if (opt_retain) {
+		assert(edata_arena_ind_get(edata) ==
+		    edata_arena_ind_get(neighbor));
 	} else {
-		emap_unlock_edata(tsdn, emap, edata1);
-		return emap_lock_result_failure;
+		/*
+		 * This isn't entirely safe with the presence of arena_reset /
+		 * destroy, in which case the neighbor edata can be destoryed if
+		 * it belongs to a manual arena.  More on that later.
+		 */
+		if (edata_arena_ind_get(edata) !=
+		    edata_arena_ind_get(neighbor)) {
+			return false;
+		}
 	}
+
+	return true;
 }
 
-/*
- * Returns a pool-locked edata_t * if there's one associated with the given
- * address, and NULL otherwise.
- */
+/* Will be removed in the next commit. */
 edata_t *
-emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, void *addr,
-    bool inactive_only) {
+emap_try_acquire_edata(tsdn_t *tsdn, emap_t *emap, void *addr,
+    extent_state_t expected_state, bool allow_head_extent) {
 	EMAP_DECLARE_RTREE_CTX;
-	edata_t *ret = NULL;
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
 	    rtree_ctx, (uintptr_t)addr, false, false);
 	if (elm == NULL) {
 		return NULL;
 	}
-	emap_lock_result_t lock_result;
-	do {
-		lock_result = emap_try_lock_rtree_leaf_elm(tsdn, emap, elm,
-		    &ret, inactive_only);
-	} while (lock_result == emap_lock_result_failure);
-	return ret;
+	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
+	    /* dependent */ true);
+	if (!allow_head_extent && contents.metadata.is_head) {
+		/* !allow_head_extent indicates the expanding path. */
+		return NULL;
+	}
+
+	edata_t *edata = contents.edata;
+	if (edata == NULL || contents.metadata.state != expected_state) {
+		return NULL;
+	}
+	assert(edata_state_get(edata) == expected_state);
+	emap_update_edata_state(tsdn, emap, edata, extent_state_updating);
+
+	return edata;
+}
+
+void
+emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_state_t new_state) {
+	assert(emap_edata_in_transition(tsdn, emap, edata));
+	assert(emap_edata_is_acquired(tsdn, emap, edata));
+
+	emap_update_edata_state(tsdn, emap, edata, new_state);
+}
+
+edata_t *
+emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_pai_t pai, extent_state_t expected_state, bool forward) {
+	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE);
+	assert(!edata_state_in_transition(expected_state));
+	assert(expected_state == extent_state_dirty ||
+	       expected_state == extent_state_muzzy ||
+	       expected_state == extent_state_retained);
+
+	void *neighbor_addr = forward ? edata_past_get(edata) :
+	    edata_before_get(edata);
+	/*
+	 * This is subtle; the rtree code asserts that its input pointer is
+	 * non-NULL, and this is a useful thing to check.  But it's possible
+	 * that edata corresponds to an address of (void *)PAGE (in practice,
+	 * this has only been observed on FreeBSD when address-space
+	 * randomization is on, but it could in principle happen anywhere).  In
+	 * this case, edata_before_get(edata) is NULL, triggering the assert.
+	 */
+	if (neighbor_addr == NULL) {
+		return NULL;
+	}
+
+	EMAP_DECLARE_RTREE_CTX;
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
+	    rtree_ctx, (uintptr_t)neighbor_addr, /* dependent*/ false,
+	    /* init_missing */ false);
+	if (elm == NULL) {
+		return NULL;
+	}
+
+	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(tsdn,
+	    &emap->rtree, elm, /* dependent */ true);
+	if (!edata_can_acquire_neighbor(edata, neighbor_contents, pai,
+	    expected_state, forward)) {
+		return NULL;
+	}
+
+	/* From this point, the neighbor edata can be safely acquired. */
+	edata_t *neighbor = neighbor_contents.edata;
+	emap_update_edata_state(tsdn, emap, neighbor, extent_state_merging);
+	extent_assert_can_coalesce(edata, neighbor);
+
+	return neighbor;
 }
 
 static bool
@@ -153,6 +248,7 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 bool
 emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     szind_t szind, bool slab) {
+	assert(edata_state_get(edata) == extent_state_active);
 	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_leaf_elm_t *elm_a, *elm_b;
@@ -161,6 +257,10 @@ emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	if (err) {
 		return true;
 	}
+	assert(rtree_leaf_elm_read(tsdn, &emap->rtree, elm_a,
+	    /* dependent */ false).edata == NULL);
+	assert(rtree_leaf_elm_read(tsdn, &emap->rtree, elm_b,
+	    /* dependent */ false).edata == NULL);
 	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, edata, szind, slab);
 	return false;
 }
@@ -190,6 +290,15 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 
 void
 emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	/*
+	 * The edata must be either in an acquired state, or protected by state
+	 * based locks.
+	 */
+	if (!emap_edata_is_acquired(tsdn, emap, edata)) {
+		witness_assert_positive_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
+	}
+
 	EMAP_DECLARE_RTREE_CTX;
 	rtree_leaf_elm_t *elm_a, *elm_b;
 
diff --git a/src/eset.c b/src/eset.c
index a52a6f7..9183ac6 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -78,7 +78,8 @@ eset_insert(eset_t *eset, edata_t *edata) {
 
 void
 eset_remove(eset_t *eset, edata_t *edata) {
-	assert(edata_state_get(edata) == eset->state);
+	assert(edata_state_get(edata) == eset->state ||
+	    edata_state_in_transition(edata_state_get(edata)));
 
 	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
diff --git a/src/extent.c b/src/extent.c
index 56ea33f..e660d4c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -64,12 +64,12 @@ extent_may_force_decay(pac_t *pac) {
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata) {
-	emap_edata_state_update(tsdn, pac->emap, edata, extent_state_active);
+	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
 
 	bool coalesced;
 	edata = extent_try_coalesce(tsdn, pac, ehooks, ecache,
 	    edata, &coalesced, false);
-	emap_edata_state_update(tsdn, pac->emap, edata, ecache->state);
+	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
 
 	if (!coalesced) {
 		return true;
@@ -183,7 +183,7 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		not_reached();
 	case extent_state_dirty:
 	case extent_state_muzzy:
-		emap_edata_state_update(tsdn, pac->emap, edata,
+		emap_update_edata_state(tsdn, pac->emap, edata,
 		    extent_state_active);
 		break;
 	case extent_state_retained:
@@ -230,7 +230,7 @@ extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == extent_state_active);
 
-	emap_edata_state_update(tsdn, pac->emap, edata, ecache->state);
+	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
 	eset_insert(&ecache->eset, edata);
 }
 
@@ -245,10 +245,11 @@ static void
 extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
-	assert(edata_state_get(edata) == ecache->state);
+	assert(edata_state_get(edata) == ecache->state ||
+	    edata_state_get(edata) == extent_state_updating);
 
 	eset_remove(&ecache->eset, edata);
-	emap_edata_state_update(tsdn, pac->emap, edata, extent_state_active);
+	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
 }
 
 static void
@@ -290,20 +291,16 @@ extent_gdump_sub(tsdn_t *tsdn, const edata_t *edata) {
 
 static bool
 extent_register_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata, bool gdump_add) {
+	assert(edata_state_get(edata) == extent_state_active);
 	/*
-	 * We need to hold the lock to protect against a concurrent coalesce
-	 * operation that sees us in a partial state.
+	 * No locking needed, as the edata must be in active state, which
+	 * prevents other threads from accessing the edata.
 	 */
-	emap_lock_edata(tsdn, pac->emap, edata);
-
 	if (emap_register_boundary(tsdn, pac->emap, edata, SC_NSIZES,
 	    /* slab */ false)) {
-		emap_unlock_edata(tsdn, pac->emap, edata);
 		return true;
 	}
 
-	emap_unlock_edata(tsdn, pac->emap, edata);
-
 	if (config_prof && gdump_add) {
 		extent_gdump_add(tsdn, edata);
 	}
@@ -333,9 +330,7 @@ extent_reregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
 static void
 extent_deregister_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata,
     bool gdump) {
-	emap_lock_edata(tsdn, pac->emap, edata);
 	emap_deregister_boundary(tsdn, pac->emap, edata);
-	emap_unlock_edata(tsdn, pac->emap, edata);
 
 	if (config_prof && gdump) {
 		extent_gdump_sub(tsdn, edata);
@@ -383,22 +378,18 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
-		edata = emap_lock_edata_from_addr(tsdn, pac->emap, new_addr,
-		    false);
+		edata = emap_try_acquire_edata(tsdn, pac->emap, new_addr,
+		    ecache->state, /* allow_head_extent*/ false);
 		if (edata != NULL) {
-			/*
-			 * We might null-out edata to report an error, but we
-			 * still need to unlock the associated mutex after.
-			 */
-			edata_t *unlock_edata = edata;
 			assert(edata_base_get(edata) == new_addr);
-			if (edata_arena_ind_get(edata) != ecache_ind_get(ecache)
-			    || edata_size_get(edata) < size
-			    || edata_state_get(edata)
-			    != ecache->state) {
+			assert(edata_arena_ind_get(edata) ==
+			    ecache_ind_get(ecache));
+			assert(edata_state_get(edata) == extent_state_updating);
+			if (edata_size_get(edata) < size) {
+				emap_release_edata(tsdn, pac->emap, edata,
+				    ecache->state);
 				edata = NULL;
 			}
-			emap_unlock_edata(tsdn, pac->emap, unlock_edata);
 		}
 	} else {
 		/*
@@ -557,8 +548,8 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			extent_deregister_no_gdump_sub(tsdn, pac, to_leak);
 			extents_abandon_vm(tsdn, pac, ehooks, ecache, to_leak,
 			    growing_retained);
-			assert(emap_lock_edata_from_addr(tsdn, pac->emap,
-			    leak, false) == NULL);
+			assert(emap_try_acquire_edata(tsdn, pac->emap,
+			    leak, ecache->state, true) == NULL);
 		}
 		return NULL;
 	}
@@ -807,41 +798,10 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 static bool
-extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
-    const edata_t *outer) {
-	assert(edata_arena_ind_get(inner) == ecache_ind_get(ecache));
-
-	if (edata_arena_ind_get(inner) != edata_arena_ind_get(outer)) {
-		return false;
-	}
-
-	/*
-	 * We wouldn't really get into this situation because one or the other
-	 * edata would have to have a head bit set to true, but this is
-	 * conceptually correct and cheap.
-	 */
-	if (edata_pai_get(inner) != edata_pai_get(outer)) {
-		return false;
-	}
-
-	assert(edata_state_get(inner) == extent_state_active);
-	if (edata_state_get(outer) != ecache->state) {
-		return false;
-	}
-
-	if (edata_committed_get(inner) != edata_committed_get(outer)) {
-		return false;
-	}
-
-	return true;
-}
-
-static bool
 extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *inner, edata_t *outer, bool forward, bool growing_retained) {
-	assert(extent_can_coalesce(ecache, inner, outer));
-
-	extent_activate_locked(tsdn, pac, ecache, outer);
+	extent_assert_can_coalesce(inner, outer);
+	eset_remove(&ecache->eset, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	bool err = extent_merge_impl(tsdn, pac, ehooks,
@@ -873,22 +833,11 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		again = false;
 
 		/* Try to coalesce forward. */
-		edata_t *next = emap_lock_edata_from_addr(tsdn, pac->emap,
-		    edata_past_get(edata), inactive_only);
+		edata_t *next = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
+		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ true);
 		if (next != NULL) {
-			/*
-			 * ecache->mtx only protects against races for
-			 * like-state extents, so call extent_can_coalesce()
-			 * before releasing next's pool lock.
-			 */
-			bool can_coalesce = extent_can_coalesce(ecache,
-			    edata, next);
-
-			emap_unlock_edata(tsdn, pac->emap, next);
-
-			if (can_coalesce && !extent_coalesce(tsdn, pac,
-			    ehooks, ecache, edata, next, true,
-			    growing_retained)) {
+			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
+			    next, true, growing_retained)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
@@ -899,30 +848,11 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		}
 
 		/* Try to coalesce backward. */
-		edata_t *prev = NULL;
-		if (edata_before_get(edata) != NULL) {
-			/*
-			 * This is subtle; the rtree code asserts that its input
-			 * pointer is non-NULL, and this is a useful thing to
-			 * check.  But it's possible that edata corresponds to
-			 * an address of (void *)PAGE (in practice, this has
-			 * only been observed on FreeBSD when address-space
-			 * randomization is on, but it could in principle happen
-			 * anywhere).  In this case, edata_before_get(edata) is
-			 * NULL, triggering the assert.
-			 */
-			prev = emap_lock_edata_from_addr(tsdn, pac->emap,
-			    edata_before_get(edata), inactive_only);
-
-		}
+		edata_t *prev = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
+		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ false);
 		if (prev != NULL) {
-			bool can_coalesce = extent_can_coalesce(ecache, edata,
-			    prev);
-			emap_unlock_edata(tsdn, pac->emap, prev);
-
-			if (can_coalesce && !extent_coalesce(tsdn, pac,
-			    ehooks, ecache, edata, prev, false,
-			    growing_retained)) {
+			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
+			    prev, false, growing_retained)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -1218,24 +1148,27 @@ extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		goto label_error_b;
 	}
 
-	emap_lock_edata2(tsdn, pac->emap, edata, trail);
+	/*
+	 * No need to acquire trail or edata, because: 1) trail was new (just
+	 * allocated); and 2) edata is either an active allocation (the shrink
+	 * path), or in an acquired state (extracted from the ecache on the
+	 * extent_recycle_split path).
+	 */
+	assert(emap_edata_is_acquired(tsdn, pac->emap, edata));
+	assert(emap_edata_is_acquired(tsdn, pac->emap, trail));
 
 	err = ehooks_split(tsdn, ehooks, edata_base_get(edata), size_a + size_b,
 	    size_a, size_b, edata_committed_get(edata));
 
 	if (err) {
-		goto label_error_c;
+		goto label_error_b;
 	}
 
 	edata_size_set(edata, size_a);
 	emap_split_commit(tsdn, pac->emap, &prepare, edata, size_a, trail,
 	    size_b);
 
-	emap_unlock_edata2(tsdn, pac->emap, edata, trail);
-
 	return trail;
-label_error_c:
-	emap_unlock_edata2(tsdn, pac->emap, edata, trail);
 label_error_b:
 	edata_cache_put(tsdn, pac->edata_cache, trail);
 label_error_a:
@@ -1277,15 +1210,15 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 	emap_prepare_t prepare;
 	emap_merge_prepare(tsdn, pac->emap, &prepare, a, b);
 
-	emap_lock_edata2(tsdn, pac->emap, a, b);
-
+	assert(edata_state_get(a) == extent_state_active ||
+	    edata_state_get(a) == extent_state_merging);
+	edata_state_set(a, extent_state_active);
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
 	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
 	    edata_sn_get(a) : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
 	emap_merge_commit(tsdn, pac->emap, &prepare, a, b);
-	emap_unlock_edata2(tsdn, pac->emap, a, b);
 
 	edata_cache_put(tsdn, pac->edata_cache, b);
 
diff --git a/src/hpa_central.c b/src/hpa_central.c
index 36758a0..9e00dd6 100644
--- a/src/hpa_central.c
+++ b/src/hpa_central.c
@@ -33,15 +33,16 @@ hpa_central_split(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata,
 	emap_prepare_t prepare;
 	bool err = emap_split_prepare(tsdn, central->emap, &prepare, edata,
 	    size, trail, cursize - size);
+	assert(edata_state_get(edata) == edata_state_get(trail));
 	if (err) {
 		edata_cache_small_put(tsdn, &central->ecs, trail);
 		return NULL;
 	}
-	emap_lock_edata2(tsdn, central->emap, edata, trail);
+	assert(edata_state_get(edata) == edata_state_get(trail));
+
 	edata_size_set(edata, size);
 	emap_split_commit(tsdn, central->emap, &prepare, edata, size, trail,
 	    cursize - size);
-	emap_unlock_edata2(tsdn, central->emap, edata, trail);
 
 	return trail;
 }
@@ -91,7 +92,7 @@ label_success:
 	 */
 	assert(edata_state_get(edata) == extent_state_dirty);
 	assert(edata_base_get(edata) == edata_addr_get(edata));
-	emap_edata_state_update(tsdn, central->emap, edata,
+	emap_update_edata_state(tsdn, central->emap, edata,
 	    extent_state_active);
 	return edata;
 }
@@ -137,43 +138,22 @@ hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
 	edata_sn_set(edata, sn);
 	edata_sn_set(trail, sn);
 
-	emap_edata_state_update(tsdn, central->emap, trail, extent_state_dirty);
+	emap_update_edata_state(tsdn, central->emap, trail, extent_state_dirty);
 	eset_insert(&central->eset, trail);
 	return false;
 }
 
-static edata_t *
-hpa_central_dalloc_get_merge_candidate(tsdn_t *tsdn, hpa_central_t *central,
-    void *addr) {
-	edata_t *edata = emap_lock_edata_from_addr(tsdn, central->emap, addr,
-	    /* inactive_only */ true);
-	if (edata == NULL) {
-		return NULL;
-	}
-	extent_pai_t pai = edata_pai_get(edata);
-	extent_state_t state = edata_state_get(edata);
-	emap_unlock_edata(tsdn, central->emap, edata);
-
-	if (pai != EXTENT_PAI_HPA) {
-		return NULL;
-	}
-	if (state == extent_state_active) {
-		return NULL;
-	}
-
-	return edata;
-}
-
 /* Merges b into a, freeing b back to the edata cache.. */
 static void
 hpa_central_dalloc_merge(tsdn_t *tsdn, hpa_central_t *central, edata_t *a,
     edata_t *b) {
+	assert(emap_edata_is_acquired(tsdn, central->emap, a));
+	assert(emap_edata_is_acquired(tsdn, central->emap, b));
+
 	emap_prepare_t prepare;
 	emap_merge_prepare(tsdn, central->emap, &prepare, a, b);
-	emap_lock_edata2(tsdn, central->emap, a, b);
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
 	emap_merge_commit(tsdn, central->emap, &prepare, a, b);
-	emap_unlock_edata2(tsdn, central->emap, a, b);
 	edata_cache_small_put(tsdn, &central->ecs, b);
 }
 
@@ -189,21 +169,24 @@ hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata) {
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
 
-	if (!edata_is_head_get(edata)) {
-		edata_t *lead = hpa_central_dalloc_get_merge_candidate(tsdn,
-		    central, edata_before_get(edata));
-		if (lead != NULL) {
-			eset_remove(&central->eset, lead);
-			hpa_central_dalloc_merge(tsdn, central, lead, edata);
-			edata = lead;
-		}
-	}
-	edata_t *trail = hpa_central_dalloc_get_merge_candidate(tsdn, central,
-	    edata_past_get(edata));
-	if (trail != NULL && !edata_is_head_get(trail)) {
+	/*
+	 *  Merge forward first, so that the original *edata stays active state
+	 *  for the second acquire (only necessary for sanity checking).
+	 */
+	edata_t *trail = emap_try_acquire_edata_neighbor(tsdn, central->emap,
+	    edata, EXTENT_PAI_HPA, extent_state_dirty, /* forward */ true);
+	if (trail != NULL) {
 		eset_remove(&central->eset, trail);
 		hpa_central_dalloc_merge(tsdn, central, edata, trail);
 	}
-	emap_edata_state_update(tsdn, central->emap, edata, extent_state_dirty);
+	edata_t *lead = emap_try_acquire_edata_neighbor(tsdn, central->emap,
+	    edata, EXTENT_PAI_HPA, extent_state_dirty, /* forward */ false);
+	if (lead != NULL) {
+		eset_remove(&central->eset, lead);
+		hpa_central_dalloc_merge(tsdn, central, lead, edata);
+		edata = lead;
+	}
+
+	emap_update_edata_state(tsdn, central->emap, edata, extent_state_dirty);
 	eset_insert(&central->eset, edata);
 }
-- 
cgit v0.12


From 49b7d7f0a4731e060df095075bedf6391058a0cd Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 11 Mar 2021 00:21:47 -0800
Subject: Passing down the original edata on the expand path.

Instead of passing down the new_addr, pass down the active edata which allows us
to always use a neighbor-acquiring semantic.  In other words, this tells us both
the original edata and neighbor address.  With this change, only neighbors of a
"known" edata can be acquired, i.e. acquiring an edata based on an arbitrary
address isn't possible anymore.
---
 include/jemalloc/internal/edata.h  |  1 -
 include/jemalloc/internal/emap.h   | 30 ++++++++++++-
 include/jemalloc/internal/extent.h |  6 ++-
 src/emap.c                         | 84 +++++++++++++++++-------------------
 src/extent.c                       | 88 ++++++++++++++++++--------------------
 src/pac.c                          |  8 ++--
 6 files changed, 116 insertions(+), 101 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index b2e6ee9..55d1dfe 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -25,7 +25,6 @@ enum extent_state_e {
 	extent_state_muzzy    = 2,
 	extent_state_retained = 3,
 	extent_state_transition = 4, /* States below are intermediate. */
-	extent_state_updating = 4,
 	extent_state_merging = 5,
 	extent_state_max = 5 /* Sanity checking only. */
 };
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 239f3e4..5a5dbb6 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -43,11 +43,26 @@ void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 void emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     extent_state_t state);
 
-edata_t *emap_try_acquire_edata(tsdn_t *tsdn, emap_t *emap, void *addr,
-    extent_state_t expected_state, bool allow_head_extent);
+/*
+ * The two acquire functions below allow accessing neighbor edatas, if it's safe
+ * and valid to do so (i.e. from the same arena, of the same state, etc.).  This
+ * is necessary because the ecache locks are state based, and only protect
+ * edatas with the same state.  Therefore the neighbor edata's state needs to be
+ * verified first, before chasing the edata pointer.  The returned edata will be
+ * in an acquired state, meaning other threads will be prevented from accessing
+ * it, even if technically the edata can still be discovered from the rtree.
+ *
+ * This means, at any moment when holding pointers to edata, either one of the
+ * state based locks is held (and the edatas are all of the protected state), or
+ * the edatas are in an acquired state (e.g. in active or merging state).  The
+ * acquire operation itself (changing the edata to an acquired state) is done
+ * under the state locks.
+ */
 edata_t *emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap,
     edata_t *edata, extent_pai_t pai, extent_state_t expected_state,
     bool forward);
+edata_t *emap_try_acquire_edata_neighbor_expand(tsdn_t *tsdn, emap_t *emap,
+    edata_t *edata, extent_pai_t pai, extent_state_t expected_state);
 void emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     extent_state_t new_state);
 
@@ -196,6 +211,17 @@ extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) {
 	assert(edata_committed_get(inner) == edata_committed_get(outer));
 	assert(edata_state_get(inner) == extent_state_active);
 	assert(edata_state_get(outer) == extent_state_merging);
+	assert(edata_base_get(inner) == edata_past_get(outer) ||
+	    edata_base_get(outer) == edata_past_get(inner));
+}
+
+JEMALLOC_ALWAYS_INLINE void
+extent_assert_can_expand(const edata_t *original, const edata_t *expand) {
+	assert(edata_arena_ind_get(original) == edata_arena_ind_get(expand));
+	assert(edata_pai_get(original) == edata_pai_get(expand));
+	assert(edata_state_get(original) == extent_state_active);
+	assert(edata_state_get(expand) == extent_state_merging);
+	assert(edata_past_get(original) == edata_base_get(expand));
 }
 
 JEMALLOC_ALWAYS_INLINE edata_t *
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index f2fee5c..6a17ba6 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -20,9 +20,11 @@
 extern size_t opt_lg_extent_max_active_fit;
 
 edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero);
+    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
+    bool zero);
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero);
+    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
+    bool zero);
 void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
diff --git a/src/emap.c b/src/emap.c
index 26a079c..949b53e 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -70,7 +70,8 @@ edata_neighbor_head_state_mergeable(bool edata_is_head,
 
 static inline bool
 edata_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
-    extent_pai_t pai, extent_state_t expected_state, bool forward) {
+    extent_pai_t pai, extent_state_t expected_state, bool forward,
+    bool expanding) {
 	edata_t *neighbor = contents.edata;
 	if (neighbor == NULL) {
 		return false;
@@ -87,8 +88,8 @@ edata_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 			return false;
 		}
 		/* From this point, it's safe to access *neighbor. */
-		if (edata_committed_get(edata) !=
-		    edata_committed_get(neighbor)) {
+		if (!expanding && (edata_committed_get(edata) !=
+		    edata_committed_get(neighbor))) {
 			/*
 			 * Some platforms (e.g. Windows) require an explicit
 			 * commit step (and writing to uncomitted memory is not
@@ -125,47 +126,13 @@ edata_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 	return true;
 }
 
-/* Will be removed in the next commit. */
-edata_t *
-emap_try_acquire_edata(tsdn_t *tsdn, emap_t *emap, void *addr,
-    extent_state_t expected_state, bool allow_head_extent) {
-	EMAP_DECLARE_RTREE_CTX;
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
-	    rtree_ctx, (uintptr_t)addr, false, false);
-	if (elm == NULL) {
-		return NULL;
-	}
-	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
-	    /* dependent */ true);
-	if (!allow_head_extent && contents.metadata.is_head) {
-		/* !allow_head_extent indicates the expanding path. */
-		return NULL;
-	}
-
-	edata_t *edata = contents.edata;
-	if (edata == NULL || contents.metadata.state != expected_state) {
-		return NULL;
-	}
-	assert(edata_state_get(edata) == expected_state);
-	emap_update_edata_state(tsdn, emap, edata, extent_state_updating);
-
-	return edata;
-}
-
-void
-emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t new_state) {
-	assert(emap_edata_in_transition(tsdn, emap, edata));
-	assert(emap_edata_is_acquired(tsdn, emap, edata));
-
-	emap_update_edata_state(tsdn, emap, edata, new_state);
-}
-
-edata_t *
-emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_pai_t pai, extent_state_t expected_state, bool forward) {
+static inline edata_t *
+emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_pai_t pai, extent_state_t expected_state, bool forward,
+    bool expanding) {
 	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE);
+	assert(!expanding || forward);
 	assert(!edata_state_in_transition(expected_state));
 	assert(expected_state == extent_state_dirty ||
 	       expected_state == extent_state_muzzy ||
@@ -196,18 +163,47 @@ emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(tsdn,
 	    &emap->rtree, elm, /* dependent */ true);
 	if (!edata_can_acquire_neighbor(edata, neighbor_contents, pai,
-	    expected_state, forward)) {
+	    expected_state, forward, expanding)) {
 		return NULL;
 	}
 
 	/* From this point, the neighbor edata can be safely acquired. */
 	edata_t *neighbor = neighbor_contents.edata;
+	assert(edata_state_get(neighbor) == expected_state);
 	emap_update_edata_state(tsdn, emap, neighbor, extent_state_merging);
-	extent_assert_can_coalesce(edata, neighbor);
+	if (expanding) {
+		extent_assert_can_expand(edata, neighbor);
+	} else {
+		extent_assert_can_coalesce(edata, neighbor);
+	}
 
 	return neighbor;
 }
 
+edata_t *
+emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_pai_t pai, extent_state_t expected_state, bool forward) {
+	return emap_try_acquire_edata_neighbor_impl(tsdn, emap, edata, pai,
+	    expected_state, forward, /* expand */ false);
+}
+
+edata_t *
+emap_try_acquire_edata_neighbor_expand(tsdn_t *tsdn, emap_t *emap,
+    edata_t *edata, extent_pai_t pai, extent_state_t expected_state) {
+	/* Try expanding forward. */
+	return emap_try_acquire_edata_neighbor_impl(tsdn, emap, edata, pai,
+	    expected_state, /* forward */ true, /* expand */ true);
+}
+
+void
+emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_state_t new_state) {
+	assert(emap_edata_in_transition(tsdn, emap, edata));
+	assert(emap_edata_is_acquired(tsdn, emap, edata));
+
+	emap_update_edata_state(tsdn, emap, edata, new_state);
+}
+
 static bool
 emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     const edata_t *edata, bool dependent, bool init_missing,
diff --git a/src/extent.c b/src/extent.c
index e660d4c..6d9e002 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -36,15 +36,15 @@ static atomic_zu_t highpages;
 
 static void extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t usize, size_t alignment, bool zero,
-    bool *commit, bool growing_retained);
+    ecache_t *ecache, edata_t *expand_edata, size_t usize, size_t alignment,
+    bool zero, bool *commit, bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
-    ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool zero,
-    bool *commit);
+    ehooks_t *ehooks, edata_t *expand_edata, size_t size, size_t alignment,
+    bool zero, bool *commit);
 static edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
 
@@ -80,14 +80,14 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 edata_t *
 ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t alignment, bool zero) {
+    edata_t *expand_edata, size_t size, size_t alignment, bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
-	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, new_addr,
+	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, expand_edata,
 	    size, alignment, zero, &commit, false);
 	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
 	return edata;
@@ -95,25 +95,27 @@ ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 edata_t *
 ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t alignment, bool zero) {
+    edata_t *expand_edata, size_t size, size_t alignment, bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
-	edata_t *edata = extent_alloc_retained(tsdn, pac, ehooks, new_addr,
+	edata_t *edata = extent_alloc_retained(tsdn, pac, ehooks, expand_edata,
 	    size, alignment, zero, &commit);
 	if (edata == NULL) {
-		if (opt_retain && new_addr != NULL) {
+		if (opt_retain && expand_edata != NULL) {
 			/*
-			 * When retain is enabled and new_addr is set, we do not
-			 * attempt extent_alloc_wrapper which does mmap that is
-			 * very unlikely to succeed (unless it happens to be at
-			 * the end).
+			 * When retain is enabled and trying to expand, we do
+			 * not attempt extent_alloc_wrapper which does mmap that
+			 * is very unlikely to succeed (unless it happens to be
+			 * at the end).
 			 */
 			return NULL;
 		}
+		void *new_addr = (expand_edata == NULL) ? NULL :
+		    edata_past_get(expand_edata);
 		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr,
 		    size, alignment, zero, &commit);
 	}
@@ -246,7 +248,7 @@ extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == ecache->state ||
-	    edata_state_get(edata) == extent_state_updating);
+	    edata_state_get(edata) == extent_state_merging);
 
 	eset_remove(&ecache->eset, edata);
 	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
@@ -354,37 +356,30 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac,
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
+    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(alignment > 0);
-	if (config_debug && new_addr != NULL) {
+	if (config_debug && expand_edata != NULL) {
 		/*
-		 * Non-NULL new_addr has two use cases:
-		 *
-		 *   1) Recycle a known-extant extent, e.g. during purging.
-		 *   2) Perform in-place expanding reallocation.
-		 *
-		 * Regardless of use case, new_addr must either refer to a
-		 * non-existing extent, or to the base of an extant extent,
-		 * since only active slabs support interior lookups (which of
-		 * course cannot be recycled).
+		 * Non-NULL expand_edata indicates in-place expanding realloc.
+		 * new_addr must either refer to a non-existing extent, or to
+		 * the base of an extant extent, since only active slabs support
+		 * interior lookups (which of course cannot be recycled).
 		 */
+		void *new_addr = edata_past_get(expand_edata);
 		assert(PAGE_ADDR2BASE(new_addr) == new_addr);
 		assert(alignment <= PAGE);
 	}
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
-	if (new_addr != NULL) {
-		edata = emap_try_acquire_edata(tsdn, pac->emap, new_addr,
-		    ecache->state, /* allow_head_extent*/ false);
+	if (expand_edata != NULL) {
+		edata = emap_try_acquire_edata_neighbor_expand(tsdn, pac->emap,
+		    expand_edata, EXTENT_PAI_PAC, ecache->state);
 		if (edata != NULL) {
-			assert(edata_base_get(edata) == new_addr);
-			assert(edata_arena_ind_get(edata) ==
-			    ecache_ind_get(ecache));
-			assert(edata_state_get(edata) == extent_state_updating);
+			extent_assert_can_expand(expand_edata, edata);
 			if (edata_size_get(edata) < size) {
 				emap_release_edata(tsdn, pac->emap, edata,
 				    ecache->state);
@@ -454,10 +449,11 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
     edata_t **to_leak, edata_t **to_salvage,
-    void *new_addr, size_t size, size_t alignment, bool growing_retained) {
+    edata_t *expand_edata, size_t size, size_t alignment,
+    bool growing_retained) {
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
 	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
-	assert(new_addr == NULL || leadsize == 0);
+	assert(expand_edata == NULL || leadsize == 0);
 	if (edata_size_get(*edata) < leadsize + size) {
 		return extent_split_interior_cant_alloc;
 	}
@@ -504,7 +500,7 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
+    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     edata_t *edata, bool growing_retained) {
 	edata_t *lead;
 	edata_t *trail;
@@ -513,7 +509,7 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	extent_split_interior_result_t result = extent_split_interior(
 	    tsdn, pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
-	    new_addr, size, alignment, growing_retained);
+	    expand_edata, size, alignment, growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
 	    && !opt_retain) {
@@ -544,12 +540,9 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			extent_deregister(tsdn, pac, to_salvage);
 		}
 		if (to_leak != NULL) {
-			void *leak = edata_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, pac, to_leak);
 			extents_abandon_vm(tsdn, pac, ehooks, ecache, to_leak,
 			    growing_retained);
-			assert(emap_try_acquire_edata(tsdn, pac->emap,
-			    leak, ecache->state, true) == NULL);
 		}
 		return NULL;
 	}
@@ -562,17 +555,17 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
-    bool growing_retained) {
+    edata_t *expand_edata, size_t size, size_t alignment, bool zero,
+    bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	edata_t *edata = extent_recycle_extract(tsdn, pac, ehooks, ecache,
-	    new_addr, size, alignment, growing_retained);
+	    expand_edata, size, alignment, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
-	edata = extent_recycle_split(tsdn, pac, ehooks, ecache, new_addr,
+	edata = extent_recycle_split(tsdn, pac, ehooks, ecache, expand_edata,
 	    size, alignment, edata, growing_retained);
 	if (edata == NULL) {
 		return NULL;
@@ -742,21 +735,22 @@ label_err:
 
 static edata_t *
 extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
+    edata_t *expand_edata, size_t size, size_t alignment, bool zero,
+    bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
 	malloc_mutex_lock(tsdn, &pac->grow_mtx);
 
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks,
-	    &pac->ecache_retained, new_addr, size, alignment, zero,
-	    commit, /* growing_retained */ true);
+	    &pac->ecache_retained, expand_edata, size, alignment, zero, commit,
+	    /* growing_retained */ true);
 	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
-	} else if (opt_retain && new_addr == NULL) {
+	} else if (opt_retain && expand_edata == NULL) {
 		edata = extent_grow_retained(tsdn, pac, ehooks, size,
 		    alignment, zero, commit);
 		/* extent_grow_retained() always releases pac->grow_mtx. */
diff --git a/src/pac.c b/src/pac.c
index 93427ca..0737e68 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -133,9 +133,7 @@ static bool
 pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero) {
 	pac_t *pac = (pac_t *)self;
-
 	ehooks_t *ehooks = pac_ehooks_get(pac);
-	void *trail_begin = edata_past_get(edata);
 
 	size_t mapped_add = 0;
 	size_t expand_amount = new_size - old_size;
@@ -144,14 +142,14 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 	edata_t *trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
-	    trail_begin, expand_amount, PAGE, zero);
+	    edata, expand_amount, PAGE, zero);
 	if (trail == NULL) {
 		trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
-		    trail_begin, expand_amount, PAGE, zero);
+		    edata, expand_amount, PAGE, zero);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, pac, ehooks,
-		    &pac->ecache_retained, trail_begin, expand_amount, PAGE,
+		    &pac->ecache_retained, edata, expand_amount, PAGE,
 		    zero);
 		mapped_add = expand_amount;
 	}
-- 
cgit v0.12


From add636596afecb87e220d31ae75a9ba0b4601fbc Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 11 Mar 2021 23:41:51 -0800
Subject: Stop checking head state in the merge hook.

Now that all merging go through try_acquire_edata_neighbor, the mergeablility
checks (including head state checking) are done before reaching the merge hook.
In other words, merge hook will never be called if the head state doesn't agree.
---
 include/jemalloc/internal/ehooks.h | 20 ++++----------------
 include/jemalloc/internal/emap.h   | 20 ++++++++++++++++++++
 src/ehooks.c                       | 34 ++++++++++++++++------------------
 src/emap.c                         | 20 --------------------
 src/extent.c                       |  4 ++--
 5 files changed, 42 insertions(+), 56 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index bae468b..064ecf5 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -61,8 +61,7 @@ bool ehooks_default_split_impl();
 bool ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a,
     size_t size_a, void *addr_b, size_t size_b, bool committed,
     unsigned arena_ind);
-bool ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a,
-    void *addr_b, bool head_b);
+bool ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, void *addr_b);
 void ehooks_default_zero_impl(void *addr, size_t size);
 
 /*
@@ -338,21 +337,10 @@ ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
-    bool head_a, void *addr_b, size_t size_b, bool head_b, bool committed) {
+    void *addr_b, size_t size_b, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	/*
-	 * The definition of extent_hooks merge function doesn't know about
-	 * extent head state, but the implementation does.  As a result, it
-	 * needs to call iealloc again and walk the rtree.  Since the cost of an
-	 * iealloc is large relative to the cost of the default merge hook
-	 * (which on posix-likes is just "return false"), we go even further
-	 * when we short-circuit; we don't just check if the extent hooks
-	 * generally are default, we check if the merge hook specifically is.
-	 */
-	if (extent_hooks == &ehooks_default_extent_hooks
-	    || extent_hooks->merge == &ehooks_default_merge) {
-		return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b,
-		    head_b);
+	if (extent_hooks == &ehooks_default_extent_hooks) {
+		return ehooks_default_merge_impl(tsdn, addr_a, addr_b);
 	} else if (extent_hooks->merge == NULL) {
 		return true;
 	} else {
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 5a5dbb6..364aefa 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -224,6 +224,26 @@ extent_assert_can_expand(const edata_t *original, const edata_t *expand) {
 	assert(edata_past_get(original) == edata_base_get(expand));
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+edata_neighbor_head_state_mergeable(bool edata_is_head,
+    bool neighbor_is_head, bool forward) {
+	/*
+	 * Head states checking: disallow merging if the higher addr extent is a
+	 * head extent.  This helps preserve first-fit, and more importantly
+	 * makes sure no merge across arenas.
+	 */
+	if (forward) {
+		if (neighbor_is_head) {
+			return false;
+		}
+	} else {
+		if (edata_is_head) {
+			return false;
+		}
+	}
+	return true;
+}
+
 JEMALLOC_ALWAYS_INLINE edata_t *
 emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	EMAP_DECLARE_RTREE_CTX;
diff --git a/src/ehooks.c b/src/ehooks.c
index e1815ee..ca3ca20 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -188,11 +188,10 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 
 bool
-ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
-    bool head_b) {
+ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, void *addr_b) {
 	assert(addr_a < addr_b);
 	/*
-	 * For non-DSS cases (first 2 branches) --
+	 * For non-DSS cases --
 	 * a) W/o maps_coalesce, merge is not always allowed (Windows):
 	 *   1) w/o retain, never merge (first branch below).
 	 *   2) with retain, only merge extents from the same VirtualAlloc
@@ -204,17 +203,23 @@ ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
 	 *      disallowed if b is a head extent, i.e. no merging across
 	 *      different mmap regions.
 	 *
-	 * a2) and b2) share the implementation (the no_merge_heads branch).
+	 * a2) and b2) are implemented in emap_try_acquire_edata_neighbor, and
+	 * sanity checked in the second branch below.
 	 */
 	if (!maps_coalesce && !opt_retain) {
 		return true;
 	}
-	/*
-	 * Don't merge across mappings when retain is on -- this preserves
-	 * first-fit ordering.
-	 */
-	if (opt_retain && head_b) {
-		return true;
+	if (config_debug) {
+		edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global,
+		    addr_a);
+		bool head_a = edata_is_head_get(a);
+		edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global,
+		    addr_b);
+		bool head_b = edata_is_head_get(b);
+		emap_assert_mapped(tsdn, &arena_emap_global, a);
+		emap_assert_mapped(tsdn, &arena_emap_global, b);
+		assert(edata_neighbor_head_state_mergeable(head_a, head_b,
+		    /* forward */ true));
 	}
 	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
 		return true;
@@ -228,14 +233,7 @@ ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	tsdn_t *tsdn = tsdn_fetch();
 
-	edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global, addr_a);
-	bool head_a = edata_is_head_get(a);
-	edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global, addr_b);
-	bool head_b = edata_is_head_get(b);
-	emap_assert_mapped(tsdn, &arena_emap_global, a);
-	emap_assert_mapped(tsdn, &arena_emap_global, b);
-
-	return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b, head_b);
+	return ehooks_default_merge_impl(tsdn, addr_a, addr_b);
 }
 
 void
diff --git a/src/emap.c b/src/emap.c
index 949b53e..0fe230a 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -49,26 +49,6 @@ emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 }
 
 static inline bool
-edata_neighbor_head_state_mergeable(bool edata_is_head,
-    bool neighbor_is_head, bool forward) {
-	/*
-	 * Head states checking: disallow merging if the higher addr extent is a
-	 * head extent.  This helps preserve first-fit, and more importantly
-	 * makes sure no merge across arenas.
-	 */
-	if (forward) {
-		if (neighbor_is_head) {
-			return false;
-		}
-	} else {
-		if (edata_is_head) {
-			return false;
-		}
-	}
-	return true;
-}
-
-static inline bool
 edata_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
     extent_pai_t pai, extent_state_t expected_state, bool forward,
     bool expanding) {
diff --git a/src/extent.c b/src/extent.c
index 6d9e002..1748d98 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1189,8 +1189,8 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 	emap_assert_mapped(tsdn, pac->emap, b);
 
 	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
-	    edata_size_get(a), edata_is_head_get(a), edata_base_get(b),
-	    edata_size_get(b), edata_is_head_get(b), edata_committed_get(a));
+	    edata_size_get(a), edata_base_get(b), edata_size_get(b),
+	    edata_committed_get(a));
 
 	if (err) {
 		return true;
-- 
cgit v0.12


From 7c964b03524de23eeff7fe203c764c7a0c0977ac Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 17 Mar 2021 16:35:57 -0700
Subject: Add rtree_write_range(): writing the same content to multiple leaf
 elements.

Apply to emap_(de)register_interior which became noticeable in perf profiles.
---
 include/jemalloc/internal/rtree.h | 111 +++++++++++++++++++++++++++++++-------
 src/emap.c                        |  35 ++++++++----
 src/pa.c                          |   2 +-
 test/unit/rtree.c                 |  66 ++++++++++++++++++++++-
 4 files changed, 182 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 42aa11c..c5f0d8c 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -137,23 +137,24 @@ bool rtree_new(rtree_t *rtree, base_t *base, bool zeroed);
 rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
 
-JEMALLOC_ALWAYS_INLINE uintptr_t
-rtree_leafkey(uintptr_t key) {
+JEMALLOC_ALWAYS_INLINE unsigned
+rtree_leaf_maskbits(void) {
 	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
 	unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
 	    rtree_levels[RTREE_HEIGHT-1].bits);
-	unsigned maskbits = ptrbits - cumbits;
-	uintptr_t mask = ~((ZU(1) << maskbits) - 1);
+	return ptrbits - cumbits;
+}
+
+JEMALLOC_ALWAYS_INLINE uintptr_t
+rtree_leafkey(uintptr_t key) {
+	uintptr_t mask = ~((ZU(1) << rtree_leaf_maskbits()) - 1);
 	return (key & mask);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 rtree_cache_direct_map(uintptr_t key) {
-	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
-	unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
-	    rtree_levels[RTREE_HEIGHT-1].bits);
-	unsigned maskbits = ptrbits - cumbits;
-	return (size_t)((key >> maskbits) & (RTREE_CTX_NCACHE - 1));
+	return (size_t)((key >> rtree_leaf_maskbits()) &
+	    (RTREE_CTX_NCACHE - 1));
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
@@ -265,30 +266,49 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 #endif
 }
 
-static inline void
-rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, rtree_contents_t contents) {
-	assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
+JEMALLOC_ALWAYS_INLINE void
+rtree_contents_encode(rtree_contents_t contents, void **bits,
+    unsigned *additional) {
 #ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
-	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
+	*bits = (void *)rtree_leaf_elm_bits_encode(contents);
 #else
-	unsigned metadata_bits = (unsigned)contents.metadata.slab
+	*additional = (unsigned)contents.metadata.slab
 	    | ((unsigned)contents.metadata.is_head << 1)
 	    | ((unsigned)contents.metadata.state << RTREE_LEAF_STATE_SHIFT)
 	    | ((unsigned)contents.metadata.szind << (RTREE_LEAF_STATE_SHIFT +
 	    RTREE_LEAF_STATE_WIDTH));
-	atomic_store_u(&elm->le_metadata, metadata_bits, ATOMIC_RELEASE);
+	*bits = contents.edata;
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE void
+rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, void *bits, unsigned additional) {
+#ifdef RTREE_LEAF_COMPACT
+	atomic_store_p(&elm->le_bits, bits, ATOMIC_RELEASE);
+#else
+	atomic_store_u(&elm->le_metadata, additional, ATOMIC_RELEASE);
 	/*
 	 * Write edata last, since the element is atomically considered valid
 	 * as soon as the edata field is non-NULL.
 	 */
-	atomic_store_p(&elm->le_edata, contents.edata, ATOMIC_RELEASE);
+	atomic_store_p(&elm->le_edata, bits, ATOMIC_RELEASE);
 #endif
 }
 
+JEMALLOC_ALWAYS_INLINE void
+rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, rtree_contents_t contents) {
+	assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
+	void *bits;
+	unsigned additional;
+
+	rtree_contents_encode(contents, &bits, &additional);
+	rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
+}
+
 /* The state field can be updated independently (and more frequently). */
-static inline void
+JEMALLOC_ALWAYS_INLINE void
 rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm1, rtree_leaf_elm_t *elm2, extent_state_t state) {
 	assert(elm1 != NULL);
@@ -447,7 +467,45 @@ rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ct
 	return false;
 }
 
-static inline bool
+JEMALLOC_ALWAYS_INLINE void
+rtree_write_range_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t base, uintptr_t end, rtree_contents_t contents, bool clearing) {
+	assert((base & PAGE_MASK) == 0 && (end & PAGE_MASK) == 0);
+	/*
+	 * Only used for emap_(de)register_interior, which implies the
+	 * boundaries have been registered already.  Therefore all the lookups
+	 * are dependent w/o init_missing, assuming the range spans across at
+	 * most 2 rtree leaf nodes (each covers 1 GiB of vaddr).
+	 */
+	void *bits;
+	unsigned additional;
+	rtree_contents_encode(contents, &bits, &additional);
+
+	rtree_leaf_elm_t *elm = NULL; /* Dead store. */
+	for (uintptr_t addr = base; addr <= end; addr += PAGE) {
+		if (addr == base ||
+		    (addr & ((ZU(1) << rtree_leaf_maskbits()) - 1)) == 0) {
+			elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
+			    /* dependent */ true, /* init_missing */ false);
+			assert(elm != NULL);
+		}
+		assert(elm == rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
+		    /* dependent */ true, /* init_missing */ false));
+		assert(!clearing || rtree_leaf_elm_read(tsdn, rtree, elm,
+		    /* dependent */ true).edata != NULL);
+		rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
+		elm++;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+rtree_write_range(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t base, uintptr_t end, rtree_contents_t contents) {
+	rtree_write_range_impl(tsdn, rtree, rtree_ctx, base, end, contents,
+	    /* clearing */ false);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
 rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
     rtree_contents_t contents) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
@@ -478,4 +536,17 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 }
 
+static inline void
+rtree_clear_range(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t base, uintptr_t end) {
+	rtree_contents_t contents;
+	contents.edata = NULL;
+	contents.metadata.szind = SC_NSIZES;
+	contents.metadata.slab = false;
+	contents.metadata.is_head = false;
+	contents.metadata.state = (extent_state_t)0;
+	rtree_write_range_impl(tsdn, rtree, rtree_ctx, base, end, contents,
+	    /* clearing */ true);
+}
+
 #endif /* JEMALLOC_INTERNAL_RTREE_H */
diff --git a/src/emap.c b/src/emap.c
index 0fe230a..a1f402b 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -241,6 +241,7 @@ emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	return false;
 }
 
+/* Invoked *after* emap_register_boundary. */
 void
 emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     szind_t szind) {
@@ -249,6 +250,22 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	assert(edata_slab_get(edata));
 	assert(edata_state_get(edata) == extent_state_active);
 
+	if (config_debug) {
+		/* Making sure the boundary is registered already. */
+		rtree_leaf_elm_t *elm_a, *elm_b;
+		bool err = emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx,
+		    edata, /* dependent */ true, /* init_missing */ false,
+		    &elm_a, &elm_b);
+		assert(!err);
+		rtree_contents_t contents_a, contents_b;
+		contents_a = rtree_leaf_elm_read(tsdn, &emap->rtree, elm_a,
+		    /* dependent */ true);
+		contents_b = rtree_leaf_elm_read(tsdn, &emap->rtree, elm_b,
+		    /* dependent */ true);
+		assert(contents_a.edata == edata && contents_b.edata == edata);
+		assert(contents_a.metadata.slab && contents_b.metadata.slab);
+	}
+
 	rtree_contents_t contents;
 	contents.edata = edata;
 	contents.metadata.szind = szind;
@@ -256,12 +273,10 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	contents.metadata.state = extent_state_active;
 	contents.metadata.is_head = false; /* Not allowed to access. */
 
-	/* Register interior. */
-	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_write(tsdn, &emap->rtree, rtree_ctx,
-		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
-		    LG_PAGE), contents);
-	}
+	assert(edata_size_get(edata) > (2 << LG_PAGE));
+	rtree_write_range(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata) + PAGE,
+	    (uintptr_t)edata_last_get(edata) - PAGE, contents);
 }
 
 void
@@ -289,10 +304,10 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	assert(edata_slab_get(edata));
-	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_clear(tsdn, &emap->rtree, rtree_ctx,
-		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
-		    LG_PAGE));
+	if (edata_size_get(edata) > (2 << LG_PAGE)) {
+		rtree_clear_range(tsdn, &emap->rtree, rtree_ctx,
+		    (uintptr_t)edata_base_get(edata) + PAGE,
+		    (uintptr_t)edata_last_get(edata) - PAGE);
 	}
 }
 
diff --git a/src/pa.c b/src/pa.c
index dd61aaa..90809b3 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -120,7 +120,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 		emap_remap(tsdn, shard->emap, edata, szind, slab);
 		edata_szind_set(edata, szind);
 		edata_slab_set(edata, slab);
-		if (slab) {
+		if (slab && (size > 2 * PAGE)) {
 			emap_register_interior(tsdn, shard->emap, edata, szind);
 		}
 	}
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 9251652..7b2a4e3 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -210,11 +210,75 @@ TEST_BEGIN(test_rtree_random) {
 }
 TEST_END
 
+static void
+test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
+    uintptr_t end) {
+	rtree_ctx_t rtree_ctx;
+	rtree_ctx_data_init(&rtree_ctx);
+
+	edata_t *edata_e = alloc_edata();
+	edata_init(edata_e, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+	rtree_contents_t contents;
+	contents.edata = edata_e;
+	contents.metadata.szind = SC_NSIZES;
+	contents.metadata.slab = false;
+	contents.metadata.is_head = false;
+	contents.metadata.state = extent_state_active;
+
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, start,
+	    contents), "Unexpected rtree_write() failure");
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, end,
+	    contents), "Unexpected rtree_write() failure");
+
+	rtree_write_range(tsdn, rtree, &rtree_ctx, start, end, contents);
+	for (uintptr_t i = 0; i < ((end - start) >> LG_PAGE); i++) {
+		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
+		    start + (i << LG_PAGE)).edata, edata_e,
+		    "rtree_edata_read() should return previously set value");
+	}
+	rtree_clear_range(tsdn, rtree, &rtree_ctx, start, end);
+	rtree_leaf_elm_t *elm;
+	for (uintptr_t i = 0; i < ((end - start) >> LG_PAGE); i++) {
+		elm = rtree_leaf_elm_lookup(tsdn, rtree, &rtree_ctx,
+		    start + (i << LG_PAGE), false, false);
+		expect_ptr_not_null(elm, "Should have been initialized.");
+		expect_ptr_null(rtree_leaf_elm_read(tsdn, rtree, elm,
+		    false).edata, "Should have been cleared.");
+	}
+}
+
+TEST_BEGIN(test_rtree_range) {
+	tsdn_t *tsdn = tsdn_fetch();
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	expect_ptr_not_null(base, "Unexpected base_new failure");
+
+	rtree_t *rtree = &test_rtree;
+	expect_false(rtree_new(rtree, base, false),
+	    "Unexpected rtree_new() failure");
+
+	/* Not crossing rtree node boundary first. */
+	uintptr_t start = ZU(1) << rtree_leaf_maskbits();
+	uintptr_t end = start + (ZU(100) << LG_PAGE);
+	test_rtree_range_write(tsdn, rtree, start, end);
+
+	/* Crossing rtree node boundary. */
+	start = (ZU(1) << rtree_leaf_maskbits()) - (ZU(10) << LG_PAGE);
+	end = start + (ZU(100) << LG_PAGE);
+	assert_ptr_ne((void *)rtree_leafkey(start), (void *)rtree_leafkey(end),
+	    "The range should span across two rtree nodes");
+	test_rtree_range_write(tsdn, rtree, start, end);
+
+	base_delete(tsdn, base);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_rtree_read_empty,
 	    test_rtree_extrema,
 	    test_rtree_bits,
-	    test_rtree_random);
+	    test_rtree_random,
+	    test_rtree_range);
 }
-- 
cgit v0.12


From 3093d9455eb179d75ec8a17b1073ee605fb1f0a9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 25 Mar 2021 15:32:44 -0700
Subject: Move the edata mergeability related functions to extent.h.

---
 include/jemalloc/internal/emap.h   | 20 -----------
 include/jemalloc/internal/extent.h | 73 ++++++++++++++++++++++++++++++++++++++
 src/ehooks.c                       |  2 +-
 src/emap.c                         | 60 +------------------------------
 4 files changed, 75 insertions(+), 80 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 364aefa..5a5dbb6 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -224,26 +224,6 @@ extent_assert_can_expand(const edata_t *original, const edata_t *expand) {
 	assert(edata_past_get(original) == edata_base_get(expand));
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-edata_neighbor_head_state_mergeable(bool edata_is_head,
-    bool neighbor_is_head, bool forward) {
-	/*
-	 * Head states checking: disallow merging if the higher addr extent is a
-	 * head extent.  This helps preserve first-fit, and more importantly
-	 * makes sure no merge across arenas.
-	 */
-	if (forward) {
-		if (neighbor_is_head) {
-			return false;
-		}
-	} else {
-		if (edata_is_head) {
-			return false;
-		}
-	}
-	return true;
-}
-
 JEMALLOC_ALWAYS_INLINE edata_t *
 emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	EMAP_DECLARE_RTREE_CTX;
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 6a17ba6..b39e5ed 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -51,4 +51,77 @@ bool extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 size_t extent_sn_next(pac_t *pac);
 bool extent_boot(void);
 
+JEMALLOC_ALWAYS_INLINE bool
+extent_neighbor_head_state_mergeable(bool edata_is_head,
+    bool neighbor_is_head, bool forward) {
+	/*
+	 * Head states checking: disallow merging if the higher addr extent is a
+	 * head extent.  This helps preserve first-fit, and more importantly
+	 * makes sure no merge across arenas.
+	 */
+	if (forward) {
+		if (neighbor_is_head) {
+			return false;
+		}
+	} else {
+		if (edata_is_head) {
+			return false;
+		}
+	}
+	return true;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
+    extent_pai_t pai, extent_state_t expected_state, bool forward,
+    bool expanding) {
+	edata_t *neighbor = contents.edata;
+	if (neighbor == NULL) {
+		return false;
+	}
+	/* It's not safe to access *neighbor yet; must verify states first. */
+	bool neighbor_is_head = contents.metadata.is_head;
+	if (!extent_neighbor_head_state_mergeable(edata_is_head_get(edata),
+	    neighbor_is_head, forward)) {
+		return NULL;
+	}
+	extent_state_t neighbor_state = contents.metadata.state;
+	if (pai == EXTENT_PAI_PAC) {
+		if (neighbor_state != expected_state) {
+			return false;
+		}
+		/* From this point, it's safe to access *neighbor. */
+		if (!expanding && (edata_committed_get(edata) !=
+		    edata_committed_get(neighbor))) {
+			/*
+			 * Some platforms (e.g. Windows) require an explicit
+			 * commit step (and writing to uncomitted memory is not
+			 * allowed).
+			 */
+			return false;
+		}
+	} else {
+		if (neighbor_state == extent_state_active) {
+			return false;
+		}
+		/* From this point, it's safe to access *neighbor. */
+	}
+
+	assert(edata_pai_get(edata) == pai);
+	if (edata_pai_get(neighbor) != pai) {
+		return false;
+	}
+	if (opt_retain) {
+		assert(edata_arena_ind_get(edata) ==
+		    edata_arena_ind_get(neighbor));
+	} else {
+		if (edata_arena_ind_get(edata) !=
+		    edata_arena_ind_get(neighbor)) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_H */
diff --git a/src/ehooks.c b/src/ehooks.c
index ca3ca20..535066e 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -218,7 +218,7 @@ ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, void *addr_b) {
 		bool head_b = edata_is_head_get(b);
 		emap_assert_mapped(tsdn, &arena_emap_global, a);
 		emap_assert_mapped(tsdn, &arena_emap_global, b);
-		assert(edata_neighbor_head_state_mergeable(head_a, head_b,
+		assert(extent_neighbor_head_state_mergeable(head_a, head_b,
 		    /* forward */ true));
 	}
 	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
diff --git a/src/emap.c b/src/emap.c
index a1f402b..1cc4fc8 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -48,64 +48,6 @@ emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	emap_assert_mapped(tsdn, emap, edata);
 }
 
-static inline bool
-edata_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
-    extent_pai_t pai, extent_state_t expected_state, bool forward,
-    bool expanding) {
-	edata_t *neighbor = contents.edata;
-	if (neighbor == NULL) {
-		return false;
-	}
-	/* It's not safe to access *neighbor yet; must verify states first. */
-	bool neighbor_is_head = contents.metadata.is_head;
-	if (!edata_neighbor_head_state_mergeable(edata_is_head_get(edata),
-	    neighbor_is_head, forward)) {
-		return NULL;
-	}
-	extent_state_t neighbor_state = contents.metadata.state;
-	if (pai == EXTENT_PAI_PAC) {
-		if (neighbor_state != expected_state) {
-			return false;
-		}
-		/* From this point, it's safe to access *neighbor. */
-		if (!expanding && (edata_committed_get(edata) !=
-		    edata_committed_get(neighbor))) {
-			/*
-			 * Some platforms (e.g. Windows) require an explicit
-			 * commit step (and writing to uncomitted memory is not
-			 * allowed).
-			 */
-			return false;
-		}
-	} else {
-		if (neighbor_state == extent_state_active) {
-			return false;
-		}
-		/* From this point, it's safe to access *neighbor. */
-	}
-
-	assert(edata_pai_get(edata) == pai);
-	if (edata_pai_get(neighbor) != pai) {
-		return false;
-	}
-	if (opt_retain) {
-		assert(edata_arena_ind_get(edata) ==
-		    edata_arena_ind_get(neighbor));
-	} else {
-		/*
-		 * This isn't entirely safe with the presence of arena_reset /
-		 * destroy, in which case the neighbor edata can be destoryed if
-		 * it belongs to a manual arena.  More on that later.
-		 */
-		if (edata_arena_ind_get(edata) !=
-		    edata_arena_ind_get(neighbor)) {
-			return false;
-		}
-	}
-
-	return true;
-}
-
 static inline edata_t *
 emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     extent_pai_t pai, extent_state_t expected_state, bool forward,
@@ -142,7 +84,7 @@ emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 
 	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(tsdn,
 	    &emap->rtree, elm, /* dependent */ true);
-	if (!edata_can_acquire_neighbor(edata, neighbor_contents, pai,
+	if (!extent_can_acquire_neighbor(edata, neighbor_contents, pai,
 	    expected_state, forward, expanding)) {
 		return NULL;
 	}
-- 
cgit v0.12


From 03d95cba8868f99fa18683d1e82596467ed08c7e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 19 Mar 2021 00:23:46 -0700
Subject: Remove the unnecessary arena_ind_set in base_alloc_edata().

All edata alloc sites are already followed with proper edata_init().
---
 src/base.c | 1 -
 src/hpa.c  | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/base.c b/src/base.c
index 9d4ce5c..44878ad 100644
--- a/src/base.c
+++ b/src/base.c
@@ -476,7 +476,6 @@ base_alloc_edata(tsdn_t *tsdn, base_t *base) {
 	if (edata == NULL) {
 		return NULL;
 	}
-	edata_arena_ind_set(edata, ehooks_ind_get(&base->ehooks));
 	edata_esn_set(edata, esn);
 	return edata;
 }
diff --git a/src/hpa.c b/src/hpa.c
index 7d4fa1b..a234e6c 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -454,7 +454,6 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 		*oom = true;
 		return NULL;
 	}
-	assert(edata_arena_ind_get(edata) == shard->ind);
 
 	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
 	if (ps == NULL) {
-- 
cgit v0.12


From 7dc77527ba1fa8a2764b975e9955a55cbb46d034 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 25 Mar 2021 17:44:18 -0700
Subject: Delete the mutex_pool module.

---
 Makefile.in                                        |  1 -
 include/jemalloc/internal/emap.h                   |  3 -
 include/jemalloc/internal/mutex_pool.h             | 94 ----------------------
 include/jemalloc/internal/witness.h                |  1 -
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  1 -
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  3 -
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  1 -
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  3 -
 src/emap.c                                         | 12 +--
 src/mutex_pool.c                                   | 17 ----
 10 files changed, 1 insertion(+), 135 deletions(-)
 delete mode 100644 include/jemalloc/internal/mutex_pool.h
 delete mode 100644 src/mutex_pool.c

diff --git a/Makefile.in b/Makefile.in
index 11a553b..c00ad0f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -128,7 +128,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/log.c \
 	$(srcroot)src/malloc_io.c \
 	$(srcroot)src/mutex.c \
-	$(srcroot)src/mutex_pool.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
 	$(srcroot)src/pa_extra.c \
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 5a5dbb6..a40b504 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -2,7 +2,6 @@
 #define JEMALLOC_INTERNAL_EMAP_H
 
 #include "jemalloc/internal/base.h"
-#include "jemalloc/internal/mutex_pool.h"
 #include "jemalloc/internal/rtree.h"
 
 /*
@@ -17,8 +16,6 @@
 typedef struct emap_s emap_t;
 struct emap_s {
 	rtree_t rtree;
-	/* Keyed by the address of the edata_t being protected. */
-	mutex_pool_t mtx_pool;
 };
 
 /* Used to pass rtree lookup context down the path. */
diff --git a/include/jemalloc/internal/mutex_pool.h b/include/jemalloc/internal/mutex_pool.h
deleted file mode 100644
index 726cece..0000000
--- a/include/jemalloc/internal/mutex_pool.h
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_MUTEX_POOL_H
-#define JEMALLOC_INTERNAL_MUTEX_POOL_H
-
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/witness.h"
-
-/* We do mod reductions by this value, so it should be kept a power of 2. */
-#define MUTEX_POOL_SIZE 256
-
-typedef struct mutex_pool_s mutex_pool_t;
-struct mutex_pool_s {
-	malloc_mutex_t mutexes[MUTEX_POOL_SIZE];
-};
-
-bool mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank);
-
-/* Internal helper - not meant to be called outside this module. */
-static inline malloc_mutex_t *
-mutex_pool_mutex(mutex_pool_t *pool, uintptr_t key) {
-	size_t hash_result[2];
-	hash(&key, sizeof(key), 0xd50dcc1b, hash_result);
-	return &pool->mutexes[hash_result[0] % MUTEX_POOL_SIZE];
-}
-
-static inline void
-mutex_pool_assert_not_held(tsdn_t *tsdn, mutex_pool_t *pool) {
-	for (int i = 0; i < MUTEX_POOL_SIZE; i++) {
-		malloc_mutex_assert_not_owner(tsdn, &pool->mutexes[i]);
-	}
-}
-
-/*
- * Note that a mutex pool doesn't work exactly the way an embdedded mutex would.
- * You're not allowed to acquire mutexes in the pool one at a time.  You have to
- * acquire all the mutexes you'll need in a single function call, and then
- * release them all in a single function call.
- */
-
-static inline void
-mutex_pool_lock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
-	mutex_pool_assert_not_held(tsdn, pool);
-
-	malloc_mutex_t *mutex = mutex_pool_mutex(pool, key);
-	malloc_mutex_lock(tsdn, mutex);
-}
-
-static inline void
-mutex_pool_unlock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
-	malloc_mutex_t *mutex = mutex_pool_mutex(pool, key);
-	malloc_mutex_unlock(tsdn, mutex);
-
-	mutex_pool_assert_not_held(tsdn, pool);
-}
-
-static inline void
-mutex_pool_lock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1,
-    uintptr_t key2) {
-	mutex_pool_assert_not_held(tsdn, pool);
-
-	malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1);
-	malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2);
-	if ((uintptr_t)mutex1 < (uintptr_t)mutex2) {
-		malloc_mutex_lock(tsdn, mutex1);
-		malloc_mutex_lock(tsdn, mutex2);
-	} else if ((uintptr_t)mutex1 == (uintptr_t)mutex2) {
-		malloc_mutex_lock(tsdn, mutex1);
-	} else {
-		malloc_mutex_lock(tsdn, mutex2);
-		malloc_mutex_lock(tsdn, mutex1);
-	}
-}
-
-static inline void
-mutex_pool_unlock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1,
-    uintptr_t key2) {
-	malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1);
-	malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2);
-	if (mutex1 == mutex2) {
-		malloc_mutex_unlock(tsdn, mutex1);
-	} else {
-		malloc_mutex_unlock(tsdn, mutex1);
-		malloc_mutex_unlock(tsdn, mutex2);
-	}
-
-	mutex_pool_assert_not_held(tsdn, pool);
-}
-
-static inline void
-mutex_pool_assert_owner(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
-	malloc_mutex_assert_owner(tsdn, mutex_pool_mutex(pool, key));
-}
-
-#endif /* JEMALLOC_INTERNAL_MUTEX_POOL_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 4cebb6e..0c29321 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -57,7 +57,6 @@ enum witness_rank_e {
 
 	WITNESS_RANK_EDATA_CACHE,
 
-	WITNESS_RANK_EMAP,
 	WITNESS_RANK_RTREE,
 	WITNESS_RANK_BASE,
 	WITNESS_RANK_ARENA_LARGE,
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 9ec953a..a66ca36 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -69,7 +69,6 @@
     <ClCompile Include="..\..\..\..\src\log.c" />
     <ClCompile Include="..\..\..\..\src\malloc_io.c" />
     <ClCompile Include="..\..\..\..\src\mutex.c" />
-    <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 210204a..0c8e6c7 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -91,9 +91,6 @@
     <ClCompile Include="..\..\..\..\src\mutex.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\mutex_pool.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\nstime.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 171b95f..94fcd7b 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -69,7 +69,6 @@
     <ClCompile Include="..\..\..\..\src\log.c" />
     <ClCompile Include="..\..\..\..\src\malloc_io.c" />
     <ClCompile Include="..\..\..\..\src\mutex.c" />
-    <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 210204a..0c8e6c7 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -91,9 +91,6 @@
     <ClCompile Include="..\..\..\..\src\mutex.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\mutex_pool.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\nstime.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/emap.c b/src/emap.c
index 1cc4fc8..e37fea3 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -12,17 +12,7 @@ typedef enum emap_lock_result_e emap_lock_result_t;
 
 bool
 emap_init(emap_t *emap, base_t *base, bool zeroed) {
-	bool err;
-	err = rtree_new(&emap->rtree, base, zeroed);
-	if (err) {
-		return true;
-	}
-	err = mutex_pool_init(&emap->mtx_pool, "emap_mutex_pool",
-	    WITNESS_RANK_EMAP);
-	if (err) {
-		return true;
-	}
-	return false;
+	return rtree_new(&emap->rtree, base, zeroed);
 }
 
 void
diff --git a/src/mutex_pool.c b/src/mutex_pool.c
deleted file mode 100644
index d7861dc..0000000
--- a/src/mutex_pool.c
+++ /dev/null
@@ -1,17 +0,0 @@
-
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool.h"
-
-bool
-mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank) {
-	for (int i = 0; i < MUTEX_POOL_SIZE; ++i) {
-		if (malloc_mutex_init(&pool->mutexes[i], name, rank,
-		    malloc_mutex_address_ordered)) {
-			return true;
-		}
-	}
-	return false;
-}
-- 
cgit v0.12


From ce68f326b0c6bc5f2ba126a9cc8afef3f8a70039 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 30 Mar 2021 16:09:37 -0700
Subject: Avoid the release & re-acquire of the ecache locks around the merge
 hook.

---
 src/extent.c | 72 +++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 37 insertions(+), 35 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 1748d98..c2b8790 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -22,7 +22,7 @@ static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
 static edata_t *extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata, size_t size_a, size_t size_b, bool growing_retained);
 static bool extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *a, edata_t *b, bool growing_retained);
+    edata_t *a, edata_t *b, bool holding_core_locks);
 
 /* Used exclusively for gdump triggering. */
 static atomic_zu_t curpages;
@@ -39,9 +39,9 @@ static edata_t *extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t usize, size_t alignment,
     bool zero, bool *commit, bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained);
+    ecache_t *ecache, edata_t *edata, bool *coalesced);
 static void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool growing_retained);
+    ecache_t *ecache, edata_t *edata);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool *commit);
@@ -68,7 +68,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	bool coalesced;
 	edata = extent_try_coalesce(tsdn, pac, ehooks, ecache,
-	    edata, &coalesced, false);
+	    edata, &coalesced);
 	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
 
 	if (!coalesced) {
@@ -136,7 +136,7 @@ ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
 
-	extent_record(tsdn, pac, ehooks, ecache, edata, false);
+	extent_record(tsdn, pac, ehooks, ecache, edata);
 }
 
 edata_t *
@@ -574,8 +574,7 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	if (*commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), growing_retained)) {
-			extent_record(tsdn, pac, ehooks, ecache, edata,
-			    growing_retained);
+			extent_record(tsdn, pac, ehooks, ecache, edata);
 			return NULL;
 		}
 	}
@@ -664,11 +663,11 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
 			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-			    lead, true);
+			    lead);
 		}
 		if (trail != NULL) {
-			extent_record(tsdn, pac, ehooks,
-			    &pac->ecache_retained, trail, true);
+			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
+			    trail);
 		}
 	} else {
 		/*
@@ -681,7 +680,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 				extent_gdump_add(tsdn, to_salvage);
 			}
 			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-			    to_salvage, true);
+			    to_salvage);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, pac, to_leak);
@@ -695,7 +694,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
 			extent_record(tsdn, pac, ehooks,
-			    &pac->ecache_retained, edata, true);
+			    &pac->ecache_retained, edata);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
@@ -793,15 +792,13 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 static bool
 extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *inner, edata_t *outer, bool forward, bool growing_retained) {
+    edata_t *inner, edata_t *outer, bool forward) {
 	extent_assert_can_coalesce(inner, outer);
 	eset_remove(&ecache->eset, outer);
 
-	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	bool err = extent_merge_impl(tsdn, pac, ehooks,
-	    forward ? inner : outer, forward ? outer : inner, growing_retained);
-	malloc_mutex_lock(tsdn, &ecache->mtx);
-
+	    forward ? inner : outer, forward ? outer : inner,
+	    /* holding_core_locks */ true);
 	if (err) {
 		extent_deactivate_locked(tsdn, pac, ecache, outer);
 	}
@@ -811,8 +808,7 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained,
-    bool inactive_only) {
+    ecache_t *ecache, edata_t *edata, bool *coalesced) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -831,7 +827,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ true);
 		if (next != NULL) {
 			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
-			    next, true, growing_retained)) {
+			    next, true)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
@@ -846,7 +842,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ false);
 		if (prev != NULL) {
 			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
-			    prev, false, growing_retained)) {
+			    prev, false)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -866,16 +862,16 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 static edata_t *
 extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
+    ecache_t *ecache, edata_t *edata, bool *coalesced) {
 	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    coalesced, growing_retained, false);
+	    coalesced);
 }
 
 static edata_t *
 extent_try_coalesce_large(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
+    ecache_t *ecache, edata_t *edata, bool *coalesced) {
 	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    coalesced, growing_retained, true);
+	    coalesced);
 }
 
 /* Purge a single extent to retained / unmapped directly. */
@@ -906,7 +902,7 @@ extent_maximally_purge(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
  */
 static void
 extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool growing_retained) {
+    ecache_t *ecache, edata_t *edata) {
 	assert((ecache->state != extent_state_dirty &&
 	    ecache->state != extent_state_muzzy) ||
 	    !edata_zeroed_get(edata));
@@ -917,7 +913,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	if (!ecache->delay_coalesce) {
 		edata = extent_try_coalesce(tsdn, pac,  ehooks, ecache, edata,
-		    NULL, growing_retained);
+		    NULL);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &pac->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
@@ -925,7 +921,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn, pac, ehooks,
-			    ecache, edata, &coalesced, growing_retained);
+			    ecache, edata, &coalesced);
 		} while (coalesced);
 		if (edata_size_get(edata) >=
 		    atomic_load_zu(&pac->oversize_threshold, ATOMIC_RELAXED)
@@ -1020,8 +1016,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata,
-	    false);
+	extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata);
 }
 
 void
@@ -1178,11 +1173,17 @@ extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
 
 static bool
 extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
-    edata_t *b, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(edata_base_get(a) < edata_base_get(b));
+    edata_t *b, bool holding_core_locks) {
+	/* Only the expanding path may merge w/o holding ecache locks. */
+	if (holding_core_locks) {
+		witness_assert_positive_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
+	} else {
+		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+		    WITNESS_RANK_CORE, 0);
+	}
 
+	assert(edata_base_get(a) < edata_base_get(b));
 	assert(edata_arena_ind_get(a) == edata_arena_ind_get(b));
 	assert(edata_arena_ind_get(a) == ehooks_ind_get(ehooks));
 	emap_assert_mapped(tsdn, pac->emap, a);
@@ -1222,7 +1223,8 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 bool
 extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b) {
-	return extent_merge_impl(tsdn, pac, ehooks, a, b, false);
+	return extent_merge_impl(tsdn, pac, ehooks, a, b,
+	    /* holding_core_locks */ false);
 }
 
 bool
-- 
cgit v0.12


From 9b523c6c15814e6662a1f659576996e047b7f965 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 30 Mar 2021 16:55:22 -0700
Subject: Refactor the locking in extent_recycle().

Hold the ecache lock across extent_recycle_extract() and extent_recycle_split(),
so that the extent_deactivate after split can avoid re-take the ecache mutex.
---
 src/extent.c | 65 ++++++++++++++++++++++++++++++++----------------------------
 1 file changed, 35 insertions(+), 30 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index c2b8790..0400114 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -20,7 +20,7 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks,
 static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static edata_t *extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, size_t size_b, bool growing_retained);
+    edata_t *edata, size_t size_a, size_t size_b, bool holding_core_locks);
 static bool extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b, bool holding_core_locks);
 
@@ -229,6 +229,7 @@ extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 static void
 extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
     edata_t *edata) {
+	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == extent_state_active);
 
@@ -237,13 +238,6 @@ extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 }
 
 static void
-extent_deactivate(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, edata_t *edata) {
-	malloc_mutex_lock(tsdn, &ecache->mtx);
-	extent_deactivate_locked(tsdn, pac, ecache, edata);
-	malloc_mutex_unlock(tsdn, &ecache->mtx);
-}
-
-static void
 extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
@@ -356,10 +350,8 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac,
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
-    bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment) {
+	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 	assert(alignment > 0);
 	if (config_debug && expand_edata != NULL) {
 		/*
@@ -373,7 +365,6 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		assert(alignment <= PAGE);
 	}
 
-	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (expand_edata != NULL) {
 		edata = emap_try_acquire_edata_neighbor_expand(tsdn, pac->emap,
@@ -407,12 +398,9 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		    lg_max_fit);
 	}
 	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &ecache->mtx);
 		return NULL;
 	}
-
 	extent_activate_locked(tsdn, pac, ecache, edata);
-	malloc_mutex_unlock(tsdn, &ecache->mtx);
 
 	return edata;
 }
@@ -449,8 +437,7 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
     edata_t **to_leak, edata_t **to_salvage,
-    edata_t *expand_edata, size_t size, size_t alignment,
-    bool growing_retained) {
+    edata_t *expand_edata, size_t size, size_t alignment) {
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
 	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
 	assert(expand_edata == NULL || leadsize == 0);
@@ -468,7 +455,7 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	if (leadsize != 0) {
 		*lead = *edata;
 		*edata = extent_split_impl(tsdn, pac, ehooks, *lead, leadsize,
-		    size + trailsize, growing_retained);
+		    size + trailsize, /* holding_core_locks*/ true);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -479,7 +466,7 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	/* Split the trail. */
 	if (trailsize != 0) {
 		*trail = extent_split_impl(tsdn, pac, ehooks, *edata, size,
-		    trailsize, growing_retained);
+		    trailsize, /* holding_core_locks */ true);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -502,6 +489,8 @@ static edata_t *
 extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     edata_t *edata, bool growing_retained) {
+	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
+
 	edata_t *lead;
 	edata_t *trail;
 	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
@@ -509,7 +498,7 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	extent_split_interior_result_t result = extent_split_interior(
 	    tsdn, pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
-	    expand_edata, size, alignment, growing_retained);
+	    expand_edata, size, alignment);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
 	    && !opt_retain) {
@@ -518,16 +507,16 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 * leaking the extent.
 		 */
 		assert(to_leak != NULL && lead == NULL && trail == NULL);
-		extent_deactivate(tsdn, pac, ecache, to_leak);
+		extent_deactivate_locked(tsdn, pac, ecache, to_leak);
 		return NULL;
 	}
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_deactivate(tsdn, pac, ecache, lead);
+			extent_deactivate_locked(tsdn, pac, ecache, lead);
 		}
 		if (trail != NULL) {
-			extent_deactivate(tsdn, pac, ecache, trail);
+			extent_deactivate_locked(tsdn, pac, ecache, trail);
 		}
 		return edata;
 	} else {
@@ -541,8 +530,14 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, pac, to_leak);
+			/*
+			 * May go down the purge path (which assume no ecache
+			 * locks).  Only happens with OOM caused split failures.
+			 */
+			malloc_mutex_unlock(tsdn, &ecache->mtx);
 			extents_abandon_vm(tsdn, pac, ehooks, ecache, to_leak,
 			    growing_retained);
+			malloc_mutex_lock(tsdn, &ecache->mtx);
 		}
 		return NULL;
 	}
@@ -559,14 +554,18 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata = extent_recycle_extract(tsdn, pac, ehooks, ecache,
-	    expand_edata, size, alignment, growing_retained);
+	    expand_edata, size, alignment);
 	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &ecache->mtx);
 		return NULL;
 	}
 
 	edata = extent_recycle_split(tsdn, pac, ehooks, ecache, expand_edata,
 	    size, alignment, edata, growing_retained);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -658,7 +657,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
 	    pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
-	    size, alignment, /* growing_retained */ true);
+	    size, alignment);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
@@ -1111,10 +1110,16 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  */
 static edata_t *
 extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, size_t size_b, bool growing_retained) {
+    edata_t *edata, size_t size_a, size_t size_b, bool holding_core_locks) {
 	assert(edata_size_get(edata) == size_a + size_b);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	/* Only the shrink path may split w/o holding core locks. */
+	if (holding_core_locks) {
+		witness_assert_positive_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
+	} else {
+		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+		    WITNESS_RANK_CORE, 0);
+	}
 
 	if (ehooks_split_will_fail(ehooks)) {
 		return NULL;
@@ -1168,7 +1173,7 @@ edata_t *
 extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
     size_t size_a, size_t size_b) {
 	return extent_split_impl(tsdn, pac, ehooks, edata, size_a, size_b,
-	    /* growing_retained */ false);
+	    /* holding_core_locks */ false);
 }
 
 static bool
-- 
cgit v0.12


From 304cdbb132b607cc22ca16eb0e37e4c6d8ecd201 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 30 Mar 2021 14:55:28 -0700
Subject: Fix a prof_recent/prof_sys_thread_name interaction

When both of these are enabled, the output format changes slightly.  Teach the
unit test about the interaction.
---
 test/unit/prof_recent.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index e16a849..9974d10 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -439,6 +439,18 @@ confirm_record(const char *template, const confirm_record_t *records,
 			}
 			ASSERT_CHAR(',');
 
+			if (opt_prof_sys_thread_name) {
+				ASSERT_FORMATTED_STR("\"%s_thread_name\"",
+				    *type);
+				ASSERT_CHAR(':');
+				ASSERT_CHAR('"');
+				while (*start != '"') {
+					++start;
+				}
+				ASSERT_CHAR('"');
+				ASSERT_CHAR(',');
+			}
+
 			ASSERT_FORMATTED_STR("\"%s_time\"", *type);
 			ASSERT_CHAR(':');
 			while (isdigit(*start)) {
-- 
cgit v0.12


From 12cd13cd418512d9e7596921ccdb62e25a103f87 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 30 Mar 2021 15:20:30 -0700
Subject: Fix thread.name/prof_sys_thread_name interaction

When prof_sys_thread_name is true, we don't allow setting the thread name.
Teach the unit test this.
---
 test/unit/prof_thread_name.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index 4a9d38a..3c4614f 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -22,7 +22,7 @@ mallctl_thread_name_set_impl(const char *thread_name, const char *func,
     int line) {
 	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
 	    (void *)&thread_name, sizeof(thread_name)), 0,
-	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
+	    "%s():%d: Unexpected mallctl failure writing thread.prof.name",
 	    func, line);
 	mallctl_thread_name_get_impl(thread_name, func, line);
 }
@@ -33,6 +33,7 @@ TEST_BEGIN(test_prof_thread_name_validation) {
 	const char *thread_name;
 
 	test_skip_if(!config_prof);
+	test_skip_if(opt_prof_sys_thread_name);
 
 	mallctl_thread_name_get("");
 	mallctl_thread_name_set("hi there");
@@ -94,12 +95,13 @@ thd_start(void *varg) {
 }
 
 TEST_BEGIN(test_prof_thread_name_threaded) {
+	test_skip_if(!config_prof);
+	test_skip_if(opt_prof_sys_thread_name);
+
 	thd_t thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
 
-	test_skip_if(!config_prof);
-
 	for (i = 0; i < NTHREADS; i++) {
 		thd_args[i] = i;
 		thd_create(&thds[i], thd_start, (void *)&thd_args[i]);
-- 
cgit v0.12


From 4f7cb3a413a966056a6c23eb996ba1d51d0517a3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 May 2021 17:14:47 -0700
Subject: Sized deallocation: fix a typo.

dealloction -> deallocation.
---
 src/safety_check.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/safety_check.c b/src/safety_check.c
index 9747afe..552b312 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -10,7 +10,7 @@ void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
 
 	safety_check_fail("<jemalloc>: size mismatch detected (true size %zu "
 	    "vs input size %zu), likely caused by application sized "
-	    "dealloction bugs (source address: %p, %s). Suggest building with "
+	    "deallocation bugs (source address: %p, %s). Suggest building with "
 	    "--enable-debug or address sanitizer for debugging. Abort.\n",
 	    true_size, input_size, ptr, src);
 }
-- 
cgit v0.12


From 1f688490e176aafbc3e3529d3025df7fcbce725b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 May 2021 16:51:43 -0700
Subject: Stats: Fix a printing bug when hpa_dirty_mult = -1

Missed a layer of indirection.
---
 src/stats.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index 69cb2d3..ef17303 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1476,8 +1476,9 @@ stats_general_print(emitter_t *emitter) {
 		 * representation.
 		 */
 		if (u32v == (uint32_t)-1) {
+			const char *neg1 = "-1";
 			emitter_kv(emitter, "hpa_dirty_mult",
-			    "opt.hpa_dirty_mult", emitter_type_string, "-1");
+			    "opt.hpa_dirty_mult", emitter_type_string, &neg1);
 		} else {
 			char buf[FXP_BUF_SIZE];
 			fxp_print(u32v, buf);
-- 
cgit v0.12


From aea91b8c338594daed753c94f33ff32d4b23fdc9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 17 Feb 2021 16:23:24 -0800
Subject: Clean up some minor data structure inconsistencies

Namely, unify the include guard styling with the majority of the project, and do
flat_bitmap -> fb, to match its naming convention.
---
 Makefile.in                             |   2 +-
 include/jemalloc/internal/eset.h        |   2 +-
 include/jemalloc/internal/fb.h          | 373 +++++++++++++
 include/jemalloc/internal/flat_bitmap.h | 373 -------------
 include/jemalloc/internal/hpdata.h      |   2 +-
 include/jemalloc/internal/ph.h          |   8 +-
 include/jemalloc/internal/rb.h          |   8 +-
 src/hpa.c                               |   2 +-
 src/psset.c                             |   2 +-
 test/unit/fb.c                          | 954 ++++++++++++++++++++++++++++++++
 test/unit/flat_bitmap.c                 | 954 --------------------------------
 11 files changed, 1340 insertions(+), 1340 deletions(-)
 create mode 100644 include/jemalloc/internal/fb.h
 delete mode 100644 include/jemalloc/internal/flat_bitmap.h
 create mode 100644 test/unit/fb.c
 delete mode 100644 test/unit/flat_bitmap.c

diff --git a/Makefile.in b/Makefile.in
index c00ad0f..130fa1e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -215,7 +215,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/edata_cache.c \
 	$(srcroot)test/unit/emitter.c \
 	$(srcroot)test/unit/extent_quantize.c \
-	${srcroot}test/unit/flat_bitmap.c \
+	${srcroot}test/unit/fb.c \
 	$(srcroot)test/unit/fork.c \
 	${srcroot}test/unit/fxp.c \
 	$(srcroot)test/unit/hash.c \
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 7b53ecd..ff5e57d 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_ESET_H
 
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/flat_bitmap.h"
+#include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 
diff --git a/include/jemalloc/internal/fb.h b/include/jemalloc/internal/fb.h
new file mode 100644
index 0000000..90c4091
--- /dev/null
+++ b/include/jemalloc/internal/fb.h
@@ -0,0 +1,373 @@
+#ifndef JEMALLOC_INTERNAL_FB_H
+#define JEMALLOC_INTERNAL_FB_H
+
+/*
+ * The flat bitmap module.  This has a larger API relative to the bitmap module
+ * (supporting things like backwards searches, and searching for both set and
+ * unset bits), at the cost of slower operations for very large bitmaps.
+ *
+ * Initialized flat bitmaps start at all-zeros (all bits unset).
+ */
+
+typedef unsigned long fb_group_t;
+#define FB_GROUP_BITS (ZU(1) << (LG_SIZEOF_LONG + 3))
+#define FB_NGROUPS(nbits) ((nbits) / FB_GROUP_BITS \
+    + ((nbits) % FB_GROUP_BITS == 0 ? 0 : 1))
+
+static inline void
+fb_init(fb_group_t *fb, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	memset(fb, 0, ngroups * sizeof(fb_group_t));
+}
+
+static inline bool
+fb_empty(fb_group_t *fb, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	for (size_t i = 0; i < ngroups; i++) {
+		if (fb[i] != 0) {
+			return false;
+		}
+	}
+	return true;
+}
+
+static inline bool
+fb_full(fb_group_t *fb, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	size_t trailing_bits = nbits % FB_GROUP_BITS;
+	size_t limit = (trailing_bits == 0 ? ngroups : ngroups - 1);
+	for (size_t i = 0; i < limit; i++) {
+		if (fb[i] != ~(fb_group_t)0) {
+			return false;
+		}
+	}
+	if (trailing_bits == 0) {
+		return true;
+	}
+	return fb[ngroups - 1] == ((fb_group_t)1 << trailing_bits) - 1;
+}
+
+static inline bool
+fb_get(fb_group_t *fb, size_t nbits, size_t bit) {
+	assert(bit < nbits);
+	size_t group_ind = bit / FB_GROUP_BITS;
+	size_t bit_ind = bit % FB_GROUP_BITS;
+	return (bool)(fb[group_ind] & ((fb_group_t)1 << bit_ind));
+}
+
+static inline void
+fb_set(fb_group_t *fb, size_t nbits, size_t bit) {
+	assert(bit < nbits);
+	size_t group_ind = bit / FB_GROUP_BITS;
+	size_t bit_ind = bit % FB_GROUP_BITS;
+	fb[group_ind] |= ((fb_group_t)1 << bit_ind);
+}
+
+static inline void
+fb_unset(fb_group_t *fb, size_t nbits, size_t bit) {
+	assert(bit < nbits);
+	size_t group_ind = bit / FB_GROUP_BITS;
+	size_t bit_ind = bit % FB_GROUP_BITS;
+	fb[group_ind] &= ~((fb_group_t)1 << bit_ind);
+}
+
+
+/*
+ * Some implementation details.  This visitation function lets us apply a group
+ * visitor to each group in the bitmap (potentially modifying it).  The mask
+ * indicates which bits are logically part of the visitation.
+ */
+typedef void (*fb_group_visitor_t)(void *ctx, fb_group_t *fb, fb_group_t mask);
+JEMALLOC_ALWAYS_INLINE void
+fb_visit_impl(fb_group_t *fb, size_t nbits, fb_group_visitor_t visit, void *ctx,
+    size_t start, size_t cnt) {
+	assert(cnt > 0);
+	assert(start + cnt <= nbits);
+	size_t group_ind = start / FB_GROUP_BITS;
+	size_t start_bit_ind = start % FB_GROUP_BITS;
+	/*
+	 * The first group is special; it's the only one we don't start writing
+	 * to from bit 0.
+	 */
+	size_t first_group_cnt = (start_bit_ind + cnt > FB_GROUP_BITS
+		? FB_GROUP_BITS - start_bit_ind : cnt);
+	/*
+	 * We can basically split affected words into:
+	 *   - The first group, where we touch only the high bits
+	 *   - The last group, where we touch only the low bits
+	 *   - The middle, where we set all the bits to the same thing.
+	 * We treat each case individually.  The last two could be merged, but
+	 * this can lead to bad codegen for those middle words.
+	 */
+	/* First group */
+	fb_group_t mask = ((~(fb_group_t)0)
+	    >> (FB_GROUP_BITS - first_group_cnt))
+	    << start_bit_ind;
+	visit(ctx, &fb[group_ind], mask);
+
+	cnt -= first_group_cnt;
+	group_ind++;
+	/* Middle groups */
+	while (cnt > FB_GROUP_BITS) {
+		visit(ctx, &fb[group_ind], ~(fb_group_t)0);
+		cnt -= FB_GROUP_BITS;
+		group_ind++;
+	}
+	/* Last group */
+	if (cnt != 0) {
+		mask = (~(fb_group_t)0) >> (FB_GROUP_BITS - cnt);
+		visit(ctx, &fb[group_ind], mask);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+fb_assign_visitor(void *ctx, fb_group_t *fb, fb_group_t mask) {
+	bool val = *(bool *)ctx;
+	if (val) {
+		*fb |= mask;
+	} else {
+		*fb &= ~mask;
+	}
+}
+
+/* Sets the cnt bits starting at position start.  Must not have a 0 count. */
+static inline void
+fb_set_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+	bool val = true;
+	fb_visit_impl(fb, nbits, &fb_assign_visitor, &val, start, cnt);
+}
+
+/* Unsets the cnt bits starting at position start.  Must not have a 0 count. */
+static inline void
+fb_unset_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+	bool val = false;
+	fb_visit_impl(fb, nbits, &fb_assign_visitor, &val, start, cnt);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+fb_scount_visitor(void *ctx, fb_group_t *fb, fb_group_t mask) {
+	size_t *scount = (size_t *)ctx;
+	*scount += popcount_lu(*fb & mask);
+}
+
+/* Finds the number of set bit in the of length cnt starting at start. */
+JEMALLOC_ALWAYS_INLINE size_t
+fb_scount(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+	size_t scount = 0;
+	fb_visit_impl(fb, nbits, &fb_scount_visitor, &scount, start, cnt);
+	return scount;
+}
+
+/* Finds the number of unset bit in the of length cnt starting at start. */
+JEMALLOC_ALWAYS_INLINE size_t
+fb_ucount(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+	size_t scount = fb_scount(fb, nbits, start, cnt);
+	return cnt - scount;
+}
+
+/*
+ * An implementation detail; find the first bit at position >= min_bit with the
+ * value val.
+ *
+ * Returns the number of bits in the bitmap if no such bit exists.
+ */
+JEMALLOC_ALWAYS_INLINE ssize_t
+fb_find_impl(fb_group_t *fb, size_t nbits, size_t start, bool val,
+    bool forward) {
+	assert(start < nbits);
+	size_t ngroups = FB_NGROUPS(nbits);
+	ssize_t group_ind = start / FB_GROUP_BITS;
+	size_t bit_ind = start % FB_GROUP_BITS;
+
+	fb_group_t maybe_invert = (val ? 0 : (fb_group_t)-1);
+
+	fb_group_t group = fb[group_ind];
+	group ^= maybe_invert;
+	if (forward) {
+		/* Only keep ones in bits bit_ind and above. */
+		group &= ~((1LU << bit_ind) - 1);
+	} else {
+		/*
+		 * Only keep ones in bits bit_ind and below.  You might more
+		 * naturally express this as (1 << (bit_ind + 1)) - 1, but
+		 * that shifts by an invalid amount if bit_ind is one less than
+		 * FB_GROUP_BITS.
+		 */
+		group &= ((2LU << bit_ind) - 1);
+	}
+	ssize_t group_ind_bound = forward ? (ssize_t)ngroups : -1;
+	while (group == 0) {
+		group_ind += forward ? 1 : -1;
+		if (group_ind == group_ind_bound) {
+			return forward ? (ssize_t)nbits : (ssize_t)-1;
+		}
+		group = fb[group_ind];
+		group ^= maybe_invert;
+	}
+	assert(group != 0);
+	size_t bit = forward ? ffs_lu(group) : fls_lu(group);
+	size_t pos = group_ind * FB_GROUP_BITS + bit;
+	/*
+	 * The high bits of a partially filled last group are zeros, so if we're
+	 * looking for zeros we don't want to report an invalid result.
+	 */
+	if (forward && !val && pos > nbits) {
+		return nbits;
+	}
+	return pos;
+}
+
+/*
+ * Find the first set bit in the bitmap with an index >= min_bit.  Returns the
+ * number of bits in the bitmap if no such bit exists.
+ */
+static inline size_t
+fb_ffu(fb_group_t *fb, size_t nbits, size_t min_bit) {
+	return (size_t)fb_find_impl(fb, nbits, min_bit, /* val */ false,
+	    /* forward */ true);
+}
+
+/* The same, but looks for an unset bit. */
+static inline size_t
+fb_ffs(fb_group_t *fb, size_t nbits, size_t min_bit) {
+	return (size_t)fb_find_impl(fb, nbits, min_bit, /* val */ true,
+	    /* forward */ true);
+}
+
+/*
+ * Find the last set bit in the bitmap with an index <= max_bit.  Returns -1 if
+ * no such bit exists.
+ */
+static inline ssize_t
+fb_flu(fb_group_t *fb, size_t nbits, size_t max_bit) {
+	return fb_find_impl(fb, nbits, max_bit, /* val */ false,
+	    /* forward */ false);
+}
+
+static inline ssize_t
+fb_fls(fb_group_t *fb, size_t nbits, size_t max_bit) {
+	return fb_find_impl(fb, nbits, max_bit, /* val */ true,
+	    /* forward */ false);
+}
+
+/* Returns whether or not we found a range. */
+JEMALLOC_ALWAYS_INLINE bool
+fb_iter_range_impl(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len, bool val, bool forward) {
+	assert(start < nbits);
+	ssize_t next_range_begin = fb_find_impl(fb, nbits, start, val, forward);
+	if ((forward && next_range_begin == (ssize_t)nbits)
+	    || (!forward && next_range_begin == (ssize_t)-1)) {
+		return false;
+	}
+	/* Half open range; the set bits are [begin, end). */
+	ssize_t next_range_end = fb_find_impl(fb, nbits, next_range_begin, !val,
+	    forward);
+	if (forward) {
+		*r_begin = next_range_begin;
+		*r_len = next_range_end - next_range_begin;
+	} else {
+		*r_begin = next_range_end + 1;
+		*r_len = next_range_begin - next_range_end;
+	}
+	return true;
+}
+
+/*
+ * Used to iterate through ranges of set bits.
+ *
+ * Tries to find the next contiguous sequence of set bits with a first index >=
+ * start.  If one exists, puts the earliest bit of the range in *r_begin, its
+ * length in *r_len, and returns true.  Otherwise, returns false (without
+ * touching *r_begin or *r_end).
+ */
+static inline bool
+fb_srange_iter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len) {
+	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+	    /* val */ true, /* forward */ true);
+}
+
+/*
+ * The same as fb_srange_iter, but searches backwards from start rather than
+ * forwards.  (The position returned is still the earliest bit in the range).
+ */
+static inline bool
+fb_srange_riter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len) {
+	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+	    /* val */ true, /* forward */ false);
+}
+
+/* Similar to fb_srange_iter, but searches for unset bits. */
+static inline bool
+fb_urange_iter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len) {
+	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+	    /* val */ false, /* forward */ true);
+}
+
+/* Similar to fb_srange_riter, but searches for unset bits. */
+static inline bool
+fb_urange_riter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len) {
+	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+	    /* val */ false, /* forward */ false);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+fb_range_longest_impl(fb_group_t *fb, size_t nbits, bool val) {
+	size_t begin = 0;
+	size_t longest_len = 0;
+	size_t len = 0;
+	while (begin < nbits && fb_iter_range_impl(fb, nbits, begin, &begin,
+	    &len, val, /* forward */ true)) {
+		if (len > longest_len) {
+			longest_len = len;
+		}
+		begin += len;
+	}
+	return longest_len;
+}
+
+static inline size_t
+fb_srange_longest(fb_group_t *fb, size_t nbits) {
+	return fb_range_longest_impl(fb, nbits, /* val */ true);
+}
+
+static inline size_t
+fb_urange_longest(fb_group_t *fb, size_t nbits) {
+	return fb_range_longest_impl(fb, nbits, /* val */ false);
+}
+
+/*
+ * Initializes each bit of dst with the bitwise-AND of the corresponding bits of
+ * src1 and src2.  All bitmaps must be the same size.
+ */
+static inline void
+fb_bit_and(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	for (size_t i = 0; i < ngroups; i++) {
+		dst[i] = src1[i] & src2[i];
+	}
+}
+
+/* Like fb_bit_and, but with bitwise-OR. */
+static inline void
+fb_bit_or(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	for (size_t i = 0; i < ngroups; i++) {
+		dst[i] = src1[i] | src2[i];
+	}
+}
+
+/* Initializes dst bit i to the negation of source bit i. */
+static inline void
+fb_bit_not(fb_group_t *dst, fb_group_t *src, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	for (size_t i = 0; i < ngroups; i++) {
+		dst[i] = ~src[i];
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_FB_H */
diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/flat_bitmap.h
deleted file mode 100644
index 90c4091..0000000
--- a/include/jemalloc/internal/flat_bitmap.h
+++ /dev/null
@@ -1,373 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_FB_H
-#define JEMALLOC_INTERNAL_FB_H
-
-/*
- * The flat bitmap module.  This has a larger API relative to the bitmap module
- * (supporting things like backwards searches, and searching for both set and
- * unset bits), at the cost of slower operations for very large bitmaps.
- *
- * Initialized flat bitmaps start at all-zeros (all bits unset).
- */
-
-typedef unsigned long fb_group_t;
-#define FB_GROUP_BITS (ZU(1) << (LG_SIZEOF_LONG + 3))
-#define FB_NGROUPS(nbits) ((nbits) / FB_GROUP_BITS \
-    + ((nbits) % FB_GROUP_BITS == 0 ? 0 : 1))
-
-static inline void
-fb_init(fb_group_t *fb, size_t nbits) {
-	size_t ngroups = FB_NGROUPS(nbits);
-	memset(fb, 0, ngroups * sizeof(fb_group_t));
-}
-
-static inline bool
-fb_empty(fb_group_t *fb, size_t nbits) {
-	size_t ngroups = FB_NGROUPS(nbits);
-	for (size_t i = 0; i < ngroups; i++) {
-		if (fb[i] != 0) {
-			return false;
-		}
-	}
-	return true;
-}
-
-static inline bool
-fb_full(fb_group_t *fb, size_t nbits) {
-	size_t ngroups = FB_NGROUPS(nbits);
-	size_t trailing_bits = nbits % FB_GROUP_BITS;
-	size_t limit = (trailing_bits == 0 ? ngroups : ngroups - 1);
-	for (size_t i = 0; i < limit; i++) {
-		if (fb[i] != ~(fb_group_t)0) {
-			return false;
-		}
-	}
-	if (trailing_bits == 0) {
-		return true;
-	}
-	return fb[ngroups - 1] == ((fb_group_t)1 << trailing_bits) - 1;
-}
-
-static inline bool
-fb_get(fb_group_t *fb, size_t nbits, size_t bit) {
-	assert(bit < nbits);
-	size_t group_ind = bit / FB_GROUP_BITS;
-	size_t bit_ind = bit % FB_GROUP_BITS;
-	return (bool)(fb[group_ind] & ((fb_group_t)1 << bit_ind));
-}
-
-static inline void
-fb_set(fb_group_t *fb, size_t nbits, size_t bit) {
-	assert(bit < nbits);
-	size_t group_ind = bit / FB_GROUP_BITS;
-	size_t bit_ind = bit % FB_GROUP_BITS;
-	fb[group_ind] |= ((fb_group_t)1 << bit_ind);
-}
-
-static inline void
-fb_unset(fb_group_t *fb, size_t nbits, size_t bit) {
-	assert(bit < nbits);
-	size_t group_ind = bit / FB_GROUP_BITS;
-	size_t bit_ind = bit % FB_GROUP_BITS;
-	fb[group_ind] &= ~((fb_group_t)1 << bit_ind);
-}
-
-
-/*
- * Some implementation details.  This visitation function lets us apply a group
- * visitor to each group in the bitmap (potentially modifying it).  The mask
- * indicates which bits are logically part of the visitation.
- */
-typedef void (*fb_group_visitor_t)(void *ctx, fb_group_t *fb, fb_group_t mask);
-JEMALLOC_ALWAYS_INLINE void
-fb_visit_impl(fb_group_t *fb, size_t nbits, fb_group_visitor_t visit, void *ctx,
-    size_t start, size_t cnt) {
-	assert(cnt > 0);
-	assert(start + cnt <= nbits);
-	size_t group_ind = start / FB_GROUP_BITS;
-	size_t start_bit_ind = start % FB_GROUP_BITS;
-	/*
-	 * The first group is special; it's the only one we don't start writing
-	 * to from bit 0.
-	 */
-	size_t first_group_cnt = (start_bit_ind + cnt > FB_GROUP_BITS
-		? FB_GROUP_BITS - start_bit_ind : cnt);
-	/*
-	 * We can basically split affected words into:
-	 *   - The first group, where we touch only the high bits
-	 *   - The last group, where we touch only the low bits
-	 *   - The middle, where we set all the bits to the same thing.
-	 * We treat each case individually.  The last two could be merged, but
-	 * this can lead to bad codegen for those middle words.
-	 */
-	/* First group */
-	fb_group_t mask = ((~(fb_group_t)0)
-	    >> (FB_GROUP_BITS - first_group_cnt))
-	    << start_bit_ind;
-	visit(ctx, &fb[group_ind], mask);
-
-	cnt -= first_group_cnt;
-	group_ind++;
-	/* Middle groups */
-	while (cnt > FB_GROUP_BITS) {
-		visit(ctx, &fb[group_ind], ~(fb_group_t)0);
-		cnt -= FB_GROUP_BITS;
-		group_ind++;
-	}
-	/* Last group */
-	if (cnt != 0) {
-		mask = (~(fb_group_t)0) >> (FB_GROUP_BITS - cnt);
-		visit(ctx, &fb[group_ind], mask);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE void
-fb_assign_visitor(void *ctx, fb_group_t *fb, fb_group_t mask) {
-	bool val = *(bool *)ctx;
-	if (val) {
-		*fb |= mask;
-	} else {
-		*fb &= ~mask;
-	}
-}
-
-/* Sets the cnt bits starting at position start.  Must not have a 0 count. */
-static inline void
-fb_set_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
-	bool val = true;
-	fb_visit_impl(fb, nbits, &fb_assign_visitor, &val, start, cnt);
-}
-
-/* Unsets the cnt bits starting at position start.  Must not have a 0 count. */
-static inline void
-fb_unset_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
-	bool val = false;
-	fb_visit_impl(fb, nbits, &fb_assign_visitor, &val, start, cnt);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-fb_scount_visitor(void *ctx, fb_group_t *fb, fb_group_t mask) {
-	size_t *scount = (size_t *)ctx;
-	*scount += popcount_lu(*fb & mask);
-}
-
-/* Finds the number of set bit in the of length cnt starting at start. */
-JEMALLOC_ALWAYS_INLINE size_t
-fb_scount(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
-	size_t scount = 0;
-	fb_visit_impl(fb, nbits, &fb_scount_visitor, &scount, start, cnt);
-	return scount;
-}
-
-/* Finds the number of unset bit in the of length cnt starting at start. */
-JEMALLOC_ALWAYS_INLINE size_t
-fb_ucount(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
-	size_t scount = fb_scount(fb, nbits, start, cnt);
-	return cnt - scount;
-}
-
-/*
- * An implementation detail; find the first bit at position >= min_bit with the
- * value val.
- *
- * Returns the number of bits in the bitmap if no such bit exists.
- */
-JEMALLOC_ALWAYS_INLINE ssize_t
-fb_find_impl(fb_group_t *fb, size_t nbits, size_t start, bool val,
-    bool forward) {
-	assert(start < nbits);
-	size_t ngroups = FB_NGROUPS(nbits);
-	ssize_t group_ind = start / FB_GROUP_BITS;
-	size_t bit_ind = start % FB_GROUP_BITS;
-
-	fb_group_t maybe_invert = (val ? 0 : (fb_group_t)-1);
-
-	fb_group_t group = fb[group_ind];
-	group ^= maybe_invert;
-	if (forward) {
-		/* Only keep ones in bits bit_ind and above. */
-		group &= ~((1LU << bit_ind) - 1);
-	} else {
-		/*
-		 * Only keep ones in bits bit_ind and below.  You might more
-		 * naturally express this as (1 << (bit_ind + 1)) - 1, but
-		 * that shifts by an invalid amount if bit_ind is one less than
-		 * FB_GROUP_BITS.
-		 */
-		group &= ((2LU << bit_ind) - 1);
-	}
-	ssize_t group_ind_bound = forward ? (ssize_t)ngroups : -1;
-	while (group == 0) {
-		group_ind += forward ? 1 : -1;
-		if (group_ind == group_ind_bound) {
-			return forward ? (ssize_t)nbits : (ssize_t)-1;
-		}
-		group = fb[group_ind];
-		group ^= maybe_invert;
-	}
-	assert(group != 0);
-	size_t bit = forward ? ffs_lu(group) : fls_lu(group);
-	size_t pos = group_ind * FB_GROUP_BITS + bit;
-	/*
-	 * The high bits of a partially filled last group are zeros, so if we're
-	 * looking for zeros we don't want to report an invalid result.
-	 */
-	if (forward && !val && pos > nbits) {
-		return nbits;
-	}
-	return pos;
-}
-
-/*
- * Find the first set bit in the bitmap with an index >= min_bit.  Returns the
- * number of bits in the bitmap if no such bit exists.
- */
-static inline size_t
-fb_ffu(fb_group_t *fb, size_t nbits, size_t min_bit) {
-	return (size_t)fb_find_impl(fb, nbits, min_bit, /* val */ false,
-	    /* forward */ true);
-}
-
-/* The same, but looks for an unset bit. */
-static inline size_t
-fb_ffs(fb_group_t *fb, size_t nbits, size_t min_bit) {
-	return (size_t)fb_find_impl(fb, nbits, min_bit, /* val */ true,
-	    /* forward */ true);
-}
-
-/*
- * Find the last set bit in the bitmap with an index <= max_bit.  Returns -1 if
- * no such bit exists.
- */
-static inline ssize_t
-fb_flu(fb_group_t *fb, size_t nbits, size_t max_bit) {
-	return fb_find_impl(fb, nbits, max_bit, /* val */ false,
-	    /* forward */ false);
-}
-
-static inline ssize_t
-fb_fls(fb_group_t *fb, size_t nbits, size_t max_bit) {
-	return fb_find_impl(fb, nbits, max_bit, /* val */ true,
-	    /* forward */ false);
-}
-
-/* Returns whether or not we found a range. */
-JEMALLOC_ALWAYS_INLINE bool
-fb_iter_range_impl(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
-    size_t *r_len, bool val, bool forward) {
-	assert(start < nbits);
-	ssize_t next_range_begin = fb_find_impl(fb, nbits, start, val, forward);
-	if ((forward && next_range_begin == (ssize_t)nbits)
-	    || (!forward && next_range_begin == (ssize_t)-1)) {
-		return false;
-	}
-	/* Half open range; the set bits are [begin, end). */
-	ssize_t next_range_end = fb_find_impl(fb, nbits, next_range_begin, !val,
-	    forward);
-	if (forward) {
-		*r_begin = next_range_begin;
-		*r_len = next_range_end - next_range_begin;
-	} else {
-		*r_begin = next_range_end + 1;
-		*r_len = next_range_begin - next_range_end;
-	}
-	return true;
-}
-
-/*
- * Used to iterate through ranges of set bits.
- *
- * Tries to find the next contiguous sequence of set bits with a first index >=
- * start.  If one exists, puts the earliest bit of the range in *r_begin, its
- * length in *r_len, and returns true.  Otherwise, returns false (without
- * touching *r_begin or *r_end).
- */
-static inline bool
-fb_srange_iter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
-    size_t *r_len) {
-	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
-	    /* val */ true, /* forward */ true);
-}
-
-/*
- * The same as fb_srange_iter, but searches backwards from start rather than
- * forwards.  (The position returned is still the earliest bit in the range).
- */
-static inline bool
-fb_srange_riter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
-    size_t *r_len) {
-	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
-	    /* val */ true, /* forward */ false);
-}
-
-/* Similar to fb_srange_iter, but searches for unset bits. */
-static inline bool
-fb_urange_iter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
-    size_t *r_len) {
-	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
-	    /* val */ false, /* forward */ true);
-}
-
-/* Similar to fb_srange_riter, but searches for unset bits. */
-static inline bool
-fb_urange_riter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
-    size_t *r_len) {
-	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
-	    /* val */ false, /* forward */ false);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-fb_range_longest_impl(fb_group_t *fb, size_t nbits, bool val) {
-	size_t begin = 0;
-	size_t longest_len = 0;
-	size_t len = 0;
-	while (begin < nbits && fb_iter_range_impl(fb, nbits, begin, &begin,
-	    &len, val, /* forward */ true)) {
-		if (len > longest_len) {
-			longest_len = len;
-		}
-		begin += len;
-	}
-	return longest_len;
-}
-
-static inline size_t
-fb_srange_longest(fb_group_t *fb, size_t nbits) {
-	return fb_range_longest_impl(fb, nbits, /* val */ true);
-}
-
-static inline size_t
-fb_urange_longest(fb_group_t *fb, size_t nbits) {
-	return fb_range_longest_impl(fb, nbits, /* val */ false);
-}
-
-/*
- * Initializes each bit of dst with the bitwise-AND of the corresponding bits of
- * src1 and src2.  All bitmaps must be the same size.
- */
-static inline void
-fb_bit_and(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits) {
-	size_t ngroups = FB_NGROUPS(nbits);
-	for (size_t i = 0; i < ngroups; i++) {
-		dst[i] = src1[i] & src2[i];
-	}
-}
-
-/* Like fb_bit_and, but with bitwise-OR. */
-static inline void
-fb_bit_or(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits) {
-	size_t ngroups = FB_NGROUPS(nbits);
-	for (size_t i = 0; i < ngroups; i++) {
-		dst[i] = src1[i] | src2[i];
-	}
-}
-
-/* Initializes dst bit i to the negation of source bit i. */
-static inline void
-fb_bit_not(fb_group_t *dst, fb_group_t *src, size_t nbits) {
-	size_t ngroups = FB_NGROUPS(nbits);
-	for (size_t i = 0; i < ngroups; i++) {
-		dst[i] = ~src[i];
-	}
-}
-
-#endif /* JEMALLOC_INTERNAL_FB_H */
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 245116b..4ff2e57 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_HPDATA_H
 #define JEMALLOC_INTERNAL_HPDATA_H
 
-#include "jemalloc/internal/flat_bitmap.h"
+#include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/typed_list.h"
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 84d6778..63aeac9 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -1,3 +1,6 @@
+#ifndef JEMALLOC_INTERNAL_PH_H
+#define JEMALLOC_INTERNAL_PH_H
+
 /*
  * A Pairing Heap implementation.
  *
@@ -12,9 +15,6 @@
  *******************************************************************************
  */
 
-#ifndef PH_H_
-#define PH_H_
-
 /* Node structure. */
 #define phn(a_type)							\
 struct {								\
@@ -388,4 +388,4 @@ a_prefix##remove(a_ph_type *ph, a_type *phn) {				\
 	}								\
 }
 
-#endif /* PH_H_ */
+#endif /* JEMALLOC_INTERNAL_PH_H */
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 47fa5ca..dfc705a 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -1,3 +1,6 @@
+#ifndef JEMALLOC_INTERNAL_RB_H
+#define JEMALLOC_INTERNAL_RB_H
+
 /*-
  *******************************************************************************
  *
@@ -19,9 +22,6 @@
  *******************************************************************************
  */
 
-#ifndef RB_H_
-#define RB_H_
-
 #ifndef __PGI
 #define RB_COMPACT
 #endif
@@ -1003,4 +1003,4 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
     rbtree->rbt_root = NULL;						\
 }
 
-#endif /* RB_H_ */
+#endif /* JEMALLOC_INTERNAL_RB_H */
diff --git a/src/hpa.c b/src/hpa.c
index a234e6c..22cf007 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/hpa.h"
 
-#include "jemalloc/internal/flat_bitmap.h"
+#include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/witness.h"
 
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
diff --git a/src/psset.c b/src/psset.c
index c4053ef..5978202 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/psset.h"
 
-#include "jemalloc/internal/flat_bitmap.h"
+#include "jemalloc/internal/fb.h"
 
 void
 psset_init(psset_t *psset) {
diff --git a/test/unit/fb.c b/test/unit/fb.c
new file mode 100644
index 0000000..d5126f6
--- /dev/null
+++ b/test/unit/fb.c
@@ -0,0 +1,954 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/fb.h"
+#include "test/nbits.h"
+
+static void
+do_test_init(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+	/* Junk fb's contents. */
+	memset(fb, 99, sz);
+	fb_init(fb, nbits);
+	for (size_t i = 0; i < nbits; i++) {
+		expect_false(fb_get(fb, nbits, i),
+		    "bitmap should start empty");
+	}
+	free(fb);
+}
+
+TEST_BEGIN(test_fb_init) {
+#define NB(nbits) \
+	do_test_init(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static void
+do_test_get_set_unset(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+	fb_init(fb, nbits);
+	/* Set the bits divisible by 3. */
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 3 == 0) {
+			fb_set(fb, nbits, i);
+		}
+	}
+	/* Check them. */
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(i % 3 == 0, fb_get(fb, nbits, i),
+		    "Unexpected bit at position %zu", i);
+	}
+	/* Unset those divisible by 5. */
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 5 == 0) {
+			fb_unset(fb, nbits, i);
+		}
+	}
+	/* Check them. */
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(i % 3 == 0 && i % 5 != 0, fb_get(fb, nbits, i),
+		    "Unexpected bit at position %zu", i);
+	}
+	free(fb);
+}
+
+TEST_BEGIN(test_get_set_unset) {
+#define NB(nbits) \
+	do_test_get_set_unset(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static ssize_t
+find_3_5_compute(ssize_t i, size_t nbits, bool bit, bool forward) {
+	for(; i < (ssize_t)nbits && i >= 0; i += (forward ? 1 : -1)) {
+		bool expected_bit = i % 3 == 0 || i % 5 == 0;
+		if (expected_bit == bit) {
+			return i;
+		}
+	}
+	return forward ? (ssize_t)nbits : (ssize_t)-1;
+}
+
+static void
+do_test_search_simple(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+	fb_init(fb, nbits);
+
+	/* We pick multiples of 3 or 5. */
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 3 == 0) {
+			fb_set(fb, nbits, i);
+		}
+		/* This tests double-setting a little, too. */
+		if (i % 5 == 0) {
+			fb_set(fb, nbits, i);
+		}
+	}
+	for (size_t i = 0; i < nbits; i++) {
+		size_t ffs_compute = find_3_5_compute(i, nbits, true, true);
+		size_t ffs_search = fb_ffs(fb, nbits, i);
+		expect_zu_eq(ffs_compute, ffs_search, "ffs mismatch at %zu", i);
+
+		ssize_t fls_compute = find_3_5_compute(i, nbits, true, false);
+		size_t fls_search = fb_fls(fb, nbits, i);
+		expect_zu_eq(fls_compute, fls_search, "fls mismatch at %zu", i);
+
+		size_t ffu_compute = find_3_5_compute(i, nbits, false, true);
+		size_t ffu_search = fb_ffu(fb, nbits, i);
+		expect_zu_eq(ffu_compute, ffu_search, "ffu mismatch at %zu", i);
+
+		size_t flu_compute = find_3_5_compute(i, nbits, false, false);
+		size_t flu_search = fb_flu(fb, nbits, i);
+		expect_zu_eq(flu_compute, flu_search, "flu mismatch at %zu", i);
+	}
+
+	free(fb);
+}
+
+TEST_BEGIN(test_search_simple) {
+#define NB(nbits) \
+	do_test_search_simple(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static void
+expect_exhaustive_results(fb_group_t *mostly_full, fb_group_t *mostly_empty,
+    size_t nbits, size_t special_bit, size_t position) {
+	if (position < special_bit) {
+		expect_zu_eq(special_bit, fb_ffs(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(-1, fb_fls(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(position, fb_ffu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_flu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+
+		expect_zu_eq(position, fb_ffs(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_fls(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(special_bit, fb_ffu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(-1, fb_flu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+	} else if (position == special_bit) {
+		expect_zu_eq(special_bit, fb_ffs(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(position + 1, fb_ffu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position - 1, fb_flu(mostly_empty, nbits,
+		    position), "mismatch at %zu, %zu", position, special_bit);
+
+		expect_zu_eq(position + 1, fb_ffs(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position - 1, fb_fls(mostly_full, nbits,
+		    position), "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(position, fb_ffu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_flu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+	} else {
+		/* position > special_bit. */
+		expect_zu_eq(nbits, fb_ffs(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits,
+		    position), "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(position, fb_ffu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_flu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+
+		expect_zu_eq(position, fb_ffs(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_fls(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(nbits, fb_ffu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(special_bit, fb_flu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+	}
+}
+
+static void
+do_test_search_exhaustive(size_t nbits) {
+	/* This test is quadratic; let's not get too big. */
+	if (nbits > 1000) {
+		return;
+	}
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *empty = malloc(sz);
+	fb_init(empty, nbits);
+	fb_group_t *full = malloc(sz);
+	fb_init(full, nbits);
+	fb_set_range(full, nbits, 0, nbits);
+
+	for (size_t i = 0; i < nbits; i++) {
+		fb_set(empty, nbits, i);
+		fb_unset(full, nbits, i);
+
+		for (size_t j = 0; j < nbits; j++) {
+			expect_exhaustive_results(full, empty, nbits, i, j);
+		}
+		fb_unset(empty, nbits, i);
+		fb_set(full, nbits, i);
+	}
+
+	free(empty);
+	free(full);
+}
+
+TEST_BEGIN(test_search_exhaustive) {
+#define NB(nbits) \
+	do_test_search_exhaustive(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+TEST_BEGIN(test_range_simple) {
+	/*
+	 * Just pick a constant big enough to have nontrivial middle sizes, and
+	 * big enough that usages of things like weirdnum (below) near the
+	 * beginning fit comfortably into the beginning of the bitmap.
+	 */
+	size_t nbits = 64 * 10;
+	size_t ngroups = FB_NGROUPS(nbits);
+	fb_group_t *fb = malloc(sizeof(fb_group_t) * ngroups);
+	fb_init(fb, nbits);
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 2 == 0) {
+			fb_set_range(fb, nbits, i, 1);
+		}
+	}
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(i % 2 == 0, fb_get(fb, nbits, i),
+		    "mismatch at position %zu", i);
+	}
+	fb_set_range(fb, nbits, 0, nbits / 2);
+	fb_unset_range(fb, nbits, nbits / 2, nbits / 2);
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(i < nbits / 2, fb_get(fb, nbits, i),
+		    "mismatch at position %zu", i);
+	}
+
+	static const size_t weirdnum = 7;
+	fb_set_range(fb, nbits, 0, nbits);
+	fb_unset_range(fb, nbits, weirdnum, FB_GROUP_BITS + weirdnum);
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(7 <= i && i <= 2 * weirdnum + FB_GROUP_BITS - 1,
+		    !fb_get(fb, nbits, i), "mismatch at position %zu", i);
+	}
+	free(fb);
+}
+TEST_END
+
+static void
+do_test_empty_full_exhaustive(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *empty = malloc(sz);
+	fb_init(empty, nbits);
+	fb_group_t *full = malloc(sz);
+	fb_init(full, nbits);
+	fb_set_range(full, nbits, 0, nbits);
+
+	expect_true(fb_full(full, nbits), "");
+	expect_false(fb_empty(full, nbits), "");
+	expect_false(fb_full(empty, nbits), "");
+	expect_true(fb_empty(empty, nbits), "");
+
+	for (size_t i = 0; i < nbits; i++) {
+		fb_set(empty, nbits, i);
+		fb_unset(full, nbits, i);
+
+		expect_false(fb_empty(empty, nbits), "error at bit %zu", i);
+		if (nbits != 1) {
+			expect_false(fb_full(empty, nbits),
+			    "error at bit %zu", i);
+			expect_false(fb_empty(full, nbits),
+			    "error at bit %zu", i);
+		} else {
+			expect_true(fb_full(empty, nbits),
+			    "error at bit %zu", i);
+			expect_true(fb_empty(full, nbits),
+			    "error at bit %zu", i);
+		}
+		expect_false(fb_full(full, nbits), "error at bit %zu", i);
+
+		fb_unset(empty, nbits, i);
+		fb_set(full, nbits, i);
+	}
+
+	free(empty);
+	free(full);
+}
+
+TEST_BEGIN(test_empty_full) {
+#define NB(nbits) \
+	do_test_empty_full_exhaustive(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+/*
+ * This tests both iter_range and the longest range functionality, which is
+ * built closely on top of it.
+ */
+TEST_BEGIN(test_iter_range_simple) {
+	size_t set_limit = 30;
+	size_t nbits = 100;
+	fb_group_t fb[FB_NGROUPS(100)];
+
+	fb_init(fb, nbits);
+
+	/*
+	 * Failing to initialize these can lead to build failures with -Wall;
+	 * the compiler can't prove that they're set.
+	 */
+	size_t begin = (size_t)-1;
+	size_t len = (size_t)-1;
+	bool result;
+
+	/* A set of checks with only the first set_limit bits *set*. */
+	fb_set_range(fb, nbits, 0, set_limit);
+	expect_zu_eq(set_limit, fb_srange_longest(fb, nbits),
+	    "Incorrect longest set range");
+	expect_zu_eq(nbits - set_limit, fb_urange_longest(fb, nbits),
+	    "Incorrect longest unset range");
+	for (size_t i = 0; i < set_limit; i++) {
+		result = fb_srange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(set_limit - i, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(nbits - set_limit, len, "Incorrect len at %zu", i);
+
+		result = fb_srange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(i + 1, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_riter(fb, nbits, i, &begin, &len);
+		expect_false(result, "Should not have found a range at %zu", i);
+	}
+	for (size_t i = set_limit; i < nbits; i++) {
+		result = fb_srange_iter(fb, nbits, i, &begin, &len);
+		expect_false(result, "Should not have found a range at %zu", i);
+
+		result = fb_urange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(nbits - i, len, "Incorrect len at %zu", i);
+
+		result = fb_srange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(set_limit, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(i - set_limit + 1, len, "Incorrect len at %zu", i);
+	}
+
+	/* A set of checks with only the first set_limit bits *unset*. */
+	fb_unset_range(fb, nbits, 0, set_limit);
+	fb_set_range(fb, nbits, set_limit, nbits - set_limit);
+	expect_zu_eq(nbits - set_limit, fb_srange_longest(fb, nbits),
+	    "Incorrect longest set range");
+	expect_zu_eq(set_limit, fb_urange_longest(fb, nbits),
+	    "Incorrect longest unset range");
+	for (size_t i = 0; i < set_limit; i++) {
+		result = fb_srange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(nbits - set_limit, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(set_limit - i, len, "Incorrect len at %zu", i);
+
+		result = fb_srange_riter(fb, nbits, i, &begin, &len);
+		expect_false(result, "Should not have found a range at %zu", i);
+
+		result = fb_urange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should not have found a range at %zu", i);
+		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(i + 1, len, "Incorrect len at %zu", i);
+	}
+	for (size_t i = set_limit; i < nbits; i++) {
+		result = fb_srange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(nbits - i, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_iter(fb, nbits, i, &begin, &len);
+		expect_false(result, "Should not have found a range at %zu", i);
+
+		result = fb_srange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(i - set_limit + 1, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(set_limit, len, "Incorrect len at %zu", i);
+	}
+
+}
+TEST_END
+
+/*
+ * Doing this bit-by-bit is too slow for a real implementation, but for testing
+ * code, it's easy to get right.  In the exhaustive tests, we'll compare the
+ * (fast but tricky) real implementation against the (slow but simple) testing
+ * one.
+ */
+static bool
+fb_iter_simple(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len, bool val, bool forward) {
+	ssize_t stride = (forward ? (ssize_t)1 : (ssize_t)-1);
+	ssize_t range_begin = (ssize_t)start;
+	for (; range_begin != (ssize_t)nbits && range_begin != -1;
+	    range_begin += stride) {
+		if (fb_get(fb, nbits, range_begin) == val) {
+			ssize_t range_end = range_begin;
+			for (; range_end != (ssize_t)nbits && range_end != -1;
+			    range_end += stride) {
+				if (fb_get(fb, nbits, range_end) != val) {
+					break;
+				}
+			}
+			if (forward) {
+				*r_begin = range_begin;
+				*r_len = range_end - range_begin;
+			} else {
+				*r_begin = range_end + 1;
+				*r_len = range_begin - range_end;
+			}
+			return true;
+		}
+	}
+	return false;
+}
+
+/* Similar, but for finding longest ranges. */
+static size_t
+fb_range_longest_simple(fb_group_t *fb, size_t nbits, bool val) {
+	size_t longest_so_far = 0;
+	for (size_t begin = 0; begin < nbits; begin++) {
+		if (fb_get(fb, nbits, begin) != val) {
+			continue;
+		}
+		size_t end = begin + 1;
+		for (; end < nbits; end++) {
+			if (fb_get(fb, nbits, end) != val) {
+				break;
+			}
+		}
+		if (end - begin > longest_so_far) {
+			longest_so_far = end - begin;
+		}
+	}
+	return longest_so_far;
+}
+
+static void
+expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
+    bool val, bool forward) {
+	bool iter_res;
+	size_t iter_begin;
+	size_t iter_len;
+	if (val) {
+		if (forward) {
+			iter_res = fb_srange_iter(fb, nbits, pos,
+			    &iter_begin, &iter_len);
+		} else {
+			iter_res = fb_srange_riter(fb, nbits, pos,
+			    &iter_begin, &iter_len);
+		}
+	} else {
+		if (forward) {
+			iter_res = fb_urange_iter(fb, nbits, pos,
+			    &iter_begin, &iter_len);
+		} else {
+			iter_res = fb_urange_riter(fb, nbits, pos,
+			    &iter_begin, &iter_len);
+		}
+	}
+
+	bool simple_iter_res;
+	/*
+	 * These are dead stores, but the compiler can't always figure that out
+	 * statically, and warns on the uninitialized variable.
+	 */
+	size_t simple_iter_begin = 0;
+	size_t simple_iter_len = 0;
+	simple_iter_res = fb_iter_simple(fb, nbits, pos, &simple_iter_begin,
+	    &simple_iter_len, val, forward);
+
+	expect_b_eq(iter_res, simple_iter_res, "Result mismatch at %zu", pos);
+	if (iter_res && simple_iter_res) {
+		assert_zu_eq(iter_begin, simple_iter_begin,
+		    "Begin mismatch at %zu", pos);
+		expect_zu_eq(iter_len, simple_iter_len,
+		    "Length mismatch at %zu", pos);
+	}
+}
+
+static void
+expect_iter_results(fb_group_t *fb, size_t nbits) {
+	for (size_t i = 0; i < nbits; i++) {
+		expect_iter_results_at(fb, nbits, i, false, false);
+		expect_iter_results_at(fb, nbits, i, false, true);
+		expect_iter_results_at(fb, nbits, i, true, false);
+		expect_iter_results_at(fb, nbits, i, true, true);
+	}
+	expect_zu_eq(fb_range_longest_simple(fb, nbits, true),
+	    fb_srange_longest(fb, nbits), "Longest range mismatch");
+	expect_zu_eq(fb_range_longest_simple(fb, nbits, false),
+	    fb_urange_longest(fb, nbits), "Longest range mismatch");
+}
+
+static void
+set_pattern_3(fb_group_t *fb, size_t nbits, bool zero_val) {
+	for (size_t i = 0; i < nbits; i++) {
+		if ((i % 6 < 3 && zero_val) || (i % 6 >= 3 && !zero_val)) {
+			fb_set(fb, nbits, i);
+		} else {
+			fb_unset(fb, nbits, i);
+		}
+	}
+}
+
+static void
+do_test_iter_range_exhaustive(size_t nbits) {
+	/* This test is also pretty slow. */
+	if (nbits > 1000) {
+		return;
+	}
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+	fb_init(fb, nbits);
+
+	set_pattern_3(fb, nbits, /* zero_val */ true);
+	expect_iter_results(fb, nbits);
+
+	set_pattern_3(fb, nbits, /* zero_val */ false);
+	expect_iter_results(fb, nbits);
+
+	fb_set_range(fb, nbits, 0, nbits);
+	fb_unset_range(fb, nbits, 0, nbits / 2 == 0 ? 1 : nbits / 2);
+	expect_iter_results(fb, nbits);
+
+	fb_unset_range(fb, nbits, 0, nbits);
+	fb_set_range(fb, nbits, 0, nbits / 2 == 0 ? 1: nbits / 2);
+	expect_iter_results(fb, nbits);
+
+	free(fb);
+}
+
+/*
+ * Like test_iter_range_simple, this tests both iteration and longest-range
+ * computation.
+ */
+TEST_BEGIN(test_iter_range_exhaustive) {
+#define NB(nbits) \
+	do_test_iter_range_exhaustive(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+/*
+ * If all set bits in the bitmap are contiguous, in [set_start, set_end),
+ * returns the number of set bits in [scount_start, scount_end).
+ */
+static size_t
+scount_contiguous(size_t set_start, size_t set_end, size_t scount_start,
+    size_t scount_end) {
+	/* No overlap. */
+	if (set_end <= scount_start || scount_end <= set_start) {
+		return 0;
+	}
+	/* set range contains scount range */
+	if (set_start <= scount_start && set_end >= scount_end) {
+		return scount_end - scount_start;
+	}
+	/* scount range contains set range. */
+	if (scount_start <= set_start && scount_end >= set_end) {
+		return set_end - set_start;
+	}
+	/* Partial overlap, with set range starting first. */
+	if (set_start < scount_start && set_end < scount_end) {
+		return set_end - scount_start;
+	}
+	/* Partial overlap, with scount range starting first. */
+	if (scount_start < set_start && scount_end < set_end) {
+		return scount_end - set_start;
+	}
+	/*
+	 * Trigger an assert failure; the above list should have been
+	 * exhaustive.
+	 */
+	unreachable();
+}
+
+static size_t
+ucount_contiguous(size_t set_start, size_t set_end, size_t ucount_start,
+    size_t ucount_end) {
+	/* No overlap. */
+	if (set_end <= ucount_start || ucount_end <= set_start) {
+		return ucount_end - ucount_start;
+	}
+	/* set range contains ucount range */
+	if (set_start <= ucount_start && set_end >= ucount_end) {
+		return 0;
+	}
+	/* ucount range contains set range. */
+	if (ucount_start <= set_start && ucount_end >= set_end) {
+		return (ucount_end - ucount_start) - (set_end - set_start);
+	}
+	/* Partial overlap, with set range starting first. */
+	if (set_start < ucount_start && set_end < ucount_end) {
+		return ucount_end - set_end;
+	}
+	/* Partial overlap, with ucount range starting first. */
+	if (ucount_start < set_start && ucount_end < set_end) {
+		return set_start - ucount_start;
+	}
+	/*
+	 * Trigger an assert failure; the above list should have been
+	 * exhaustive.
+	 */
+	unreachable();
+}
+
+static void
+expect_count_match_contiguous(fb_group_t *fb, size_t nbits, size_t set_start,
+    size_t set_end) {
+	for (size_t i = 0; i < nbits; i++) {
+		for (size_t j = i + 1; j <= nbits; j++) {
+			size_t cnt = j - i;
+			size_t scount_expected = scount_contiguous(set_start,
+			    set_end, i, j);
+			size_t scount_computed = fb_scount(fb, nbits, i, cnt);
+			expect_zu_eq(scount_expected, scount_computed,
+			    "fb_scount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with bits set in [%zu, %zu)",
+			    nbits, i, cnt, set_start, set_end);
+
+			size_t ucount_expected = ucount_contiguous(set_start,
+			    set_end, i, j);
+			size_t ucount_computed = fb_ucount(fb, nbits, i, cnt);
+			assert_zu_eq(ucount_expected, ucount_computed,
+			    "fb_ucount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with bits set in [%zu, %zu)",
+			    nbits, i, cnt, set_start, set_end);
+
+		}
+	}
+}
+
+static void
+do_test_count_contiguous(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+
+	fb_init(fb, nbits);
+
+	expect_count_match_contiguous(fb, nbits, 0, 0);
+	for (size_t i = 0; i < nbits; i++) {
+		fb_set(fb, nbits, i);
+		expect_count_match_contiguous(fb, nbits, 0, i + 1);
+	}
+
+	for (size_t i = 0; i < nbits; i++) {
+		fb_unset(fb, nbits, i);
+		expect_count_match_contiguous(fb, nbits, i + 1, nbits);
+	}
+
+	free(fb);
+}
+
+TEST_BEGIN(test_count_contiguous_simple) {
+	enum {nbits = 300};
+	fb_group_t fb[FB_NGROUPS(nbits)];
+	fb_init(fb, nbits);
+	/* Just an arbitrary number. */
+	size_t start = 23;
+
+	fb_set_range(fb, nbits, start, 30 - start);
+	expect_count_match_contiguous(fb, nbits, start, 30);
+
+	fb_set_range(fb, nbits, start, 40 - start);
+	expect_count_match_contiguous(fb, nbits, start, 40);
+
+	fb_set_range(fb, nbits, start, 70 - start);
+	expect_count_match_contiguous(fb, nbits, start, 70);
+
+	fb_set_range(fb, nbits, start, 120 - start);
+	expect_count_match_contiguous(fb, nbits, start, 120);
+
+	fb_set_range(fb, nbits, start, 150 - start);
+	expect_count_match_contiguous(fb, nbits, start, 150);
+
+	fb_set_range(fb, nbits, start, 200 - start);
+	expect_count_match_contiguous(fb, nbits, start, 200);
+
+	fb_set_range(fb, nbits, start, 290 - start);
+	expect_count_match_contiguous(fb, nbits, start, 290);
+}
+TEST_END
+
+TEST_BEGIN(test_count_contiguous) {
+#define NB(nbits) \
+	/* This test is *particularly* slow in debug builds. */ \
+	if ((!config_debug && nbits < 300) || nbits < 150) { \
+		do_test_count_contiguous(nbits); \
+	}
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static void
+expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
+    size_t nbits) {
+	for (size_t i = 0; i < nbits; i++) {
+		for (size_t j = i + 1; j <= nbits; j++) {
+			size_t cnt = j - i;
+			size_t odd_scount = cnt / 2
+			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
+			size_t odd_scount_computed = fb_scount(fb_odd, nbits,
+			    i, j - i);
+			assert_zu_eq(odd_scount, odd_scount_computed,
+			    "fb_scount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with alternating bits set.",
+			    nbits, i, j - i);
+
+			size_t odd_ucount = cnt / 2
+			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
+			size_t odd_ucount_computed = fb_ucount(fb_odd, nbits,
+			    i, j - i);
+			assert_zu_eq(odd_ucount, odd_ucount_computed,
+			    "fb_ucount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with alternating bits set.",
+			    nbits, i, j - i);
+
+			size_t even_scount = cnt / 2
+			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
+			size_t even_scount_computed = fb_scount(fb_even, nbits,
+			    i, j - i);
+			assert_zu_eq(even_scount, even_scount_computed,
+			    "fb_scount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with alternating bits set.",
+			    nbits, i, j - i);
+
+			size_t even_ucount = cnt / 2
+			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
+			size_t even_ucount_computed = fb_ucount(fb_even, nbits,
+			    i, j - i);
+			assert_zu_eq(even_ucount, even_ucount_computed,
+			    "fb_ucount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with alternating bits set.",
+			    nbits, i, j - i);
+		}
+	}
+}
+
+static void
+do_test_count_alternating(size_t nbits) {
+	if (nbits > 1000) {
+		return;
+	}
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb_even = malloc(sz);
+	fb_group_t *fb_odd = malloc(sz);
+
+	fb_init(fb_even, nbits);
+	fb_init(fb_odd, nbits);
+
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 2 == 0) {
+			fb_set(fb_even, nbits, i);
+		} else {
+			fb_set(fb_odd, nbits, i);
+		}
+	}
+
+	expect_count_match_alternating(fb_even, fb_odd, nbits);
+
+	free(fb_even);
+	free(fb_odd);
+}
+
+TEST_BEGIN(test_count_alternating) {
+#define NB(nbits) \
+	do_test_count_alternating(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static void
+do_test_bit_op(size_t nbits, bool (*op)(bool a, bool b),
+    void (*fb_op)(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits)) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb1 = malloc(sz);
+	fb_group_t *fb2 = malloc(sz);
+	fb_group_t *fb_result = malloc(sz);
+	fb_init(fb1, nbits);
+	fb_init(fb2, nbits);
+	fb_init(fb_result, nbits);
+
+	/* Just two random numbers. */
+	const uint64_t prng_init1 = (uint64_t)0X4E9A9DE6A35691CDULL;
+	const uint64_t prng_init2 = (uint64_t)0X7856E396B063C36EULL;
+
+	uint64_t prng1 = prng_init1;
+	uint64_t prng2 = prng_init2;
+
+	for (size_t i = 0; i < nbits; i++) {
+		bool bit1 = ((prng1 & (1ULL << (i % 64))) != 0);
+		bool bit2 = ((prng2 & (1ULL << (i % 64))) != 0);
+
+		if (bit1) {
+			fb_set(fb1, nbits, i);
+		}
+		if (bit2) {
+			fb_set(fb2, nbits, i);
+		}
+
+		if (i % 64 == 0) {
+			prng1 = prng_state_next_u64(prng1);
+			prng2 = prng_state_next_u64(prng2);
+		}
+	}
+
+	fb_op(fb_result, fb1, fb2, nbits);
+
+	/* Reset the prngs to replay them. */
+	prng1 = prng_init1;
+	prng2 = prng_init2;
+
+	for (size_t i = 0; i < nbits; i++) {
+		bool bit1 = ((prng1 & (1ULL << (i % 64))) != 0);
+		bool bit2 = ((prng2 & (1ULL << (i % 64))) != 0);
+
+		/* Original bitmaps shouldn't change. */
+		expect_b_eq(bit1, fb_get(fb1, nbits, i), "difference at bit %zu", i);
+		expect_b_eq(bit2, fb_get(fb2, nbits, i), "difference at bit %zu", i);
+
+		/* New one should be bitwise and. */
+		expect_b_eq(op(bit1, bit2), fb_get(fb_result, nbits, i),
+		    "difference at bit %zu", i);
+
+		/* Update the same way we did last time. */
+		if (i % 64 == 0) {
+			prng1 = prng_state_next_u64(prng1);
+			prng2 = prng_state_next_u64(prng2);
+		}
+	}
+
+	free(fb1);
+	free(fb2);
+	free(fb_result);
+}
+
+static bool
+binary_and(bool a, bool b) {
+	return a & b;
+}
+
+static void
+do_test_bit_and(size_t nbits) {
+	do_test_bit_op(nbits, &binary_and, &fb_bit_and);
+}
+
+TEST_BEGIN(test_bit_and) {
+#define NB(nbits) \
+	do_test_bit_and(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static bool
+binary_or(bool a, bool b) {
+	return a | b;
+}
+
+static void
+do_test_bit_or(size_t nbits) {
+	do_test_bit_op(nbits, &binary_or, &fb_bit_or);
+}
+
+TEST_BEGIN(test_bit_or) {
+#define NB(nbits) \
+	do_test_bit_or(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static bool
+binary_not(bool a, bool b) {
+	(void)b;
+	return !a;
+}
+
+static void
+fb_bit_not_shim(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2,
+    size_t nbits) {
+	(void)src2;
+	fb_bit_not(dst, src1, nbits);
+}
+
+static void
+do_test_bit_not(size_t nbits) {
+	do_test_bit_op(nbits, &binary_not, &fb_bit_not_shim);
+}
+
+TEST_BEGIN(test_bit_not) {
+#define NB(nbits) \
+	do_test_bit_not(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_fb_init,
+	    test_get_set_unset,
+	    test_search_simple,
+	    test_search_exhaustive,
+	    test_range_simple,
+	    test_empty_full,
+	    test_iter_range_simple,
+	    test_iter_range_exhaustive,
+	    test_count_contiguous_simple,
+	    test_count_contiguous,
+	    test_count_alternating,
+	    test_bit_and,
+	    test_bit_or,
+	    test_bit_not);
+}
diff --git a/test/unit/flat_bitmap.c b/test/unit/flat_bitmap.c
deleted file mode 100644
index 6b0bcc3..0000000
--- a/test/unit/flat_bitmap.c
+++ /dev/null
@@ -1,954 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#include "jemalloc/internal/flat_bitmap.h"
-#include "test/nbits.h"
-
-static void
-do_test_init(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
-	fb_group_t *fb = malloc(sz);
-	/* Junk fb's contents. */
-	memset(fb, 99, sz);
-	fb_init(fb, nbits);
-	for (size_t i = 0; i < nbits; i++) {
-		expect_false(fb_get(fb, nbits, i),
-		    "bitmap should start empty");
-	}
-	free(fb);
-}
-
-TEST_BEGIN(test_fb_init) {
-#define NB(nbits) \
-	do_test_init(nbits);
-	NBITS_TAB
-#undef NB
-}
-TEST_END
-
-static void
-do_test_get_set_unset(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
-	fb_group_t *fb = malloc(sz);
-	fb_init(fb, nbits);
-	/* Set the bits divisible by 3. */
-	for (size_t i = 0; i < nbits; i++) {
-		if (i % 3 == 0) {
-			fb_set(fb, nbits, i);
-		}
-	}
-	/* Check them. */
-	for (size_t i = 0; i < nbits; i++) {
-		expect_b_eq(i % 3 == 0, fb_get(fb, nbits, i),
-		    "Unexpected bit at position %zu", i);
-	}
-	/* Unset those divisible by 5. */
-	for (size_t i = 0; i < nbits; i++) {
-		if (i % 5 == 0) {
-			fb_unset(fb, nbits, i);
-		}
-	}
-	/* Check them. */
-	for (size_t i = 0; i < nbits; i++) {
-		expect_b_eq(i % 3 == 0 && i % 5 != 0, fb_get(fb, nbits, i),
-		    "Unexpected bit at position %zu", i);
-	}
-	free(fb);
-}
-
-TEST_BEGIN(test_get_set_unset) {
-#define NB(nbits) \
-	do_test_get_set_unset(nbits);
-	NBITS_TAB
-#undef NB
-}
-TEST_END
-
-static ssize_t
-find_3_5_compute(ssize_t i, size_t nbits, bool bit, bool forward) {
-	for(; i < (ssize_t)nbits && i >= 0; i += (forward ? 1 : -1)) {
-		bool expected_bit = i % 3 == 0 || i % 5 == 0;
-		if (expected_bit == bit) {
-			return i;
-		}
-	}
-	return forward ? (ssize_t)nbits : (ssize_t)-1;
-}
-
-static void
-do_test_search_simple(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
-	fb_group_t *fb = malloc(sz);
-	fb_init(fb, nbits);
-
-	/* We pick multiples of 3 or 5. */
-	for (size_t i = 0; i < nbits; i++) {
-		if (i % 3 == 0) {
-			fb_set(fb, nbits, i);
-		}
-		/* This tests double-setting a little, too. */
-		if (i % 5 == 0) {
-			fb_set(fb, nbits, i);
-		}
-	}
-	for (size_t i = 0; i < nbits; i++) {
-		size_t ffs_compute = find_3_5_compute(i, nbits, true, true);
-		size_t ffs_search = fb_ffs(fb, nbits, i);
-		expect_zu_eq(ffs_compute, ffs_search, "ffs mismatch at %zu", i);
-
-		ssize_t fls_compute = find_3_5_compute(i, nbits, true, false);
-		size_t fls_search = fb_fls(fb, nbits, i);
-		expect_zu_eq(fls_compute, fls_search, "fls mismatch at %zu", i);
-
-		size_t ffu_compute = find_3_5_compute(i, nbits, false, true);
-		size_t ffu_search = fb_ffu(fb, nbits, i);
-		expect_zu_eq(ffu_compute, ffu_search, "ffu mismatch at %zu", i);
-
-		size_t flu_compute = find_3_5_compute(i, nbits, false, false);
-		size_t flu_search = fb_flu(fb, nbits, i);
-		expect_zu_eq(flu_compute, flu_search, "flu mismatch at %zu", i);
-	}
-
-	free(fb);
-}
-
-TEST_BEGIN(test_search_simple) {
-#define NB(nbits) \
-	do_test_search_simple(nbits);
-	NBITS_TAB
-#undef NB
-}
-TEST_END
-
-static void
-expect_exhaustive_results(fb_group_t *mostly_full, fb_group_t *mostly_empty,
-    size_t nbits, size_t special_bit, size_t position) {
-	if (position < special_bit) {
-		expect_zu_eq(special_bit, fb_ffs(mostly_empty, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(-1, fb_fls(mostly_empty, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zu_eq(position, fb_ffu(mostly_empty, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(position, fb_flu(mostly_empty, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-
-		expect_zu_eq(position, fb_ffs(mostly_full, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(position, fb_fls(mostly_full, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zu_eq(special_bit, fb_ffu(mostly_full, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(-1, fb_flu(mostly_full, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-	} else if (position == special_bit) {
-		expect_zu_eq(special_bit, fb_ffs(mostly_empty, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zu_eq(position + 1, fb_ffu(mostly_empty, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(position - 1, fb_flu(mostly_empty, nbits,
-		    position), "mismatch at %zu, %zu", position, special_bit);
-
-		expect_zu_eq(position + 1, fb_ffs(mostly_full, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(position - 1, fb_fls(mostly_full, nbits,
-		    position), "mismatch at %zu, %zu", position, special_bit);
-		expect_zu_eq(position, fb_ffu(mostly_full, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(position, fb_flu(mostly_full, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-	} else {
-		/* position > special_bit. */
-		expect_zu_eq(nbits, fb_ffs(mostly_empty, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits,
-		    position), "mismatch at %zu, %zu", position, special_bit);
-		expect_zu_eq(position, fb_ffu(mostly_empty, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(position, fb_flu(mostly_empty, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-
-		expect_zu_eq(position, fb_ffs(mostly_full, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(position, fb_fls(mostly_full, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zu_eq(nbits, fb_ffu(mostly_full, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(special_bit, fb_flu(mostly_full, nbits, position),
-		    "mismatch at %zu, %zu", position, special_bit);
-	}
-}
-
-static void
-do_test_search_exhaustive(size_t nbits) {
-	/* This test is quadratic; let's not get too big. */
-	if (nbits > 1000) {
-		return;
-	}
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
-	fb_group_t *empty = malloc(sz);
-	fb_init(empty, nbits);
-	fb_group_t *full = malloc(sz);
-	fb_init(full, nbits);
-	fb_set_range(full, nbits, 0, nbits);
-
-	for (size_t i = 0; i < nbits; i++) {
-		fb_set(empty, nbits, i);
-		fb_unset(full, nbits, i);
-
-		for (size_t j = 0; j < nbits; j++) {
-			expect_exhaustive_results(full, empty, nbits, i, j);
-		}
-		fb_unset(empty, nbits, i);
-		fb_set(full, nbits, i);
-	}
-
-	free(empty);
-	free(full);
-}
-
-TEST_BEGIN(test_search_exhaustive) {
-#define NB(nbits) \
-	do_test_search_exhaustive(nbits);
-	NBITS_TAB
-#undef NB
-}
-TEST_END
-
-TEST_BEGIN(test_range_simple) {
-	/*
-	 * Just pick a constant big enough to have nontrivial middle sizes, and
-	 * big enough that usages of things like weirdnum (below) near the
-	 * beginning fit comfortably into the beginning of the bitmap.
-	 */
-	size_t nbits = 64 * 10;
-	size_t ngroups = FB_NGROUPS(nbits);
-	fb_group_t *fb = malloc(sizeof(fb_group_t) * ngroups);
-	fb_init(fb, nbits);
-	for (size_t i = 0; i < nbits; i++) {
-		if (i % 2 == 0) {
-			fb_set_range(fb, nbits, i, 1);
-		}
-	}
-	for (size_t i = 0; i < nbits; i++) {
-		expect_b_eq(i % 2 == 0, fb_get(fb, nbits, i),
-		    "mismatch at position %zu", i);
-	}
-	fb_set_range(fb, nbits, 0, nbits / 2);
-	fb_unset_range(fb, nbits, nbits / 2, nbits / 2);
-	for (size_t i = 0; i < nbits; i++) {
-		expect_b_eq(i < nbits / 2, fb_get(fb, nbits, i),
-		    "mismatch at position %zu", i);
-	}
-
-	static const size_t weirdnum = 7;
-	fb_set_range(fb, nbits, 0, nbits);
-	fb_unset_range(fb, nbits, weirdnum, FB_GROUP_BITS + weirdnum);
-	for (size_t i = 0; i < nbits; i++) {
-		expect_b_eq(7 <= i && i <= 2 * weirdnum + FB_GROUP_BITS - 1,
-		    !fb_get(fb, nbits, i), "mismatch at position %zu", i);
-	}
-	free(fb);
-}
-TEST_END
-
-static void
-do_test_empty_full_exhaustive(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
-	fb_group_t *empty = malloc(sz);
-	fb_init(empty, nbits);
-	fb_group_t *full = malloc(sz);
-	fb_init(full, nbits);
-	fb_set_range(full, nbits, 0, nbits);
-
-	expect_true(fb_full(full, nbits), "");
-	expect_false(fb_empty(full, nbits), "");
-	expect_false(fb_full(empty, nbits), "");
-	expect_true(fb_empty(empty, nbits), "");
-
-	for (size_t i = 0; i < nbits; i++) {
-		fb_set(empty, nbits, i);
-		fb_unset(full, nbits, i);
-
-		expect_false(fb_empty(empty, nbits), "error at bit %zu", i);
-		if (nbits != 1) {
-			expect_false(fb_full(empty, nbits),
-			    "error at bit %zu", i);
-			expect_false(fb_empty(full, nbits),
-			    "error at bit %zu", i);
-		} else {
-			expect_true(fb_full(empty, nbits),
-			    "error at bit %zu", i);
-			expect_true(fb_empty(full, nbits),
-			    "error at bit %zu", i);
-		}
-		expect_false(fb_full(full, nbits), "error at bit %zu", i);
-
-		fb_unset(empty, nbits, i);
-		fb_set(full, nbits, i);
-	}
-
-	free(empty);
-	free(full);
-}
-
-TEST_BEGIN(test_empty_full) {
-#define NB(nbits) \
-	do_test_empty_full_exhaustive(nbits);
-	NBITS_TAB
-#undef NB
-}
-TEST_END
-
-/*
- * This tests both iter_range and the longest range functionality, which is
- * built closely on top of it.
- */
-TEST_BEGIN(test_iter_range_simple) {
-	size_t set_limit = 30;
-	size_t nbits = 100;
-	fb_group_t fb[FB_NGROUPS(100)];
-
-	fb_init(fb, nbits);
-
-	/*
-	 * Failing to initialize these can lead to build failures with -Wall;
-	 * the compiler can't prove that they're set.
-	 */
-	size_t begin = (size_t)-1;
-	size_t len = (size_t)-1;
-	bool result;
-
-	/* A set of checks with only the first set_limit bits *set*. */
-	fb_set_range(fb, nbits, 0, set_limit);
-	expect_zu_eq(set_limit, fb_srange_longest(fb, nbits),
-	    "Incorrect longest set range");
-	expect_zu_eq(nbits - set_limit, fb_urange_longest(fb, nbits),
-	    "Incorrect longest unset range");
-	for (size_t i = 0; i < set_limit; i++) {
-		result = fb_srange_iter(fb, nbits, i, &begin, &len);
-		expect_true(result, "Should have found a range at %zu", i);
-		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
-		expect_zu_eq(set_limit - i, len, "Incorrect len at %zu", i);
-
-		result = fb_urange_iter(fb, nbits, i, &begin, &len);
-		expect_true(result, "Should have found a range at %zu", i);
-		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
-		expect_zu_eq(nbits - set_limit, len, "Incorrect len at %zu", i);
-
-		result = fb_srange_riter(fb, nbits, i, &begin, &len);
-		expect_true(result, "Should have found a range at %zu", i);
-		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
-		expect_zu_eq(i + 1, len, "Incorrect len at %zu", i);
-
-		result = fb_urange_riter(fb, nbits, i, &begin, &len);
-		expect_false(result, "Should not have found a range at %zu", i);
-	}
-	for (size_t i = set_limit; i < nbits; i++) {
-		result = fb_srange_iter(fb, nbits, i, &begin, &len);
-		expect_false(result, "Should not have found a range at %zu", i);
-
-		result = fb_urange_iter(fb, nbits, i, &begin, &len);
-		expect_true(result, "Should have found a range at %zu", i);
-		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
-		expect_zu_eq(nbits - i, len, "Incorrect len at %zu", i);
-
-		result = fb_srange_riter(fb, nbits, i, &begin, &len);
-		expect_true(result, "Should have found a range at %zu", i);
-		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
-		expect_zu_eq(set_limit, len, "Incorrect len at %zu", i);
-
-		result = fb_urange_riter(fb, nbits, i, &begin, &len);
-		expect_true(result, "Should have found a range at %zu", i);
-		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
-		expect_zu_eq(i - set_limit + 1, len, "Incorrect len at %zu", i);
-	}
-
-	/* A set of checks with only the first set_limit bits *unset*. */
-	fb_unset_range(fb, nbits, 0, set_limit);
-	fb_set_range(fb, nbits, set_limit, nbits - set_limit);
-	expect_zu_eq(nbits - set_limit, fb_srange_longest(fb, nbits),
-	    "Incorrect longest set range");
-	expect_zu_eq(set_limit, fb_urange_longest(fb, nbits),
-	    "Incorrect longest unset range");
-	for (size_t i = 0; i < set_limit; i++) {
-		result = fb_srange_iter(fb, nbits, i, &begin, &len);
-		expect_true(result, "Should have found a range at %zu", i);
-		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
-		expect_zu_eq(nbits - set_limit, len, "Incorrect len at %zu", i);
-
-		result = fb_urange_iter(fb, nbits, i, &begin, &len);
-		expect_true(result, "Should have found a range at %zu", i);
-		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
-		expect_zu_eq(set_limit - i, len, "Incorrect len at %zu", i);
-
-		result = fb_srange_riter(fb, nbits, i, &begin, &len);
-		expect_false(result, "Should not have found a range at %zu", i);
-
-		result = fb_urange_riter(fb, nbits, i, &begin, &len);
-		expect_true(result, "Should not have found a range at %zu", i);
-		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
-		expect_zu_eq(i + 1, len, "Incorrect len at %zu", i);
-	}
-	for (size_t i = set_limit; i < nbits; i++) {
-		result = fb_srange_iter(fb, nbits, i, &begin, &len);
-		expect_true(result, "Should have found a range at %zu", i);
-		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
-		expect_zu_eq(nbits - i, len, "Incorrect len at %zu", i);
-
-		result = fb_urange_iter(fb, nbits, i, &begin, &len);
-		expect_false(result, "Should not have found a range at %zu", i);
-
-		result = fb_srange_riter(fb, nbits, i, &begin, &len);
-		expect_true(result, "Should have found a range at %zu", i);
-		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
-		expect_zu_eq(i - set_limit + 1, len, "Incorrect len at %zu", i);
-
-		result = fb_urange_riter(fb, nbits, i, &begin, &len);
-		expect_true(result, "Should have found a range at %zu", i);
-		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
-		expect_zu_eq(set_limit, len, "Incorrect len at %zu", i);
-	}
-
-}
-TEST_END
-
-/*
- * Doing this bit-by-bit is too slow for a real implementation, but for testing
- * code, it's easy to get right.  In the exhaustive tests, we'll compare the
- * (fast but tricky) real implementation against the (slow but simple) testing
- * one.
- */
-static bool
-fb_iter_simple(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
-    size_t *r_len, bool val, bool forward) {
-	ssize_t stride = (forward ? (ssize_t)1 : (ssize_t)-1);
-	ssize_t range_begin = (ssize_t)start;
-	for (; range_begin != (ssize_t)nbits && range_begin != -1;
-	    range_begin += stride) {
-		if (fb_get(fb, nbits, range_begin) == val) {
-			ssize_t range_end = range_begin;
-			for (; range_end != (ssize_t)nbits && range_end != -1;
-			    range_end += stride) {
-				if (fb_get(fb, nbits, range_end) != val) {
-					break;
-				}
-			}
-			if (forward) {
-				*r_begin = range_begin;
-				*r_len = range_end - range_begin;
-			} else {
-				*r_begin = range_end + 1;
-				*r_len = range_begin - range_end;
-			}
-			return true;
-		}
-	}
-	return false;
-}
-
-/* Similar, but for finding longest ranges. */
-static size_t
-fb_range_longest_simple(fb_group_t *fb, size_t nbits, bool val) {
-	size_t longest_so_far = 0;
-	for (size_t begin = 0; begin < nbits; begin++) {
-		if (fb_get(fb, nbits, begin) != val) {
-			continue;
-		}
-		size_t end = begin + 1;
-		for (; end < nbits; end++) {
-			if (fb_get(fb, nbits, end) != val) {
-				break;
-			}
-		}
-		if (end - begin > longest_so_far) {
-			longest_so_far = end - begin;
-		}
-	}
-	return longest_so_far;
-}
-
-static void
-expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
-    bool val, bool forward) {
-	bool iter_res;
-	size_t iter_begin;
-	size_t iter_len;
-	if (val) {
-		if (forward) {
-			iter_res = fb_srange_iter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
-		} else {
-			iter_res = fb_srange_riter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
-		}
-	} else {
-		if (forward) {
-			iter_res = fb_urange_iter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
-		} else {
-			iter_res = fb_urange_riter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
-		}
-	}
-
-	bool simple_iter_res;
-	/*
-	 * These are dead stores, but the compiler can't always figure that out
-	 * statically, and warns on the uninitialized variable.
-	 */
-	size_t simple_iter_begin = 0;
-	size_t simple_iter_len = 0;
-	simple_iter_res = fb_iter_simple(fb, nbits, pos, &simple_iter_begin,
-	    &simple_iter_len, val, forward);
-
-	expect_b_eq(iter_res, simple_iter_res, "Result mismatch at %zu", pos);
-	if (iter_res && simple_iter_res) {
-		assert_zu_eq(iter_begin, simple_iter_begin,
-		    "Begin mismatch at %zu", pos);
-		expect_zu_eq(iter_len, simple_iter_len,
-		    "Length mismatch at %zu", pos);
-	}
-}
-
-static void
-expect_iter_results(fb_group_t *fb, size_t nbits) {
-	for (size_t i = 0; i < nbits; i++) {
-		expect_iter_results_at(fb, nbits, i, false, false);
-		expect_iter_results_at(fb, nbits, i, false, true);
-		expect_iter_results_at(fb, nbits, i, true, false);
-		expect_iter_results_at(fb, nbits, i, true, true);
-	}
-	expect_zu_eq(fb_range_longest_simple(fb, nbits, true),
-	    fb_srange_longest(fb, nbits), "Longest range mismatch");
-	expect_zu_eq(fb_range_longest_simple(fb, nbits, false),
-	    fb_urange_longest(fb, nbits), "Longest range mismatch");
-}
-
-static void
-set_pattern_3(fb_group_t *fb, size_t nbits, bool zero_val) {
-	for (size_t i = 0; i < nbits; i++) {
-		if ((i % 6 < 3 && zero_val) || (i % 6 >= 3 && !zero_val)) {
-			fb_set(fb, nbits, i);
-		} else {
-			fb_unset(fb, nbits, i);
-		}
-	}
-}
-
-static void
-do_test_iter_range_exhaustive(size_t nbits) {
-	/* This test is also pretty slow. */
-	if (nbits > 1000) {
-		return;
-	}
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
-	fb_group_t *fb = malloc(sz);
-	fb_init(fb, nbits);
-
-	set_pattern_3(fb, nbits, /* zero_val */ true);
-	expect_iter_results(fb, nbits);
-
-	set_pattern_3(fb, nbits, /* zero_val */ false);
-	expect_iter_results(fb, nbits);
-
-	fb_set_range(fb, nbits, 0, nbits);
-	fb_unset_range(fb, nbits, 0, nbits / 2 == 0 ? 1 : nbits / 2);
-	expect_iter_results(fb, nbits);
-
-	fb_unset_range(fb, nbits, 0, nbits);
-	fb_set_range(fb, nbits, 0, nbits / 2 == 0 ? 1: nbits / 2);
-	expect_iter_results(fb, nbits);
-
-	free(fb);
-}
-
-/*
- * Like test_iter_range_simple, this tests both iteration and longest-range
- * computation.
- */
-TEST_BEGIN(test_iter_range_exhaustive) {
-#define NB(nbits) \
-	do_test_iter_range_exhaustive(nbits);
-	NBITS_TAB
-#undef NB
-}
-TEST_END
-
-/*
- * If all set bits in the bitmap are contiguous, in [set_start, set_end),
- * returns the number of set bits in [scount_start, scount_end).
- */
-static size_t
-scount_contiguous(size_t set_start, size_t set_end, size_t scount_start,
-    size_t scount_end) {
-	/* No overlap. */
-	if (set_end <= scount_start || scount_end <= set_start) {
-		return 0;
-	}
-	/* set range contains scount range */
-	if (set_start <= scount_start && set_end >= scount_end) {
-		return scount_end - scount_start;
-	}
-	/* scount range contains set range. */
-	if (scount_start <= set_start && scount_end >= set_end) {
-		return set_end - set_start;
-	}
-	/* Partial overlap, with set range starting first. */
-	if (set_start < scount_start && set_end < scount_end) {
-		return set_end - scount_start;
-	}
-	/* Partial overlap, with scount range starting first. */
-	if (scount_start < set_start && scount_end < set_end) {
-		return scount_end - set_start;
-	}
-	/*
-	 * Trigger an assert failure; the above list should have been
-	 * exhaustive.
-	 */
-	unreachable();
-}
-
-static size_t
-ucount_contiguous(size_t set_start, size_t set_end, size_t ucount_start,
-    size_t ucount_end) {
-	/* No overlap. */
-	if (set_end <= ucount_start || ucount_end <= set_start) {
-		return ucount_end - ucount_start;
-	}
-	/* set range contains ucount range */
-	if (set_start <= ucount_start && set_end >= ucount_end) {
-		return 0;
-	}
-	/* ucount range contains set range. */
-	if (ucount_start <= set_start && ucount_end >= set_end) {
-		return (ucount_end - ucount_start) - (set_end - set_start);
-	}
-	/* Partial overlap, with set range starting first. */
-	if (set_start < ucount_start && set_end < ucount_end) {
-		return ucount_end - set_end;
-	}
-	/* Partial overlap, with ucount range starting first. */
-	if (ucount_start < set_start && ucount_end < set_end) {
-		return set_start - ucount_start;
-	}
-	/*
-	 * Trigger an assert failure; the above list should have been
-	 * exhaustive.
-	 */
-	unreachable();
-}
-
-static void
-expect_count_match_contiguous(fb_group_t *fb, size_t nbits, size_t set_start,
-    size_t set_end) {
-	for (size_t i = 0; i < nbits; i++) {
-		for (size_t j = i + 1; j <= nbits; j++) {
-			size_t cnt = j - i;
-			size_t scount_expected = scount_contiguous(set_start,
-			    set_end, i, j);
-			size_t scount_computed = fb_scount(fb, nbits, i, cnt);
-			expect_zu_eq(scount_expected, scount_computed,
-			    "fb_scount error with nbits=%zu, start=%zu, "
-			    "cnt=%zu, with bits set in [%zu, %zu)",
-			    nbits, i, cnt, set_start, set_end);
-
-			size_t ucount_expected = ucount_contiguous(set_start,
-			    set_end, i, j);
-			size_t ucount_computed = fb_ucount(fb, nbits, i, cnt);
-			assert_zu_eq(ucount_expected, ucount_computed,
-			    "fb_ucount error with nbits=%zu, start=%zu, "
-			    "cnt=%zu, with bits set in [%zu, %zu)",
-			    nbits, i, cnt, set_start, set_end);
-
-		}
-	}
-}
-
-static void
-do_test_count_contiguous(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
-	fb_group_t *fb = malloc(sz);
-
-	fb_init(fb, nbits);
-
-	expect_count_match_contiguous(fb, nbits, 0, 0);
-	for (size_t i = 0; i < nbits; i++) {
-		fb_set(fb, nbits, i);
-		expect_count_match_contiguous(fb, nbits, 0, i + 1);
-	}
-
-	for (size_t i = 0; i < nbits; i++) {
-		fb_unset(fb, nbits, i);
-		expect_count_match_contiguous(fb, nbits, i + 1, nbits);
-	}
-
-	free(fb);
-}
-
-TEST_BEGIN(test_count_contiguous_simple) {
-	enum {nbits = 300};
-	fb_group_t fb[FB_NGROUPS(nbits)];
-	fb_init(fb, nbits);
-	/* Just an arbitrary number. */
-	size_t start = 23;
-
-	fb_set_range(fb, nbits, start, 30 - start);
-	expect_count_match_contiguous(fb, nbits, start, 30);
-
-	fb_set_range(fb, nbits, start, 40 - start);
-	expect_count_match_contiguous(fb, nbits, start, 40);
-
-	fb_set_range(fb, nbits, start, 70 - start);
-	expect_count_match_contiguous(fb, nbits, start, 70);
-
-	fb_set_range(fb, nbits, start, 120 - start);
-	expect_count_match_contiguous(fb, nbits, start, 120);
-
-	fb_set_range(fb, nbits, start, 150 - start);
-	expect_count_match_contiguous(fb, nbits, start, 150);
-
-	fb_set_range(fb, nbits, start, 200 - start);
-	expect_count_match_contiguous(fb, nbits, start, 200);
-
-	fb_set_range(fb, nbits, start, 290 - start);
-	expect_count_match_contiguous(fb, nbits, start, 290);
-}
-TEST_END
-
-TEST_BEGIN(test_count_contiguous) {
-#define NB(nbits) \
-	/* This test is *particularly* slow in debug builds. */ \
-	if ((!config_debug && nbits < 300) || nbits < 150) { \
-		do_test_count_contiguous(nbits); \
-	}
-	NBITS_TAB
-#undef NB
-}
-TEST_END
-
-static void
-expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
-    size_t nbits) {
-	for (size_t i = 0; i < nbits; i++) {
-		for (size_t j = i + 1; j <= nbits; j++) {
-			size_t cnt = j - i;
-			size_t odd_scount = cnt / 2
-			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
-			size_t odd_scount_computed = fb_scount(fb_odd, nbits,
-			    i, j - i);
-			assert_zu_eq(odd_scount, odd_scount_computed,
-			    "fb_scount error with nbits=%zu, start=%zu, "
-			    "cnt=%zu, with alternating bits set.",
-			    nbits, i, j - i);
-
-			size_t odd_ucount = cnt / 2
-			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
-			size_t odd_ucount_computed = fb_ucount(fb_odd, nbits,
-			    i, j - i);
-			assert_zu_eq(odd_ucount, odd_ucount_computed,
-			    "fb_ucount error with nbits=%zu, start=%zu, "
-			    "cnt=%zu, with alternating bits set.",
-			    nbits, i, j - i);
-
-			size_t even_scount = cnt / 2
-			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
-			size_t even_scount_computed = fb_scount(fb_even, nbits,
-			    i, j - i);
-			assert_zu_eq(even_scount, even_scount_computed,
-			    "fb_scount error with nbits=%zu, start=%zu, "
-			    "cnt=%zu, with alternating bits set.",
-			    nbits, i, j - i);
-
-			size_t even_ucount = cnt / 2
-			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
-			size_t even_ucount_computed = fb_ucount(fb_even, nbits,
-			    i, j - i);
-			assert_zu_eq(even_ucount, even_ucount_computed,
-			    "fb_ucount error with nbits=%zu, start=%zu, "
-			    "cnt=%zu, with alternating bits set.",
-			    nbits, i, j - i);
-		}
-	}
-}
-
-static void
-do_test_count_alternating(size_t nbits) {
-	if (nbits > 1000) {
-		return;
-	}
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
-	fb_group_t *fb_even = malloc(sz);
-	fb_group_t *fb_odd = malloc(sz);
-
-	fb_init(fb_even, nbits);
-	fb_init(fb_odd, nbits);
-
-	for (size_t i = 0; i < nbits; i++) {
-		if (i % 2 == 0) {
-			fb_set(fb_even, nbits, i);
-		} else {
-			fb_set(fb_odd, nbits, i);
-		}
-	}
-
-	expect_count_match_alternating(fb_even, fb_odd, nbits);
-
-	free(fb_even);
-	free(fb_odd);
-}
-
-TEST_BEGIN(test_count_alternating) {
-#define NB(nbits) \
-	do_test_count_alternating(nbits);
-	NBITS_TAB
-#undef NB
-}
-TEST_END
-
-static void
-do_test_bit_op(size_t nbits, bool (*op)(bool a, bool b),
-    void (*fb_op)(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits)) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
-	fb_group_t *fb1 = malloc(sz);
-	fb_group_t *fb2 = malloc(sz);
-	fb_group_t *fb_result = malloc(sz);
-	fb_init(fb1, nbits);
-	fb_init(fb2, nbits);
-	fb_init(fb_result, nbits);
-
-	/* Just two random numbers. */
-	const uint64_t prng_init1 = (uint64_t)0X4E9A9DE6A35691CDULL;
-	const uint64_t prng_init2 = (uint64_t)0X7856E396B063C36EULL;
-
-	uint64_t prng1 = prng_init1;
-	uint64_t prng2 = prng_init2;
-
-	for (size_t i = 0; i < nbits; i++) {
-		bool bit1 = ((prng1 & (1ULL << (i % 64))) != 0);
-		bool bit2 = ((prng2 & (1ULL << (i % 64))) != 0);
-
-		if (bit1) {
-			fb_set(fb1, nbits, i);
-		}
-		if (bit2) {
-			fb_set(fb2, nbits, i);
-		}
-
-		if (i % 64 == 0) {
-			prng1 = prng_state_next_u64(prng1);
-			prng2 = prng_state_next_u64(prng2);
-		}
-	}
-
-	fb_op(fb_result, fb1, fb2, nbits);
-
-	/* Reset the prngs to replay them. */
-	prng1 = prng_init1;
-	prng2 = prng_init2;
-
-	for (size_t i = 0; i < nbits; i++) {
-		bool bit1 = ((prng1 & (1ULL << (i % 64))) != 0);
-		bool bit2 = ((prng2 & (1ULL << (i % 64))) != 0);
-
-		/* Original bitmaps shouldn't change. */
-		expect_b_eq(bit1, fb_get(fb1, nbits, i), "difference at bit %zu", i);
-		expect_b_eq(bit2, fb_get(fb2, nbits, i), "difference at bit %zu", i);
-
-		/* New one should be bitwise and. */
-		expect_b_eq(op(bit1, bit2), fb_get(fb_result, nbits, i),
-		    "difference at bit %zu", i);
-
-		/* Update the same way we did last time. */
-		if (i % 64 == 0) {
-			prng1 = prng_state_next_u64(prng1);
-			prng2 = prng_state_next_u64(prng2);
-		}
-	}
-
-	free(fb1);
-	free(fb2);
-	free(fb_result);
-}
-
-static bool
-binary_and(bool a, bool b) {
-	return a & b;
-}
-
-static void
-do_test_bit_and(size_t nbits) {
-	do_test_bit_op(nbits, &binary_and, &fb_bit_and);
-}
-
-TEST_BEGIN(test_bit_and) {
-#define NB(nbits) \
-	do_test_bit_and(nbits);
-	NBITS_TAB
-#undef NB
-}
-TEST_END
-
-static bool
-binary_or(bool a, bool b) {
-	return a | b;
-}
-
-static void
-do_test_bit_or(size_t nbits) {
-	do_test_bit_op(nbits, &binary_or, &fb_bit_or);
-}
-
-TEST_BEGIN(test_bit_or) {
-#define NB(nbits) \
-	do_test_bit_or(nbits);
-	NBITS_TAB
-#undef NB
-}
-TEST_END
-
-static bool
-binary_not(bool a, bool b) {
-	(void)b;
-	return !a;
-}
-
-static void
-fb_bit_not_shim(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2,
-    size_t nbits) {
-	(void)src2;
-	fb_bit_not(dst, src1, nbits);
-}
-
-static void
-do_test_bit_not(size_t nbits) {
-	do_test_bit_op(nbits, &binary_not, &fb_bit_not_shim);
-}
-
-TEST_BEGIN(test_bit_not) {
-#define NB(nbits) \
-	do_test_bit_not(nbits);
-	NBITS_TAB
-#undef NB
-}
-TEST_END
-
-int
-main(void) {
-	return test_no_reentrancy(
-	    test_fb_init,
-	    test_get_set_unset,
-	    test_search_simple,
-	    test_search_exhaustive,
-	    test_range_simple,
-	    test_empty_full,
-	    test_iter_range_simple,
-	    test_iter_range_exhaustive,
-	    test_count_contiguous_simple,
-	    test_count_contiguous,
-	    test_count_alternating,
-	    test_bit_and,
-	    test_bit_or,
-	    test_bit_not);
-}
-- 
cgit v0.12


From b2c08ef2e62a72951488c1603113b2d3881bd9d6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 4 Mar 2021 15:08:41 -0800
Subject: RB unit tests: don't test reentrantly.

The RB code doesn't do any allocation, and takes a little bit of time to run.
There's no sense in doing everything three times.
---
 test/unit/rb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/rb.c b/test/unit/rb.c
index 2509a6d..a594fb7 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -349,7 +349,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
+	return test_no_reentrancy(
 	    test_rb_empty,
 	    test_rb_random);
 }
-- 
cgit v0.12


From 5417938215384d9373d290ba30d5dcccc5db5c80 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 16 Mar 2021 18:08:04 -0700
Subject: Red-black tree: add summarize/filter.

This allows tracking extra information in the nodes of an red-black tree to
filter searches in the tree to just those that match some property.
---
 include/jemalloc/internal/rb.h | 912 +++++++++++++++++++++++++++++++++++++++--
 test/unit/rb.c                 | 740 +++++++++++++++++++++++++++++++--
 2 files changed, 1583 insertions(+), 69 deletions(-)

diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index dfc705a..a9a51cb 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -26,6 +26,15 @@
 #define RB_COMPACT
 #endif
 
+/*
+ * Each node in the RB tree consumes at least 1 byte of space (for the linkage
+ * if nothing else, so there are a maximum of sizeof(void *) << 3 rb tree nodes
+ * in any process (and thus, at most sizeof(void *) << 3 nodes in any rb tree).
+ * The choice of algorithm bounds the depth of a tree to twice the binary log of
+ * the number of elements in the tree; the following bound follows.
+ */
+#define RB_MAX_DEPTH (sizeof(void *) << 4)
+
 #ifdef RB_COMPACT
 /* Node structure. */
 #define rb_node(a_type)							\
@@ -159,12 +168,22 @@ struct {								\
     rbtn_right_set(a_type, a_field, (r_node), (a_node));		\
 } while (0)
 
+#define rb_summarized_only_false(...)
+#define rb_summarized_only_true(...) __VA_ARGS__
+#define rb_empty_summarize(a_node, a_lchild, a_rchild) false
+
 /*
- * The rb_proto() macro generates function prototypes that correspond to the
- * functions generated by an equivalently parameterized call to rb_gen().
+ * The rb_proto() and rb_summarized_proto() macros generate function prototypes
+ * that correspond to the functions generated by an equivalently parameterized
+ * call to rb_gen() or rb_summarized_gen(), respectively.
  */
 
 #define rb_proto(a_attr, a_prefix, a_rbt_type, a_type)			\
+    rb_proto_impl(a_attr, a_prefix, a_rbt_type, a_type, false)
+#define rb_summarized_proto(a_attr, a_prefix, a_rbt_type, a_type)	\
+    rb_proto_impl(a_attr, a_prefix, a_rbt_type, a_type, true)
+#define rb_proto_impl(a_attr, a_prefix, a_rbt_type, a_type,		\
+    a_is_summarized)							\
 a_attr void								\
 a_prefix##new(a_rbt_type *rbtree);					\
 a_attr bool								\
@@ -195,31 +214,94 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
   a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg);		\
 a_attr void								\
 a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
-  void *arg);
+  void *arg);								\
+/* Extended API */							\
+rb_summarized_only_##a_is_summarized(					\
+a_attr void								\
+a_prefix##update_summaries(a_rbt_type *rbtree, a_type *node);		\
+a_attr bool								\
+a_prefix##empty_filtered(a_rbt_type *rbtree,				\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##first_filtered(a_rbt_type *rbtree,				\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##last_filtered(a_rbt_type *rbtree,				\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##next_filtered(a_rbt_type *rbtree, a_type *node,		\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##prev_filtered(a_rbt_type *rbtree, a_type *node,		\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##search_filtered(a_rbt_type *rbtree, const a_type *key,	\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##nsearch_filtered(a_rbt_type *rbtree, const a_type *key,	\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##psearch_filtered(a_rbt_type *rbtree, const a_type *key,	\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##iter_filtered(a_rbt_type *rbtree, a_type *start,		\
+    a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg,		\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##reverse_iter_filtered(a_rbt_type *rbtree, a_type *start,	\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg,		\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+)
 
 /*
  * The rb_gen() macro generates a type-specific red-black tree implementation,
  * based on the above cpp macros.
- *
  * Arguments:
  *
- *   a_attr    : Function attribute for generated functions (ex: static).
- *   a_prefix  : Prefix for generated functions (ex: ex_).
- *   a_rb_type : Type for red-black tree data structure (ex: ex_t).
- *   a_type    : Type for red-black tree node data structure (ex: ex_node_t).
- *   a_field   : Name of red-black tree node linkage (ex: ex_link).
- *   a_cmp     : Node comparison function name, with the following prototype:
- *                 int (a_cmp *)(a_type *a_node, a_type *a_other);
- *                                       ^^^^^^
- *                                    or a_key
- *               Interpretation of comparison function return values:
- *                 -1 : a_node <  a_other
- *                  0 : a_node == a_other
- *                  1 : a_node >  a_other
- *               In all cases, the a_node or a_key macro argument is the first
- *               argument to the comparison function, which makes it possible
- *               to write comparison functions that treat the first argument
- *               specially.
+ *   a_attr:
+ *     Function attribute for generated functions (ex: static).
+ *   a_prefix:
+ *     Prefix for generated functions (ex: ex_).
+ *   a_rb_type:
+ *     Type for red-black tree data structure (ex: ex_t).
+ *   a_type:
+ *     Type for red-black tree node data structure (ex: ex_node_t).
+ *   a_field:
+ *     Name of red-black tree node linkage (ex: ex_link).
+ *   a_cmp:
+ *     Node comparison function name, with the following prototype:
+ *
+ *     int a_cmp(a_type *a_node, a_type *a_other);
+ *                        ^^^^^^
+ *                        or a_key
+ *     Interpretation of comparison function return values:
+ *       -1 : a_node <  a_other
+ *        0 : a_node == a_other
+ *        1 : a_node >  a_other
+ *     In all cases, the a_node or a_key macro argument is the first argument to
+ *     the comparison function, which makes it possible to write comparison
+ *     functions that treat the first argument specially.  a_cmp must be a total
+ *     order on values inserted into the tree -- duplicates are not allowed.
  *
  * Assuming the following setup:
  *
@@ -338,8 +420,193 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
  *               during iteration.  There is no way to stop iteration once it
  *               has begun.
  *         arg : Opaque pointer passed to cb().
+ *
+ * The rb_summarized_gen() macro generates all the functions above, but has an
+ * expanded interface.  In introduces the notion of summarizing subtrees, and of
+ * filtering searches in the tree according to the information contained in
+ * those summaries.
+ * The extra macro argument is:
+ *   a_summarize:
+ *     Tree summarization function name, with the following prototype:
+ *
+ *     bool a_summarize(a_type *a_node, const a_type *a_left_child,
+ *         const a_type *a_right_child);
+ *
+ *     This function should update a_node with the summary of the subtree rooted
+ *     there, using the data contained in it and the summaries in a_left_child
+ *     and a_right_child.  One or both of them may be NULL.  When the tree
+ *     changes due to an insertion or removal, it updates the summaries of all
+ *     nodes whose subtrees have changed (always updating the summaries of
+ *     children before their parents).  If the user alters a node in the tree in
+ *     a way that may change its summary, they can call the generated
+ *     update_summaries function to bubble up the summary changes to the root.
+ *     It should return true if the summary changed (or may have changed), and
+ *     false if it didn't (which will allow the implementation to terminate
+ *     "bubbling up" the summaries early).
+ *     As the parameter names indicate, the children are ordered as they are in
+ *     the tree, a_left_child, if it is not NULL, compares less than a_node,
+ *     which in turn compares less than a_right_child (if a_right_child is not
+ *     NULL).
+ *
+ * Using the same setup as above but replacing the macro with
+ *   rb_summarized_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp,
+ *       ex_summarize)
+ *
+ * Generates all the previous functions, but adds some more:
+ *
+ *   static void
+ *   ex_update_summaries(ex_t *tree, ex_node_t *node);
+ *       Description: Recompute all summaries of ancestors of node.
+ *       Args:
+ *         tree: Pointer to an initialized red-black tree object.
+ *         node: The element of the tree whose summary may have changed.
+ *
+ * For each of ex_empty, ex_first, ex_last, ex_next, ex_prev, ex_search,
+ * ex_nsearch, ex_psearch, ex_iter, and ex_reverse_iter, an additional function
+ * is generated as well, with the suffix _filtered (e.g. ex_empty_filtered,
+ * ex_first_filtered, etc.).  These use the concept of a "filter"; a binary
+ * property some node either satisfies or does not satisfy.  Clever use of the
+ * a_summary argument to rb_summarized_gen can allow efficient computation of
+ * these predicates across whole subtrees of the tree.
+ * The extended API functions accept three additional arguments after the
+ * arguments to the corresponding non-extended equivalent.
+ *
+ * ex_fn(..., bool (*filter_node)(void *, ex_node_t *),
+ *     bool (*filter_subtree)(void *, ex_node_t *), void *filter_ctx);
+ *         filter_node    : Returns true if the node passes the filter.
+ *         filter_subtree : Returns true if some node in the subtree rooted at
+ *                          node passes the filter.
+ *         filter_ctx     : A context argument passed to the filters.
+ *
+ * For a more concrete example of summarizing and filtering, suppose we're using
+ * the red-black tree to track a set of integers:
+ *
+ * struct ex_node_s {
+ *     rb_node(ex_node_t) ex_link;
+ *     unsigned data;
+ * };
+ *
+ * Suppose, for some application-specific reason, we want to be able to quickly
+ * find numbers in the set which are divisible by large powers of 2 (say, for
+ * aligned allocation purposes).  We augment the node with a summary field:
+ *
+ * struct ex_node_s {
+ *     rb_node(ex_node_t) ex_link;
+ *     unsigned data;
+ *     unsigned max_subtree_ffs;
+ * }
+ *
+ * and define our summarization function as follows:
+ *
+ * bool
+ * ex_summarize(ex_node_t *node, const ex_node_t *lchild,
+ *   const ex_node_t *rchild) {
+ *     unsigned new_max_subtree_ffs = ffs(node->data);
+ *     if (lchild != NULL && lchild->max_subtree_ffs > new_max_subtree_ffs) {
+ *         new_max_subtree_ffs = lchild->max_subtree_ffs;
+ *     }
+ *     if (rchild != NULL && rchild->max_subtree_ffs > new_max_subtree_ffs) {
+ *         new_max_subtree_ffs = rchild->max_subtree_ffs;
+ *     }
+ *     bool changed = (node->max_subtree_ffs != new_max_subtree_ffs)
+ *     node->max_subtree_ffs = new_max_subtree_ffs;
+ *     // This could be "return true" without any correctness or big-O
+ *     // performance changes; but practically, precisely reporting summary
+ *     // changes reduces the amount of work that has to be done when "bubbling
+ *     // up" summary changes.
+ *     return changed;
+ * }
+ *
+ * We can now implement our filter functions as follows:
+ * bool
+ * ex_filter_node(void *filter_ctx, ex_node_t *node) {
+ *     unsigned required_ffs = *(unsigned *)filter_ctx;
+ *     return ffs(node->data) >= required_ffs;
+ * }
+ * bool
+ * ex_filter_subtree(void *filter_ctx, ex_node_t *node) {
+ *     unsigned required_ffs = *(unsigned *)filter_ctx;
+ *     return node->max_subtree_ffs >= required_ffs;
+ * }
+ *
+ * We can now easily search for, e.g., the smallest integer in the set that's
+ * divisible by 128:
+ * ex_node_t *
+ * find_div_128(ex_tree_t *tree) {
+ *     unsigned min_ffs = 7;
+ *     return ex_first_filtered(tree, &ex_filter_node, &ex_filter_subtree,
+ *         &min_ffs);
+ * }
+ *
+ * We could with similar ease:
+ * - Fnd the next multiple of 128 in the set that's larger than 12345 (with
+ *   ex_nsearch_filtered)
+ * - Iterate over just those multiples of 64 that are in the set (with
+ *   ex_iter_filtered)
+ * - Determine if the set contains any multiples of 1024 (with
+ *   ex_empty_filtered).
+ *
+ * Some possibly subtle API notes:
+ * - The node argument to ex_next_filtered and ex_prev_filtered need not pass
+ *   the filter; it will find the next/prev node that passes the filter.
+ * - ex_search_filtered will fail even for a node in the tree, if that node does
+ *   not pass the filter.  ex_psearch_filtered and ex_nsearch_filtered behave
+ *   similarly; they may return a node larger/smaller than the key, even if a
+ *   node equivalent to the key is in the tree (but does not pass the filter).
+ * - Similarly, if the start argument to a filtered iteration function does not
+ *   pass the filter, the callback won't be invoked on it.
+ *
+ * These should make sense after a moment's reflection; each post-condition is
+ * the same as with the unfiltered version, with the added constraint that the
+ * returned node must pass the filter.
  */
 #define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp)	\
+    rb_gen_impl(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp,	\
+	rb_empty_summarize, false)
+#define rb_summarized_gen(a_attr, a_prefix, a_rbt_type, a_type,		\
+    a_field, a_cmp, a_summarize)					\
+    rb_gen_impl(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp,	\
+	a_summarize, true)
+
+#define rb_gen_impl(a_attr, a_prefix, a_rbt_type, a_type,		\
+    a_field, a_cmp, a_summarize, a_is_summarized)			\
+typedef struct {							\
+    a_type *node;							\
+    int cmp;								\
+} a_prefix##path_entry_t;						\
+static inline void							\
+a_prefix##summarize_range(a_prefix##path_entry_t *rfirst,		\
+    a_prefix##path_entry_t *rlast) {					\
+    while ((uintptr_t)rlast >= (uintptr_t)rfirst) {			\
+	a_type *node = rlast->node;					\
+	/* Avoid a warning when a_summarize is rb_empty_summarize. */	\
+	(void)node;							\
+	bool changed = a_summarize(node, rbtn_left_get(a_type, a_field,	\
+	    node), rbtn_right_get(a_type, a_field, node));		\
+	if (!changed) {							\
+		break;							\
+	}								\
+	rlast--;							\
+    }									\
+}									\
+/* On the remove pathways, we sometimes swap the node being removed   */\
+/* and its first successor; in such cases we need to do two range     */\
+/* updates; one from the node to its (former) swapped successor, the  */\
+/* next from that successor to the root (with either allowed to       */\
+/* bail out early if appropriate.                                     */\
+static inline void							\
+a_prefix##summarize_swapped_range(a_prefix##path_entry_t *rfirst,	\
+    a_prefix##path_entry_t *rlast, a_prefix##path_entry_t *swap_loc) {	\
+	if (swap_loc == NULL || rlast <= swap_loc) {			\
+		a_prefix##summarize_range(rfirst, rlast);		\
+	} else {							\
+		a_prefix##summarize_range(swap_loc + 1, rlast);		\
+		(void)a_summarize(swap_loc->node,			\
+		    rbtn_left_get(a_type, a_field, swap_loc->node),	\
+		    rbtn_right_get(a_type, a_field, swap_loc->node));	\
+		a_prefix##summarize_range(rfirst, swap_loc - 1);	\
+	}								\
+}									\
 a_attr void								\
 a_prefix##new(a_rbt_type *rbtree) {					\
     rb_new(a_type, a_field, rbtree);					\
@@ -465,10 +732,8 @@ a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) {		\
 }									\
 a_attr void								\
 a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
-    struct {								\
-	a_type *node;							\
-	int cmp;							\
-    } path[sizeof(void *) << 4], *pathp;				\
+    a_prefix##path_entry_t path[RB_MAX_DEPTH];			\
+    a_prefix##path_entry_t *pathp;					\
     rbt_node_new(a_type, a_field, rbtree, node);			\
     /* Wind. */								\
     path->node = rbtree->rbt_root;					\
@@ -484,6 +749,13 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
 	}								\
     }									\
     pathp->node = node;							\
+    /* A loop invariant we maintain is that all nodes with            */\
+    /* out-of-date summaries live in path[0], path[1], ..., *pathp.   */\
+    /* To maintain this, we have to summarize node, since we          */\
+    /* decrement pathp before the first iteration.                    */\
+    assert(rbtn_left_get(a_type, a_field, node) == NULL);		\
+    assert(rbtn_right_get(a_type, a_field, node) == NULL);		\
+    (void)a_summarize(node, NULL, NULL);				\
     /* Unwind. */							\
     for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) {	\
 	a_type *cnode = pathp->node;					\
@@ -498,9 +770,13 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
 		    a_type *tnode;					\
 		    rbtn_black_set(a_type, a_field, leftleft);		\
 		    rbtn_rotate_right(a_type, a_field, cnode, tnode);	\
+		    (void)a_summarize(cnode,				\
+			rbtn_left_get(a_type, a_field, cnode),		\
+			rbtn_right_get(a_type, a_field, cnode));	\
 		    cnode = tnode;					\
 		}							\
 	    } else {							\
+		a_prefix##summarize_range(path, pathp);			\
 		return;							\
 	    }								\
 	} else {							\
@@ -521,13 +797,20 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_rotate_left(a_type, a_field, cnode, tnode);	\
 		    rbtn_color_set(a_type, a_field, tnode, tred);	\
 		    rbtn_red_set(a_type, a_field, cnode);		\
+		    (void)a_summarize(cnode,				\
+			rbtn_left_get(a_type, a_field, cnode),		\
+			rbtn_right_get(a_type, a_field, cnode));	\
 		    cnode = tnode;					\
 		}							\
 	    } else {							\
+		a_prefix##summarize_range(path, pathp);			\
 		return;							\
 	    }								\
 	}								\
 	pathp->node = cnode;						\
+	(void)a_summarize(cnode,					\
+	    rbtn_left_get(a_type, a_field, cnode),			\
+	    rbtn_right_get(a_type, a_field, cnode));			\
     }									\
     /* Set root, and make it black. */					\
     rbtree->rbt_root = path->node;					\
@@ -535,12 +818,18 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
 }									\
 a_attr void								\
 a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
-    struct {								\
-	a_type *node;							\
-	int cmp;							\
-    } *pathp, *nodep, path[sizeof(void *) << 4];			\
+    a_prefix##path_entry_t path[RB_MAX_DEPTH];				\
+    a_prefix##path_entry_t *pathp;					\
+    a_prefix##path_entry_t *nodep;					\
+    a_prefix##path_entry_t *swap_loc;					\
+    /* This is a "real" sentinel -- NULL means we didn't swap the     */\
+    /* node to be pruned with one of its successors, and so           */\
+    /* summarization can terminate early whenever some summary        */\
+    /* doesn't change.                                                */\
+    swap_loc = NULL;							\
+    /* This is just to silence a compiler warning. */			\
+    nodep = NULL;							\
     /* Wind. */								\
-    nodep = NULL; /* Silence compiler warning. */			\
     path->node = rbtree->rbt_root;					\
     for (pathp = path; pathp->node != NULL; pathp++) {			\
 	int cmp = pathp->cmp = a_cmp(node, pathp->node);		\
@@ -567,6 +856,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
     pathp--;								\
     if (pathp->node != node) {						\
 	/* Swap node with its successor. */				\
+	swap_loc = nodep;						\
 	bool tred = rbtn_red_get(a_type, a_field, pathp->node);		\
 	rbtn_color_set(a_type, a_field, pathp->node,			\
 	  rbtn_red_get(a_type, a_field, node));				\
@@ -604,6 +894,9 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 	    rbtn_black_set(a_type, a_field, left);			\
 	    if (pathp == path) {					\
 		rbtree->rbt_root = left;				\
+		/* Nothing to summarize -- the subtree rooted at the  */\
+		/* node's left child hasn't changed, and it's now the */\
+		/* root.					      */\
 	    } else {							\
 		if (pathp[-1].cmp < 0) {				\
 		    rbtn_left_set(a_type, a_field, pathp[-1].node,	\
@@ -612,6 +905,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_right_set(a_type, a_field, pathp[-1].node,	\
 		      left);						\
 		}							\
+		a_prefix##summarize_swapped_range(path, &pathp[-1],	\
+		    swap_loc);						\
 	    }								\
 	    return;							\
 	} else if (pathp == path) {					\
@@ -620,10 +915,15 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 	    return;							\
 	}								\
     }									\
+    /* We've now established the invariant that the node has no right */\
+    /* child (well, morally; we didn't bother nulling it out if we    */\
+    /* swapped it with its successor), and that the only nodes with   */\
+    /* out-of-date summaries live in path[0], path[1], ..., pathp[-1].*/\
     if (rbtn_red_get(a_type, a_field, pathp->node)) {			\
 	/* Prune red node, which requires no fixup. */			\
 	assert(pathp[-1].cmp < 0);					\
 	rbtn_left_set(a_type, a_field, pathp[-1].node, NULL);		\
+	a_prefix##summarize_swapped_range(path, &pathp[-1], swap_loc);	\
 	return;								\
     }									\
     /* The node to be pruned is black, so unwind until balance is     */\
@@ -657,6 +957,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_right_set(a_type, a_field, pathp->node, tnode);\
 		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
 		      tnode);						\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
+		    (void)a_summarize(right,				\
+			rbtn_left_get(a_type, a_field, right),		\
+			rbtn_right_get(a_type, a_field, right));	\
 		} else {						\
 		    /*      ||                                        */\
 		    /*    pathp(r)                                    */\
@@ -667,7 +973,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    /*                                                */\
 		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
 		      tnode);						\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
 		}							\
+		(void)a_summarize(tnode, rbtn_left_get(a_type, a_field,	\
+		    tnode), rbtn_right_get(a_type, a_field, tnode));	\
 		/* Balance restored, but rotation modified subtree    */\
 		/* root.                                              */\
 		assert((uintptr_t)pathp > (uintptr_t)path);		\
@@ -678,6 +989,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_right_set(a_type, a_field, pathp[-1].node,	\
 		      tnode);						\
 		}							\
+		a_prefix##summarize_swapped_range(path, &pathp[-1],	\
+		    swap_loc);						\
 		return;							\
 	    } else {							\
 		a_type *right = rbtn_right_get(a_type, a_field,		\
@@ -698,6 +1011,15 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_right_set(a_type, a_field, pathp->node, tnode);\
 		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
 		      tnode);						\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
+		    (void)a_summarize(right,				\
+			rbtn_left_get(a_type, a_field, right),		\
+			rbtn_right_get(a_type, a_field, right));	\
+		    (void)a_summarize(tnode,				\
+			rbtn_left_get(a_type, a_field, tnode),		\
+			rbtn_right_get(a_type, a_field, tnode));	\
 		    /* Balance restored, but rotation modified        */\
 		    /* subtree root, which may actually be the tree   */\
 		    /* root.                                          */\
@@ -712,6 +1034,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 			    rbtn_right_set(a_type, a_field,		\
 			      pathp[-1].node, tnode);			\
 			}						\
+			a_prefix##summarize_swapped_range(path,		\
+			    &pathp[-1], swap_loc);			\
 		    }							\
 		    return;						\
 		} else {						\
@@ -725,6 +1049,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_red_set(a_type, a_field, pathp->node);		\
 		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
 		      tnode);						\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
+		    (void)a_summarize(tnode,				\
+			rbtn_left_get(a_type, a_field, tnode),		\
+			rbtn_right_get(a_type, a_field, tnode));	\
 		    pathp->node = tnode;				\
 		}							\
 	    }								\
@@ -757,6 +1087,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		      tnode);						\
 		    rbtn_right_set(a_type, a_field, unode, tnode);	\
 		    rbtn_rotate_left(a_type, a_field, unode, tnode);	\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
+		    (void)a_summarize(unode,				\
+			rbtn_left_get(a_type, a_field, unode),		\
+			rbtn_right_get(a_type, a_field, unode));	\
 		} else {						\
 		    /*      ||                                        */\
 		    /*    pathp(b)                                    */\
@@ -771,7 +1107,13 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
 		      tnode);						\
 		    rbtn_black_set(a_type, a_field, tnode);		\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
 		}							\
+		(void)a_summarize(tnode,				\
+		    rbtn_left_get(a_type, a_field, tnode),		\
+		    rbtn_right_get(a_type, a_field, tnode));		\
 		/* Balance restored, but rotation modified subtree    */\
 		/* root, which may actually be the tree root.         */\
 		if (pathp == path) {					\
@@ -785,6 +1127,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 			rbtn_right_set(a_type, a_field, pathp[-1].node,	\
 			  tnode);					\
 		    }							\
+		    a_prefix##summarize_swapped_range(path, &pathp[-1],	\
+			swap_loc);					\
 		}							\
 		return;							\
 	    } else if (rbtn_red_get(a_type, a_field, pathp->node)) {	\
@@ -803,6 +1147,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_black_set(a_type, a_field, leftleft);		\
 		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
 		      tnode);						\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
+		    (void)a_summarize(tnode,				\
+			rbtn_left_get(a_type, a_field, tnode),		\
+			rbtn_right_get(a_type, a_field, tnode));	\
 		    /* Balance restored, but rotation modified        */\
 		    /* subtree root.                                  */\
 		    assert((uintptr_t)pathp > (uintptr_t)path);		\
@@ -813,6 +1163,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 			rbtn_right_set(a_type, a_field, pathp[-1].node,	\
 			  tnode);					\
 		    }							\
+		    a_prefix##summarize_swapped_range(path, &pathp[-1],	\
+			swap_loc);					\
 		    return;						\
 		} else {						\
 		    /*        ||                                      */\
@@ -824,6 +1176,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_red_set(a_type, a_field, left);		\
 		    rbtn_black_set(a_type, a_field, pathp->node);	\
 		    /* Balance restored. */				\
+		    a_prefix##summarize_swapped_range(path, pathp,	\
+			swap_loc);					\
 		    return;						\
 		}							\
 	    } else {							\
@@ -840,6 +1194,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_black_set(a_type, a_field, leftleft);		\
 		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
 		      tnode);						\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
+		    (void)a_summarize(tnode,				\
+			rbtn_left_get(a_type, a_field, tnode),		\
+			rbtn_right_get(a_type, a_field, tnode));	\
 		    /* Balance restored, but rotation modified        */\
 		    /* subtree root, which may actually be the tree   */\
 		    /* root.                                          */\
@@ -854,6 +1214,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 			    rbtn_right_set(a_type, a_field,		\
 			      pathp[-1].node, tnode);			\
 			}						\
+		        a_prefix##summarize_swapped_range(path,		\
+			    &pathp[-1], swap_loc);			\
 		    }							\
 		    return;						\
 		} else {						\
@@ -864,6 +1226,9 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    /*          /                                     */\
 		    /*        (b)                                     */\
 		    rbtn_red_set(a_type, a_field, left);		\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
 		}							\
 	    }								\
 	}								\
@@ -1001,6 +1366,491 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
   void *arg) {								\
     a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg);	\
     rbtree->rbt_root = NULL;						\
-}
+}									\
+/* BEGIN SUMMARIZED-ONLY IMPLEMENTATION */				\
+rb_summarized_only_##a_is_summarized(					\
+static inline a_prefix##path_entry_t *					\
+a_prefix##wind(a_rbt_type *rbtree,					\
+    a_prefix##path_entry_t path[RB_MAX_DEPTH], a_type *node) {		\
+    a_prefix##path_entry_t *pathp;					\
+    path->node = rbtree->rbt_root;					\
+    for (pathp = path; ; pathp++) {					\
+	assert((size_t)(pathp - path) < RB_MAX_DEPTH);			\
+	pathp->cmp = a_cmp(node, pathp->node);				\
+	if (pathp->cmp < 0) {						\
+	    pathp[1].node = rbtn_left_get(a_type, a_field,		\
+		pathp->node);						\
+	} else if (pathp->cmp == 0) {					\
+	    return pathp;						\
+	} else {							\
+	    pathp[1].node = rbtn_right_get(a_type, a_field,		\
+		pathp->node);						\
+	}								\
+    }									\
+    unreachable();							\
+}									\
+a_attr void								\
+a_prefix##update_summaries(a_rbt_type *rbtree, a_type *node) {		\
+    a_prefix##path_entry_t path[RB_MAX_DEPTH];				\
+    a_prefix##path_entry_t *pathp = a_prefix##wind(rbtree, path, node);	\
+    a_prefix##summarize_range(path, pathp);				\
+}									\
+a_attr bool								\
+a_prefix##empty_filtered(a_rbt_type *rbtree,				\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *node = rbtree->rbt_root;					\
+    return node == NULL || !filter_subtree(filter_ctx, node);		\
+}									\
+static inline a_type *							\
+a_prefix##first_filtered_from_node(a_type *node,			\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    assert(node != NULL && filter_subtree(filter_ctx, node));		\
+    while (true) {							\
+	a_type *left = rbtn_left_get(a_type, a_field, node);		\
+	a_type *right = rbtn_right_get(a_type, a_field, node);		\
+	if (left != NULL && filter_subtree(filter_ctx, left)) {		\
+	    node = left;						\
+	} else if (filter_node(filter_ctx, node)) {			\
+	    return node;						\
+	} else {							\
+		assert(right != NULL					\
+		    && filter_subtree(filter_ctx, right));		\
+		node = right;						\
+	}								\
+    }									\
+    unreachable();							\
+}									\
+a_attr a_type *								\
+a_prefix##first_filtered(a_rbt_type *rbtree,				\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *node = rbtree->rbt_root;					\
+    if (node == NULL || !filter_subtree(filter_ctx, node)) {		\
+	return NULL;							\
+    }									\
+    return a_prefix##first_filtered_from_node(node, filter_node,	\
+	filter_subtree, filter_ctx);					\
+}									\
+static inline a_type *							\
+a_prefix##last_filtered_from_node(a_type *node,				\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    assert(node != NULL && filter_subtree(filter_ctx, node));		\
+    while (true) {							\
+	a_type *left = rbtn_left_get(a_type, a_field, node);		\
+	a_type *right = rbtn_right_get(a_type, a_field, node);		\
+	if (right != NULL && filter_subtree(filter_ctx, right)) {	\
+	    node = right;						\
+	} else if (filter_node(filter_ctx, node)) {			\
+	    return node;						\
+	} else {							\
+		assert(left != NULL					\
+		    && filter_subtree(filter_ctx, left));		\
+		node = left;						\
+	}								\
+    }									\
+    unreachable();							\
+}									\
+a_attr a_type *								\
+a_prefix##last_filtered(a_rbt_type *rbtree,				\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *node = rbtree->rbt_root;					\
+    if (node == NULL || !filter_subtree(filter_ctx, node)) {		\
+	return NULL;							\
+    }									\
+    return a_prefix##last_filtered_from_node(node, filter_node,		\
+	filter_subtree, filter_ctx);					\
+}									\
+/* Internal implementation function.  Search for a node comparing     */\
+/* equal to key matching the filter.  If such a node is in the tree,  */\
+/* return it.  Additionally, the caller has the option to ask for     */\
+/* bounds on the next / prev node in the tree passing the filter.     */\
+/* If nextbound is true, then this function will do one of the        */\
+/* following:                                                         */\
+/* - Fill in *nextbound_node with the smallest node in the tree       */\
+/*   greater than key passing the filter, and NULL-out                */\
+/*   *nextbound_subtree.                                              */\
+/* - Fill in *nextbound_subtree with a parent of that node which is   */\
+/*   not a parent of the searched-for node, and NULL-out              */\
+/*   *nextbound_node.                                                 */\
+/* - NULL-out both *nextbound_node and *nextbound_subtree, in which   */\
+/*   case no node greater than key but passing the filter is in the   */\
+/*   tree.                                                            */\
+/* The prevbound case is similar.  If the caller knows that key is in */\
+/* the tree and that the subtree rooted at key does not contain a     */\
+/* node satisfying the bound being searched for, then they can pass   */\
+/* false for include_subtree, in which case we won't bother searching */\
+/* there (risking a cache miss).                                      */\
+/*                                                                    */\
+/* This API is unfortunately complex; but the logic for filtered      */\
+/* searches is very subtle, and otherwise we would have to repeat it  */\
+/* multiple times for filtered search, nsearch, psearch, next, and    */\
+/* prev.                                                              */\
+static inline a_type *							\
+a_prefix##search_with_filter_bounds(a_rbt_type *rbtree,			\
+  const a_type *key,							\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx,							\
+  bool include_subtree,							\
+  bool nextbound, a_type **nextbound_node, a_type **nextbound_subtree,	\
+  bool prevbound, a_type **prevbound_node, a_type **prevbound_subtree) {\
+    if (nextbound) {							\
+	    *nextbound_node = NULL;					\
+	    *nextbound_subtree = NULL;					\
+    }									\
+    if (prevbound) {							\
+	    *prevbound_node = NULL;					\
+	    *prevbound_subtree = NULL;					\
+    }									\
+    a_type *tnode = rbtree->rbt_root;					\
+    while (tnode != NULL && filter_subtree(filter_ctx, tnode)) {	\
+	int cmp = a_cmp(key, tnode);					\
+	a_type *tleft = rbtn_left_get(a_type, a_field, tnode);		\
+	a_type *tright = rbtn_right_get(a_type, a_field, tnode);	\
+	if (cmp < 0) {							\
+	    if (nextbound) {						\
+		if (filter_node(filter_ctx, tnode)) {			\
+		    *nextbound_node = tnode;				\
+		    *nextbound_subtree = NULL;				\
+		} else if (tright != NULL && filter_subtree(		\
+		    filter_ctx, tright)) {				\
+		    *nextbound_node = NULL;				\
+		    *nextbound_subtree = tright;			\
+		}							\
+	    }								\
+	    tnode = tleft;						\
+	} else if (cmp > 0) {						\
+	    if (prevbound) {						\
+		if (filter_node(filter_ctx, tnode)) {			\
+		    *prevbound_node = tnode;				\
+		    *prevbound_subtree = NULL;				\
+		} else if (tleft != NULL && filter_subtree(		\
+		    filter_ctx, tleft)) {				\
+		    *prevbound_node = NULL;				\
+		    *prevbound_subtree = tleft;				\
+		}							\
+	    }								\
+	    tnode = tright;						\
+	} else {							\
+	    if (filter_node(filter_ctx, tnode)) {			\
+		return tnode;						\
+	    }								\
+	    if (include_subtree) {					\
+		if (prevbound && tleft != NULL && filter_subtree(	\
+		    filter_ctx, tleft)) {				\
+		    *prevbound_node = NULL;				\
+		    *prevbound_subtree = tleft;				\
+		}							\
+		if (nextbound && tright != NULL && filter_subtree(	\
+		    filter_ctx, tright)) {				\
+		    *nextbound_node = NULL;				\
+		    *nextbound_subtree = tright;			\
+		}							\
+	    }								\
+	    return NULL;						\
+	}								\
+    }									\
+    return NULL;							\
+}									\
+a_attr a_type *								\
+a_prefix##next_filtered(a_rbt_type *rbtree, a_type *node,		\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *nright = rbtn_right_get(a_type, a_field, node);		\
+    if (nright != NULL && filter_subtree(filter_ctx, nright)) {		\
+	return a_prefix##first_filtered_from_node(nright, filter_node,	\
+	    filter_subtree, filter_ctx);				\
+    }									\
+    a_type *node_candidate;						\
+    a_type *subtree_candidate;						\
+    a_type *search_result = a_prefix##search_with_filter_bounds(	\
+	rbtree, node, filter_node, filter_subtree, filter_ctx,		\
+	/* include_subtree */ false,					\
+	/* nextbound */ true, &node_candidate, &subtree_candidate,	\
+	/* prevbound */ false, NULL, NULL);				\
+    assert(node == search_result					\
+	|| !filter_node(filter_ctx, node));				\
+    if (node_candidate != NULL) {					\
+	return node_candidate;						\
+    }									\
+    if (subtree_candidate != NULL) {					\
+	return a_prefix##first_filtered_from_node(			\
+	    subtree_candidate, filter_node, filter_subtree,		\
+	    filter_ctx);						\
+    }									\
+    return NULL;							\
+}									\
+a_attr a_type *								\
+a_prefix##prev_filtered(a_rbt_type *rbtree, a_type *node,		\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *nleft = rbtn_left_get(a_type, a_field, node);		\
+    if (nleft != NULL && filter_subtree(filter_ctx, nleft)) {		\
+	return a_prefix##last_filtered_from_node(nleft, filter_node,	\
+	    filter_subtree, filter_ctx);				\
+    }									\
+    a_type *node_candidate;						\
+    a_type *subtree_candidate;						\
+    a_type *search_result = a_prefix##search_with_filter_bounds(	\
+	rbtree, node, filter_node, filter_subtree, filter_ctx,		\
+	/* include_subtree */ false,					\
+	/* nextbound */ false, NULL, NULL,				\
+	/* prevbound */ true, &node_candidate, &subtree_candidate);	\
+    assert(node == search_result					\
+	|| !filter_node(filter_ctx, node));				\
+    if (node_candidate != NULL) {					\
+	return node_candidate;						\
+    }									\
+    if (subtree_candidate != NULL) {					\
+	return a_prefix##last_filtered_from_node(			\
+	    subtree_candidate, filter_node, filter_subtree,		\
+	    filter_ctx);						\
+    }									\
+    return NULL;							\
+}									\
+a_attr a_type *								\
+a_prefix##search_filtered(a_rbt_type *rbtree, const a_type *key,	\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *result = a_prefix##search_with_filter_bounds(rbtree, key,	\
+	filter_node, filter_subtree, filter_ctx,			\
+	/* include_subtree */ false,					\
+	/* nextbound */ false, NULL, NULL,				\
+	/* prevbound */ false, NULL, NULL);				\
+    return result;							\
+}									\
+a_attr a_type *								\
+a_prefix##nsearch_filtered(a_rbt_type *rbtree, const a_type *key,	\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *node_candidate;						\
+    a_type *subtree_candidate;						\
+    a_type *result = a_prefix##search_with_filter_bounds(rbtree, key,	\
+	filter_node, filter_subtree, filter_ctx,			\
+	/* include_subtree */ true,					\
+	/* nextbound */ true, &node_candidate, &subtree_candidate,	\
+	/* prevbound */ false, NULL, NULL);				\
+    if (result != NULL) {						\
+	return result;							\
+    }									\
+    if (node_candidate != NULL) {					\
+	return node_candidate;						\
+    }									\
+    if (subtree_candidate != NULL) {					\
+	return a_prefix##first_filtered_from_node(			\
+	    subtree_candidate, filter_node, filter_subtree,		\
+	    filter_ctx);						\
+    }									\
+    return NULL;							\
+}									\
+a_attr a_type *								\
+a_prefix##psearch_filtered(a_rbt_type *rbtree, const a_type *key,	\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *node_candidate;						\
+    a_type *subtree_candidate;						\
+    a_type *result = a_prefix##search_with_filter_bounds(rbtree, key,	\
+	filter_node, filter_subtree, filter_ctx,			\
+	/* include_subtree */ true,					\
+	/* nextbound */ false, NULL, NULL,				\
+	/* prevbound */ true, &node_candidate, &subtree_candidate);	\
+    if (result != NULL) {						\
+	return result;							\
+    }									\
+    if (node_candidate != NULL) {					\
+	return node_candidate;						\
+    }									\
+    if (subtree_candidate != NULL) {					\
+	return a_prefix##last_filtered_from_node(			\
+	    subtree_candidate, filter_node, filter_subtree,		\
+	    filter_ctx);						\
+    }									\
+    return NULL;							\
+}									\
+a_attr a_type *								\
+a_prefix##iter_recurse_filtered(a_rbt_type *rbtree, a_type *node,	\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg,		\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    if (node == NULL || !filter_subtree(filter_ctx, node)) {		\
+	return NULL;							\
+    }									\
+    a_type *ret;							\
+    a_type *left = rbtn_left_get(a_type, a_field, node);		\
+    a_type *right = rbtn_right_get(a_type, a_field, node);		\
+    ret = a_prefix##iter_recurse_filtered(rbtree, left, cb, arg,	\
+      filter_node, filter_subtree, filter_ctx);				\
+    if (ret != NULL) {							\
+	return ret;							\
+    }									\
+    if (filter_node(filter_ctx, node)) {				\
+	ret = cb(rbtree, node, arg);					\
+    }									\
+    if (ret != NULL) {							\
+	return ret;							\
+    }									\
+    return a_prefix##iter_recurse_filtered(rbtree, right, cb, arg,	\
+      filter_node, filter_subtree, filter_ctx);				\
+}									\
+a_attr a_type *								\
+a_prefix##iter_start_filtered(a_rbt_type *rbtree, a_type *start,	\
+  a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *),		\
+  void *arg, bool (*filter_node)(void *, a_type *),			\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    if (!filter_subtree(filter_ctx, node)) {				\
+	return NULL;							\
+    }									\
+    int cmp = a_cmp(start, node);					\
+    a_type *ret;							\
+    a_type *left = rbtn_left_get(a_type, a_field, node);		\
+    a_type *right = rbtn_right_get(a_type, a_field, node);		\
+    if (cmp < 0) {							\
+	ret = a_prefix##iter_start_filtered(rbtree, start, left, cb,	\
+	    arg, filter_node, filter_subtree, filter_ctx);		\
+	if (ret != NULL) {						\
+	    return ret;							\
+	}								\
+	if (filter_node(filter_ctx, node)) {				\
+	    ret = cb(rbtree, node, arg);				\
+	    if (ret != NULL) {						\
+		return ret;						\
+	    }								\
+	}								\
+	return a_prefix##iter_recurse_filtered(rbtree, right, cb, arg,	\
+	    filter_node, filter_subtree, filter_ctx);			\
+    } else if (cmp > 0) {						\
+	return a_prefix##iter_start_filtered(rbtree, start, right,	\
+	  cb, arg, filter_node, filter_subtree, filter_ctx);		\
+    } else {								\
+	if (filter_node(filter_ctx, node)) {				\
+	    ret = cb(rbtree, node, arg);				\
+	    if (ret != NULL) {						\
+		return ret;						\
+	    }								\
+	}								\
+	return a_prefix##iter_recurse_filtered(rbtree, right, cb, arg,	\
+	  filter_node, filter_subtree, filter_ctx);			\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##iter_filtered(a_rbt_type *rbtree, a_type *start,		\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg,		\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *ret;							\
+    if (start != NULL) {						\
+	ret = a_prefix##iter_start_filtered(rbtree, start,		\
+	    rbtree->rbt_root, cb, arg, filter_node, filter_subtree,	\
+	    filter_ctx);						\
+    } else {								\
+	ret = a_prefix##iter_recurse_filtered(rbtree, rbtree->rbt_root,	\
+	    cb, arg, filter_node, filter_subtree, filter_ctx);		\
+    }									\
+    return ret;								\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter_recurse_filtered(a_rbt_type *rbtree,		\
+  a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *),		\
+  void *arg,								\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    if (node == NULL || !filter_subtree(filter_ctx, node)) {		\
+	return NULL;							\
+    }									\
+    a_type *ret;							\
+    a_type *left = rbtn_left_get(a_type, a_field, node);		\
+    a_type *right = rbtn_right_get(a_type, a_field, node);		\
+    ret = a_prefix##reverse_iter_recurse_filtered(rbtree, right, cb,	\
+	arg, filter_node, filter_subtree, filter_ctx);			\
+    if (ret != NULL) {							\
+	return ret;							\
+    }									\
+    if (filter_node(filter_ctx, node)) {				\
+	ret = cb(rbtree, node, arg);					\
+    }									\
+    if (ret != NULL) {							\
+	return ret;							\
+    }									\
+    return a_prefix##reverse_iter_recurse_filtered(rbtree, left, cb,	\
+      arg, filter_node, filter_subtree, filter_ctx);			\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter_start_filtered(a_rbt_type *rbtree, a_type *start,\
+  a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *),		\
+  void *arg, bool (*filter_node)(void *, a_type *),			\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    if (!filter_subtree(filter_ctx, node)) {				\
+	return NULL;							\
+    }									\
+    int cmp = a_cmp(start, node);					\
+    a_type *ret;							\
+    a_type *left = rbtn_left_get(a_type, a_field, node);		\
+    a_type *right = rbtn_right_get(a_type, a_field, node);		\
+    if (cmp > 0) {							\
+	ret = a_prefix##reverse_iter_start_filtered(rbtree, start,	\
+	    right, cb, arg, filter_node, filter_subtree, filter_ctx);	\
+	if (ret != NULL) {						\
+	    return ret;							\
+	}								\
+	if (filter_node(filter_ctx, node)) {				\
+	    ret = cb(rbtree, node, arg);				\
+	    if (ret != NULL) {						\
+		return ret;						\
+	    }								\
+	}								\
+	return a_prefix##reverse_iter_recurse_filtered(rbtree, left, cb,\
+	    arg, filter_node, filter_subtree, filter_ctx);		\
+    } else if (cmp < 0) {						\
+	return a_prefix##reverse_iter_start_filtered(rbtree, start,	\
+	  left, cb, arg, filter_node, filter_subtree, filter_ctx);	\
+    } else {								\
+	if (filter_node(filter_ctx, node)) {				\
+	    ret = cb(rbtree, node, arg);				\
+	    if (ret != NULL) {						\
+		return ret;						\
+	    }								\
+	}								\
+	return a_prefix##reverse_iter_recurse_filtered(rbtree, left, cb,\
+	  arg, filter_node, filter_subtree, filter_ctx);		\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter_filtered(a_rbt_type *rbtree, a_type *start,	\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg,		\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *ret;							\
+    if (start != NULL) {						\
+	ret = a_prefix##reverse_iter_start_filtered(rbtree, start,	\
+	    rbtree->rbt_root, cb, arg, filter_node, filter_subtree,	\
+	    filter_ctx);						\
+    } else {								\
+	ret = a_prefix##reverse_iter_recurse_filtered(rbtree,		\
+	    rbtree->rbt_root, cb, arg, filter_node, filter_subtree,	\
+	    filter_ctx);						\
+    }									\
+    return ret;								\
+}									\
+) /* end rb_summarized_only */
 
 #endif /* JEMALLOC_INTERNAL_RB_H */
diff --git a/test/unit/rb.c b/test/unit/rb.c
index a594fb7..7d4c454 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include <stdlib.h>
+
 #include "jemalloc/internal/rb.h"
 
 #define rbtn_black_height(a_type, a_field, a_rbt, r_height) do {	\
@@ -13,13 +15,47 @@
 	}								\
 } while (0)
 
-typedef struct node_s node_t;
+static bool summarize_always_returns_true = false;
 
+typedef struct node_s node_t;
 struct node_s {
 #define NODE_MAGIC 0x9823af7e
 	uint32_t magic;
 	rb_node(node_t) link;
+	/* Order used by nodes. */
 	uint64_t key;
+	/*
+	 * Our made-up summary property is "specialness", with summarization
+	 * taking the max.
+	 */
+	uint64_t specialness;
+
+	/*
+	 * Used by some of the test randomization to avoid double-removing
+	 * nodes.
+	 */
+	bool mid_remove;
+
+	/*
+	 * To test searching functionality, we want to temporarily weaken the
+	 * ordering to allow non-equal nodes that nevertheless compare equal.
+	 */
+	bool allow_duplicates;
+
+	/*
+	 * In check_consistency, it's handy to know a node's rank in the tree;
+	 * this tracks it (but only there; not all tests use this).
+	 */
+	int rank;
+	int filtered_rank;
+
+	/*
+	 * Replicate the internal structure of the tree, to make sure the
+	 * implementation doesn't miss any updates.
+	 */
+	const node_t *summary_lchild;
+	const node_t *summary_rchild;
+	uint64_t summary_max_specialness;
 };
 
 static int
@@ -30,10 +66,12 @@ node_cmp(const node_t *a, const node_t *b) {
 	expect_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
 
 	ret = (a->key > b->key) - (a->key < b->key);
-	if (ret == 0) {
+	if (ret == 0 && !a->allow_duplicates) {
 		/*
 		 * Duplicates are not allowed in the tree, so force an
-		 * arbitrary ordering for non-identical items with equal keys.
+		 * arbitrary ordering for non-identical items with equal keys,
+		 * unless the user is searching and wants to allow the
+		 * duplicate.
 		 */
 		ret = (((uintptr_t)a) > ((uintptr_t)b))
 		    - (((uintptr_t)a) < ((uintptr_t)b));
@@ -41,8 +79,77 @@ node_cmp(const node_t *a, const node_t *b) {
 	return ret;
 }
 
+static uint64_t
+node_subtree_specialness(node_t *n, const node_t *lchild,
+    const node_t *rchild) {
+	uint64_t subtree_specialness = n->specialness;
+	if (lchild != NULL
+	    && lchild->summary_max_specialness > subtree_specialness) {
+		subtree_specialness = lchild->summary_max_specialness;
+	}
+	if (rchild != NULL
+	    && rchild->summary_max_specialness > subtree_specialness) {
+		subtree_specialness = rchild->summary_max_specialness;
+	}
+	return subtree_specialness;
+}
+
+static bool
+node_summarize(node_t *a, const node_t *lchild, const node_t *rchild) {
+	uint64_t new_summary_max_specialness = node_subtree_specialness(
+	    a, lchild, rchild);
+	bool changed = (a->summary_lchild != lchild)
+	    || (a->summary_rchild != rchild)
+	    || (new_summary_max_specialness != a->summary_max_specialness);
+	a->summary_max_specialness = new_summary_max_specialness;
+	a->summary_lchild = lchild;
+	a->summary_rchild = rchild;
+	return changed || summarize_always_returns_true;
+}
+
 typedef rb_tree(node_t) tree_t;
-rb_gen(static, tree_, tree_t, node_t, link, node_cmp);
+rb_summarized_proto(static, tree_, tree_t, node_t);
+rb_summarized_gen(static, tree_, tree_t, node_t, link, node_cmp,
+    node_summarize);
+
+static bool
+specialness_filter_node(void *ctx, node_t *node) {
+	uint64_t specialness = *(uint64_t *)ctx;
+	return node->specialness >= specialness;
+}
+
+static bool
+specialness_filter_subtree(void *ctx, node_t *node) {
+	uint64_t specialness = *(uint64_t *)ctx;
+	return node->summary_max_specialness >= specialness;
+}
+
+static node_t *
+tree_iterate_cb(tree_t *tree, node_t *node, void *data) {
+	unsigned *i = (unsigned *)data;
+	node_t *search_node;
+
+	expect_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
+
+	/* Test rb_search(). */
+	search_node = tree_search(tree, node);
+	expect_ptr_eq(search_node, node,
+	    "tree_search() returned unexpected node");
+
+	/* Test rb_nsearch(). */
+	search_node = tree_nsearch(tree, node);
+	expect_ptr_eq(search_node, node,
+	    "tree_nsearch() returned unexpected node");
+
+	/* Test rb_psearch(). */
+	search_node = tree_psearch(tree, node);
+	expect_ptr_eq(search_node, node,
+	    "tree_psearch() returned unexpected node");
+
+	(*i)++;
+
+	return NULL;
+}
 
 TEST_BEGIN(test_rb_empty) {
 	tree_t tree;
@@ -65,6 +172,32 @@ TEST_BEGIN(test_rb_empty) {
 	key.key = 0;
 	key.magic = NODE_MAGIC;
 	expect_ptr_null(tree_psearch(&tree, &key), "Unexpected node");
+
+	unsigned nodes = 0;
+	tree_iter_filtered(&tree, NULL, &tree_iterate_cb,
+	    &nodes, &specialness_filter_node, &specialness_filter_subtree,
+	    NULL);
+	expect_u_eq(0, nodes, "");
+
+	nodes = 0;
+	tree_reverse_iter_filtered(&tree, NULL, &tree_iterate_cb,
+	    &nodes, &specialness_filter_node, &specialness_filter_subtree,
+	    NULL);
+	expect_u_eq(0, nodes, "");
+
+	expect_ptr_null(tree_first_filtered(&tree, &specialness_filter_node,
+	    &specialness_filter_subtree, NULL), "");
+	expect_ptr_null(tree_last_filtered(&tree, &specialness_filter_node,
+	    &specialness_filter_subtree, NULL), "");
+
+	key.key = 0;
+	key.magic = NODE_MAGIC;
+	expect_ptr_null(tree_search_filtered(&tree, &key,
+	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
+	expect_ptr_null(tree_nsearch_filtered(&tree, &key,
+	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
+	expect_ptr_null(tree_psearch_filtered(&tree, &key,
+	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
 }
 TEST_END
 
@@ -81,6 +214,16 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	left_node = rbtn_left_get(node_t, link, node);
 	right_node = rbtn_right_get(node_t, link, node);
 
+	expect_ptr_eq(left_node, node->summary_lchild,
+	    "summary missed a tree update");
+	expect_ptr_eq(right_node, node->summary_rchild,
+	    "summary missed a tree update");
+
+	uint64_t expected_subtree_specialness = node_subtree_specialness(node,
+	    left_node, right_node);
+	expect_u64_eq(expected_subtree_specialness,
+	    node->summary_max_specialness, "Incorrect summary");
+
 	if (!rbtn_red_get(node_t, link, node)) {
 		black_depth++;
 	}
@@ -117,33 +260,6 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	return ret;
 }
 
-static node_t *
-tree_iterate_cb(tree_t *tree, node_t *node, void *data) {
-	unsigned *i = (unsigned *)data;
-	node_t *search_node;
-
-	expect_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
-
-	/* Test rb_search(). */
-	search_node = tree_search(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_search() returned unexpected node");
-
-	/* Test rb_nsearch(). */
-	search_node = tree_nsearch(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_nsearch() returned unexpected node");
-
-	/* Test rb_psearch(). */
-	search_node = tree_psearch(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_psearch() returned unexpected node");
-
-	(*i)++;
-
-	return NULL;
-}
-
 static unsigned
 tree_iterate(tree_t *tree) {
 	unsigned i;
@@ -225,9 +341,11 @@ destroy_cb(node_t *node, void *data) {
 }
 
 TEST_BEGIN(test_rb_random) {
-#define NNODES 25
-#define NBAGS 250
-#define SEED 42
+	enum {
+		NNODES = 25,
+		NBAGS = 500,
+		SEED = 42
+	};
 	sfmt_t *sfmt;
 	uint64_t bag[NNODES];
 	tree_t tree;
@@ -255,12 +373,26 @@ TEST_BEGIN(test_rb_random) {
 			}
 		}
 
+		/*
+		 * We alternate test behavior with a period of 2 here, and a
+		 * period of 5 down below, so there's no cycle in which certain
+		 * combinations get omitted.
+		 */
+		summarize_always_returns_true = (i % 2 == 0);
+
 		for (j = 1; j <= NNODES; j++) {
 			/* Initialize tree and nodes. */
 			tree_new(&tree);
 			for (k = 0; k < j; k++) {
 				nodes[k].magic = NODE_MAGIC;
 				nodes[k].key = bag[k];
+				nodes[k].specialness = gen_rand64_range(sfmt,
+				    NNODES);
+				nodes[k].mid_remove = false;
+				nodes[k].allow_duplicates = false;
+				nodes[k].summary_lchild = NULL;
+				nodes[k].summary_rchild = NULL;
+				nodes[k].summary_max_specialness = 0;
 			}
 
 			/* Insert nodes. */
@@ -341,9 +473,538 @@ TEST_BEGIN(test_rb_random) {
 		}
 	}
 	fini_gen_rand(sfmt);
-#undef NNODES
-#undef NBAGS
-#undef SEED
+}
+TEST_END
+
+static void
+expect_simple_consistency(tree_t *tree, uint64_t specialness,
+    bool expected_empty, node_t *expected_first, node_t *expected_last) {
+	bool empty;
+	node_t *first;
+	node_t *last;
+
+	empty = tree_empty_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, &specialness);
+	expect_b_eq(expected_empty, empty, "");
+
+	first = tree_first_filtered(tree,
+	    &specialness_filter_node, &specialness_filter_subtree,
+	    (void *)&specialness);
+	expect_ptr_eq(expected_first, first, "");
+
+	last = tree_last_filtered(tree,
+	    &specialness_filter_node, &specialness_filter_subtree,
+	    (void *)&specialness);
+	expect_ptr_eq(expected_last, last, "");
+}
+
+TEST_BEGIN(test_rb_filter_simple) {
+	enum {FILTER_NODES = 10};
+	node_t nodes[FILTER_NODES];
+	for (unsigned i = 0; i < FILTER_NODES; i++) {
+		nodes[i].magic = NODE_MAGIC;
+		nodes[i].key = i;
+		if (i == 0) {
+			nodes[i].specialness = 0;
+		} else {
+			nodes[i].specialness = ffs_u(i);
+		}
+		nodes[i].mid_remove = false;
+		nodes[i].allow_duplicates = false;
+		nodes[i].summary_lchild = NULL;
+		nodes[i].summary_rchild = NULL;
+		nodes[i].summary_max_specialness = 0;
+	}
+
+	summarize_always_returns_true = false;
+
+	tree_t tree;
+	tree_new(&tree);
+
+	/* Should be empty */
+	expect_simple_consistency(&tree, /* specialness */ 0, /* empty */ true,
+	    /* first */ NULL, /* last */ NULL);
+
+	/* Fill in just the odd nodes. */
+	for (int i = 1; i < FILTER_NODES; i += 2) {
+		tree_insert(&tree, &nodes[i]);
+	}
+
+	/* A search for an odd node should succeed. */
+	expect_simple_consistency(&tree, /* specialness */ 0, /* empty */ false,
+	    /* first */ &nodes[1], /* last */ &nodes[9]);
+
+	/* But a search for an even one should fail. */
+	expect_simple_consistency(&tree, /* specialness */ 1, /* empty */ true,
+	    /* first */ NULL, /* last */ NULL);
+
+	/* Now we add an even. */
+	tree_insert(&tree, &nodes[4]);
+	expect_simple_consistency(&tree, /* specialness */ 1, /* empty */ false,
+	    /* first */ &nodes[4], /* last */ &nodes[4]);
+
+	/* A smaller even, and a larger even. */
+	tree_insert(&tree, &nodes[2]);
+	tree_insert(&tree, &nodes[8]);
+
+	/*
+	 * A first-search (resp. last-search) for an even should switch to the
+	 * lower (higher) one, now that it's been added.
+	 */
+	expect_simple_consistency(&tree, /* specialness */ 1, /* empty */ false,
+	    /* first */ &nodes[2], /* last */ &nodes[8]);
+
+	/*
+	 * If we remove 2, a first-search we should go back to 4, while a
+	 * last-search should remain unchanged.
+	 */
+	tree_remove(&tree, &nodes[2]);
+	expect_simple_consistency(&tree, /* specialness */ 1, /* empty */ false,
+	    /* first */ &nodes[4], /* last */ &nodes[8]);
+
+	/* Reinsert 2, then find it again. */
+	tree_insert(&tree, &nodes[2]);
+	expect_simple_consistency(&tree, /* specialness */ 1, /* empty */ false,
+	    /* first */ &nodes[2], /* last */ &nodes[8]);
+
+	/* Searching for a multiple of 4 should not have changed. */
+	expect_simple_consistency(&tree, /* specialness */ 2, /* empty */ false,
+	    /* first */ &nodes[4], /* last */ &nodes[8]);
+
+	/* And a multiple of 8 */
+	expect_simple_consistency(&tree, /* specialness */ 3, /* empty */ false,
+	    /* first */ &nodes[8], /* last */ &nodes[8]);
+
+	/* But not a multiple of 16 */
+	expect_simple_consistency(&tree, /* specialness */ 4, /* empty */ true,
+	    /* first */ NULL, /* last */ NULL);
+}
+TEST_END
+
+typedef struct iter_ctx_s iter_ctx_t;
+struct iter_ctx_s {
+	int ncalls;
+	node_t *last_node;
+
+	int ncalls_max;
+	bool forward;
+};
+
+static node_t *
+tree_iterate_filtered_cb(tree_t *tree, node_t *node, void *arg) {
+	iter_ctx_t *ctx = (iter_ctx_t *)arg;
+	ctx->ncalls++;
+	expect_u64_ge(node->specialness, 1,
+	    "Should only invoke cb on nodes that pass the filter");
+	if (ctx->last_node != NULL) {
+		if (ctx->forward) {
+			expect_d_lt(node_cmp(ctx->last_node, node), 0,
+			    "Incorrect iteration order");
+		} else {
+			expect_d_gt(node_cmp(ctx->last_node, node), 0,
+			    "Incorrect iteration order");
+		}
+	}
+	ctx->last_node = node;
+	if (ctx->ncalls == ctx->ncalls_max) {
+		return node;
+	}
+	return NULL;
+}
+
+static int
+qsort_node_cmp(const void *ap, const void *bp) {
+	node_t *a = *(node_t **)ap;
+	node_t *b = *(node_t **)bp;
+	return node_cmp(a, b);
+}
+
+#define UPDATE_TEST_MAX 100
+static void
+check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
+	uint64_t specialness = 1;
+
+	bool empty;
+	bool real_empty = true;
+	node_t *first;
+	node_t *real_first = NULL;
+	node_t *last;
+	node_t *real_last = NULL;
+	for (int i = 0; i < nnodes; i++) {
+		if (nodes[i].specialness >= specialness) {
+			real_empty = false;
+			if (real_first == NULL
+			    || node_cmp(&nodes[i], real_first) < 0) {
+				real_first = &nodes[i];
+			}
+			if (real_last == NULL
+			    || node_cmp(&nodes[i], real_last) > 0) {
+				real_last = &nodes[i];
+			}
+		}
+	}
+
+	empty = tree_empty_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, &specialness);
+	expect_b_eq(real_empty, empty, "");
+
+	first = tree_first_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, &specialness);
+	expect_ptr_eq(real_first, first, "");
+
+	last = tree_last_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, &specialness);
+	expect_ptr_eq(real_last, last, "");
+
+	for (int i = 0; i < nnodes; i++) {
+		node_t *next_filtered;
+		node_t *real_next_filtered = NULL;
+		node_t *prev_filtered;
+		node_t *real_prev_filtered = NULL;
+		for (int j = 0; j < nnodes; j++) {
+			if (nodes[j].specialness < specialness) {
+				continue;
+			}
+			if (node_cmp(&nodes[j], &nodes[i]) < 0
+			    && (real_prev_filtered == NULL
+			    || node_cmp(&nodes[j], real_prev_filtered) > 0)) {
+				real_prev_filtered = &nodes[j];
+			}
+			if (node_cmp(&nodes[j], &nodes[i]) > 0
+			    && (real_next_filtered == NULL
+			    || node_cmp(&nodes[j], real_next_filtered) < 0)) {
+				real_next_filtered = &nodes[j];
+			}
+		}
+		next_filtered = tree_next_filtered(tree, &nodes[i],
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_next_filtered, next_filtered, "");
+
+		prev_filtered = tree_prev_filtered(tree, &nodes[i],
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_prev_filtered, prev_filtered, "");
+
+		node_t *search_filtered;
+		node_t *real_search_filtered;
+		node_t *nsearch_filtered;
+		node_t *real_nsearch_filtered;
+		node_t *psearch_filtered;
+		node_t *real_psearch_filtered;
+
+		/*
+		 * search, nsearch, psearch from a node before nodes[i] in the
+		 * ordering.
+		 */
+		node_t before;
+		before.magic = NODE_MAGIC;
+		before.key = nodes[i].key - 1;
+		before.allow_duplicates = false;
+		real_search_filtered = NULL;
+		search_filtered = tree_search_filtered(tree, &before,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_search_filtered, search_filtered, "");
+
+		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : real_next_filtered);
+		nsearch_filtered = tree_nsearch_filtered(tree, &before,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
+
+		real_psearch_filtered = real_prev_filtered;
+		psearch_filtered = tree_psearch_filtered(tree, &before,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_psearch_filtered, psearch_filtered, "");
+
+		/* search, nsearch, psearch from nodes[i] */
+		real_search_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : NULL);
+		search_filtered = tree_search_filtered(tree, &nodes[i],
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_search_filtered, search_filtered, "");
+
+		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : real_next_filtered);
+		nsearch_filtered = tree_nsearch_filtered(tree, &nodes[i],
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
+
+		real_psearch_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : real_prev_filtered);
+		psearch_filtered = tree_psearch_filtered(tree, &nodes[i],
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_psearch_filtered, psearch_filtered, "");
+
+		/*
+		 * search, nsearch, psearch from a node equivalent to but
+		 * distinct from nodes[i].
+		 */
+		node_t equiv;
+		equiv.magic = NODE_MAGIC;
+		equiv.key = nodes[i].key;
+		equiv.allow_duplicates = true;
+		real_search_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : NULL);
+		search_filtered = tree_search_filtered(tree, &equiv,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_search_filtered, search_filtered, "");
+
+		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : real_next_filtered);
+		nsearch_filtered = tree_nsearch_filtered(tree, &equiv,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
+
+		real_psearch_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : real_prev_filtered);
+		psearch_filtered = tree_psearch_filtered(tree, &equiv,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_psearch_filtered, psearch_filtered, "");
+
+		/*
+		 * search, nsearch, psearch from a node after nodes[i] in the
+		 * ordering.
+		 */
+		node_t after;
+		after.magic = NODE_MAGIC;
+		after.key = nodes[i].key + 1;
+		after.allow_duplicates = false;
+		real_search_filtered = NULL;
+		search_filtered = tree_search_filtered(tree, &after,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_search_filtered, search_filtered, "");
+
+		real_nsearch_filtered = real_next_filtered;
+		nsearch_filtered = tree_nsearch_filtered(tree, &after,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
+
+		real_psearch_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : real_prev_filtered);
+		psearch_filtered = tree_psearch_filtered(tree, &after,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_psearch_filtered, psearch_filtered, "");
+	}
+
+	/* Filtered iteration test setup. */
+	int nspecial = 0;
+	node_t *sorted_nodes[UPDATE_TEST_MAX];
+	node_t *sorted_filtered_nodes[UPDATE_TEST_MAX];
+	for (int i = 0; i < nnodes; i++) {
+		sorted_nodes[i] = &nodes[i];
+	}
+	qsort(sorted_nodes, nnodes, sizeof(node_t *), &qsort_node_cmp);
+	for (int i = 0; i < nnodes; i++) {
+		sorted_nodes[i]->rank = i;
+		sorted_nodes[i]->filtered_rank = nspecial;
+		if (sorted_nodes[i]->specialness >= 1) {
+			sorted_filtered_nodes[nspecial] = sorted_nodes[i];
+			nspecial++;
+		}
+	}
+
+	node_t *iter_result;
+
+	iter_ctx_t ctx;
+	ctx.ncalls = 0;
+	ctx.last_node = NULL;
+	ctx.ncalls_max = INT_MAX;
+	ctx.forward = true;
+
+	/* Filtered forward iteration from the beginning. */
+	iter_result = tree_iter_filtered(tree, NULL, &tree_iterate_filtered_cb,
+	    &ctx, &specialness_filter_node, &specialness_filter_subtree,
+	    &specialness);
+	expect_ptr_null(iter_result, "");
+	expect_d_eq(nspecial, ctx.ncalls, "");
+	/* Filtered forward iteration from a starting point. */
+	for (int i = 0; i < nnodes; i++) {
+		ctx.ncalls = 0;
+		ctx.last_node = NULL;
+		iter_result = tree_iter_filtered(tree, &nodes[i],
+		    &tree_iterate_filtered_cb, &ctx, &specialness_filter_node,
+		    &specialness_filter_subtree, &specialness);
+		expect_ptr_null(iter_result, "");
+		expect_d_eq(nspecial - nodes[i].filtered_rank, ctx.ncalls, "");
+	}
+	/* Filtered forward iteration from the beginning, with stopping */
+	for (int i = 0; i < nspecial; i++) {
+		ctx.ncalls = 0;
+		ctx.last_node = NULL;
+		ctx.ncalls_max = i + 1;
+		iter_result = tree_iter_filtered(tree, NULL,
+		    &tree_iterate_filtered_cb, &ctx, &specialness_filter_node,
+		    &specialness_filter_subtree, &specialness);
+		expect_ptr_eq(sorted_filtered_nodes[i], iter_result, "");
+		expect_d_eq(ctx.ncalls, i + 1, "");
+	}
+	/* Filtered forward iteration from a starting point, with stopping. */
+	for (int i = 0; i < nnodes; i++) {
+		for (int j = 0; j < nspecial - nodes[i].filtered_rank; j++) {
+			ctx.ncalls = 0;
+			ctx.last_node = NULL;
+			ctx.ncalls_max = j + 1;
+			iter_result = tree_iter_filtered(tree, &nodes[i],
+			    &tree_iterate_filtered_cb, &ctx,
+			    &specialness_filter_node,
+			    &specialness_filter_subtree, &specialness);
+			expect_d_eq(j + 1, ctx.ncalls, "");
+			expect_ptr_eq(sorted_filtered_nodes[
+			    nodes[i].filtered_rank + j], iter_result, "");
+		}
+	}
+
+	/* Backwards iteration. */
+	ctx.ncalls = 0;
+	ctx.last_node = NULL;
+	ctx.ncalls_max = INT_MAX;
+	ctx.forward = false;
+
+	/* Filtered backward iteration from the end. */
+	iter_result = tree_reverse_iter_filtered(tree, NULL,
+	    &tree_iterate_filtered_cb, &ctx, &specialness_filter_node,
+	    &specialness_filter_subtree, &specialness);
+	expect_ptr_null(iter_result, "");
+	expect_d_eq(nspecial, ctx.ncalls, "");
+	/* Filtered backward iteration from a starting point. */
+	for (int i = 0; i < nnodes; i++) {
+		ctx.ncalls = 0;
+		ctx.last_node = NULL;
+		iter_result = tree_reverse_iter_filtered(tree, &nodes[i],
+		    &tree_iterate_filtered_cb, &ctx, &specialness_filter_node,
+		    &specialness_filter_subtree, &specialness);
+		expect_ptr_null(iter_result, "");
+		int surplus_rank = (nodes[i].specialness >= 1 ? 1 : 0);
+		expect_d_eq(nodes[i].filtered_rank + surplus_rank, ctx.ncalls,
+		    "");
+	}
+	/* Filtered backward iteration from the end, with stopping */
+	for (int i = 0; i < nspecial; i++) {
+		ctx.ncalls = 0;
+		ctx.last_node = NULL;
+		ctx.ncalls_max = i + 1;
+		iter_result = tree_reverse_iter_filtered(tree, NULL,
+		    &tree_iterate_filtered_cb, &ctx, &specialness_filter_node,
+		    &specialness_filter_subtree, &specialness);
+		expect_ptr_eq(sorted_filtered_nodes[nspecial - i - 1],
+		    iter_result, "");
+		expect_d_eq(ctx.ncalls, i + 1, "");
+	}
+	/* Filtered backward iteration from a starting point, with stopping. */
+	for (int i = 0; i < nnodes; i++) {
+		int surplus_rank = (nodes[i].specialness >= 1 ? 1 : 0);
+		for (int j = 0; j < nodes[i].filtered_rank + surplus_rank;
+		    j++) {
+			ctx.ncalls = 0;
+			ctx.last_node = NULL;
+			ctx.ncalls_max = j + 1;
+			iter_result = tree_reverse_iter_filtered(tree,
+			    &nodes[i], &tree_iterate_filtered_cb, &ctx,
+			    &specialness_filter_node,
+			    &specialness_filter_subtree, &specialness);
+			expect_d_eq(j + 1, ctx.ncalls, "");
+			expect_ptr_eq(sorted_filtered_nodes[
+			    nodes[i].filtered_rank - j - 1 + surplus_rank],
+			    iter_result, "");
+		}
+	}
+}
+
+static void
+do_update_search_test(int nnodes, int ntrees, int nremovals,
+    int nupdates) {
+	node_t nodes[UPDATE_TEST_MAX];
+	assert(nnodes <= UPDATE_TEST_MAX);
+
+	sfmt_t *sfmt = init_gen_rand(12345);
+	for (int i = 0; i < ntrees; i++) {
+		tree_t tree;
+		tree_new(&tree);
+		for (int j = 0; j < nnodes; j++) {
+			nodes[j].magic = NODE_MAGIC;
+			/*
+			 * In consistency checking, we increment or decrement a
+			 * key and assume that the result is not a key in the
+			 * tree.  This isn't a *real* concern with 64-bit keys
+			 * and a good PRNG, but why not be correct anyways?
+			 */
+			nodes[j].key = 2 * gen_rand64(sfmt);
+			nodes[j].specialness = 0;
+			nodes[j].mid_remove = false;
+			nodes[j].allow_duplicates = false;
+			nodes[j].summary_lchild = NULL;
+			nodes[j].summary_rchild = NULL;
+			nodes[j].summary_max_specialness = 0;
+			tree_insert(&tree, &nodes[j]);
+		}
+		for (int j = 0; j < nremovals; j++) {
+			int victim = (int)gen_rand64_range(sfmt, nnodes);
+			if (!nodes[victim].mid_remove) {
+				tree_remove(&tree, &nodes[victim]);
+				nodes[victim].mid_remove = true;
+			}
+		}
+		for (int j = 0; j < nnodes; j++) {
+			if (nodes[j].mid_remove) {
+				nodes[j].mid_remove = false;
+				nodes[j].key = 2 * gen_rand64(sfmt);
+				tree_insert(&tree, &nodes[j]);
+			}
+		}
+		for (int i = 0; i < nupdates; i++) {
+			uint32_t ind = gen_rand32_range(sfmt, nnodes);
+			nodes[ind].specialness = 1 - nodes[ind].specialness;
+			tree_update_summaries(&tree, &nodes[ind]);
+			check_consistency(&tree, nodes, nnodes);
+		}
+	}
+}
+
+TEST_BEGIN(test_rb_update_search) {
+	summarize_always_returns_true = false;
+	do_update_search_test(2, 100, 3, 50);
+	do_update_search_test(5, 100, 3, 50);
+	do_update_search_test(12, 100, 5, 1000);
+	do_update_search_test(100, 1, 50, 500);
+}
+TEST_END
+
+typedef rb_tree(node_t) unsummarized_tree_t;
+rb_gen(static UNUSED, unsummarized_tree_, unsummarized_tree_t, node_t, link,
+    node_cmp);
+
+static node_t *
+unsummarized_tree_iterate_cb(unsummarized_tree_t *tree, node_t *node,
+    void *data) {
+	unsigned *i = (unsigned *)data;
+	(*i)++;
+	return NULL;
+}
+/*
+ * The unsummarized and summarized funtionality is implemented via the same
+ * functions; we don't really need to do much more than test that we can exclude
+ * the filtered functionality without anything breaking.
+ */
+TEST_BEGIN(test_rb_unsummarized) {
+	unsummarized_tree_t tree;
+	unsummarized_tree_new(&tree);
+	unsigned nnodes = 0;
+	unsummarized_tree_iter(&tree, NULL, &unsummarized_tree_iterate_cb,
+	    &nnodes);
+	expect_u_eq(0, nnodes, "");
 }
 TEST_END
 
@@ -351,5 +1012,8 @@ int
 main(void) {
 	return test_no_reentrancy(
 	    test_rb_empty,
-	    test_rb_random);
+	    test_rb_random,
+	    test_rb_filter_simple,
+	    test_rb_update_search,
+	    test_rb_unsummarized);
 }
-- 
cgit v0.12


From 08089589f74ac23268791be18742d031cc5dd041 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 12 May 2021 16:00:38 -0700
Subject: Fix an interaction between the oversize_threshold test and bgthds.

Also added the shared utility to check if background_thread is enabled.
---
 test/include/test/bgthd.h            | 17 +++++++++++++++++
 test/include/test/jemalloc_test.h.in |  1 +
 test/integration/extent.c            | 16 ++--------------
 test/unit/arena_decay.c              | 22 +++++-----------------
 test/unit/oversize_threshold.c       |  6 ++++--
 test/unit/stats.c                    |  2 +-
 6 files changed, 30 insertions(+), 34 deletions(-)
 create mode 100644 test/include/test/bgthd.h

diff --git a/test/include/test/bgthd.h b/test/include/test/bgthd.h
new file mode 100644
index 0000000..4fa2395
--- /dev/null
+++ b/test/include/test/bgthd.h
@@ -0,0 +1,17 @@
+/*
+ * Shared utility for checking if background_thread is enabled, which affects
+ * the purging behavior and assumptions in some tests.
+ */
+
+static inline bool
+is_background_thread_enabled(void) {
+	bool enabled;
+	size_t sz = sizeof(bool);
+	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
+	if (ret == ENOENT) {
+		return false;
+	}
+	assert_d_eq(ret, 0, "Unexpected mallctl error");
+
+	return enabled;
+}
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index ae67574..0e33216 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -128,6 +128,7 @@ static const bool config_debug =
 #include "test/test.h"
 #include "test/timer.h"
 #include "test/thd.h"
+#include "test/bgthd.h"
 #define MEXP 19937
 #include "test/SFMT.h"
 
diff --git a/test/integration/extent.c b/test/integration/extent.c
index ccc314d..831ef63 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -2,18 +2,6 @@
 
 #include "test/extent_hooks.h"
 
-static bool
-check_background_thread_enabled(void) {
-	bool enabled;
-	size_t sz = sizeof(bool);
-	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
-	if (ret == ENOENT) {
-		return false;
-	}
-	expect_d_eq(ret, 0, "Unexpected mallctl error");
-	return enabled;
-}
-
 static void
 test_extent_body(unsigned arena_ind) {
 	void *p;
@@ -177,7 +165,7 @@ test_manual_hook_body(void) {
 	expect_ptr_ne(old_hooks->merge, extent_merge_hook,
 	    "Unexpected extent_hooks error");
 
-	if (!check_background_thread_enabled()) {
+	if (!is_background_thread_enabled()) {
 		test_extent_body(arena_ind);
 	}
 
@@ -235,7 +223,7 @@ TEST_BEGIN(test_extent_auto_hook) {
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
 	    (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure");
 
-	test_skip_if(check_background_thread_enabled());
+	test_skip_if(is_background_thread_enabled());
 	test_extent_body(arena_ind);
 }
 TEST_END
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index cea39e0..9fca538 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -10,18 +10,6 @@ static nstime_t time_mock;
 static bool monotonic_mock;
 
 static bool
-check_background_thread_enabled(void) {
-	bool enabled;
-	size_t sz = sizeof(bool);
-	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
-	if (ret == ENOENT) {
-		return false;
-	}
-	expect_d_eq(ret, 0, "Unexpected mallctl error");
-	return enabled;
-}
-
-static bool
 nstime_monotonic_mock(void) {
 	return monotonic_mock;
 }
@@ -184,7 +172,7 @@ generate_dirty(unsigned arena_ind, size_t size) {
 }
 
 TEST_BEGIN(test_decay_ticks) {
-	test_skip_if(check_background_thread_enabled());
+	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 
 	ticker_geom_t *decay_ticker;
@@ -417,7 +405,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 }
 
 TEST_BEGIN(test_decay_ticker) {
-	test_skip_if(check_background_thread_enabled());
+	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 #define NPS 2048
 	ssize_t ddt = opt_dirty_decay_ms;
@@ -476,7 +464,7 @@ TEST_BEGIN(test_decay_ticker) {
 TEST_END
 
 TEST_BEGIN(test_decay_nonmonotonic) {
-	test_skip_if(check_background_thread_enabled());
+	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 #define NPS (SMOOTHSTEP_NSTEPS + 1)
 	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
@@ -534,7 +522,7 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 TEST_END
 
 TEST_BEGIN(test_decay_now) {
-	test_skip_if(check_background_thread_enabled());
+	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 
 	unsigned arena_ind = do_arena_create(0, 0);
@@ -555,7 +543,7 @@ TEST_BEGIN(test_decay_now) {
 TEST_END
 
 TEST_BEGIN(test_decay_never) {
-	test_skip_if(check_background_thread_enabled() || !config_stats);
+	test_skip_if(is_background_thread_enabled() || !config_stats);
 	test_skip_if(opt_hpa);
 
 	unsigned arena_ind = do_arena_create(-1, -1);
diff --git a/test/unit/oversize_threshold.c b/test/unit/oversize_threshold.c
index e374b14..44a8f76 100644
--- a/test/unit/oversize_threshold.c
+++ b/test/unit/oversize_threshold.c
@@ -106,14 +106,16 @@ TEST_BEGIN(test_oversize_threshold) {
 	/* Allocating and freeing half a megabyte should leave them dirty. */
 	void *ptr = mallocx(512 * 1024, MALLOCX_ARENA(arena));
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
-	expect_zu_lt(max_purged, 512 * 1024, "Expected no 512k purge");
+	if (!is_background_thread_enabled()) {
+		expect_zu_lt(max_purged, 512 * 1024, "Expected no 512k purge");
+	}
 
 	/* Purge again to reset everything out. */
 	arena_mallctl("arena.%u.purge", arena, NULL, NULL, NULL, 0);
 	max_purged = 0;
 
 	/*
-	 * Allocating and freeing 2 megabytes should leave them dirty because of
+	 * Allocating and freeing 2 megabytes should have them purged because of
 	 * the oversize threshold.
 	 */
 	ptr = mallocx(2 * 1024 * 1024, MALLOCX_ARENA(arena));
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 6b6594d..cb99b09 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -119,7 +119,7 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	    "Unexepected mallctl() result");
 
 	if (config_stats) {
-		if (!background_thread_enabled() && !opt_hpa) {
+		if (!is_background_thread_enabled() && !opt_hpa) {
 			expect_u64_gt(dirty_npurge + muzzy_npurge, 0,
 			    "At least one purge should have occurred");
 		}
-- 
cgit v0.12


From 11beab38bc5ede45f06af3c513efd003c9d32088 Mon Sep 17 00:00:00 2001
From: Deanna Gelbart <deanna.gelbart@hootsuite.com>
Date: Tue, 11 May 2021 19:02:33 -0700
Subject: Added --debug-syms-by-id option

---
 bin/jeprof.in | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 51 insertions(+), 5 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index d47359c..e0b212a 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -240,6 +240,7 @@ Miscellaneous:
    --test              Run unit tests
    --help              This message
    --version           Version information
+   --debug-syms-by-id  (Linux only) Find debug symbol files by build ID as well as by name
 
 Environment Variables:
    JEPROF_TMPDIR        Profiles directory. Defaults to \$HOME/jeprof
@@ -365,6 +366,7 @@ sub Init() {
   $main::opt_tools   = "";
   $main::opt_debug   = 0;
   $main::opt_test    = 0;
+  $main::opt_debug_syms_by_id = 0;
 
   # These are undocumented flags used only by unittests.
   $main::opt_test_stride = 0;
@@ -433,6 +435,7 @@ sub Init() {
              "tools=s"        => \$main::opt_tools,
              "test!"          => \$main::opt_test,
              "debug!"         => \$main::opt_debug,
+             "debug-syms-by-id!" => \$main::opt_debug_syms_by_id,
              # Undocumented flags used only by unittests:
              "test_stride=i"  => \$main::opt_test_stride,
       ) || usage("Invalid option(s)");
@@ -577,6 +580,11 @@ sub Init() {
   foreach (@prefix_list) {
     s|/+$||;
   }
+
+  # Flag to prevent us from trying over and over to use
+  #  elfutils if it's not installed (used only with
+  #  --debug-syms-by-id option).
+  $main::gave_up_on_elfutils = 0;
 }
 
 sub FilterAndPrint {
@@ -4492,16 +4500,54 @@ sub FindLibrary {
 # For libc libraries, the copy in /usr/lib/debug contains debugging symbols
 sub DebuggingLibrary {
   my $file = shift;
-  if ($file =~ m|^/|) {
-      if (-f "/usr/lib/debug$file") {
-        return "/usr/lib/debug$file";
-      } elsif (-f "/usr/lib/debug$file.debug") {
-        return "/usr/lib/debug$file.debug";
+      
+  if ($file !~ m|^/|) {
+    return undef;
+  }
+      
+  # Find debug symbol file if it's named after the library's name.
+  
+  if (-f "/usr/lib/debug$file") {                 
+    if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file\n"; }
+    return "/usr/lib/debug$file";
+  } elsif (-f "/usr/lib/debug$file.debug") {
+    if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file.debug\n"; }
+    return "/usr/lib/debug$file.debug"; 
+  }
+
+  if(!$main::opt_debug_syms_by_id) {
+    if($main::opt_debug) { print STDERR "no debug symbols found for $file\n" };
+    return undef;
+  }
+
+  # Find debug file if it's named after the library's build ID.
+  
+  my $readelf = '';
+  if (!$main::gave_up_on_elfutils) {
+    $readelf = qx/eu-readelf -n ${file}/;
+    if ($?) {
+      print STDERR "Cannot run eu-readelf. To use --debug-syms-by-id you must be on Linux, with elfutils installed.\n";
+      $main::gave_up_on_elfutils = 1;
+      return undef;
+    }
+    my $buildID = $1 if $readelf =~ /Build ID: ([A-Fa-f0-9]+)/s;
+    if (defined $buildID && length $buildID > 0) {
+      my $symbolFile = '/usr/lib/debug/.build-id/' . substr($buildID, 0, 2) . '/' . substr($buildID, 2) . '.debug';
+      if (-e $symbolFile) {
+        if($main::opt_debug) { print STDERR "found debug symbol file $symbolFile for $file\n" };
+        return $symbolFile;
+      } else {
+        if($main::opt_debug) { print STDERR "no debug symbol file found for $file, build ID: $buildID\n" };
+        return undef;
       }
+    }
   }
+
+  if($main::opt_debug) { print STDERR "no debug symbols found for $file, build ID unknown\n" };
   return undef;
 }
 
+
 # Parse text section header of a library using objdump
 sub ParseTextSectionHeaderFromObjdump {
   my $lib = shift;
-- 
cgit v0.12


From 36c6bfb963e8a36a8918eb841902e006466fb7c2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 18 May 2021 14:52:46 -0700
Subject: SEC: Allow arbitrarily many shards, cached sizes.

---
 include/jemalloc/internal/pa.h  |  4 ++--
 include/jemalloc/internal/sec.h | 22 +++++-------------
 src/arena.c                     |  2 +-
 src/jemalloc.c                  |  2 +-
 src/pa.c                        |  7 +++---
 src/sec.c                       | 50 ++++++++++++++++++++++++++++-------------
 test/unit/sec.c                 | 13 +++++++++--
 7 files changed, 59 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index acb94eb..cb9f8cf 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -130,8 +130,8 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * This isn't exposed to users; we allow late enablement of the HPA shard so
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
-bool pa_shard_enable_hpa(pa_shard_t *shard, const hpa_shard_opts_t *hpa_opts,
-    const sec_opts_t *hpa_sec_opts);
+bool pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
+    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index ddcdfbd..fa86338 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -14,20 +14,6 @@
  */
 
 /*
- * This is a *small* extent cache, after all.  Assuming 4k pages and an ngroup
- * of 4, this allows caching of sizes up to 128k.
- */
-#define SEC_NPSIZES 16
-/*
- * For now, we put a cap on the number of SECs an arena can have.  There's no
- * reason it can't be dynamic; it's just inconvenient.  This number of shards
- * are embedded in the arenas, so there's a space / configurability tradeoff
- * here.  Eventually, we should probably dynamically allocate only however many
- * we require.
- */
-#define SEC_NSHARDS_MAX 8
-
-/*
  * For now, this is just one field; eventually, we'll probably want to get more
  * fine-grained data out (like per-size class statistics).
  */
@@ -91,7 +77,7 @@ struct sec_shard_s {
 	 * hooks are installed.
 	 */
 	bool enabled;
-	sec_bin_t bins[SEC_NPSIZES];
+	sec_bin_t *bins;
 	/* Number of bytes in all bins in the shard. */
 	size_t bytes_cur;
 	/* The next pszind to flush in the flush-some pathways. */
@@ -104,10 +90,12 @@ struct sec_s {
 	pai_t *fallback;
 
 	sec_opts_t opts;
-	sec_shard_t shards[SEC_NSHARDS_MAX];
+	sec_shard_t *shards;
+	pszind_t npsizes;
 };
 
-bool sec_init(sec_t *sec, pai_t *fallback, const sec_opts_t *opts);
+bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
+    const sec_opts_t *opts);
 void sec_flush(tsdn_t *tsdn, sec_t *sec);
 void sec_disable(tsdn_t *tsdn, sec_t *sec);
 
diff --git a/src/arena.c b/src/arena.c
index 78ea92c..3ff9157 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1565,7 +1565,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 *   so arena_hpa_global is not yet initialized.
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
-		if (pa_shard_enable_hpa(&arena->pa_shard, &opt_hpa_opts,
+		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, &opt_hpa_opts,
 		    &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 613733f..1f48993 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1781,7 +1781,7 @@ malloc_init_hard_a0_locked() {
 			opt_hpa = false;
 		}
 	} else if (opt_hpa) {
-		if (pa_shard_enable_hpa(&a0->pa_shard, &opt_hpa_opts,
+		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard, &opt_hpa_opts,
 		    &opt_hpa_sec_opts)) {
 			return true;
 		}
diff --git a/src/pa.c b/src/pa.c
index 90809b3..cb3b3df 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -49,13 +49,14 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 }
 
 bool
-pa_shard_enable_hpa(pa_shard_t *shard, const hpa_shard_opts_t *hpa_opts,
-    const sec_opts_t *hpa_sec_opts) {
+pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
+    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) {
 	if (hpa_shard_init(&shard->hpa_shard, shard->emap, shard->base,
 	    &shard->edata_cache, shard->ind, hpa_opts)) {
 		return true;
 	}
-	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, hpa_sec_opts)) {
+	if (sec_init(tsdn, &shard->hpa_sec, shard->base, &shard->hpa_shard.pai,
+	    hpa_sec_opts)) {
 		return true;
 	}
 	shard->ever_used_hpa = true;
diff --git a/src/sec.c b/src/sec.c
index c37cf35..4175346 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -19,35 +19,55 @@ sec_bin_init(sec_bin_t *bin) {
 }
 
 bool
-sec_init(sec_t *sec, pai_t *fallback, const sec_opts_t *opts) {
-	size_t nshards_clipped = opts->nshards;
-	if (nshards_clipped > SEC_NSHARDS_MAX) {
-		nshards_clipped = SEC_NSHARDS_MAX;
+sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
+    const sec_opts_t *opts) {
+	size_t max_alloc = opts->max_alloc & PAGE_MASK;
+	pszind_t npsizes = sz_psz2ind(max_alloc);
+	if (sz_pind2sz(npsizes) > opts->max_alloc) {
+		npsizes--;
 	}
-	for (size_t i = 0; i < nshards_clipped; i++) {
-		sec_shard_t *shard = &sec->shards[i];
+	size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
+	size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
+	size_t sz_alloc = sz_shards + sz_bins;
+	void *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
+	if (dynalloc == NULL) {
+		return true;
+	}
+	sec_shard_t *shard_cur = (sec_shard_t *)dynalloc;
+	sec->shards = shard_cur;
+	sec_bin_t *bin_cur = (sec_bin_t *)&shard_cur[opts->nshards];
+	/* Just for asserts, below. */
+	sec_bin_t *bin_start = bin_cur;
+
+	for (size_t i = 0; i < opts->nshards; i++) {
+		sec_shard_t *shard = shard_cur;
+		shard_cur++;
 		bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
 		    WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
 		if (err) {
 			return true;
 		}
 		shard->enabled = true;
-		for (pszind_t j = 0; j < SEC_NPSIZES; j++) {
+		shard->bins = bin_cur;
+		for (pszind_t j = 0; j < npsizes; j++) {
 			sec_bin_init(&shard->bins[j]);
+			bin_cur++;
 		}
 		shard->bytes_cur = 0;
 		shard->to_flush_next = 0;
 	}
+	/*
+	 * Should have exactly matched the bin_start to the first unused byte
+	 * after the shards.
+	 */
+	assert((void *)shard_cur == (void *)bin_start);
+	/* And the last bin to use up the last bytes of the allocation. */
+	assert((char *)bin_cur == ((char *)dynalloc + sz_alloc));
 	sec->fallback = fallback;
 
-	size_t max_alloc_clipped = opts->max_alloc;
-	if (max_alloc_clipped > sz_pind2sz(SEC_NPSIZES - 1)) {
-		max_alloc_clipped = sz_pind2sz(SEC_NPSIZES - 1);
-	}
 
 	sec->opts = *opts;
-	sec->opts.nshards = nshards_clipped;
-	sec->opts.max_alloc = max_alloc_clipped;
+	sec->npsizes = npsizes;
 
 	/*
 	 * Initialize these last so that an improper use of an SEC whose
@@ -106,7 +126,7 @@ sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 
 		/* Update our victim-picking state. */
 		shard->to_flush_next++;
-		if (shard->to_flush_next == SEC_NPSIZES) {
+		if (shard->to_flush_next == sec->npsizes) {
 			shard->to_flush_next = 0;
 		}
 
@@ -249,7 +269,7 @@ sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	shard->bytes_cur = 0;
 	edata_list_active_t to_flush;
 	edata_list_active_init(&to_flush);
-	for (pszind_t i = 0; i < SEC_NPSIZES; i++) {
+	for (pszind_t i = 0; i < sec->npsizes; i++) {
 		sec_bin_t *bin = &shard->bins[i];
 		bin->bytes_cur = 0;
 		edata_list_active_concat(&to_flush, &bin->freelist);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 36ae1a5..01455c8 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -37,7 +37,14 @@ test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
 	opts.bytes_after_flush = max_bytes / 2;
 	opts.batch_fill_extra = 4;
 
-	bool err = sec_init(sec, fallback, &opts);
+	/*
+	 * We end up leaking this base, but that's fine; this test is
+	 * short-running, and SECs are arena-scoped in reality.
+	 */
+	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
+	    &ehooks_default_extent_hooks);
+
+	bool err = sec_init(TSDN_NULL, sec, base, fallback, &opts);
 	assert_false(err, "Unexpected initialization failure");
 }
 
@@ -412,10 +419,12 @@ TEST_BEGIN(test_nshards_0) {
 	sec_t sec;
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
+	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
+	    &ehooks_default_extent_hooks);
 
 	sec_opts_t opts = SEC_OPTS_DEFAULT;
 	opts.nshards = 0;
-	sec_init(&sec, &ta.pai, &opts);
+	sec_init(TSDN_NULL, &sec, base, &ta.pai, &opts);
 
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 	    /* zero */ false);
-- 
cgit v0.12


From 2c0f4c2ac3b6a78a849526be384a7a2349d1a09c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= <ondrej@sury.org>
Date: Tue, 25 May 2021 09:19:40 +0200
Subject: Fix typo in configure.ac: experimetal -> experimental

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 41a03d2..0748329 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2587,7 +2587,7 @@ AC_MSG_RESULT([static libs        : ${enable_static}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])
 AC_MSG_RESULT([debug              : ${enable_debug}])
 AC_MSG_RESULT([stats              : ${enable_stats}])
-AC_MSG_RESULT([experimetal_smallocx : ${enable_experimental_smallocx}])
+AC_MSG_RESULT([experimental_smallocx : ${enable_experimental_smallocx}])
 AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
-- 
cgit v0.12


From 2381efab5754d13da5104b101b1e695afb442590 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 21 May 2021 07:28:16 -0700
Subject: ARC: add Minimum allocation alignment

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 include/jemalloc/internal/quantum.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index 11e870a..760d6ad 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -65,6 +65,9 @@
 #  ifdef __le32__
 #    define LG_QUANTUM		4
 #  endif
+#  ifdef __arc__
+#    define LG_QUANTUM		3
+#  endif
 #  ifndef LG_QUANTUM
 #    error "Unknown minimum alignment for architecture; specify via "
 	 "--with-lg-quantum"
-- 
cgit v0.12


From 4fb93a18ee56795fab725c23cc0211b0198dda46 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Sat, 19 Jun 2021 13:38:44 +0100
Subject: extent_can_acquire_neighbor typo fix

---
 include/jemalloc/internal/extent.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index b39e5ed..03eebdd 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -83,7 +83,7 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 	bool neighbor_is_head = contents.metadata.is_head;
 	if (!extent_neighbor_head_state_mergeable(edata_is_head_get(edata),
 	    neighbor_is_head, forward)) {
-		return NULL;
+		return false;
 	}
 	extent_state_t neighbor_state = contents.metadata.state;
 	if (pai == EXTENT_PAI_PAC) {
-- 
cgit v0.12


From 0689448b1e8c8c5ae2d1c216f86c88d22a124166 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 21 Jun 2021 14:07:10 -0700
Subject: Travis: Unbreak the builds.

In the hopes of future-proofing as much as possible, jump to the latest
distribution Travis supports.
---
 .travis.yml              | 119 ++++++++++++++---------------------------------
 configure.ac             |   1 +
 scripts/gen_travis.py    |  59 +++++++++++------------
 src/stats.c              |   2 +-
 test/include/test/test.h |   2 -
 test/src/test.c          |  13 ------
 test/unit/fb.c           |   4 +-
 test/unit/log.c          |   2 +-
 8 files changed, 69 insertions(+), 133 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index b61627b..6aea058 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,5 @@
 language: generic
-dist: precise
+dist: focal
 
 matrix:
   include:
@@ -8,23 +8,20 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
       addons: &gcc_multilib
         apt:
           packages:
             - gcc-multilib
+            - g++-multilib
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
@@ -58,132 +55,92 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       addons: *gcc_multilib
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
       addons: *gcc_multilib
@@ -366,14 +323,6 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
-    # Valgrind
-    - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
-      addons:
-        apt:
-          packages:
-            - valgrind
-
 
 before_script:
   - autoconf
diff --git a/configure.ac b/configure.ac
index 0748329..1783800 100644
--- a/configure.ac
+++ b/configure.ac
@@ -258,6 +258,7 @@ if test "x$GCC" = "xyes" ; then
   JE_CFLAGS_ADD([-Wno-missing-braces])
   dnl This one too.
   JE_CFLAGS_ADD([-Wno-missing-field-initializers])
+  JE_CFLAGS_ADD([-Wno-missing-attributes])
   JE_CFLAGS_ADD([-pipe])
   JE_CFLAGS_ADD([-g3])
 elif test "x$je_cv_msvc" = "xyes" ; then
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 6832f91..992bf00 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -1,10 +1,10 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 from itertools import combinations
 
 travis_template = """\
 language: generic
-dist: precise
+dist: focal
 
 matrix:
   include:
@@ -30,7 +30,6 @@ script:
 # travis though, we don't test all 2**7 = 128 possible combinations of these;
 # instead, we only test combinations of up to 2 'unusual' settings, under the
 # hope that bugs involving interactions of such settings are rare.
-# Things at once, for C(7, 0) + C(7, 1) + C(7, 2) = 29
 MAX_UNUSUAL_OPTIONS = 2
 
 os_default = 'linux'
@@ -41,7 +40,6 @@ arch_unusual = 'ppc64le'
 
 compilers_default = 'CC=gcc CXX=g++'
 compilers_unusual = 'CC=clang CXX=clang++'
-compilers_ppc_default = 'CC=gcc-8 CXX=g++-8'
 
 compiler_flag_unusuals = ['-m32']
 
@@ -67,7 +65,7 @@ all_unusuals = (
 )
 
 unusual_combinations_to_test = []
-for i in xrange(MAX_UNUSUAL_OPTIONS + 1):
+for i in range(MAX_UNUSUAL_OPTIONS + 1):
     unusual_combinations_to_test += combinations(all_unusuals, i)
 
 gcc_multilib_set = False
@@ -117,24 +115,24 @@ def format_job(combination):
             job += '        apt:\n'
             job += '          packages:\n'
             job += '            - gcc-multilib\n'
+            job += '            - g++-multilib\n'
             gcc_multilib_set = True
 
-    if arch == 'ppc64le':
-        job += '      addons:'
-        if gcc_ppc_set:
-            job += ' *gcc_ppc\n'
-        else:
-            job += ' &gcc_ppc\n'
-            job += '        apt:\n'
-            job += '          packages:\n'
-            job += '            - g++-8\n'
-        # Compilers overwritten for PPC64LE to gcc-8
-        compilers = compilers_ppc_default
-
     # We get some spurious errors when -Warray-bounds is enabled.
+    extra_cflags = ['-Werror', '-Wno-array-bounds']
+    if 'clang' in compilers or os == 'osx':
+        extra_cflags += [
+	    '-Wno-unknown-warning-option',
+	    '-Wno-ignored-attributes'
+	]
+    if os == 'osx':
+        extra_cflags += [
+	    '-Wno-deprecated-declarations',
+	]
     env_string = ('{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}" '
-                'EXTRA_CFLAGS="-Werror -Wno-array-bounds"').format(
-                compilers, " ".join(compiler_flags), " ".join(configure_flags))
+        'EXTRA_CFLAGS="{}"'.format(
+        compilers, ' '.join(compiler_flags), ' '.join(configure_flags),
+        ' '.join(extra_cflags)))
 
     job += '      env: %s\n' % env_string
     return job
@@ -157,16 +155,19 @@ include_rows += '''\
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 '''
 
+# Does not seem to be working on newer travis machines. Valgrind has long been a
+# pain point; abandon it for now.
 # Valgrind build bots
-include_rows += '''
-    # Valgrind
-    - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
-      addons:
-        apt:
-          packages:
-            - valgrind
-'''
+#include_rows += '''
+#    # Valgrind
+#    - os: linux
+#      arch: amd64
+#      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
+#      addons:
+#        apt:
+#          packages:
+#            - valgrind
+#'''
 
 # To enable valgrind on macosx add:
 #
@@ -176,4 +177,4 @@ include_rows += '''
 #
 # It currently fails due to: https://github.com/jemalloc/jemalloc/issues/1274
 
-print travis_template % include_rows
+print(travis_template % include_rows)
diff --git a/src/stats.c b/src/stats.c
index ef17303..2e8c451 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -947,7 +947,7 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "nonfull_slabs");
 	bool in_gap = false;
-	for (pszind_t j = 0; j < PSSET_NPSIZES; j++) {
+	for (pszind_t j = 0; j < PSSET_NPSIZES && j < SC_NPSIZES; j++) {
 		stats_arenas_mib[5] = j;
 
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_huge",
diff --git a/test/include/test/test.h b/test/include/test/test.h
index 2167e8c..d4b6591 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -581,5 +581,3 @@ test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
 void	p_test_fail(const char *prefix, const char *message);
-
-void strncpy_cond(void *dst, const char *src, bool cond);
diff --git a/test/src/test.c b/test/src/test.c
index 4583e55..f97ce4d 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -232,16 +232,3 @@ p_test_fail(const char *prefix, const char *message) {
 	malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message);
 	test_status = test_status_fail;
 }
-
-void
-strncpy_cond(void *dst, const char *src, bool cond) {
-	if (cond) {
-		/*
-		 * Avoid strcpy and explicitly set length to 0 because the
-		 * `stringop-overflow` check may warn even if the specific test
-		 * is unreachable.
-		 */
-		size_t n = cond ? strlen(src) + 1 : 0;
-		strncpy(dst, src, n);
-	}
-}
diff --git a/test/unit/fb.c b/test/unit/fb.c
index d5126f6..ad72c75 100644
--- a/test/unit/fb.c
+++ b/test/unit/fb.c
@@ -473,8 +473,8 @@ static void
 expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
     bool val, bool forward) {
 	bool iter_res;
-	size_t iter_begin;
-	size_t iter_len;
+	size_t iter_begin JEMALLOC_CC_SILENCE_INIT(0);
+	size_t iter_len JEMALLOC_CC_SILENCE_INIT(0);
 	if (val) {
 		if (forward) {
 			iter_res = fb_srange_iter(fb, nbits, pos,
diff --git a/test/unit/log.c b/test/unit/log.c
index 02e6a6a..c09b589 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -4,7 +4,7 @@
 
 static void
 update_log_var_names(const char *names) {
-	strncpy_cond(log_var_names, names, config_log);
+	strncpy(log_var_names, names, sizeof(log_var_names));
 }
 
 static void
-- 
cgit v0.12


From 4452a4812ff8bc2a5127a9b220de05999a0652f1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 21 Jun 2021 13:40:30 -0700
Subject: Add opt.experimental_infallible_new.

This allows a guarantee that operator new never throws.

Fix the .gitignore rules to include test/integration/cpp while we're here.
---
 .gitignore                                         |  1 +
 Makefile.in                                        |  4 +-
 configure.ac                                       |  3 ++
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |  3 ++
 .../jemalloc/internal/jemalloc_internal_externs.h  |  1 +
 include/jemalloc/internal/jemalloc_preamble.h.in   |  9 ++++
 src/jemalloc.c                                     |  7 +++
 src/jemalloc_cpp.cpp                               |  7 ++-
 src/stats.c                                        |  1 +
 test/integration/cpp/basic.cpp                     |  1 -
 test/integration/cpp/infallible_new_false.cpp      | 23 ++++++++
 test/integration/cpp/infallible_new_false.sh       |  8 +++
 test/integration/cpp/infallible_new_true.cpp       | 61 ++++++++++++++++++++++
 test/integration/cpp/infallible_new_true.sh        |  8 +++
 14 files changed, 134 insertions(+), 3 deletions(-)
 create mode 100644 test/integration/cpp/infallible_new_false.cpp
 create mode 100644 test/integration/cpp/infallible_new_false.sh
 create mode 100644 test/integration/cpp/infallible_new_true.cpp
 create mode 100644 test/integration/cpp/infallible_new_true.sh

diff --git a/.gitignore b/.gitignore
index 0c3c040..1c0b338 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,6 +52,7 @@ test/include/test/jemalloc_test.h
 test/include/test/jemalloc_test_defs.h
 
 /test/integration/[A-Za-z]*
+!/test/integration/cpp/
 !/test/integration/[A-Za-z]*.*
 /test/integration/*.[od]
 /test/integration/*.out
diff --git a/Makefile.in b/Makefile.in
index 130fa1e..c36b818 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -309,7 +309,9 @@ TESTS_INTEGRATION += \
 endif
 ifeq (@enable_cxx@, 1)
 CPP_SRCS := $(srcroot)src/jemalloc_cpp.cpp
-TESTS_INTEGRATION_CPP := $(srcroot)test/integration/cpp/basic.cpp
+TESTS_INTEGRATION_CPP := $(srcroot)test/integration/cpp/basic.cpp \
+	$(srcroot)test/integration/cpp/infallible_new_true.cpp \
+	$(srcroot)test/integration/cpp/infallible_new_false.cpp
 else
 CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
diff --git a/configure.ac b/configure.ac
index 1783800..5eb4d46 100644
--- a/configure.ac
+++ b/configure.ac
@@ -324,6 +324,9 @@ if test "x$enable_cxx" = "x1" ; then
     enable_cxx="0"
   fi
 fi
+if test "x$enable_cxx" = "x1"; then
+  AC_DEFINE([JEMALLOC_ENABLE_CXX], [ ])
+fi
 AC_SUBST([enable_cxx])
 AC_SUBST([CONFIGURE_CXXFLAGS])
 AC_SUBST([SPECIFIED_CXXFLAGS])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 093c8be..78d1213 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -401,6 +401,9 @@
 /* Performs additional safety checks when defined. */
 #undef JEMALLOC_OPT_SAFETY_CHECKS
 
+/* Is C++ support being built? */
+#undef JEMALLOC_ENABLE_CXX
+
 /* Performs additional size checks when defined. */
 #undef JEMALLOC_OPT_SIZE_CHECKS
 
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index de5731f..af6dc0a 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -26,6 +26,7 @@ extern void (*junk_free_callback)(void *ptr, size_t size);
 extern void (*junk_alloc_callback)(void *ptr, size_t size);
 extern bool opt_utrace;
 extern bool opt_xmalloc;
+extern bool opt_experimental_infallible_new;
 extern bool opt_zero;
 extern unsigned opt_narenas;
 extern zero_realloc_action_t opt_zero_realloc_action;
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index ef1cbae..f5d83a6 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -198,6 +198,15 @@ static const bool config_opt_size_checks =
 #endif
     ;
 
+/* Whether or not the C++ extensions are enabled. */
+static const bool config_enable_cxx =
+#ifdef JEMALLOC_ENABLE_CXX
+    true
+#else
+    false
+#endif
+;
+
 #if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
 /* Currently percpu_arena depends on sched_getcpu. */
 #define JEMALLOC_PERCPU_ARENA
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1f48993..c70244d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -141,6 +141,7 @@ void (*junk_free_callback)(void *ptr, size_t size) = &default_junk_free;
 
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
+bool	opt_experimental_infallible_new = false;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
 fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
@@ -1307,6 +1308,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			if (config_xmalloc) {
 				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
 			}
+			if (config_enable_cxx) {
+				CONF_HANDLE_BOOL(
+				    opt_experimental_infallible_new,
+				    "experimental_infallible_new")
+			}
+
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
 			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max",
 			    0, TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 47ba92a..451655f 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -56,6 +56,12 @@ void	operator delete[](void* ptr, std::size_t size, std::align_val_t al) noexcep
 JEMALLOC_NOINLINE
 static void *
 handleOOM(std::size_t size, bool nothrow) {
+	if (opt_experimental_infallible_new) {
+		safety_check_fail("<jemalloc>: Allocation failed and "
+		    "opt.experimental_infallible_new is true. Aborting.\n");
+		return nullptr;
+	}
+
 	void *ptr = nullptr;
 
 	while (ptr == nullptr) {
@@ -93,7 +99,6 @@ fallback_impl(std::size_t size) noexcept(IsNoExcept) {
 	if (likely(ptr != nullptr)) {
 		return ptr;
 	}
-
 	return handleOOM(size, IsNoExcept);
 }
 
diff --git a/src/stats.c b/src/stats.c
index 2e8c451..34cae0a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1501,6 +1501,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("zero")
 	OPT_WRITE_BOOL("utrace")
 	OPT_WRITE_BOOL("xmalloc")
+	OPT_WRITE_BOOL("experimental_infallible_new")
 	OPT_WRITE_BOOL("tcache")
 	OPT_WRITE_SIZE_T("tcache_max")
 	OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
diff --git a/test/integration/cpp/basic.cpp b/test/integration/cpp/basic.cpp
index b48ec8a..c1cf6cd 100644
--- a/test/integration/cpp/basic.cpp
+++ b/test/integration/cpp/basic.cpp
@@ -1,4 +1,3 @@
-#include <memory>
 #include "test/jemalloc_test.h"
 
 TEST_BEGIN(test_basic) {
diff --git a/test/integration/cpp/infallible_new_false.cpp b/test/integration/cpp/infallible_new_false.cpp
new file mode 100644
index 0000000..42196d6
--- /dev/null
+++ b/test/integration/cpp/infallible_new_false.cpp
@@ -0,0 +1,23 @@
+#include <memory>
+
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_failing_alloc) {
+	bool saw_exception = false;
+	try {
+		/* Too big of an allocation to succeed. */
+		void *volatile ptr = ::operator new((size_t)-1);
+		(void)ptr;
+	} catch (...) {
+		saw_exception = true;
+	}
+	expect_true(saw_exception, "Didn't get a failure");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_failing_alloc);
+}
+
diff --git a/test/integration/cpp/infallible_new_false.sh b/test/integration/cpp/infallible_new_false.sh
new file mode 100644
index 0000000..7d41812
--- /dev/null
+++ b/test/integration/cpp/infallible_new_false.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+XMALLOC_STR=""
+if [ "x${enable_xmalloc}" = "x1" ] ; then
+  XMALLOC_STR="xmalloc:false,"
+fi
+
+export MALLOC_CONF="${XMALLOC_STR}experimental_infallible_new:false"
diff --git a/test/integration/cpp/infallible_new_true.cpp b/test/integration/cpp/infallible_new_true.cpp
new file mode 100644
index 0000000..9b943bd
--- /dev/null
+++ b/test/integration/cpp/infallible_new_true.cpp
@@ -0,0 +1,61 @@
+#include <stdio.h>
+
+/*
+ * We can't test C++ in unit tests, and we can't change the safety check failure
+ * hook in integration tests.  So we check that we *actually* abort on failure,
+ * by forking and checking the child process exit code.
+ */
+
+/* It's a unix system? */
+#ifdef __unix__
+/* I know this! */
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/wait.h>
+static const bool can_fork = true;
+#else
+static const bool can_fork = false;
+#endif
+
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_failing_alloc) {
+	test_skip_if(!can_fork);
+#ifdef __unix__
+	pid_t pid = fork();
+	expect_d_ne(pid, -1, "Unexpected fork failure");
+	if (pid == 0) {
+		/*
+		 * In the child, we'll print an error message to stderr before
+		 * exiting.  Close stderr to avoid spamming output for this
+		 * expected failure.
+		 */
+		fclose(stderr);
+		try {
+			/* Too big of an allocation to succeed. */
+			void *volatile ptr = ::operator new((size_t)-1);
+			(void)ptr;
+		} catch (...) {
+			/*
+			 * Swallow the exception; remember, we expect this to
+			 * fail via an abort within new, not because an
+			 * exception didn't get caught.
+			 */
+		}
+	} else {
+		int status;
+		pid_t err = waitpid(pid, &status, 0);
+		expect_d_ne(-1, err, "waitpid failure");
+		expect_false(WIFEXITED(status),
+		    "Should have seen an abnormal failure");
+	}
+#endif
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_failing_alloc);
+}
+
diff --git a/test/integration/cpp/infallible_new_true.sh b/test/integration/cpp/infallible_new_true.sh
new file mode 100644
index 0000000..4a0ff54
--- /dev/null
+++ b/test/integration/cpp/infallible_new_true.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+XMALLOC_STR=""
+if [ "x${enable_xmalloc}" = "x1" ] ; then
+  XMALLOC_STR="xmalloc:false,"
+fi
+
+export MALLOC_CONF="${XMALLOC_STR}experimental_infallible_new:true"
-- 
cgit v0.12


From de033f56c08745500f98b590f5138ddc4a5c0732 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 11 May 2021 14:49:55 -0700
Subject: mpsc_queue: Add module.

This is a simple multi-producer, single-consumer queue.  The intended use case
is in the HPA, as we begin supporting hpdatas that move between hpa_shards.  We
take just a single CAS as the cost to send a message (or a batch of messages) in
the low-contention case, and lock-freedom lets us avoid some lock-ordering
issues.
---
 Makefile.in                            |   1 +
 include/jemalloc/internal/mpsc_queue.h | 134 +++++++++++++++
 test/unit/mpsc_queue.c                 | 304 +++++++++++++++++++++++++++++++++
 3 files changed, 439 insertions(+)
 create mode 100644 include/jemalloc/internal/mpsc_queue.h
 create mode 100644 test/unit/mpsc_queue.c

diff --git a/Makefile.in b/Makefile.in
index c36b818..ed03d4e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -233,6 +233,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/malloc_conf_2.c \
 	$(srcroot)test/unit/malloc_io.c \
 	$(srcroot)test/unit/math.c \
+	$(srcroot)test/unit/mpsc_queue.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
 	$(srcroot)test/unit/nstime.c \
diff --git a/include/jemalloc/internal/mpsc_queue.h b/include/jemalloc/internal/mpsc_queue.h
new file mode 100644
index 0000000..316ea9b
--- /dev/null
+++ b/include/jemalloc/internal/mpsc_queue.h
@@ -0,0 +1,134 @@
+#ifndef JEMALLOC_INTERNAL_MPSC_QUEUE_H
+#define JEMALLOC_INTERNAL_MPSC_QUEUE_H
+
+#include "jemalloc/internal/atomic.h"
+
+/*
+ * A concurrent implementation of a multi-producer, single-consumer queue.  It
+ * supports three concurrent operations:
+ * - Push
+ * - Push batch
+ * - Pop batch
+ *
+ * These operations are all lock-free.
+ *
+ * The implementation is the simple two-stack queue built on a Treiber stack.
+ * It's not terribly efficient, but this isn't expected to go into anywhere with
+ * hot code.  In fact, we don't really even need queue semantics in any
+ * anticipated use cases; we could get away with just the stack.  But this way
+ * lets us frame the API in terms of the existing list types, which is a nice
+ * convenience.  We can save on cache misses by introducing our own (parallel)
+ * single-linked list type here, and dropping FIFO semantics, if we need this to
+ * get faster.  Since we're currently providing queue semantics though, we use
+ * the prev field in the link rather than the next field for Treiber-stack
+ * linkage, so that we can preserve order for bash-pushed lists (recall that the
+ * two-stack tricks reverses orders in the lock-free first stack).
+ */
+
+#define mpsc_queue(a_type)						\
+struct {								\
+	atomic_p_t tail;						\
+}
+
+#define mpsc_queue_proto(a_attr, a_prefix, a_queue_type, a_type,	\
+    a_list_type)							\
+/* Initialize a queue. */						\
+a_attr void								\
+a_prefix##new(a_queue_type *queue);					\
+/* Insert all items in src into the queue, clearing src. */		\
+a_attr void								\
+a_prefix##push_batch(a_queue_type *queue, a_list_type *src);		\
+/* Insert node into the queue. */					\
+a_attr void								\
+a_prefix##push(a_queue_type *queue, a_type *node);			\
+/*									\
+ * Pop all items in the queue into the list at dst.  dst should already	\
+ * be initialized (and may contain existing items, which then remain	\
+ * in dst).								\
+ */									\
+a_attr void								\
+a_prefix##pop_batch(a_queue_type *queue, a_list_type *dst);
+
+#define mpsc_queue_gen(a_attr, a_prefix, a_queue_type, a_type,		\
+    a_list_type, a_link)						\
+a_attr void								\
+a_prefix##new(a_queue_type *queue) {					\
+	atomic_store_p(&queue->tail, NULL, ATOMIC_RELAXED);		\
+}									\
+a_attr void								\
+a_prefix##push_batch(a_queue_type *queue, a_list_type *src) {		\
+	/*								\
+	 * Reuse the ql list next field as the Treiber stack next	\
+	 * field.							\
+	 */								\
+	a_type *first = ql_first(src);					\
+	a_type *last = ql_last(src, a_link);				\
+	void* cur_tail = atomic_load_p(&queue->tail, ATOMIC_RELAXED);	\
+	do {								\
+		/*							\
+		 * Note that this breaks the queue ring structure;	\
+		 * it's not a ring any more!				\
+		 */							\
+		first->a_link.qre_prev = cur_tail;			\
+		/*							\
+		 * Note: the upcoming CAS doesn't need an atomic; every	\
+		 * push only needs to synchronize with the next pop,	\
+		 * which we get from the release sequence rules.	\
+		 */							\
+	} while (!atomic_compare_exchange_weak_p(&queue->tail,		\
+	    &cur_tail, last, ATOMIC_RELEASE, ATOMIC_RELAXED));		\
+	ql_new(src);							\
+}									\
+a_attr void								\
+a_prefix##push(a_queue_type *queue, a_type *node) {			\
+	ql_elm_new(node, a_link);					\
+	a_list_type list;						\
+	ql_new(&list);							\
+	ql_head_insert(&list, node, a_link);				\
+	a_prefix##push_batch(queue, &list);				\
+}									\
+a_attr void								\
+a_prefix##pop_batch(a_queue_type *queue, a_list_type *dst) {		\
+	a_type *tail = atomic_load_p(&queue->tail, ATOMIC_RELAXED);	\
+	if (tail == NULL) {						\
+		/*							\
+		 * In the common special case where there are no	\
+		 * pending elements, bail early without a costly RMW.	\
+		 */							\
+		return;							\
+	}								\
+	tail = atomic_exchange_p(&queue->tail, NULL, ATOMIC_ACQUIRE);	\
+	/*								\
+	 * It's a single-consumer queue, so if cur started non-NULL,	\
+	 * it'd better stay non-NULL.					\
+	 */								\
+	assert(tail != NULL);						\
+	/*								\
+	 * We iterate through the stack and both fix up the link	\
+	 * structure (stack insertion broke the list requirement that	\
+	 * the list be circularly linked).  It's just as efficient at	\
+	 * this point to make the queue a "real" queue, so do that as	\
+	 * well.							\
+	 * If this ever gets to be a hot spot, we can omit this fixup	\
+	 * and make the queue a bag (i.e. not necessarily ordered), but	\
+	 * that would mean jettisoning the existing list API as the 	\
+	 * batch pushing/popping interface.				\
+	 */								\
+	a_list_type reversed;						\
+	ql_new(&reversed);						\
+	while (tail != NULL) {						\
+		/*							\
+		 * Pop an item off the stack, prepend it onto the list	\
+		 * (reversing the order).  Recall that we use the	\
+		 * list prev field as the Treiber stack next field to	\
+		 * preserve order of batch-pushed items when reversed.	\
+		 */							\
+		a_type *next = tail->a_link.qre_prev;			\
+		ql_elm_new(tail, a_link);				\
+		ql_head_insert(&reversed, tail, a_link);		\
+		tail = next;						\
+	}								\
+	ql_concat(dst, &reversed, a_link);				\
+}
+
+#endif /* JEMALLOC_INTERNAL_MPSC_QUEUE_H */
diff --git a/test/unit/mpsc_queue.c b/test/unit/mpsc_queue.c
new file mode 100644
index 0000000..895edf8
--- /dev/null
+++ b/test/unit/mpsc_queue.c
@@ -0,0 +1,304 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/mpsc_queue.h"
+
+typedef struct elem_s elem_t;
+typedef ql_head(elem_t) elem_list_t;
+typedef mpsc_queue(elem_t) elem_mpsc_queue_t;
+struct elem_s {
+	int thread;
+	int idx;
+	ql_elm(elem_t) link;
+};
+
+/* Include both proto and gen to make sure they match up. */
+mpsc_queue_proto(static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t,
+    elem_list_t);
+mpsc_queue_gen(static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t,
+    elem_list_t, link);
+
+static void
+init_elems_simple(elem_t *elems, int nelems, int thread) {
+	for (int i = 0; i < nelems; i++) {
+		elems[i].thread = thread;
+		elems[i].idx = i;
+		ql_elm_new(&elems[i], link);
+	}
+}
+
+static void
+check_elems_simple(elem_list_t *list, int nelems, int thread) {
+	elem_t *elem;
+	int next_idx = 0;
+	ql_foreach(elem, list, link) {
+		expect_d_lt(next_idx, nelems, "Too many list items");
+		expect_d_eq(thread, elem->thread, "");
+		expect_d_eq(next_idx, elem->idx, "List out of order");
+		next_idx++;
+	}
+}
+
+TEST_BEGIN(test_simple) {
+	enum {NELEMS = 10};
+	elem_t elems[NELEMS];
+	elem_list_t list;
+	elem_mpsc_queue_t queue;
+
+	/* Pop empty queue onto empty list -> empty list */
+	ql_new(&list);
+	elem_mpsc_queue_new(&queue);
+	elem_mpsc_queue_pop_batch(&queue, &list);
+	expect_true(ql_empty(&list), "");
+
+	/* Pop empty queue onto nonempty list -> list unchanged */
+	ql_new(&list);
+	elem_mpsc_queue_new(&queue);
+	init_elems_simple(elems, NELEMS, 0);
+	for (int i = 0; i < NELEMS; i++) {
+		ql_tail_insert(&list, &elems[i], link);
+	}
+	elem_mpsc_queue_pop_batch(&queue, &list);
+	check_elems_simple(&list, NELEMS, 0);
+
+	/* Pop nonempty queue onto empty list -> list takes queue contents */
+	ql_new(&list);
+	elem_mpsc_queue_new(&queue);
+	init_elems_simple(elems, NELEMS, 0);
+	for (int i = 0; i < NELEMS; i++) {
+		elem_mpsc_queue_push(&queue, &elems[i]);
+	}
+	elem_mpsc_queue_pop_batch(&queue, &list);
+	check_elems_simple(&list, NELEMS, 0);
+
+	/* Pop nonempty queue onto nonempty list -> list gains queue contents */
+	ql_new(&list);
+	elem_mpsc_queue_new(&queue);
+	init_elems_simple(elems, NELEMS, 0);
+	for (int i = 0; i < NELEMS / 2; i++) {
+		ql_tail_insert(&list, &elems[i], link);
+	}
+	for (int i = NELEMS / 2; i < NELEMS; i++) {
+		elem_mpsc_queue_push(&queue, &elems[i]);
+	}
+	elem_mpsc_queue_pop_batch(&queue, &list);
+	check_elems_simple(&list, NELEMS, 0);
+
+}
+TEST_END
+
+TEST_BEGIN(test_push_single_or_batch) {
+	enum {
+		BATCH_MAX = 10,
+		/*
+		 * We'll push i items one-at-a-time, then i items as a batch,
+		 * then i items as a batch again, as i ranges from 1 to
+		 * BATCH_MAX.  So we need 3 times the sum of the numbers from 1
+		 * to BATCH_MAX elements total.
+		 */
+		NELEMS = 3 * BATCH_MAX * (BATCH_MAX - 1) / 2
+	};
+	elem_t elems[NELEMS];
+	init_elems_simple(elems, NELEMS, 0);
+	elem_list_t list;
+	ql_new(&list);
+	elem_mpsc_queue_t queue;
+	elem_mpsc_queue_new(&queue);
+	int next_idx = 0;
+	for (int i = 1; i < 10; i++) {
+		/* Push i items 1 at a time. */
+		for (int j = 0; j < i; j++) {
+			elem_mpsc_queue_push(&queue, &elems[next_idx]);
+			next_idx++;
+		}
+		/* Push i items in batch. */
+		for (int j = 0; j < i; j++) {
+			ql_tail_insert(&list, &elems[next_idx], link);
+			next_idx++;
+		}
+		elem_mpsc_queue_push_batch(&queue, &list);
+		expect_true(ql_empty(&list), "Batch push should empty source");
+		/*
+		 * Push i items in batch, again.  This tests two batches
+		 * proceeding one after the other.
+		 */
+		for (int j = 0; j < i; j++) {
+			ql_tail_insert(&list, &elems[next_idx], link);
+			next_idx++;
+		}
+		elem_mpsc_queue_push_batch(&queue, &list);
+		expect_true(ql_empty(&list), "Batch push should empty source");
+	}
+	expect_d_eq(NELEMS, next_idx, "Miscomputed number of elems to push.");
+
+	expect_true(ql_empty(&list), "");
+	elem_mpsc_queue_pop_batch(&queue, &list);
+	check_elems_simple(&list, NELEMS, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_multi_op) {
+	enum {NELEMS = 20};
+	elem_t elems[NELEMS];
+	init_elems_simple(elems, NELEMS, 0);
+	elem_list_t push_list;
+	ql_new(&push_list);
+	elem_list_t result_list;
+	ql_new(&result_list);
+	elem_mpsc_queue_t queue;
+	elem_mpsc_queue_new(&queue);
+
+	int next_idx = 0;
+	/* Push first quarter 1-at-a-time. */
+	for (int i = 0; i < NELEMS / 4; i++) {
+		elem_mpsc_queue_push(&queue, &elems[next_idx]);
+		next_idx++;
+	}
+	/* Push second quarter in batch. */
+	for (int i = NELEMS / 4; i < NELEMS / 2; i++) {
+		ql_tail_insert(&push_list, &elems[next_idx], link);
+		next_idx++;
+	}
+	elem_mpsc_queue_push_batch(&queue, &push_list);
+	/* Batch pop all pushed elements. */
+	elem_mpsc_queue_pop_batch(&queue, &result_list);
+	/* Push third quarter in batch. */
+	for (int i = NELEMS / 2; i < 3 * NELEMS / 4; i++) {
+		ql_tail_insert(&push_list, &elems[next_idx], link);
+		next_idx++;
+	}
+	elem_mpsc_queue_push_batch(&queue, &push_list);
+	/* Push last quarter one-at-a-time. */
+	for (int i = 3 * NELEMS / 4; i < NELEMS; i++) {
+		elem_mpsc_queue_push(&queue, &elems[next_idx]);
+		next_idx++;
+	}
+	/* Pop them again.  Order of existing list should be preserved. */
+	elem_mpsc_queue_pop_batch(&queue, &result_list);
+
+	check_elems_simple(&result_list, NELEMS, 0);
+
+}
+TEST_END
+
+typedef struct pusher_arg_s pusher_arg_t;
+struct pusher_arg_s {
+	elem_mpsc_queue_t *queue;
+	int thread;
+	elem_t *elems;
+	int nelems;
+};
+
+typedef struct popper_arg_s popper_arg_t;
+struct popper_arg_s {
+	elem_mpsc_queue_t *queue;
+	int npushers;
+	int nelems_per_pusher;
+	int *pusher_counts;
+};
+
+static void *
+thd_pusher(void *void_arg) {
+	pusher_arg_t *arg = (pusher_arg_t *)void_arg;
+	int next_idx = 0;
+	while (next_idx < arg->nelems) {
+		/* Push 10 items in batch. */
+		elem_list_t list;
+		ql_new(&list);
+		int limit = next_idx + 10;
+		while (next_idx < arg->nelems && next_idx < limit) {
+			ql_tail_insert(&list, &arg->elems[next_idx], link);
+			next_idx++;
+		}
+		elem_mpsc_queue_push_batch(arg->queue, &list);
+		/* Push 10 items one-at-a-time. */
+		limit = next_idx + 10;
+		while (next_idx < arg->nelems && next_idx < limit) {
+			elem_mpsc_queue_push(arg->queue, &arg->elems[next_idx]);
+			next_idx++;
+		}
+
+	}
+	return NULL;
+}
+
+static void *
+thd_popper(void *void_arg) {
+	popper_arg_t *arg = (popper_arg_t *)void_arg;
+	int done_pushers = 0;
+	while (done_pushers < arg->npushers) {
+		elem_list_t list;
+		ql_new(&list);
+		elem_mpsc_queue_pop_batch(arg->queue, &list);
+		elem_t *elem;
+		ql_foreach(elem, &list, link) {
+			int thread = elem->thread;
+			int idx = elem->idx;
+			expect_d_eq(arg->pusher_counts[thread], idx,
+			    "Thread's pushes reordered");
+			arg->pusher_counts[thread]++;
+			if (arg->pusher_counts[thread]
+			    == arg->nelems_per_pusher) {
+				done_pushers++;
+			}
+		}
+	}
+	return NULL;
+}
+
+TEST_BEGIN(test_multiple_threads) {
+	enum {
+		NPUSHERS = 4,
+		NELEMS_PER_PUSHER = 1000*1000,
+	};
+	thd_t pushers[NPUSHERS];
+	pusher_arg_t pusher_arg[NPUSHERS];
+
+	thd_t popper;
+	popper_arg_t popper_arg;
+
+	elem_mpsc_queue_t queue;
+	elem_mpsc_queue_new(&queue);
+
+	elem_t *elems = calloc(NPUSHERS * NELEMS_PER_PUSHER, sizeof(elem_t));
+	elem_t *elem_iter = elems;
+	for (int i = 0; i < NPUSHERS; i++) {
+		pusher_arg[i].queue = &queue;
+		pusher_arg[i].thread = i;
+		pusher_arg[i].elems = elem_iter;
+		pusher_arg[i].nelems = NELEMS_PER_PUSHER;
+
+		init_elems_simple(elem_iter, NELEMS_PER_PUSHER, i);
+		elem_iter += NELEMS_PER_PUSHER;
+	}
+	popper_arg.queue = &queue;
+	popper_arg.npushers = NPUSHERS;
+	popper_arg.nelems_per_pusher = NELEMS_PER_PUSHER;
+	int pusher_counts[NPUSHERS] = {0};
+	popper_arg.pusher_counts = pusher_counts;
+
+	thd_create(&popper, thd_popper, (void *)&popper_arg);
+	for (int i = 0; i < NPUSHERS; i++) {
+		thd_create(&pushers[i], thd_pusher, &pusher_arg[i]);
+	}
+
+	thd_join(popper, NULL);
+	for (int i = 0; i < NPUSHERS; i++) {
+		thd_join(pushers[i], NULL);
+	}
+
+	for (int i = 0; i < NPUSHERS; i++) {
+		expect_d_eq(NELEMS_PER_PUSHER, pusher_counts[i], "");
+	}
+
+	free(elems);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_simple,
+	    test_push_single_or_batch,
+	    test_multi_op,
+	    test_multiple_threads);
+}
-- 
cgit v0.12


From d202218e865a14d8fcff5c41682719a07434518c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Jun 2021 17:14:43 -0700
Subject: HPA: Fix typos with big performance implications.

This fixes two simple but significant typos in the HPA:
- The conf string parsing accidentally set a min value of PAGE for
  hpa_sec_batch_fill_extra; i.e. allocating 4096 extra pages every time we
  attempted to allocate a single page.  This puts us over the SEC flush limit,
  so we then immediately flush all but one of them (probably triggering
  purging).
- The HPA was using the default PAI batch alloc implementation, which meant it
  did not actually get any locking advantages.

This snuck by because I did all the performance testing without using the PAI
interface or config settings.  When I cleaned it up and put everything behind
nice interfaces, I only did correctness checks, and didn't try any performance
ones.
---
 src/hpa.c      | 2 +-
 src/jemalloc.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 22cf007..8ef881f 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -92,7 +92,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	 * operating on corrupted data.
 	 */
 	shard->pai.alloc = &hpa_alloc;
-	shard->pai.alloc_batch = &pai_alloc_batch_default;
+	shard->pai.alloc_batch = &hpa_alloc_batch;
 	shard->pai.expand = &hpa_expand;
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c70244d..6ff9f97 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1489,8 +1489,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "hpa_sec_bytes_after_flush", PAGE, 0,
 			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
-			    "hpa_sec_batch_fill_extra", PAGE, 0, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, true);
+			    "hpa_sec_batch_fill_extra", 0, HUGEPAGE_PAGES,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
-- 
cgit v0.12


From 9c42ed2d1491451dcc8cdb429ecf9ee46070054d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 7 Jul 2021 15:16:38 -0700
Subject: Travis: Don't test "clang" on OS X.

On OS X, "gcc" is really just clang anyways, so this combination gets tested by
the gcc test.  This is purely redundant, and (since it runs early in the output)
increases time to signal for real breakages further down in the list.
---
 .travis.yml           | 3 ---
 scripts/gen_travis.py | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 6aea058..5cf0e08 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -55,9 +55,6 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 992bf00..fe9d840 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -87,6 +87,9 @@ def format_job(combination):
       'percpu_arena:percpu' in malloc_conf or 'background_thread:true' \
       in malloc_conf):
         return ""
+    # gcc is just a redirect to clang on OS X. No need to test both.
+    if os == 'osx' and compilers_unusual in combination:
+        return ""
     if len(malloc_conf) > 0:
         configure_flags.append('--with-malloc-conf=' + ",".join(malloc_conf))
 
-- 
cgit v0.12


From 347523517bb90210ffeadf115730003531645394 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 8 Jul 2021 10:38:45 -0700
Subject: PAI: Fix a typo.

---
 include/jemalloc/internal/pai.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 16e022d..4d3a9e0 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -61,7 +61,7 @@ pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 
 static inline void
 pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
-	return self->dalloc_batch(tsdn, self, list);
+	self->dalloc_batch(tsdn, self, list);
 }
 
 /*
-- 
cgit v0.12


From 41fd56605e95c40650ab1d012b5e09c273b19490 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Jun 2021 13:29:02 -0700
Subject: HPA: Purge across retained extents.

This lets us cut down on the number of expensive system calls we perform.
---
 include/jemalloc/internal/hpdata.h |  5 +-
 src/hpdata.c                       | 97 +++++++++++++++++++++++++++++++-------
 test/unit/hpdata.c                 | 81 ++++++++++++++++++++++++++++---
 test/unit/psset.c                  |  2 +
 4 files changed, 160 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 4ff2e57..32e2624 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -110,7 +110,7 @@ struct hpdata_s {
 	 */
 	size_t h_ntouched;
 
-	/* The dirty pages (using the same definition as above). */
+	/* The touched pages (using the same definition as above). */
 	fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 };
 
@@ -356,6 +356,7 @@ void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
 typedef struct hpdata_purge_state_s hpdata_purge_state_t;
 struct hpdata_purge_state_s {
 	size_t npurged;
+	size_t ndirty_to_purge;
 	fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
 	size_t next_purge_search_begin;
 };
@@ -372,7 +373,7 @@ struct hpdata_purge_state_s {
  * until you're done, and then end.  Allocating out of an hpdata undergoing
  * purging is not allowed.
  *
- * Returns the number of pages that will be purged.
+ * Returns the number of dirty pages that will be purged.
  */
 size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
 
diff --git a/src/hpdata.c b/src/hpdata.c
index b861e9e..18519be 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -166,33 +166,93 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 size_t
 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
-	/* See the comment in reserve. */
+	/*
+	 * See the comment below; we might purge any inactive extent, so it's
+	 * unsafe for any other thread to turn any inactive extent active while
+	 * we're operating on it.
+	 */
+	assert(!hpdata_alloc_allowed_get(hpdata));
 
 	purge_state->npurged = 0;
 	purge_state->next_purge_search_begin = 0;
 
 	/*
-	 * Initialize to_purge with everything that's not active but that is
-	 * dirty.
+	 * Initialize to_purge.
+	 *
+	 * It's possible to end up in situations where two dirty extents are
+	 * separated by a retained extent:
+	 * - 1 page allocated.
+	 * - 1 page allocated.
+	 * - 1 pages allocated.
 	 *
-	 * As an optimization, we could note that in practice we never allocate
-	 * out of a hugepage while purging within it, and so could try to
-	 * combine dirty extents separated by a non-dirty but non-active extent
-	 * to avoid purge calls.  This does nontrivially complicate metadata
-	 * tracking though, so let's hold off for now.
+	 * If the middle page is freed and purged, and then the first and third
+	 * pages are freed, and then another purge pass happens, the hpdata
+	 * looks like this:
+	 * - 1 page dirty.
+	 * - 1 page retained.
+	 * - 1 page dirty.
+	 *
+	 * But it's safe to do a single 3-page purge.
+	 *
+	 * We do this by first computing the dirty pages, and then filling in
+	 * any gaps by extending each range in the dirty bitmap to extend until
+	 * the next active page.  This purges more pages, but the expensive part
+	 * of purging is the TLB shootdowns, rather than the kernel state
+	 * tracking; doing a little bit more of the latter is fine if it saves
+	 * us from doing some of the former.
+	 */
+
+	/*
+	 * The dirty pages are those that are touched but not active.  Note that
+	 * in a normal-ish case, HUGEPAGE_PAGES is something like 512 and the
+	 * fb_group_t is 64 bits, so this is 64 bytes, spread across 8
+	 * fb_group_ts.
 	 */
-	fb_bit_not(purge_state->to_purge, hpdata->active_pages, HUGEPAGE_PAGES);
-	fb_bit_and(purge_state->to_purge, purge_state->to_purge,
-	    hpdata->touched_pages, HUGEPAGE_PAGES);
+	fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
+	fb_init(dirty_pages, HUGEPAGE_PAGES);
+	fb_bit_not(dirty_pages, hpdata->active_pages, HUGEPAGE_PAGES);
+	fb_bit_and(dirty_pages, dirty_pages, hpdata->touched_pages,
+	    HUGEPAGE_PAGES);
+
+	fb_init(purge_state->to_purge, HUGEPAGE_PAGES);
+	size_t next_bit = 0;
+	while (next_bit < HUGEPAGE_PAGES) {
+		size_t next_dirty = fb_ffs(dirty_pages, HUGEPAGE_PAGES,
+		    next_bit);
+		/* Recall that fb_ffs returns nbits if no set bit is found. */
+		if (next_dirty == HUGEPAGE_PAGES) {
+			break;
+		}
+		size_t next_active = fb_ffs(hpdata->active_pages,
+		    HUGEPAGE_PAGES, next_dirty);
+		/*
+		 * Don't purge past the end of the dirty extent, into retained
+		 * pages.  This helps the kernel a tiny bit, but honestly it's
+		 * mostly helpful for testing (where we tend to write test cases
+		 * that think in terms of the dirty ranges).
+		 */
+		ssize_t last_dirty = fb_fls(dirty_pages, HUGEPAGE_PAGES,
+		    next_active - 1);
+		assert(last_dirty >= 0);
+		assert((size_t)last_dirty >= next_dirty);
+		assert((size_t)last_dirty - next_dirty + 1 <= HUGEPAGE_PAGES);
+
+		fb_set_range(purge_state->to_purge, HUGEPAGE_PAGES, next_dirty,
+		    last_dirty - next_dirty + 1);
+		next_bit = next_active + 1;
+	}
 
-	/* We purge everything we can. */
-	size_t to_purge = hpdata->h_ntouched - hpdata->h_nactive;
-	assert(to_purge == fb_scount(
+	/* We should purge, at least, everything dirty. */
+	size_t ndirty = hpdata->h_ntouched - hpdata->h_nactive;
+	purge_state->ndirty_to_purge = ndirty;
+	assert(ndirty <= fb_scount(
 	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+	assert(ndirty == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0,
+	    HUGEPAGE_PAGES));
 
 	hpdata_assert_consistent(hpdata);
 
-	return to_purge;
+	return ndirty;
 }
 
 bool
@@ -203,6 +263,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	 * hpdata without synchronization, and therefore have no right to expect
 	 * a consistent state.
 	 */
+	assert(!hpdata_alloc_allowed_get(hpdata));
 
 	if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
 		return false;
@@ -228,19 +289,21 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 
 void
 hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
+	assert(!hpdata_alloc_allowed_get(hpdata));
 	hpdata_assert_consistent(hpdata);
 	/* See the comment in reserve. */
 	assert(!hpdata->h_in_psset || hpdata->h_updating);
 
 	assert(purge_state->npurged == fb_scount(purge_state->to_purge,
 	    HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+	assert(purge_state->npurged >= purge_state->ndirty_to_purge);
 
 	fb_bit_not(purge_state->to_purge, purge_state->to_purge,
 	    HUGEPAGE_PAGES);
 	fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
 	    purge_state->to_purge, HUGEPAGE_PAGES);
-	assert(hpdata->h_ntouched >= purge_state->npurged);
-	hpdata->h_ntouched -= purge_state->npurged;
+	assert(hpdata->h_ntouched >= purge_state->ndirty_to_purge);
+	hpdata->h_ntouched -= purge_state->ndirty_to_purge;
 
 	hpdata_assert_consistent(hpdata);
 }
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 11bccc5..288e71d 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -67,6 +67,7 @@ TEST_BEGIN(test_purge_simple) {
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
 
+	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
 	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
 	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge, "");
@@ -90,11 +91,9 @@ TEST_BEGIN(test_purge_simple) {
 TEST_END
 
 /*
- * We only test intervening dalloc's not intervening allocs; we don't need
- * intervening allocs, and foreseeable optimizations will make them not just
- * unnecessary but incorrect.  In particular, if there are two dirty extents
- * separated only by a retained extent, we can just purge the entire range,
- * saving a purge call.
+ * We only test intervening dalloc's not intervening allocs; the latter are
+ * disallowed as a purging precondition (because they interfere with purging
+ * across a retained extent, saving a purge call).
  */
 TEST_BEGIN(test_purge_intervening_dalloc) {
 	hpdata_t hpdata;
@@ -112,6 +111,7 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
 
+	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
 	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
 	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge, "");
@@ -137,7 +137,7 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 	expect_ptr_eq(
 	    (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
 	    purge_addr, "");
-	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
+	expect_zu_ge(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
 	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
 	    &purge_size);
@@ -150,6 +150,74 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_over_retained) {
+	void *purge_addr;
+	size_t purge_size;
+
+	hpdata_t hpdata;
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+
+	/* Allocate the first 3/4 of the pages. */
+	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
+	expect_ptr_eq(alloc, HPDATA_ADDR, "");
+
+	/* Free the second quarter. */
+	void *second_quarter =
+	    (void *)((uintptr_t)alloc + HUGEPAGE_PAGES / 4 * PAGE);
+	hpdata_unreserve(&hpdata, second_quarter, HUGEPAGE_PAGES / 4 * PAGE);
+
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
+
+	/* Purge the second quarter. */
+	hpdata_alloc_allowed_set(&hpdata, false);
+	hpdata_purge_state_t purge_state;
+	size_t to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
+	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge_dirty, "");
+
+	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_true(got_result, "");
+	expect_ptr_eq(second_quarter, purge_addr, "");
+	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
+
+	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_false(got_result, "Unexpected additional purge range: "
+	    "extent at %p of size %zu", purge_addr, purge_size);
+	hpdata_purge_end(&hpdata, &purge_state);
+
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
+
+	/* Free the first and third quarter. */
+	hpdata_unreserve(&hpdata, HPDATA_ADDR, HUGEPAGE_PAGES / 4 * PAGE);
+	hpdata_unreserve(&hpdata,
+	    (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
+	    HUGEPAGE_PAGES / 4 * PAGE);
+
+	/*
+	 * Purge again.  The second quarter is retained, so we can safely
+	 * re-purge it.  We expect a single purge of 3/4 of the hugepage,
+	 * purging half its pages.
+	 */
+	to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
+	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge_dirty, "");
+
+	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_true(got_result, "");
+	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
+	expect_zu_eq(3 * HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
+
+	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_false(got_result, "Unexpected additional purge range: "
+	    "extent at %p of size %zu", purge_addr, purge_size);
+	hpdata_purge_end(&hpdata, &purge_state);
+
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), 0, "");
+}
+TEST_END
+
 TEST_BEGIN(test_hugify) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
@@ -171,5 +239,6 @@ int main(void) {
 	    test_reserve_alloc,
 	    test_purge_simple,
 	    test_purge_intervening_dalloc,
+	    test_purge_over_retained,
 	    test_hugify);
 }
diff --git a/test/unit/psset.c b/test/unit/psset.c
index fde403e..7bce7c1 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -18,12 +18,14 @@ edata_init_test(edata_t *edata) {
 static void
 test_psset_fake_purge(hpdata_t *ps) {
 	hpdata_purge_state_t purge_state;
+	hpdata_alloc_allowed_set(ps, false);
 	hpdata_purge_begin(ps, &purge_state);
 	void *addr;
 	size_t size;
 	while (hpdata_purge_next(ps, &purge_state, &addr, &size)) {
 	}
 	hpdata_purge_end(ps, &purge_state);
+	hpdata_alloc_allowed_set(ps, true);
 }
 
 static void
-- 
cgit v0.12


From 47d8a7e6b04a81f2938f1b18f66cb468870fa442 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Jun 2021 16:21:29 -0700
Subject: psset: Purge empty slabs first.

These are particularly good candidates for purging (listed in the diff).
---
 include/jemalloc/internal/psset.h |   9 ++-
 src/psset.c                       |  29 ++++++++--
 test/unit/psset.c                 | 112 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 143 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 96fb300..e1d6497 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -25,6 +25,9 @@
  * index 2*pszind), and one for the non-hugified hpdatas (at index 2*pszind +
  * 1).  This lets us implement a preference for purging non-hugified hpdatas
  * among similarly-dirty ones.
+ * We reserve the last two indices for empty slabs, in that case purging
+ * hugified ones (which are definitionally all waste) before non-hugified ones
+ * (i.e. reversing the order).
  */
 #define PSSET_NPURGE_LISTS (2 * PSSET_NPSIZES)
 
@@ -78,7 +81,11 @@ struct psset_s {
 	 * allocations.
 	 */
 	hpdata_empty_list_t empty;
-	/* Slabs which are available to be purged, ordered by purge level. */
+	/*
+	 * Slabs which are available to be purged, ordered by how much we want
+	 * to purge them (with later indices indicating slabs we want to purge
+	 * more).
+	 */
 	hpdata_purge_list_t to_purge[PSSET_NPURGE_LISTS];
 	/* Bitmap for which set bits correspond to non-empty purge lists. */
 	fb_group_t purge_bitmap[FB_NGROUPS(PSSET_NPURGE_LISTS)];
diff --git a/src/psset.c b/src/psset.c
index 5978202..9a8f054 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -201,11 +201,32 @@ psset_purge_list_ind(hpdata_t *ps) {
 	size_t ndirty = hpdata_ndirty_get(ps);
 	/* Shouldn't have something with no dirty pages purgeable. */
 	assert(ndirty > 0);
+	/*
+	 * Higher indices correspond to lists we'd like to purge earlier; make
+	 * the two highest indices correspond to empty lists, which we attempt
+	 * to purge before purging any non-empty list.  This has two advantages:
+	 * - Empty page slabs are the least likely to get reused (we'll only
+	 *   pick them for an allocation if we have no other choice).
+	 * - Empty page slabs can purge every dirty page they contain in a
+	 *   single call, which is not usually the case.
+	 *
+	 * We purge hugeified empty slabs before nonhugeified ones, on the basis
+	 * that they are fully dirty, while nonhugified slabs might not be, so
+	 * we free up more pages more easily.
+	 */
+	if (hpdata_nactive_get(ps) == 0) {
+		if (hpdata_huge_get(ps)) {
+			return PSSET_NPURGE_LISTS - 1;
+		} else {
+			return PSSET_NPURGE_LISTS - 2;
+		}
+	}
+
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(ndirty << LG_PAGE));
 	/*
-	 * Higher indices correspond to lists we'd like to purge earlier;
-	 * increment the index for the nonhugified hpdatas first, so that we'll
-	 * pick them before picking hugified ones.
+	 * For non-empty slabs, we may reuse them again.  Prefer purging
+	 * non-hugeified slabs before hugeified ones then, among pages of
+	 * similar dirtiness.  We still get some benefit from the hugification.
 	 */
 	return (size_t)pind * 2 + (hpdata_huge_get(ps) ? 0 : 1);
 }
@@ -321,7 +342,7 @@ psset_pick_purge(psset_t *psset) {
 		return NULL;
 	}
 	pszind_t ind = (pszind_t)ind_ssz;
-	assert(ind < PSSET_NPSIZES);
+	assert(ind < PSSET_NPURGE_LISTS);
 	hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]);
 	assert(ps != NULL);
 	return ps;
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 7bce7c1..6ff7201 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -545,7 +545,7 @@ TEST_END
 TEST_BEGIN(test_purge_prefers_nonhuge) {
 	/*
 	 * All else being equal, we should prefer purging non-huge pages over
-	 * huge ones.
+	 * huge ones for non-empty extents.
 	 */
 
 	/* Nothing magic about this constant. */
@@ -625,6 +625,112 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_prefers_empty) {
+	void *ptr;
+
+	psset_t psset;
+	psset_init(&psset);
+
+	hpdata_t hpdata_empty;
+	hpdata_t hpdata_nonempty;
+	hpdata_init(&hpdata_empty, (void *)(10 * HUGEPAGE), 123);
+	psset_insert(&psset, &hpdata_empty);
+	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456);
+	psset_insert(&psset, &hpdata_nonempty);
+
+	psset_update_begin(&psset, &hpdata_empty);
+	ptr = hpdata_reserve_alloc(&hpdata_empty, PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_empty), ptr, "");
+	hpdata_unreserve(&hpdata_empty, ptr, PAGE);
+	hpdata_purge_allowed_set(&hpdata_empty, true);
+	psset_update_end(&psset, &hpdata_empty);
+
+	psset_update_begin(&psset, &hpdata_nonempty);
+	ptr = hpdata_reserve_alloc(&hpdata_nonempty, 10 * PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_nonempty), ptr, "");
+	hpdata_unreserve(&hpdata_nonempty, ptr, 9 * PAGE);
+	hpdata_purge_allowed_set(&hpdata_nonempty, true);
+	psset_update_end(&psset, &hpdata_nonempty);
+
+	/*
+	 * The nonempty slab has 9 dirty pages, while the empty one has only 1.
+	 * We should still pick the empty one for purging.
+	 */
+	hpdata_t *to_purge = psset_pick_purge(&psset);
+	expect_ptr_eq(&hpdata_empty, to_purge, "");
+}
+TEST_END
+
+TEST_BEGIN(test_purge_prefers_empty_huge) {
+	void *ptr;
+
+	psset_t psset;
+	psset_init(&psset);
+
+	enum {NHP = 10 };
+
+	hpdata_t hpdata_huge[NHP];
+	hpdata_t hpdata_nonhuge[NHP];
+
+	uintptr_t cur_addr = 100 * HUGEPAGE;
+	uint64_t cur_age = 123;
+	for (int i = 0; i < NHP; i++) {
+		hpdata_init(&hpdata_huge[i], (void *)cur_addr, cur_age);
+		cur_addr += HUGEPAGE;
+		cur_age++;
+		psset_insert(&psset, &hpdata_huge[i]);
+
+		hpdata_init(&hpdata_nonhuge[i], (void *)cur_addr, cur_age);
+		cur_addr += HUGEPAGE;
+		cur_age++;
+		psset_insert(&psset, &hpdata_nonhuge[i]);
+
+		/*
+		 * Make the hpdata_huge[i] fully dirty, empty, purgable, and
+		 * huge.
+		 */
+		psset_update_begin(&psset, &hpdata_huge[i]);
+		ptr = hpdata_reserve_alloc(&hpdata_huge[i], HUGEPAGE);
+		expect_ptr_eq(hpdata_addr_get(&hpdata_huge[i]), ptr, "");
+		hpdata_hugify(&hpdata_huge[i]);
+		hpdata_unreserve(&hpdata_huge[i], ptr, HUGEPAGE);
+		hpdata_purge_allowed_set(&hpdata_huge[i], true);
+		psset_update_end(&psset, &hpdata_huge[i]);
+
+		/*
+		 * Make hpdata_nonhuge[i] fully dirty, empty, purgable, and
+		 * non-huge.
+		 */
+		psset_update_begin(&psset, &hpdata_nonhuge[i]);
+		ptr = hpdata_reserve_alloc(&hpdata_nonhuge[i], HUGEPAGE);
+		expect_ptr_eq(hpdata_addr_get(&hpdata_nonhuge[i]), ptr, "");
+		hpdata_unreserve(&hpdata_nonhuge[i], ptr, HUGEPAGE);
+		hpdata_purge_allowed_set(&hpdata_nonhuge[i], true);
+		psset_update_end(&psset, &hpdata_nonhuge[i]);
+	}
+
+	/*
+	 * We have a bunch of empty slabs, half huge, half nonhuge, inserted in
+	 * alternating order.  We should pop all the huge ones before popping
+	 * any of the non-huge ones for purging.
+	 */
+	for (int i = 0; i < NHP; i++) {
+		hpdata_t *to_purge = psset_pick_purge(&psset);
+		expect_ptr_eq(&hpdata_huge[i], to_purge, "");
+		psset_update_begin(&psset, to_purge);
+		hpdata_purge_allowed_set(to_purge, false);
+		psset_update_end(&psset, to_purge);
+	}
+	for (int i = 0; i < NHP; i++) {
+		hpdata_t *to_purge = psset_pick_purge(&psset);
+		expect_ptr_eq(&hpdata_nonhuge[i], to_purge, "");
+		psset_update_begin(&psset, to_purge);
+		hpdata_purge_allowed_set(to_purge, false);
+		psset_update_end(&psset, to_purge);
+	}
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -636,5 +742,7 @@ main(void) {
 	    test_stats,
 	    test_oldest_fit,
 	    test_insert_remove,
-	    test_purge_prefers_nonhuge);
+	    test_purge_prefers_nonhuge,
+	    test_purge_prefers_empty,
+	    test_purge_prefers_empty_huge);
 }
-- 
cgit v0.12


From ace329d11bc397444e99ff81ff4b8d2ca26cc21c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 Jun 2021 13:52:28 -0700
Subject: HPA batch dalloc: Just do one deferred work check.

We only need to do one check per batch dalloc, not one check per dalloc in the
batch.
---
 src/hpa.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 8ef881f..ba02f79 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -723,17 +723,6 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
 	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
 	psset_update_end(&shard->psset, ps);
-	hpa_do_deferred_work(tsdn, shard);
-}
-
-static void
-hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
-	hpa_shard_t *shard = hpa_from_pai(self);
-
-	hpa_dalloc_prepare_unlocked(tsdn, shard, edata);
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	hpa_dalloc_locked(tsdn, shard, edata);
-	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
 static void
@@ -751,9 +740,19 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
 		edata_list_active_remove(list, edata);
 		hpa_dalloc_locked(tsdn, shard, edata);
 	}
+	hpa_do_deferred_work(tsdn, shard);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
+static void
+hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	/* Just a dalloc_batch of size 1; this lets us share logic. */
+	edata_list_active_t dalloc_list;
+	edata_list_active_init(&dalloc_list);
+	edata_list_active_append(&dalloc_list, edata);
+	hpa_dalloc_batch(tsdn, self, &dalloc_list);
+}
+
 void
 hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
-- 
cgit v0.12


From 583284f2d91f79b0174ee23e1b4d946b63845246 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 Jun 2021 16:07:27 -0700
Subject: Add HPA deferral functionality.

---
 include/jemalloc/internal/arena_externs.h |  1 +
 include/jemalloc/internal/hpa.h           |  5 ++++
 include/jemalloc/internal/hpa_opts.h      | 18 +++++++++++-
 src/arena.c                               | 10 ++++++-
 src/background_thread.c                   |  2 +-
 src/hpa.c                                 | 46 ++++++++++++++++++++++++++-----
 src/jemalloc.c                            |  6 ++--
 7 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 360653f..bb3462f 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -51,6 +51,7 @@ bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
 ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
 void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all);
+void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 778d1c9..27adefc 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -55,6 +55,7 @@ struct hpa_shard_s {
 	malloc_mutex_t mtx;
 	/* The base metadata allocator. */
 	base_t *base;
+
 	/*
 	 * This edata cache is the one we use when allocating a small extent
 	 * from a pageslab.  The pageslab itself comes from the centralized
@@ -122,6 +123,10 @@ void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
 void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
 
+void hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
+    bool deferral_allowed);
+void hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard);
+
 /*
  * We share the fork ordering with the PA and arena prefork handling; that's why
  * these are 3 and 4 rather than 0 and 1.
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 5ff0072..ef16219 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -32,6 +32,14 @@ struct hpa_shard_opts_s {
 	 * active_pages.  This may be set to (fxp_t)-1 to disable purging.
 	 */
 	fxp_t dirty_mult;
+
+	/*
+	 * Whether or not the PAI methods are allowed to defer work to a
+	 * subsequent hpa_shard_do_deferred_work() call.  Practically, this
+	 * corresponds to background threads being enabled.  We track this
+	 * ourselves for encapsulation purposes.
+	 */
+	bool deferral_allowed;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -42,7 +50,15 @@ struct hpa_shard_opts_s {
 	/* dehugification_threshold */					\
 	HUGEPAGE * 20 / 100,						\
 	/* dirty_mult */						\
-	FXP_INIT_PERCENT(25)						\
+	FXP_INIT_PERCENT(25),						\
+	/*								\
+	 * deferral_allowed						\
+	 * 								\
+	 * Really, this is always set by the arena during creation	\
+	 * or by an hpa_shard_set_deferral_allowed call, so the value	\
+	 * we put here doesn't matter.					\
+	 */								\
+	false								\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/arena.c b/src/arena.c
index 3ff9157..bdc120f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -461,6 +461,12 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	arena_decay_muzzy(tsdn, arena, is_background_thread, all);
 }
 
+/* Called from background threads. */
+void
+arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {
+	arena_decay(tsdn, arena, true, false);
+}
+
 void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
 	bool generated_dirty;
@@ -1565,7 +1571,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 *   so arena_hpa_global is not yet initialized.
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
-		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, &opt_hpa_opts,
+		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
+		hpa_shard_opts.deferral_allowed = background_thread_enabled();
+		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, &hpa_shard_opts,
 		    &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
diff --git a/src/background_thread.c b/src/background_thread.c
index 7302a30..edcf786 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -291,7 +291,7 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne
 		if (!arena) {
 			continue;
 		}
-		arena_decay(tsdn, arena, true, false);
+		arena_do_deferred_work(tsdn, arena);
 		if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 			/* Min interval will be used. */
 			continue;
diff --git a/src/hpa.c b/src/hpa.c
index ba02f79..ee25e94 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -426,17 +426,29 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return true;
 }
 
+/*
+ * Execution of deferred work is forced if it's triggered by an explicit
+ * hpa_shard_do_deferred_work() call.
+ */
 static void
-hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
+hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
+    bool forced) {
 	bool hugified;
 	bool purged;
 	size_t nloop = 0;
-	/* Just *some* bound, to impose a worst-case latency bound. */
-	size_t maxloops = 100;;
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	if (!forced && shard->opts.deferral_allowed) {
+		return;
+	}
+	/*
+	 * If we're on a background thread, do work so long as there's work to
+	 * be done.  Otherwise, bound latency to not be *too* bad by doing at
+	 * most a small fixed number of operations.
+	 */
+	size_t maxloops = (forced ? (size_t)-1 : 8);
 	do {
-		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 		hugified = hpa_try_hugify(tsdn, shard);
-
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 		purged = false;
 		if (hpa_should_purge(tsdn, shard)) {
 			purged = hpa_try_purge(tsdn, shard);
@@ -528,7 +540,7 @@ hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 		edata_list_active_append(results, edata);
 	}
 
-	hpa_do_deferred_work(tsdn, shard);
+	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	return nsuccess;
 }
@@ -740,7 +752,7 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
 		edata_list_active_remove(list, edata);
 		hpa_dalloc_locked(tsdn, shard, edata);
 	}
-	hpa_do_deferred_work(tsdn, shard);
+	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
@@ -801,6 +813,26 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 void
+hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
+    bool deferral_allowed) {
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	bool deferral_previously_allowed = shard->opts.deferral_allowed;
+	shard->opts.deferral_allowed = deferral_allowed;
+	if (deferral_previously_allowed && !deferral_allowed) {
+		hpa_shard_maybe_do_deferred_work(tsdn, shard,
+		    /* forced */ true);
+	}
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+}
+
+void
+hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ true);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+}
+
+void
 hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_prefork(tsdn, &shard->grow_mtx);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 6ff9f97..85d6863 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1788,8 +1788,10 @@ malloc_init_hard_a0_locked() {
 			opt_hpa = false;
 		}
 	} else if (opt_hpa) {
-		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard, &opt_hpa_opts,
-		    &opt_hpa_sec_opts)) {
+		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
+		hpa_shard_opts.deferral_allowed = background_thread_enabled();
+		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard,
+		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			return true;
 		}
 	}
-- 
cgit v0.12


From 1d4a7666d558b2c21e8cfc2b3e8981020db072fa Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 7 Jun 2021 11:45:57 -0700
Subject: HPA: Do deferred operations on background threads.

---
 Makefile.in                                        |   1 +
 .../jemalloc/internal/background_thread_externs.h  |   1 +
 .../jemalloc/internal/background_thread_structs.h  |   8 ++
 include/jemalloc/internal/pa.h                     |  12 +-
 src/arena.c                                        |   1 +
 src/background_thread.c                            |  51 ++++++-
 src/ctl.c                                          |   5 +
 src/jemalloc.c                                     |   9 ++
 src/pa.c                                           |  12 ++
 src/stats.c                                        |   1 +
 test/unit/hpa_background_thread.c                  | 158 +++++++++++++++++++++
 test/unit/hpa_background_thread.sh                 |   4 +
 12 files changed, 256 insertions(+), 7 deletions(-)
 create mode 100644 test/unit/hpa_background_thread.c
 create mode 100644 test/unit/hpa_background_thread.sh

diff --git a/Makefile.in b/Makefile.in
index ed03d4e..3e7d122 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -221,6 +221,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
+	$(srcroot)test/unit/hpa_background_thread.c \
 	$(srcroot)test/unit/hpa_central.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index d5c1369..bc49bea 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
 
 extern bool opt_background_thread;
+extern ssize_t opt_background_thread_hpa_interval_max_ms;
 extern size_t opt_max_background_threads;
 extern malloc_mutex_t background_thread_lock;
 extern atomic_b_t background_thread_enabled_state;
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index 249115c..cc14dde 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -11,6 +11,14 @@
 #define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT
 #define DEFAULT_NUM_BACKGROUND_THREAD 4
 
+/*
+ * These exist only as a transitional state.  Eventually, deferral should be
+ * part of the PAI, and each implementation can indicate wait times with more
+ * specificity.
+ */
+#define BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED (-2)
+#define BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED 5000
+
 typedef enum {
 	background_thread_stopped,
 	background_thread_started,
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index cb9f8cf..0fb7725 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -172,11 +172,21 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
  */
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
-
 bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
     ssize_t decay_ms, pac_purge_eagerness_t eagerness);
 ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
 
+/*
+ * Do deferred work on this PA shard.
+ *
+ * Morally, this should do both PAC decay and the HPA deferred work.  For now,
+ * though, the arena, background thread, and PAC modules are tightly interwoven
+ * in a way that's tricky to extricate, so we only do the HPA-specific parts.
+ */
+void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
+    bool deferral_allowed);
+void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+
 /******************************************************************************/
 /*
  * Various bits of "boring" functionality that are still part of this module,
diff --git a/src/arena.c b/src/arena.c
index bdc120f..d6a1f67 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -465,6 +465,7 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 void
 arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 	arena_decay(tsdn, arena, true, false);
+	pa_shard_do_deferred_work(tsdn, &arena->pa_shard);
 }
 
 void
diff --git a/src/background_thread.c b/src/background_thread.c
index edcf786..1fb24fe 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -13,6 +13,13 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 /* Read-only after initialization. */
 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
 size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1;
+/*
+ * This is disabled (and set to -1) if the HPA is.  If the HPA is enabled,
+ * malloc_conf initialization sets it to
+ * BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED.
+ */
+ssize_t opt_background_thread_hpa_interval_max_ms =
+    BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED;
 
 /* Used for thread creation, termination and stats. */
 malloc_mutex_t background_thread_lock;
@@ -209,7 +216,20 @@ arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	i2 = arena_decay_compute_purge_interval_impl(tsdn,
 	    &arena->pa_shard.pac.decay_muzzy, &arena->pa_shard.pac.ecache_muzzy);
 
-	return i1 < i2 ? i1 : i2;
+	uint64_t min_so_far = i1 < i2 ? i1 : i2;
+	if (opt_background_thread_hpa_interval_max_ms >= 0) {
+		uint64_t hpa_interval = 1000 * 1000 *
+		    (uint64_t)opt_background_thread_hpa_interval_max_ms;
+		if (hpa_interval < min_so_far) {
+			if (hpa_interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+				min_so_far = BACKGROUND_THREAD_MIN_INTERVAL_NS;
+			} else {
+				min_so_far = hpa_interval;
+			}
+		}
+	}
+
+	return min_so_far;
 }
 
 static void
@@ -607,16 +627,16 @@ background_threads_enable(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
 
 	VARIABLE_ARRAY(bool, marked, max_background_threads);
-	unsigned i, nmarked;
-	for (i = 0; i < max_background_threads; i++) {
+	unsigned nmarked;
+	for (unsigned i = 0; i < max_background_threads; i++) {
 		marked[i] = false;
 	}
 	nmarked = 0;
 	/* Thread 0 is required and created at the end. */
 	marked[0] = true;
 	/* Mark the threads we need to create for thread 0. */
-	unsigned n = narenas_total_get();
-	for (i = 1; i < n; i++) {
+	unsigned narenas = narenas_total_get();
+	for (unsigned i = 1; i < narenas; i++) {
 		if (marked[i % max_background_threads] ||
 		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
 			continue;
@@ -633,7 +653,18 @@ background_threads_enable(tsd_t *tsd) {
 		}
 	}
 
-	return background_thread_create_locked(tsd, 0);
+	bool err = background_thread_create_locked(tsd, 0);
+	if (err) {
+		return true;
+	}
+	for (unsigned i = 0; i < narenas; i++) {
+		arena_t *arena = arena_get(tsd_tsdn(tsd), i, false);
+		if (arena != NULL) {
+			pa_shard_set_deferral_allowed(tsd_tsdn(tsd),
+			    &arena->pa_shard, true);
+		}
+	}
+	return false;
 }
 
 bool
@@ -647,6 +678,14 @@ background_threads_disable(tsd_t *tsd) {
 		return true;
 	}
 	assert(n_background_threads == 0);
+	unsigned narenas = narenas_total_get();
+	for (unsigned i = 0; i < narenas; i++) {
+		arena_t *arena = arena_get(tsd_tsdn(tsd), i, false);
+		if (arena != NULL) {
+			pa_shard_set_deferral_allowed(tsd_tsdn(tsd),
+			    &arena->pa_shard, false);
+		}
+	}
 
 	return false;
 }
diff --git a/src/ctl.c b/src/ctl.c
index c713f0e..c66b4d8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -111,6 +111,7 @@ CTL_PROTO(opt_percpu_arena)
 CTL_PROTO(opt_oversize_threshold)
 CTL_PROTO(opt_background_thread)
 CTL_PROTO(opt_max_background_threads)
+CTL_PROTO(opt_background_thread_hpa_interval_max_ms)
 CTL_PROTO(opt_dirty_decay_ms)
 CTL_PROTO(opt_muzzy_decay_ms)
 CTL_PROTO(opt_stats_print)
@@ -423,6 +424,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("oversize_threshold"),	CTL(opt_oversize_threshold)},
 	{NAME("background_thread"),	CTL(opt_background_thread)},
 	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
+	{NAME("background_thread_hpa_interval_max_ms"),
+		CTL(opt_background_thread_hpa_interval_max_ms)},
 	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
 	{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
@@ -2139,6 +2142,8 @@ CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
 CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t)
 CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
 CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t)
+CTL_RO_NL_GEN(opt_background_thread_hpa_interval_max_ms,
+    opt_background_thread_hpa_interval_max_ms, ssize_t)
 CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 85d6863..28c7fdc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1410,6 +1410,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			CONF_HANDLE_BOOL(opt_hpa, "hpa")
+			CONF_HANDLE_SSIZE_T(
+			    opt_background_thread_hpa_interval_max_ms,
+			    "background_thread_hpa_interval_max_ms", -1,
+			    SSIZE_MAX)
 			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
 			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
@@ -1659,6 +1663,11 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 	malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf);
 	malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
 	    NULL);
+	if (opt_hpa && opt_background_thread_hpa_interval_max_ms
+	    == BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED) {
+		opt_background_thread_hpa_interval_max_ms =
+		    BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED;
+	}
 }
 
 #undef MALLOC_CONF_NSOURCES
diff --git a/src/pa.c b/src/pa.c
index cb3b3df..cbc8f76 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -208,3 +208,15 @@ ssize_t
 pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) {
 	return pac_decay_ms_get(&shard->pac, state);
 }
+
+void
+pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
+    bool deferral_allowed) {
+	hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard,
+	    deferral_allowed);
+}
+
+void
+pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
+	hpa_shard_do_deferred_work(tsdn, &shard->hpa_shard);
+}
diff --git a/src/stats.c b/src/stats.c
index 34cae0a..4e6c392 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1494,6 +1494,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
+	OPT_WRITE_SSIZE_T("background_thread_hpa_interval_max_ms")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
 	OPT_WRITE_SSIZE_T_MUTABLE("muzzy_decay_ms", "arenas.muzzy_decay_ms")
 	OPT_WRITE_SIZE_T("lg_extent_max_active_fit")
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
new file mode 100644
index 0000000..1907a6d
--- /dev/null
+++ b/test/unit/hpa_background_thread.c
@@ -0,0 +1,158 @@
+#include "test/jemalloc_test.h"
+#include "test/sleep.h"
+
+static void
+sleep_for_background_thread_interval() {
+	/*
+	 * The sleep interval set in our .sh file is 50ms.  So it should
+	 * definitely run if we sleep for for times that.
+	 */
+	sleep_ns(200 * 1000 * 1000);
+}
+
+static unsigned
+create_arena() {
+	unsigned arena_ind;
+	size_t sz;
+
+	sz = sizeof(unsigned);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 2),
+	    0, "Unexpected mallctl() failure");
+	return arena_ind;
+}
+
+static size_t
+get_empty_ndirty(unsigned arena_ind) {
+	int err;
+	size_t ndirty_huge;
+	size_t ndirty_nonhuge;
+	uint64_t epoch = 1;
+	size_t sz = sizeof(epoch);
+	err = je_mallctl("epoch", (void *)&epoch, &sz, (void *)&epoch,
+	    sizeof(epoch));
+	expect_d_eq(0, err, "Unexpected mallctl() failure");
+
+	size_t mib[6];
+	size_t miblen = sizeof(mib)/sizeof(mib[0]);
+	err = mallctlnametomib(
+	    "stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge", mib,
+	    &miblen);
+	expect_d_eq(0, err, "Unexpected mallctlnametomib() failure");
+
+	sz = sizeof(ndirty_nonhuge);
+	mib[2] = arena_ind;
+	err = mallctlbymib(mib, miblen, &ndirty_nonhuge, &sz, NULL, 0);
+	expect_d_eq(0, err, "Unexpected mallctlbymib() failure");
+
+	err = mallctlnametomib(
+	    "stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge", mib,
+	    &miblen);
+	expect_d_eq(0, err, "Unexpected mallctlnametomib() failure");
+
+	sz = sizeof(ndirty_huge);
+	mib[2] = arena_ind;
+	err = mallctlbymib(mib, miblen, &ndirty_huge, &sz, NULL, 0);
+	expect_d_eq(0, err, "Unexpected mallctlbymib() failure");
+
+	return ndirty_huge + ndirty_nonhuge;
+}
+
+static void
+set_background_thread_enabled(bool enabled) {
+	int err;
+	err = je_mallctl("background_thread", NULL, NULL, &enabled,
+	    sizeof(enabled));
+	expect_d_eq(0, err, "Unexpected mallctl failure");
+}
+
+static void
+expect_purging(unsigned arena_ind, bool expect_deferred) {
+	size_t empty_ndirty;
+
+	empty_ndirty = get_empty_ndirty(arena_ind);
+	expect_zu_eq(0, empty_ndirty, "Expected arena to start unused.");
+
+	/*
+	 * It's possible that we get unlucky with our stats collection timing,
+	 * and the background thread runs in between the deallocation and the
+	 * stats collection.  So we retry 10 times, and see if we *ever* see
+	 * deferred reclamation.
+	 */
+	bool observed_dirty_page = false;
+	for (int i = 0; i < 10; i++) {
+		void *ptr = mallocx(PAGE,
+		    MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+		empty_ndirty = get_empty_ndirty(arena_ind);
+		expect_zu_eq(0, empty_ndirty, "All pages should be active");
+		dallocx(ptr, MALLOCX_TCACHE_NONE);
+		empty_ndirty = get_empty_ndirty(arena_ind);
+		if (expect_deferred) {
+			expect_true(empty_ndirty == 0 || empty_ndirty == 1,
+			    "Unexpected extra dirty page count: %zu",
+			    empty_ndirty);
+		} else {
+			assert_zu_eq(0, empty_ndirty,
+			    "Saw dirty pages without deferred purging");
+		}
+		if (empty_ndirty > 0) {
+			observed_dirty_page = true;
+			break;
+		}
+	}
+	expect_b_eq(expect_deferred, observed_dirty_page, "");
+	if (expect_deferred) {
+		sleep_for_background_thread_interval();
+	}
+	empty_ndirty = get_empty_ndirty(arena_ind);
+	expect_zu_eq(0, empty_ndirty, "Should have seen a background purge");
+}
+
+TEST_BEGIN(test_hpa_background_thread_purges) {
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+	test_skip_if(!have_background_thread);
+
+	unsigned arena_ind = create_arena();
+	/*
+	 * Our .sh sets dirty mult to 0, so all dirty pages should get purged
+	 * any time any thread frees.
+	 */
+	expect_purging(arena_ind, /* expect_deferred */ true);
+}
+TEST_END
+
+TEST_BEGIN(test_hpa_background_thread_enable_disable) {
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+	test_skip_if(!have_background_thread);
+
+	unsigned arena_ind = create_arena();
+
+	set_background_thread_enabled(false);
+	expect_purging(arena_ind, false);
+
+	set_background_thread_enabled(true);
+	expect_purging(arena_ind, true);
+}
+TEST_END
+
+int
+main(void) {
+	/*
+	 * OK, this is a sort of nasty hack.  We don't want to add *another*
+	 * config option for HPA (the intent is that it becomes available on
+	 * more platforms over time, and we're trying to prune back config
+	 * options generally.  But we'll get initialization errors on other
+	 * platforms if we set hpa:true in the MALLOC_CONF (even if we set
+	 * abort_conf:false as well).  So we reach into the internals and set
+	 * them directly, but only if we know that we're actually going to do
+	 * something nontrivial in the tests.
+	 */
+	if (config_stats && hpa_supported() && have_background_thread) {
+		opt_hpa = true;
+		opt_background_thread = true;
+	}
+	return test_no_reentrancy(
+	    test_hpa_background_thread_purges,
+	    test_hpa_background_thread_enable_disable);
+}
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
new file mode 100644
index 0000000..811da8b
--- /dev/null
+++ b/test/unit/hpa_background_thread.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+export MALLOC_CONF="hpa_dirty_mult:0,background_thread_hpa_interval_max_ms:50,hpa_sec_nshards:0"
+
-- 
cgit v0.12


From 113938b6f43d528793e029d55ae51e21094b79bc Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 14 Jun 2021 14:18:08 -0700
Subject: HPA: Pull out a hooks type.

For now, this is a no-op change.  In a subsequent commit, it will be useful for
testing.
---
 Makefile.in                                        |  1 +
 include/jemalloc/internal/hpa.h                    | 12 +++++-
 include/jemalloc/internal/hpa_hooks.h              | 15 +++++++
 include/jemalloc/internal/pa.h                     |  4 +-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  3 ++
 src/arena.c                                        |  4 +-
 src/hpa.c                                          | 30 +++++---------
 src/hpa_hooks.c                                    | 46 ++++++++++++++++++++++
 src/jemalloc.c                                     |  2 +-
 src/pa.c                                           |  5 ++-
 test/unit/hpa.c                                    |  2 +-
 14 files changed, 100 insertions(+), 29 deletions(-)
 create mode 100644 include/jemalloc/internal/hpa_hooks.h
 create mode 100644 src/hpa_hooks.c

diff --git a/Makefile.in b/Makefile.in
index 3e7d122..abdf800 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -122,6 +122,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_central.c \
+	$(srcroot)src/hpa_hooks.c \
 	$(srcroot)src/hpdata.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 27adefc..3132a6f 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_HPA_H
 
 #include "jemalloc/internal/exp_grow.h"
+#include "jemalloc/internal/hpa_hooks.h"
 #include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
@@ -57,6 +58,14 @@ struct hpa_shard_s {
 	base_t *base;
 
 	/*
+	 * The HPA hooks for this shard.  Eventually, once we have the
+	 * hpa_central_t back, these should live there (since it doesn't make
+	 * sense for different shards on the same hpa_central_t to have
+	 * different hooks).
+	 */
+	hpa_hooks_t hooks;
+
+	/*
 	 * This edata cache is the one we use when allocating a small extent
 	 * from a pageslab.  The pageslab itself comes from the centralized
 	 * allocator, and so will use its edata_cache.
@@ -109,7 +118,8 @@ struct hpa_shard_s {
  */
 bool hpa_supported();
 bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
-    edata_cache_t *edata_cache, unsigned ind, const hpa_shard_opts_t *opts);
+    edata_cache_t *edata_cache, unsigned ind, const hpa_hooks_t *hooks,
+    const hpa_shard_opts_t *opts);
 
 void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
 void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
new file mode 100644
index 0000000..5c5b5f6
--- /dev/null
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -0,0 +1,15 @@
+#ifndef JEMALLOC_INTERNAL_HPA_HOOKS_H
+#define JEMALLOC_INTERNAL_HPA_HOOKS_H
+
+typedef struct hpa_hooks_s hpa_hooks_t;
+struct hpa_hooks_s {
+	void *(*map)(size_t size);
+	void (*unmap)(void *ptr, size_t size);
+	void (*purge)(void *ptr, size_t size);
+	void (*hugify)(void *ptr, size_t size);
+	void (*dehugify)(void *ptr, size_t size);
+};
+
+extern hpa_hooks_t hpa_hooks_default;
+
+#endif /* JEMALLOC_INTERNAL_HPA_HOOKS_H */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 0fb7725..582625b 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -131,7 +131,9 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
 bool pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
-    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts);
+    const hpa_hooks_t *hpa_hooks, const hpa_shard_opts_t *hpa_opts,
+    const sec_opts_t *hpa_sec_opts);
+
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index a66ca36..f6fae7f 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 0c8e6c7..800861d 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_central.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 94fcd7b..3d3e717 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 0c8e6c7..800861d 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_central.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index d6a1f67..5daeea3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1574,8 +1574,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
-		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, &hpa_shard_opts,
-		    &opt_hpa_sec_opts)) {
+		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard,
+		    &hpa_hooks_default, &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
 	}
diff --git a/src/hpa.c b/src/hpa.c
index ee25e94..07ad117 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -52,7 +52,8 @@ hpa_supported() {
 
 bool
 hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
-    edata_cache_t *edata_cache, unsigned ind, const hpa_shard_opts_t *opts) {
+    edata_cache_t *edata_cache, unsigned ind,
+    const hpa_hooks_t *hooks, const hpa_shard_opts_t *opts) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@@ -69,6 +70,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 
 	assert(edata_cache != NULL);
 	shard->base = base;
+	shard->hooks = *hooks;
 	edata_cache_small_init(&shard->ecs, edata_cache);
 	psset_init(&shard->psset);
 	shard->age_counter = 0;
@@ -251,20 +253,14 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * allocate an edata_t for the new psset.
 	 */
 	if (shard->eden == NULL) {
-		/*
-		 * During development, we're primarily concerned with systems
-		 * with overcommit.  Eventually, we should be more careful here.
-		 */
-		bool commit = true;
 		/* Allocate address space, bailing if we fail. */
-		void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE,
-		    &commit);
+		void *new_eden = shard->hooks.map(HPA_EDEN_SIZE);
 		if (new_eden == NULL) {
 			return NULL;
 		}
 		ps = hpa_alloc_ps(tsdn, shard);
 		if (ps == NULL) {
-			pages_unmap(new_eden, HPA_EDEN_SIZE);
+			shard->hooks.unmap(new_eden, HPA_EDEN_SIZE);
 			return NULL;
 		}
 		shard->eden = new_eden;
@@ -335,7 +331,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	/* Actually do the purging, now that the lock is dropped. */
 	if (dehugify) {
-		pages_nohuge(hpdata_addr_get(to_purge), HUGEPAGE);
+		shard->hooks.dehugify(hpdata_addr_get(to_purge), HUGEPAGE);
 	}
 	size_t total_purged = 0;
 	uint64_t purges_this_pass = 0;
@@ -346,7 +342,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 		total_purged += purge_size;
 		assert(total_purged <= HUGEPAGE);
 		purges_this_pass++;
-		pages_purge_forced(purge_addr, purge_size);
+		shard->hooks.purge(purge_addr, purge_size);
 	}
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
@@ -404,15 +400,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	bool err = pages_huge(hpdata_addr_get(to_hugify),
-	    HUGEPAGE);
-	/*
-	 * It's not clear what we could do in case of error; we
-	 * might get into situations where we loop trying to
-	 * hugify some page and failing over and over again.
-	 * Just eat the error and pretend we were successful.
-	 */
-	(void)err;
+	shard->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	shard->stats.nhugifies++;
@@ -808,7 +796,7 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 		/* There should be no allocations anywhere. */
 		assert(hpdata_empty(ps));
 		psset_remove(&shard->psset, ps);
-		pages_unmap(hpdata_addr_get(ps), HUGEPAGE);
+		shard->hooks.unmap(hpdata_addr_get(ps), HUGEPAGE);
 	}
 }
 
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
new file mode 100644
index 0000000..7e07c31
--- /dev/null
+++ b/src/hpa_hooks.c
@@ -0,0 +1,46 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpa_hooks.h"
+
+static void *hpa_hooks_map(size_t size);
+static void hpa_hooks_unmap(void *ptr, size_t size);
+static void hpa_hooks_purge(void *ptr, size_t size);
+static void hpa_hooks_hugify(void *ptr, size_t size);
+static void hpa_hooks_dehugify(void *ptr, size_t size);
+
+hpa_hooks_t hpa_hooks_default = {
+	&hpa_hooks_map,
+	&hpa_hooks_unmap,
+	&hpa_hooks_purge,
+	&hpa_hooks_hugify,
+	&hpa_hooks_dehugify,
+};
+
+static void *
+hpa_hooks_map(size_t size) {
+	bool commit = true;
+	return pages_map(NULL, size, HUGEPAGE, &commit);
+}
+
+static void
+hpa_hooks_unmap(void *ptr, size_t size) {
+	pages_unmap(ptr, size);
+}
+
+static void
+hpa_hooks_purge(void *ptr, size_t size) {
+	pages_purge_forced(ptr, size);
+}
+
+static void
+hpa_hooks_hugify(void *ptr, size_t size) {
+	bool err = pages_huge(ptr, size);
+	(void)err;
+}
+
+static void
+hpa_hooks_dehugify(void *ptr, size_t size) {
+	bool err = pages_nohuge(ptr, size);
+	(void)err;
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 28c7fdc..5adb563 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1800,7 +1800,7 @@ malloc_init_hard_a0_locked() {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
 		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard,
-		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
+		    &hpa_hooks_default, &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			return true;
 		}
 	}
diff --git a/src/pa.c b/src/pa.c
index cbc8f76..0172dfa 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -50,9 +50,10 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 
 bool
 pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
-    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) {
+    const hpa_hooks_t *hpa_hooks, const hpa_shard_opts_t *hpa_opts,
+    const sec_opts_t *hpa_sec_opts) {
 	if (hpa_shard_init(&shard->hpa_shard, shard->emap, shard->base,
-	    &shard->edata_cache, shard->ind, hpa_opts)) {
+	    &shard->edata_cache, shard->ind, hpa_hooks, hpa_opts)) {
 		return true;
 	}
 	if (sec_init(tsdn, &shard->hpa_sec, shard->base, &shard->hpa_shard.pai,
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 4600983..0558680 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -42,7 +42,7 @@ create_test_data() {
 
 	err = hpa_shard_init(&test_data->shard, &test_data->emap,
 	    test_data->base, &test_data->shard_edata_cache, SHARD_IND,
-	    &opts);
+	    &hpa_hooks_default, &opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
-- 
cgit v0.12


From 6630c5989672cbbd5ec2369aaa46ce6f5ce1ed4e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 14 Jun 2021 14:53:23 -0700
Subject: HPA: Hugification hysteresis.

We wait a while after deciding a huge extent should get hugified to see if it
gets purged before long.  This avoids hugifying extents that might shortly get
dehugified for purging.

Rename and use the hpa_dehugification_threshold option support code for this,
since it's now ignored.
---
 include/jemalloc/internal/hpa_hooks.h |   1 +
 include/jemalloc/internal/hpa_opts.h  |  19 +++--
 include/jemalloc/internal/hpdata.h    |  23 ++++--
 src/ctl.c                             |   9 +--
 src/hpa.c                             |  61 ++++++++++++---
 src/hpa_hooks.c                       |   7 ++
 src/jemalloc.c                        |  24 ++----
 src/stats.c                           |   7 +-
 test/unit/hpa.c                       | 143 +++++++++++++++++++++++++++++++---
 9 files changed, 234 insertions(+), 60 deletions(-)

diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 5c5b5f6..3e21d85 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -8,6 +8,7 @@ struct hpa_hooks_s {
 	void (*purge)(void *ptr, size_t size);
 	void (*hugify)(void *ptr, size_t size);
 	void (*dehugify)(void *ptr, size_t size);
+	void (*curtime)(nstime_t *r_time);
 };
 
 extern hpa_hooks_t hpa_hooks_default;
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index ef16219..2548f44 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -17,16 +17,13 @@ struct hpa_shard_opts_s {
 	 * any allocation request.
 	 */
 	size_t slab_max_alloc;
+
 	/*
 	 * When the number of active bytes in a hugepage is >=
 	 * hugification_threshold, we force hugify it.
 	 */
 	size_t hugification_threshold;
-	/*
-	 * When the number of dirty bytes in a hugepage is >=
-	 * dehugification_threshold, we force dehugify it.
-	 */
-	size_t dehugification_threshold;
+
 	/*
 	 * The HPA purges whenever the number of pages exceeds dirty_mult *
 	 * active_pages.  This may be set to (fxp_t)-1 to disable purging.
@@ -40,6 +37,12 @@ struct hpa_shard_opts_s {
 	 * ourselves for encapsulation purposes.
 	 */
 	bool deferral_allowed;
+
+	/*
+	 * How long a hugepage has to be a hugification candidate before it will
+	 * actually get hugified.
+	 */
+	uint64_t hugify_delay_ms;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -47,8 +50,6 @@ struct hpa_shard_opts_s {
 	64 * 1024,							\
 	/* hugification_threshold */					\
 	HUGEPAGE * 95 / 100,						\
-	/* dehugification_threshold */					\
-	HUGEPAGE * 20 / 100,						\
 	/* dirty_mult */						\
 	FXP_INIT_PERCENT(25),						\
 	/*								\
@@ -58,7 +59,9 @@ struct hpa_shard_opts_s {
 	 * or by an hpa_shard_set_deferral_allowed call, so the value	\
 	 * we put here doesn't matter.					\
 	 */								\
-	false								\
+	false,								\
+	/* hugify_delay_ms */						\
+	10 * 1000							\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 32e2624..2a12add 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -61,6 +61,8 @@ struct hpdata_s {
 
 	/* And with hugifying. */
 	bool h_hugify_allowed;
+	/* When we became a hugification candidate. */
+	nstime_t h_time_hugify_allowed;
 	bool h_in_psset_hugify_container;
 
 	/* Whether or not a purge or hugify is currently happening. */
@@ -175,8 +177,8 @@ hpdata_purge_allowed_get(const hpdata_t *hpdata) {
 
 static inline void
 hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
-	assert(purge_allowed == false || !hpdata->h_mid_purge);
-	hpdata->h_purge_allowed = purge_allowed;
+       assert(purge_allowed == false || !hpdata->h_mid_purge);
+       hpdata->h_purge_allowed = purge_allowed;
 }
 
 static inline bool
@@ -185,9 +187,20 @@ hpdata_hugify_allowed_get(const hpdata_t *hpdata) {
 }
 
 static inline void
-hpdata_hugify_allowed_set(hpdata_t *hpdata, bool hugify_allowed) {
-	assert(hugify_allowed == false || !hpdata->h_mid_hugify);
-	hpdata->h_hugify_allowed = hugify_allowed;
+hpdata_allow_hugify(hpdata_t *hpdata, nstime_t now) {
+	assert(!hpdata->h_mid_hugify);
+	hpdata->h_hugify_allowed = true;
+	hpdata->h_time_hugify_allowed = now;
+}
+
+static inline nstime_t
+hpdata_time_hugify_allowed(hpdata_t *hpdata) {
+	return hpdata->h_time_hugify_allowed;
+}
+
+static inline void
+hpdata_disallow_hugify(hpdata_t *hpdata) {
+	hpdata->h_hugify_allowed = false;
 }
 
 static inline bool
diff --git a/src/ctl.c b/src/ctl.c
index c66b4d8..b3e62df 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -96,7 +96,7 @@ CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
-CTL_PROTO(opt_hpa_dehugification_threshold)
+CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -406,8 +406,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
 	{NAME("hpa_hugification_threshold"),
 		CTL(opt_hpa_hugification_threshold)},
-	{NAME("hpa_dehugification_threshold"),
-		CTL(opt_hpa_dehugification_threshold)},
+	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@@ -2114,8 +2113,8 @@ CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
     opt_hpa_opts.hugification_threshold, size_t)
-CTL_RO_NL_GEN(opt_hpa_dehugification_threshold,
-    opt_hpa_opts.dehugification_threshold, size_t)
+CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
+
 /*
  * This will have to change before we publicly document this option; fxp_t and
  * its representation are internal implementation details.
diff --git a/src/hpa.c b/src/hpa.c
index 07ad117..4ae30b9 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -198,7 +198,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (hpdata_changing_state_get(ps)) {
 		hpdata_purge_allowed_set(ps, false);
-		hpdata_hugify_allowed_set(ps, false);
+		hpdata_disallow_hugify(ps);
 		return;
 	}
 	/*
@@ -226,7 +226,24 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0);
 	if (hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
-		hpdata_hugify_allowed_set(ps, true);
+		nstime_t now;
+		shard->hooks.curtime(&now);
+		hpdata_allow_hugify(ps, now);
+	}
+	/*
+	 * Once a hugepage has become eligible for hugification, we don't mark
+	 * it as ineligible just because it stops meeting the criteria (this
+	 * could lead to situations where a hugepage that spends most of its
+	 * time meeting the criteria never quite getting hugified if there are
+	 * intervening deallocations).  The idea is that the hugification delay
+	 * will allow them to get purged, reseting their "hugify-allowed" bit.
+	 * If they don't get purged, then the hugification isn't hurting and
+	 * might help.  As an exception, we don't hugify hugepages that are now
+	 * empty; it definitely doesn't help there until the hugepage gets
+	 * reused, which is likely not for a while.
+	 */
+	if (hpdata_nactive_get(ps) == 0) {
+		hpdata_disallow_hugify(ps);
 	}
 }
 
@@ -309,7 +326,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	assert(hpdata_alloc_allowed_get(to_purge));
 	hpdata_mid_purge_set(to_purge, true);
 	hpdata_purge_allowed_set(to_purge, false);
-	hpdata_hugify_allowed_set(to_purge, false);
+	hpdata_disallow_hugify(to_purge);
 	/*
 	 * Unlike with hugification (where concurrent
 	 * allocations are allowed), concurrent allocation out
@@ -386,6 +403,16 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	assert(hpdata_hugify_allowed_get(to_hugify));
 	assert(!hpdata_changing_state_get(to_hugify));
 
+	/* Make sure that it's been hugifiable for long enough. */
+	nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify);
+	nstime_t nstime;
+	shard->hooks.curtime(&nstime);
+	nstime_subtract(&nstime, &time_hugify_allowed);
+	uint64_t millis = nstime_msec(&nstime);
+	if (millis < shard->opts.hugify_delay_ms) {
+		return false;
+	}
+
 	/*
 	 * Don't let anyone else purge or hugify this page while
 	 * we're hugifying it (allocations and deallocations are
@@ -394,7 +421,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_mid_hugify_set(to_hugify, true);
 	hpdata_purge_allowed_set(to_hugify, false);
-	hpdata_hugify_allowed_set(to_hugify, false);
+	hpdata_disallow_hugify(to_hugify);
 	assert(hpdata_alloc_allowed_get(to_hugify));
 	psset_update_end(&shard->psset, to_hugify);
 
@@ -421,9 +448,6 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 static void
 hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
     bool forced) {
-	bool hugified;
-	bool purged;
-	size_t nloop = 0;
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
@@ -433,16 +457,29 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 	 * be done.  Otherwise, bound latency to not be *too* bad by doing at
 	 * most a small fixed number of operations.
 	 */
-	size_t maxloops = (forced ? (size_t)-1 : 8);
+	bool hugified = false;
+	bool purged = false;
+	size_t max_ops = (forced ? (size_t)-1 : 16);
+	size_t nops = 0;
 	do {
-		hugified = hpa_try_hugify(tsdn, shard);
-		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+		/*
+		 * Always purge before hugifying, to make sure we get some
+		 * ability to hit our quiescence targets.
+		 */
 		purged = false;
-		if (hpa_should_purge(tsdn, shard)) {
+		while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
 			purged = hpa_try_purge(tsdn, shard);
+			if (purged) {
+				nops++;
+			}
 		}
+		hugified = hpa_try_hugify(tsdn, shard);
+		if (hugified) {
+			nops++;
+		}
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	} while ((hugified || purged) && nloop++ < maxloops);
+	} while ((hugified || purged) && nops < max_ops);
 }
 
 static edata_t *
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 7e07c31..6f37761 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -8,6 +8,7 @@ static void hpa_hooks_unmap(void *ptr, size_t size);
 static void hpa_hooks_purge(void *ptr, size_t size);
 static void hpa_hooks_hugify(void *ptr, size_t size);
 static void hpa_hooks_dehugify(void *ptr, size_t size);
+static void hpa_hooks_curtime(nstime_t *r_nstime);
 
 hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_map,
@@ -15,6 +16,7 @@ hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_purge,
 	&hpa_hooks_hugify,
 	&hpa_hooks_dehugify,
+	&hpa_hooks_curtime,
 };
 
 static void *
@@ -44,3 +46,8 @@ hpa_hooks_dehugify(void *ptr, size_t size) {
 	bool err = pages_nohuge(ptr, size);
 	(void)err;
 }
+
+static void
+hpa_hooks_curtime(nstime_t *r_nstime) {
+	nstime_update(r_nstime);
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5adb563..71efcb6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1145,6 +1145,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)	\
 			CONF_HANDLE_T_SIGNED(int64_t, o, n, min, max,	\
 			    check_min, check_max, clip)
+#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)\
+			CONF_HANDLE_T_U(uint64_t, o, n, min, max,	\
+			    check_min, check_max, clip)
 #define CONF_HANDLE_SSIZE_T(o, n, min, max)				\
 			CONF_HANDLE_T_SIGNED(ssize_t, o, n, min, max,	\
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, false)
@@ -1441,26 +1444,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 
-			/* And the same for the dehugification_threhsold. */
 			CONF_HANDLE_SIZE_T(
-			    opt_hpa_opts.dehugification_threshold,
-			    "hpa_dehugification_threshold", PAGE, HUGEPAGE,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-			if (CONF_MATCH("hpa_dehugification_threshold_ratio")) {
-				fxp_t ratio;
-				char *end;
-				bool err = fxp_parse(&ratio, v,
-				    &end);
-				if (err || (size_t)(end - v) != vlen
-				    || ratio > FXP_INIT_INT(1)) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
-				} else {
-					opt_hpa_opts.dehugification_threshold =
-					    fxp_mul_frac(HUGEPAGE, ratio);
-				}
-				CONF_CONTINUE;
-			}
+			    opt_hpa_opts.hugify_delay_ms, "hpa_hugify_delay_ms",
+			    0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
diff --git a/src/stats.c b/src/stats.c
index 4e6c392..16aa3fd 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1376,7 +1376,7 @@ stats_general_print(emitter_t *emitter) {
 	uint64_t u64v;
 	int64_t i64v;
 	ssize_t ssv, ssv2;
-	size_t sv, bsz, usz, u32sz, i64sz, ssz, sssz, cpsz;
+	size_t sv, bsz, usz, u32sz, u64sz, i64sz, ssz, sssz, cpsz;
 
 	bsz = sizeof(bool);
 	usz = sizeof(unsigned);
@@ -1385,6 +1385,7 @@ stats_general_print(emitter_t *emitter) {
 	cpsz = sizeof(const char *);
 	u32sz = sizeof(uint32_t);
 	i64sz = sizeof(int64_t);
+	u64sz = sizeof(uint64_t);
 
 	CTL_GET("version", &cpv, const char *);
 	emitter_kv(emitter, "version", "Version", emitter_type_string, &cpv);
@@ -1442,6 +1443,8 @@ stats_general_print(emitter_t *emitter) {
 
 #define OPT_WRITE_INT64(name)						\
 	OPT_WRITE(name, i64v, i64sz, emitter_type_int64)
+#define OPT_WRITE_UINT64(name)						\
+	OPT_WRITE(name, u64v, u64sz, emitter_type_uint64)
 
 #define OPT_WRITE_SIZE_T(name)						\
 	OPT_WRITE(name, sv, ssz, emitter_type_size)
@@ -1468,7 +1471,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("hpa")
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
-	OPT_WRITE_SIZE_T("hpa_dehugification_threshold")
+	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 0558680..a9e551f 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -19,8 +19,21 @@ struct test_data_s {
 	emap_t emap;
 };
 
+static hpa_shard_opts_t test_hpa_shard_opts_default = {
+	/* slab_max_alloc */
+	ALLOC_MAX,
+	/* hugification threshold */
+	HUGEPAGE,
+	/* dirty_mult */
+	FXP_INIT_PERCENT(25),
+	/* deferral_allowed */
+	false,
+	/* hugify_delay_ms */
+	10 * 1000,
+};
+
 static hpa_shard_t *
-create_test_data() {
+create_test_data(hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 	bool err;
 	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
 	    &ehooks_default_extent_hooks);
@@ -37,12 +50,9 @@ create_test_data() {
 	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
 	assert_false(err, "");
 
-	hpa_shard_opts_t opts = HPA_SHARD_OPTS_DEFAULT;
-	opts.slab_max_alloc = ALLOC_MAX;
-
 	err = hpa_shard_init(&test_data->shard, &test_data->emap,
 	    test_data->base, &test_data->shard_edata_cache, SHARD_IND,
-	    &hpa_hooks_default, &opts);
+	    hooks, opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
@@ -58,7 +68,8 @@ destroy_test_data(hpa_shard_t *shard) {
 TEST_BEGIN(test_alloc_max) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data();
+	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
+	    &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	edata_t *edata;
@@ -134,7 +145,8 @@ node_remove(mem_tree_t *tree, edata_t *edata) {
 TEST_BEGIN(test_stress) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data();
+	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
+	    &test_hpa_shard_opts_default);
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
@@ -224,7 +236,8 @@ expect_contiguous(edata_t **edatas, size_t nedatas) {
 TEST_BEGIN(test_alloc_dalloc_batch) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data();
+	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
+	    &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	enum {NALLOCS = 8};
@@ -282,6 +295,117 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 }
 TEST_END
 
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static bool defer_purge_called = false;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+	defer_purge_called = true;
+}
+
+static bool defer_hugify_called = false;
+static void
+defer_test_hugify(void *ptr, size_t size) {
+	defer_hugify_called = true;
+}
+
+static bool defer_dehugify_called = false;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	defer_dehugify_called = true;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time) {
+	*r_time = defer_curtime;
+}
+
+TEST_BEGIN(test_defer_time) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	edata_t *edatas[HUGEPAGE_PAGES];
+	for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_false(defer_hugify_called, "Hugified too early");
+
+	/* Hugification delay is set to 10 seconds in options. */
+	nstime_init2(&defer_curtime, 11, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(defer_hugify_called, "Failed to hugify");
+
+	defer_hugify_called = false;
+
+	/* Purge.  Recall that dirty_mult is .25. */
+	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i]);
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_false(defer_hugify_called, "Hugified too early");
+	expect_true(defer_dehugify_called, "Should have dehugified");
+	expect_true(defer_purge_called, "Should have purged");
+	defer_hugify_called = false;
+	defer_dehugify_called = false;
+	defer_purge_called = false;
+
+	/*
+	 * Refill the page.  We now meet the hugification threshold; we should
+	 * be marked for pending hugify.
+	 */
+	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/*
+	 * We would be ineligible for hugification, had we not already met the
+	 * threshold before dipping below it.
+	 */
+	pai_dalloc(tsdn, &shard->pai, edatas[0]);
+	/* Wait for the threshold again. */
+	nstime_init2(&defer_curtime, 22, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(defer_hugify_called, "Hugified too early");
+	expect_false(defer_dehugify_called, "Unexpected dehugify");
+	expect_false(defer_purge_called, "Unexpected purge");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -299,5 +423,6 @@ main(void) {
 	return test_no_reentrancy(
 	    test_alloc_max,
 	    test_stress,
-	    test_alloc_dalloc_batch);
+	    test_alloc_dalloc_batch,
+	    test_defer_time);
 }
-- 
cgit v0.12


From 4b633b9a81bb0fe1b234bd6243496d407cae8665 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Fri, 16 Jul 2021 14:53:25 -0700
Subject: Clean up background thread sleep computation

Isolate the computation of purge interval from background thread logic and
move into more suitable file.
---
 include/jemalloc/internal/decay.h |  11 +++
 src/background_thread.c           | 179 +++++++++++---------------------------
 src/decay.c                       |  73 ++++++++++++++++
 3 files changed, 133 insertions(+), 130 deletions(-)

diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index df39665..a81e392 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -3,6 +3,8 @@
 
 #include "jemalloc/internal/smoothstep.h"
 
+#define DECAY_UNBOUNDED_TIME_TO_PURGE ((uint64_t)-1)
+
 /*
  * The decay_t computes the number of pages we should purge at any given time.
  * Page allocators inform a decay object when pages enter a decay-able state
@@ -146,4 +148,13 @@ void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
 bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
     size_t current_npages);
 
+/*
+ * Calculates wait time until at least npages_threshold pages should be purged.
+ *
+ * Returns number of nanoseconds or DECAY_UNBOUNDED_TIME_TO_PURGE in case of
+ * indefinite wait.
+ */
+uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
+    uint64_t npages_threshold);
+
 #endif /* JEMALLOC_INTERNAL_DECAY_H */
diff --git a/src/background_thread.c b/src/background_thread.c
index 1fb24fe..4951cd1 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -104,134 +104,6 @@ set_current_thread_affinity(int cpu) {
 /* Minimal sleep interval 100 ms. */
 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
 
-static inline size_t
-decay_npurge_after_interval(decay_t *decay, size_t interval) {
-	size_t i;
-	uint64_t sum = 0;
-	for (i = 0; i < interval; i++) {
-		sum += decay->backlog[i] * h_steps[i];
-	}
-	for (; i < SMOOTHSTEP_NSTEPS; i++) {
-		sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
-	}
-
-	return (size_t)(sum >> SMOOTHSTEP_BFP);
-}
-
-static uint64_t
-arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, decay_t *decay,
-    ecache_t *ecache) {
-	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
-		/* Use minimal interval if decay is contended. */
-		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
-	}
-
-	uint64_t interval;
-	ssize_t decay_time = decay_ms_read(decay);
-	if (decay_time <= 0) {
-		/* Purging is eagerly done or disabled currently. */
-		interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
-		goto label_done;
-	}
-
-	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
-	assert(decay_interval_ns > 0);
-	size_t npages = ecache_npages_get(ecache);
-	if (npages == 0) {
-		unsigned i;
-		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
-			if (decay->backlog[i] > 0) {
-				break;
-			}
-		}
-		if (i == SMOOTHSTEP_NSTEPS) {
-			/* No dirty pages recorded.  Sleep indefinitely. */
-			interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
-			goto label_done;
-		}
-	}
-	if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
-		/* Use max interval. */
-		interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
-		goto label_done;
-	}
-
-	size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
-	size_t ub = SMOOTHSTEP_NSTEPS;
-	/* Minimal 2 intervals to ensure reaching next epoch deadline. */
-	lb = (lb < 2) ? 2 : lb;
-	if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
-	    (lb + 2 > ub)) {
-		interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
-		goto label_done;
-	}
-
-	assert(lb + 2 <= ub);
-	size_t npurge_lb, npurge_ub;
-	npurge_lb = decay_npurge_after_interval(decay, lb);
-	if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
-		interval = decay_interval_ns * lb;
-		goto label_done;
-	}
-	npurge_ub = decay_npurge_after_interval(decay, ub);
-	if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
-		interval = decay_interval_ns * ub;
-		goto label_done;
-	}
-
-	unsigned n_search = 0;
-	size_t target, npurge;
-	while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
-	    && (lb + 2 < ub)) {
-		target = (lb + ub) / 2;
-		npurge = decay_npurge_after_interval(decay, target);
-		if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
-			ub = target;
-			npurge_ub = npurge;
-		} else {
-			lb = target;
-			npurge_lb = npurge;
-		}
-		assert(n_search < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
-		++n_search;
-	}
-	interval = decay_interval_ns * (ub + lb) / 2;
-label_done:
-	interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
-	    BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-
-	return interval;
-}
-
-/* Compute purge interval for background threads. */
-static uint64_t
-arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
-	uint64_t i1, i2;
-	i1 = arena_decay_compute_purge_interval_impl(tsdn,
-	    &arena->pa_shard.pac.decay_dirty, &arena->pa_shard.pac.ecache_dirty);
-	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
-		return i1;
-	}
-	i2 = arena_decay_compute_purge_interval_impl(tsdn,
-	    &arena->pa_shard.pac.decay_muzzy, &arena->pa_shard.pac.ecache_muzzy);
-
-	uint64_t min_so_far = i1 < i2 ? i1 : i2;
-	if (opt_background_thread_hpa_interval_max_ms >= 0) {
-		uint64_t hpa_interval = 1000 * 1000 *
-		    (uint64_t)opt_background_thread_hpa_interval_max_ms;
-		if (hpa_interval < min_so_far) {
-			if (hpa_interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
-				min_so_far = BACKGROUND_THREAD_MIN_INTERVAL_NS;
-			} else {
-				min_so_far = hpa_interval;
-			}
-		}
-	}
-
-	return min_so_far;
-}
-
 static void
 background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
     uint64_t interval) {
@@ -301,6 +173,52 @@ background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
 	return false;
 }
 
+static inline uint64_t
+arena_decay_compute_purge_interval(tsdn_t *tsdn, decay_t *decay,
+    size_t npages) {
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		/* Use minimal interval if decay is contended. */
+		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
+	}
+	uint64_t decay_ns = decay_ns_until_purge(decay, npages,
+	    BACKGROUND_THREAD_NPAGES_THRESHOLD);
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+
+	return decay_ns < BACKGROUND_THREAD_MIN_INTERVAL_NS ?
+	    BACKGROUND_THREAD_MIN_INTERVAL_NS :
+	    decay_ns;
+}
+
+
+static inline uint64_t
+arena_decay_compute_min_purge_interval(tsdn_t *tsdn, arena_t *arena) {
+	uint64_t dirty, muzzy;
+	dirty = arena_decay_compute_purge_interval(tsdn,
+	    &arena->pa_shard.pac.decay_dirty,
+	    ecache_npages_get(&arena->pa_shard.pac.ecache_dirty));
+	if (dirty == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+		return dirty;
+	}
+	muzzy = arena_decay_compute_purge_interval(tsdn,
+	    &arena->pa_shard.pac.decay_muzzy,
+	    ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy));
+
+	uint64_t min_so_far = dirty < muzzy ? dirty : muzzy;
+	if (opt_background_thread_hpa_interval_max_ms >= 0) {
+		uint64_t hpa_interval = 1000 * 1000 *
+		    (uint64_t)opt_background_thread_hpa_interval_max_ms;
+		if (hpa_interval < min_so_far) {
+			if (hpa_interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+				min_so_far = BACKGROUND_THREAD_MIN_INTERVAL_NS;
+			} else {
+				min_so_far = hpa_interval;
+			}
+		}
+	}
+
+	return min_so_far;
+}
+
 static inline void
 background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
 	uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
@@ -316,10 +234,11 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne
 			/* Min interval will be used. */
 			continue;
 		}
-		uint64_t interval = arena_decay_compute_purge_interval(tsdn,
+		uint64_t interval = arena_decay_compute_min_purge_interval(tsdn,
 		    arena);
 		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
-		if (min_interval > interval) {
+		if (interval != DECAY_UNBOUNDED_TIME_TO_PURGE &&
+		    min_interval > interval) {
 			min_interval = interval;
 		}
 	}
diff --git a/src/decay.c b/src/decay.c
index 23d59da..87e3a8b 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -175,3 +175,76 @@ decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
 
 	return true;
 }
+
+static inline size_t
+decay_npurge_after_interval(decay_t *decay, size_t interval) {
+	size_t i;
+	uint64_t sum = 0;
+	for (i = 0; i < interval; i++) {
+		sum += decay->backlog[i] * h_steps[i];
+	}
+	for (; i < SMOOTHSTEP_NSTEPS; i++) {
+		sum += decay->backlog[i] *
+		    (h_steps[i] - h_steps[i - interval]);
+	}
+
+	return (size_t)(sum >> SMOOTHSTEP_BFP);
+}
+
+uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
+    uint64_t npages_threshold) {
+	ssize_t decay_time = decay_ms_read(decay);
+	if (decay_time <= 0) {
+		/* Purging is eagerly done or disabled currently. */
+		return DECAY_UNBOUNDED_TIME_TO_PURGE;
+	}
+	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
+	assert(decay_interval_ns > 0);
+	if (npages_current == 0) {
+		unsigned i;
+		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
+			if (decay->backlog[i] > 0) {
+				break;
+			}
+		}
+		if (i == SMOOTHSTEP_NSTEPS) {
+			/* No dirty pages recorded.  Sleep indefinitely. */
+			return DECAY_UNBOUNDED_TIME_TO_PURGE;
+		}
+	}
+	if (npages_current <= npages_threshold) {
+		/* Use max interval. */
+		return decay_interval_ns * SMOOTHSTEP_NSTEPS;
+	}
+
+	/* Minimal 2 intervals to ensure reaching next epoch deadline. */
+	size_t lb = 2;
+	size_t ub = SMOOTHSTEP_NSTEPS;
+
+	size_t npurge_lb, npurge_ub;
+	npurge_lb = decay_npurge_after_interval(decay, lb);
+	if (npurge_lb > npages_threshold) {
+		return decay_interval_ns * lb;
+	}
+	npurge_ub = decay_npurge_after_interval(decay, ub);
+	if (npurge_ub < npages_threshold) {
+		return decay_interval_ns * ub;
+	}
+
+	unsigned n_search = 0;
+	size_t target, npurge;
+	while ((npurge_lb + npages_threshold < npurge_ub) && (lb + 2 < ub)) {
+		target = (lb + ub) / 2;
+		npurge = decay_npurge_after_interval(decay, target);
+		if (npurge > npages_threshold) {
+			ub = target;
+			npurge_ub = npurge;
+		} else {
+			lb = target;
+			npurge_lb = npurge;
+		}
+		assert(n_search < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
+		++n_search;
+	}
+	return decay_interval_ns * (ub + lb) / 2;
+}
-- 
cgit v0.12


From aaea4fd1e640690042b34755fd5e4714ebd0459b Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 20 Jul 2021 13:22:05 -0700
Subject: Add more documentation to decay.c

It took me a while to understand why some things are implemented the way they
are, so hopefully it will help future readers.
---
 include/jemalloc/internal/decay.h |  3 ++-
 src/decay.c                       | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index a81e392..8e51745 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -149,7 +149,8 @@ bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
     size_t current_npages);
 
 /*
- * Calculates wait time until at least npages_threshold pages should be purged.
+ * Calculates wait time until a number of pages in the interval
+ * [0.5 * npages_threshold .. 1.5 * npages_threshold] should be purged.
  *
  * Returns number of nanoseconds or DECAY_UNBOUNDED_TIME_TO_PURGE in case of
  * indefinite wait.
diff --git a/src/decay.c b/src/decay.c
index 87e3a8b..fdbd63d 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -102,6 +102,11 @@ decay_backlog_npages_limit(const decay_t *decay) {
 	return npages_limit_backlog;
 }
 
+/*
+ * Update backlog, assuming that 'nadvance_u64' time intervals have passed.
+ * Trailing 'nadvance_u64' records should be erased and 'current_npages' is
+ * placed as the newest record.
+ */
 static void
 decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
     size_t current_npages) {
@@ -176,6 +181,22 @@ decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
 	return true;
 }
 
+/*
+ * Calculate how many pages should be purged after 'interval'.
+ *
+ * First, calculate how many pages should remain at the moment, then subtract
+ * the number of pages that should remain after 'interval'. The difference is
+ * how many pages should be purged until then.
+ *
+ * The number of pages that should remain at a specific moment is calculated
+ * like this: pages(now) = sum(backlog[i] * h_steps[i]). After 'interval'
+ * passes, backlog would shift 'interval' positions to the left and sigmoid
+ * curve would be applied starting with backlog[interval].
+ *
+ * The implementation doesn't directly map to the description, but it's
+ * essentially the same calculation, optimized to avoid iterating over
+ * [interval..SMOOTHSTEP_NSTEPS) twice.
+ */
 static inline size_t
 decay_npurge_after_interval(decay_t *decay, size_t interval) {
 	size_t i;
-- 
cgit v0.12


From c88fe355e64fa18eef932b4446aae7296babcc06 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 21 Jul 2021 14:45:55 -0700
Subject: Add unit tests for decay

After slight changes in the interface, it's an opportunity to enhance unit
tests.
---
 test/unit/decay.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 214 insertions(+), 8 deletions(-)

diff --git a/test/unit/decay.c b/test/unit/decay.c
index 9da0d94..72484c8 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -2,10 +2,68 @@
 
 #include "jemalloc/internal/decay.h"
 
-/*
- * Honestly, this is mostly a stub for now.  Eventually, we should beef up
- * testing here.
- */
+TEST_BEGIN(test_decay_init) {
+	decay_t decay;
+	memset(&decay, 0, sizeof(decay));
+
+	nstime_t curtime;
+	nstime_init(&curtime, 0);
+
+	ssize_t decay_ms = 1000;
+	assert_true(decay_ms_valid(decay_ms), "");
+
+	expect_false(decay_init(&decay, &curtime, decay_ms),
+	    "Failed to initialize decay");
+	expect_zd_eq(decay_ms_read(&decay), decay_ms,
+	    "Decay_ms was initialized incorrectly");
+	expect_u64_ne(decay_epoch_duration_ns(&decay), 0,
+	    "Epoch duration was initialized incorrectly");
+}
+TEST_END
+
+TEST_BEGIN(test_decay_ms_valid) {
+	expect_false(decay_ms_valid(-7),
+	    "Misclassified negative decay as valid");
+	expect_true(decay_ms_valid(-1),
+	    "Misclassified -1 (never decay) as invalid decay");
+	expect_true(decay_ms_valid(8943),
+	    "Misclassified valid decay");
+	if (SSIZE_MAX > NSTIME_SEC_MAX) {
+		expect_false(
+		    decay_ms_valid((ssize_t)(NSTIME_SEC_MAX * KQU(1000) + 39)),
+		    "Misclassified too large decay");
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_decay_maybe_advance_epoch) {
+	decay_t decay;
+	memset(&decay, 0, sizeof(decay));
+
+	nstime_t curtime;
+	nstime_init(&curtime, 0);
+
+	uint64_t decay_ms = 1000;
+
+	bool err = decay_init(&decay, &curtime, (ssize_t)decay_ms);
+	expect_false(err, "");
+
+	bool advanced;
+	advanced = decay_maybe_advance_epoch(&decay, &curtime, 0);
+	expect_false(advanced, "Epoch advanced while time didn't");
+
+	nstime_t interval;
+	nstime_init(&interval, decay_epoch_duration_ns(&decay));
+
+	nstime_add(&curtime, &interval);
+	advanced = decay_maybe_advance_epoch(&decay, &curtime, 0);
+	expect_false(advanced, "Epoch advanced after first interval");
+
+	nstime_add(&curtime, &interval);
+	advanced = decay_maybe_advance_epoch(&decay, &curtime, 0);
+	expect_true(advanced, "Epoch didn't advance after two intervals");
+}
+TEST_END
 
 TEST_BEGIN(test_decay_empty) {
 	/* If we never have any decaying pages, npages_limit should be 0. */
@@ -30,16 +88,164 @@ TEST_BEGIN(test_decay_empty) {
 		    &curtime, dirty_pages);
 		if (epoch_advanced) {
 			nepochs++;
-			assert_zu_eq(decay_npages_limit_get(&decay), 0,
-			    "Should not increase the limit arbitrarily");
+			expect_zu_eq(decay_npages_limit_get(&decay), 0,
+			    "Unexpectedly increased npages_limit");
 		}
 	}
-	assert_d_gt(nepochs, 0, "Should have advanced epochs");
+	expect_d_gt(nepochs, 0, "Epochs never advanced");
+}
+TEST_END
+
+/*
+ * Verify that npages_limit correctly decays as the time goes.
+ *
+ * During first 'nepoch_init' epochs, add new dirty pages.
+ * After that, let them decay and verify npages_limit decreases.
+ * Then proceed with another 'nepoch_init' epochs and check that
+ * all dirty pages are flushed out of backlog, bringing npages_limit
+ * down to zero.
+ */
+TEST_BEGIN(test_decay) {
+	const uint64_t nepoch_init = 10;
+
+	decay_t decay;
+	memset(&decay, 0, sizeof(decay));
+
+	nstime_t curtime;
+	nstime_init(&curtime, 0);
+
+	uint64_t decay_ms = 1000;
+	uint64_t decay_ns = decay_ms * 1000 * 1000;
+
+	bool err = decay_init(&decay, &curtime, (ssize_t)decay_ms);
+	assert_false(err, "");
+
+	expect_zu_eq(decay_npages_limit_get(&decay), 0,
+	    "Empty decay returned nonzero npages_limit");
+
+	nstime_t epochtime;
+	nstime_init(&epochtime, decay_epoch_duration_ns(&decay));
+
+	const size_t dirty_pages_per_epoch = 1000;
+	size_t dirty_pages = 0;
+	uint64_t epoch_ns = decay_epoch_duration_ns(&decay);
+	bool epoch_advanced = false;
+
+	/* Populate backlog with some dirty pages */
+	for (uint64_t i = 0; i < nepoch_init; i++) {
+		nstime_add(&curtime, &epochtime);
+		dirty_pages += dirty_pages_per_epoch;
+		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
+		    dirty_pages);
+	}
+	expect_true(epoch_advanced, "Epoch never advanced");
+
+	size_t npages_limit = decay_npages_limit_get(&decay);
+	expect_zu_gt(npages_limit, 0, "npages_limit is incorrectly equal "
+	    "to zero after dirty pages have been added");
+
+	/* Keep dirty pages unchanged and verify that npages_limit decreases */
+	for (uint64_t i = nepoch_init; i * epoch_ns < decay_ns; ++i) {
+		nstime_add(&curtime, &epochtime);
+		epoch_advanced = decay_maybe_advance_epoch(&decay, &curtime,
+				    dirty_pages);
+		if (epoch_advanced) {
+			size_t npages_limit_new = decay_npages_limit_get(&decay);
+			expect_zu_lt(npages_limit_new, npages_limit,
+			    "napges_limit failed to decay");
+
+			npages_limit = npages_limit_new;
+		}
+	}
+
+	expect_zu_gt(npages_limit, 0, "npages_limit decayed to zero earlier "
+	    "than decay_ms since last dirty page was added");
+
+	/* Completely push all dirty pages out of the backlog */
+	epoch_advanced = false;
+	for (uint64_t i = 0; i < nepoch_init; i++) {
+		nstime_add(&curtime, &epochtime);
+		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
+		    dirty_pages);
+	}
+	expect_true(epoch_advanced, "Epoch never advanced");
+
+	npages_limit = decay_npages_limit_get(&decay);
+	expect_zu_eq(npages_limit, 0, "npages_limit didn't decay to 0 after "
+	    "decay_ms since last bump in dirty pages");
+}
+TEST_END
+
+TEST_BEGIN(test_decay_ns_until_purge) {
+	const uint64_t nepoch_init = 10;
+
+	decay_t decay;
+	memset(&decay, 0, sizeof(decay));
+
+	nstime_t curtime;
+	nstime_init(&curtime, 0);
+
+	uint64_t decay_ms = 1000;
+	uint64_t decay_ns = decay_ms * 1000 * 1000;
+
+	bool err = decay_init(&decay, &curtime, (ssize_t)decay_ms);
+	assert_false(err, "");
+
+	nstime_t epochtime;
+	nstime_init(&epochtime, decay_epoch_duration_ns(&decay));
+
+	uint64_t ns_until_purge_empty = decay_ns_until_purge(&decay, 0, 0);
+	expect_u64_eq(ns_until_purge_empty, DECAY_UNBOUNDED_TIME_TO_PURGE,
+	    "Failed to return unbounded wait time for zero threshold");
+
+	const size_t dirty_pages_per_epoch = 1000;
+	size_t dirty_pages = 0;
+	bool epoch_advanced = false;
+	for (uint64_t i = 0; i < nepoch_init; i++) {
+		nstime_add(&curtime, &epochtime);
+		dirty_pages += dirty_pages_per_epoch;
+		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
+		    dirty_pages);
+	}
+	expect_true(epoch_advanced, "Epoch never advanced");
+
+	uint64_t ns_until_purge_all = decay_ns_until_purge(&decay,
+	    dirty_pages, dirty_pages);
+	expect_u64_ge(ns_until_purge_all, decay_ns,
+	    "Incorrectly calculated time to purge all pages");
+
+	uint64_t ns_until_purge_none = decay_ns_until_purge(&decay,
+	    dirty_pages, 0);
+	expect_u64_eq(ns_until_purge_none, decay_epoch_duration_ns(&decay) * 2,
+	    "Incorrectly calculated time to purge 0 pages");
+
+	uint64_t npages_threshold = dirty_pages / 2;
+	uint64_t ns_until_purge_half = decay_ns_until_purge(&decay,
+	    dirty_pages, npages_threshold);
+
+	nstime_t waittime;
+	nstime_init(&waittime, ns_until_purge_half);
+	nstime_add(&curtime, &waittime);
+
+	decay_maybe_advance_epoch(&decay, &curtime, dirty_pages);
+	size_t npages_limit = decay_npages_limit_get(&decay);
+	expect_zu_lt(npages_limit, dirty_pages,
+	    "npages_limit failed to decrease after waiting");
+	size_t expected = dirty_pages - npages_limit;
+	int deviation = abs((int)expected - (int)(npages_threshold));
+	expect_d_lt(deviation, (int)(npages_threshold / 2),
+	    "After waiting, number of pages is out of the expected interval "
+	    "[0.5 * npages_threshold .. 1.5 * npages_threshold]");
 }
 TEST_END
 
 int
 main(void) {
 	return test(
-	    test_decay_empty);
+	    test_decay_init,
+	    test_decay_ms_valid,
+	    test_decay_maybe_advance_epoch,
+	    test_decay_empty,
+	    test_decay,
+	    test_decay_ns_until_purge);
 }
-- 
cgit v0.12


From e09eac1d4e9df2e889417e1cd3e56b451b959ba8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 6 May 2021 13:47:01 -0700
Subject: Remove hpa_central.

This is now dead code.
---
 Makefile.in                                        |   2 -
 include/jemalloc/internal/hpa_central.h            |  47 ---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   1 -
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |   3 -
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |   1 -
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |   3 -
 src/hpa_central.c                                  | 192 ---------
 test/unit/hpa_central.c                            | 450 ---------------------
 8 files changed, 699 deletions(-)
 delete mode 100644 include/jemalloc/internal/hpa_central.h
 delete mode 100644 src/hpa_central.c
 delete mode 100644 test/unit/hpa_central.c

diff --git a/Makefile.in b/Makefile.in
index abdf800..286f7ea 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -121,7 +121,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/fxp.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
-	$(srcroot)src/hpa_central.c \
 	$(srcroot)src/hpa_hooks.c \
 	$(srcroot)src/hpdata.c \
 	$(srcroot)src/inspect.c \
@@ -223,7 +222,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
 	$(srcroot)test/unit/hpa_background_thread.c \
-	$(srcroot)test/unit/hpa_central.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/inspect.c \
diff --git a/include/jemalloc/internal/hpa_central.h b/include/jemalloc/internal/hpa_central.h
deleted file mode 100644
index 8659f71..0000000
--- a/include/jemalloc/internal/hpa_central.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_HPA_CENTRAL_H
-#define JEMALLOC_INTERNAL_HPA_CENTRAL_H
-
-#include "jemalloc/internal/base.h"
-#include "jemalloc/internal/emap.h"
-
-typedef struct hpa_central_s hpa_central_t;
-struct hpa_central_s {
-	/* The emap we use for metadata operations. */
-	emap_t *emap;
-
-	edata_cache_small_t ecs;
-	eset_t eset;
-
-	size_t sn_next;
-};
-
-void hpa_central_init(hpa_central_t *central, edata_cache_t *edata_cache,
-    emap_t *emap);
-/*
- * Tries to satisfy the given allocation request with an extent already given to
- * central.
- */
-edata_t *hpa_central_alloc_reuse(tsdn_t *tsdn, hpa_central_t *central,
-    size_t size_min, size_t size_goal);
-/*
- * Adds the given edata to the central allocator as a new allocation.  The
- * intent is that after a reuse attempt fails, the caller can allocate a new
- * extent using whatever growth policy it prefers and allocate from that, giving
- * the excess to the hpa_central_t (this is analogous to the
- * extent_grow_retained functionality; we can allocate address space in
- * exponentially growing chunks).
- *
- * The edata_t should come from the same base that this hpa was initialized
- * with.  Only complete extents should be added (i.e. those for which the head
- * bit is true, and for which their successor is either not owned by jemalloc
- * or also has a head bit of true).  It should be active, large enough to
- * satisfy the requested allocation, and not already in the emap.
- *
- * If this returns true, then we did not accept the extent, and took no action.
- * Otherwise, modifies *edata to satisfy the allocation.
- */
-bool hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
-    size_t size, edata_t *to_add);
-void hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata);
-
-#endif /* JEMALLOC_INTERNAL_HPA_CENTRAL_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index f6fae7f..597b247 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -61,7 +61,6 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
-    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 800861d..d063a01 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -67,9 +67,6 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\hpa_central.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 3d3e717..46633e8 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -61,7 +61,6 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
-    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 800861d..d063a01 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -67,9 +67,6 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\hpa_central.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/hpa_central.c b/src/hpa_central.c
deleted file mode 100644
index 9e00dd6..0000000
--- a/src/hpa_central.c
+++ /dev/null
@@ -1,192 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-#include "jemalloc/internal/hpa_central.h"
-
-void
-hpa_central_init(hpa_central_t *central, edata_cache_t *edata_cache,
-    emap_t *emap) {
-	central->emap = emap;
-	edata_cache_small_init(&central->ecs, edata_cache);
-	eset_init(&central->eset, extent_state_dirty);
-	central->sn_next = 0;
-}
-
-/*
- * Returns the trail, or NULL in case of failure (which can only occur in case
- * of an emap operation failure; i.e. OOM).
- */
-static edata_t *
-hpa_central_split(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata,
-    size_t size) {
-	edata_t *trail = edata_cache_small_get(tsdn, &central->ecs);
-	if (trail == NULL) {
-		return NULL;
-	}
-	size_t cursize = edata_size_get(edata);
-	edata_init(trail, edata_arena_ind_get(edata),
-	    (void *)((uintptr_t)edata_base_get(edata) + size), cursize - size,
-	    /* slab */ false, SC_NSIZES, edata_sn_get(edata),
-	    edata_state_get(edata), edata_zeroed_get(edata),
-	    edata_committed_get(edata), EXTENT_PAI_HPA, EXTENT_NOT_HEAD);
-
-	emap_prepare_t prepare;
-	bool err = emap_split_prepare(tsdn, central->emap, &prepare, edata,
-	    size, trail, cursize - size);
-	assert(edata_state_get(edata) == edata_state_get(trail));
-	if (err) {
-		edata_cache_small_put(tsdn, &central->ecs, trail);
-		return NULL;
-	}
-	assert(edata_state_get(edata) == edata_state_get(trail));
-
-	edata_size_set(edata, size);
-	emap_split_commit(tsdn, central->emap, &prepare, edata, size, trail,
-	    cursize - size);
-
-	return trail;
-}
-
-edata_t *
-hpa_central_alloc_reuse(tsdn_t *tsdn, hpa_central_t *central,
-    size_t size_min, size_t size_goal) {
-	assert((size_min & PAGE_MASK) == 0);
-	assert((size_goal & PAGE_MASK) == 0);
-
-	/*
-	 * Fragmentation avoidance is more important in the HPA than giving the
-	 * user their preferred amount of space, since we expect the average
-	 * unused extent to be more costly (PAC extents can get purged away
-	 * easily at any granularity; HPA extents are much more difficult to
-	 * purge away if they get stranded).  So we always search for the
-	 * earliest (in first-fit ordering) extent that can satisfy the request,
-	 * and use it, regardless of the goal size.
-	 */
-	edata_t *edata = eset_fit(&central->eset, size_min, PAGE,
-	    /* exact_only */ false, /* lg_max_fit */ SC_PTR_BITS);
-	if (edata == NULL) {
-		return NULL;
-	}
-
-	eset_remove(&central->eset, edata);
-	/* Maybe the first fit is also under the limit. */
-	if (edata_size_get(edata) <= size_goal) {
-		goto label_success;
-	}
-
-	/* Otherwise, split. */
-	edata_t *trail = hpa_central_split(tsdn, central, edata, size_goal);
-	if (trail == NULL) {
-		eset_insert(&central->eset, edata);
-		return NULL;
-	}
-	emap_assert_mapped(tsdn, central->emap, trail);
-	eset_insert(&central->eset, trail);
-
-label_success:
-	emap_assert_mapped(tsdn, central->emap, edata);
-	assert(edata_size_get(edata) >= size_min);
-	/*
-	 * We don't yet support purging in the hpa_central; everything should be
-	 * dirty.
-	 */
-	assert(edata_state_get(edata) == extent_state_dirty);
-	assert(edata_base_get(edata) == edata_addr_get(edata));
-	emap_update_edata_state(tsdn, central->emap, edata,
-	    extent_state_active);
-	return edata;
-}
-
-bool
-hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
-    size_t size, edata_t *edata) {
-	assert((size & PAGE_MASK) == 0);
-	assert(edata_base_get(edata) == edata_addr_get(edata));
-	assert(edata_size_get(edata) >= size);
-	assert(edata_arena_ind_get(edata)
-	    == base_ind_get(central->ecs.fallback->base));
-	assert(edata_is_head_get(edata));
-	assert(edata_state_get(edata) == extent_state_active);
-	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
-	assert(edata_slab_get(edata) == false);
-	assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
-
-	/* edata should be a new alloc, and hence not already mapped. */
-	emap_assert_not_mapped(tsdn, central->emap, edata);
-
-	size_t cursize = edata_size_get(edata);
-
-	bool err = emap_register_boundary(tsdn, central->emap, edata, SC_NSIZES,
-	    /* slab */ false);
-	if (err) {
-		return true;
-	}
-	/* No splitting is necessary. */
-	if (cursize == size) {
-		size_t sn = central->sn_next++;
-		edata_sn_set(edata, sn);
-		return false;
-	}
-
-	/* We should split. */
-	edata_t *trail = hpa_central_split(tsdn, central, edata, size);
-	if (trail == NULL) {
-		emap_deregister_boundary(tsdn, central->emap, NULL);
-		return true;
-	}
-	size_t sn = central->sn_next++;
-	edata_sn_set(edata, sn);
-	edata_sn_set(trail, sn);
-
-	emap_update_edata_state(tsdn, central->emap, trail, extent_state_dirty);
-	eset_insert(&central->eset, trail);
-	return false;
-}
-
-/* Merges b into a, freeing b back to the edata cache.. */
-static void
-hpa_central_dalloc_merge(tsdn_t *tsdn, hpa_central_t *central, edata_t *a,
-    edata_t *b) {
-	assert(emap_edata_is_acquired(tsdn, central->emap, a));
-	assert(emap_edata_is_acquired(tsdn, central->emap, b));
-
-	emap_prepare_t prepare;
-	emap_merge_prepare(tsdn, central->emap, &prepare, a, b);
-	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
-	emap_merge_commit(tsdn, central->emap, &prepare, a, b);
-	edata_cache_small_put(tsdn, &central->ecs, b);
-}
-
-void
-hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata) {
-	assert(edata_state_get(edata) == extent_state_active);
-	assert(edata_ps_get(edata) == NULL);
-
-	/*
-	 * These should really be called at the pa interface level, but
-	 * currently they're not.
-	 */
-	edata_addr_set(edata, edata_base_get(edata));
-	edata_zeroed_set(edata, false);
-
-	/*
-	 *  Merge forward first, so that the original *edata stays active state
-	 *  for the second acquire (only necessary for sanity checking).
-	 */
-	edata_t *trail = emap_try_acquire_edata_neighbor(tsdn, central->emap,
-	    edata, EXTENT_PAI_HPA, extent_state_dirty, /* forward */ true);
-	if (trail != NULL) {
-		eset_remove(&central->eset, trail);
-		hpa_central_dalloc_merge(tsdn, central, edata, trail);
-	}
-	edata_t *lead = emap_try_acquire_edata_neighbor(tsdn, central->emap,
-	    edata, EXTENT_PAI_HPA, extent_state_dirty, /* forward */ false);
-	if (lead != NULL) {
-		eset_remove(&central->eset, lead);
-		hpa_central_dalloc_merge(tsdn, central, lead, edata);
-		edata = lead;
-	}
-
-	emap_update_edata_state(tsdn, central->emap, edata, extent_state_dirty);
-	eset_insert(&central->eset, edata);
-}
diff --git a/test/unit/hpa_central.c b/test/unit/hpa_central.c
deleted file mode 100644
index f90b6e3..0000000
--- a/test/unit/hpa_central.c
+++ /dev/null
@@ -1,450 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#include "jemalloc/internal/hpa_central.h"
-
-typedef struct test_data_s test_data_t;
-struct test_data_s {
-	/*
-	 * Must be the first member -- we convert back and forth between the
-	 * test_data_t and the hpa_central_t;
-	 */
-	hpa_central_t central;
-	base_t *base;
-	edata_cache_t edata_cache;
-	emap_t emap;
-};
-
-void
-create_test_data(hpa_central_t **r_central, base_t **r_base) {
-	bool err;
-	base_t *base = base_new(TSDN_NULL, /* ind */ 111,
-	    &ehooks_default_extent_hooks);
-	assert_ptr_not_null(base, "");
-
-	test_data_t *test_data = malloc(sizeof(test_data_t));
-	assert_ptr_not_null(test_data, "");
-
-	test_data->base = base;
-
-	err = edata_cache_init(&test_data->edata_cache, base);
-	assert_false(err, "");
-
-	err = emap_init(&test_data->emap, test_data->base,
-	    /* zeroed */ false);
-	assert_false(err, "");
-
-	hpa_central_init(&test_data->central, &test_data->edata_cache,
-	    &test_data->emap);
-
-	*r_central = (hpa_central_t *)test_data;
-	*r_base = base;
-}
-
-static void
-destroy_test_data(hpa_central_t *central) {
-	test_data_t *test_data = (test_data_t *)central;
-	base_delete(TSDN_NULL, test_data->base);
-	free(test_data);
-}
-
-static edata_t *
-test_edata(base_t *base, uintptr_t addr, size_t size) {
-	edata_t *edata = base_alloc_edata(TSDN_NULL, base);
-	assert_ptr_not_null(edata, "");
-	edata_init(edata, base_ind_get(base), (void *)addr,
-	    size, /* slab */ false, /* szind_t */ SC_NSIZES, /* sn */ 0,
-	    extent_state_active, /* zeroed */ true, /* comitted */ true,
-	    EXTENT_PAI_HPA, /* is_head */ true);
-	return edata;
-}
-
-static void
-edata_expect_alloc(base_t *base, edata_t *edata, uintptr_t addr, size_t size) {
-	expect_ptr_not_null(edata, "Alloc should have succeeded");
-	expect_u_eq(base_ind_get(base), edata_arena_ind_get(edata), "");
-	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata), "");
-	expect_d_eq(extent_state_active, edata_state_get(edata), "");
-	assert_ptr_eq((void *)addr, edata_base_get(edata), "");
-	assert_zu_eq(size, edata_size_get(edata), "");
-}
-
-
-TEST_BEGIN(test_empty) {
-	hpa_central_t *central;
-	base_t *base;
-	create_test_data(&central, &base);
-
-	edata_t *edata;
-
-	edata = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, PAGE);
-	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
-
-	edata = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, 2 * PAGE);
-	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
-
-	edata = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, 8 * PAGE);
-	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
-
-	edata = hpa_central_alloc_reuse(TSDN_NULL, central, 4 * PAGE, 8 * PAGE);
-	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
-
-	destroy_test_data(central);
-}
-TEST_END
-
-TEST_BEGIN(test_first_fit_simple) {
-	hpa_central_t *central;
-	base_t *base;
-	create_test_data(&central, &base);
-
-	edata_t *edata1 = test_edata(base, 10 * PAGE, 10 * PAGE);
-	bool err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata1);
-	expect_false(err, "Unexpected grow failure");
-	edata_expect_alloc(base, edata1, 10 * PAGE, PAGE);
-
-	edata_t *edata2 = test_edata(base, 4 * PAGE, 1 * PAGE);
-	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata2);
-	expect_false(err, "Unexpected grow failure");
-	edata_expect_alloc(base, edata2, 4 * PAGE, PAGE);
-
-	hpa_central_dalloc(TSDN_NULL, central, edata2);
-
-	/*
-	 * Even though there's a lower-addressed extent that a by-size search
-	 * will find earlier, we should still pick the earlier one.
-	 */
-	edata_t *edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, PAGE);
-	/*
-	 * Recall there's still an active page at the beginning of the extent
-	 * added at 10 * PAGE; the next allocation from it should be at 11 *
-	 * PAGE.
-	 */
-	edata_expect_alloc(base, edata3, 11 * PAGE, PAGE);
-
-	destroy_test_data(central);
-}
-TEST_END
-
-TEST_BEGIN(test_first_fit_large_goal) {
-	/*
-	 * See the comment in hpa_central_alloc_reuse; we should prefer an
-	 * earlier allocation over a later one, even if it means we fall short
-	 * of the goal size.
-	 */
-	hpa_central_t *central;
-	base_t *base;
-	create_test_data(&central, &base);
-
-	edata_t *edata1 = test_edata(base, 10 * PAGE, 10 * PAGE);
-	bool err = hpa_central_alloc_grow(TSDN_NULL, central, 2 * PAGE, edata1);
-	expect_false(err, "Unexpected grow failure");
-	edata_expect_alloc(base, edata1, 10 * PAGE, 2 * PAGE);
-
-	/* We need a page, but would like 2. */
-	edata_t *edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
-	    2 * PAGE);
-	edata_expect_alloc(base, edata2, 12 * PAGE, 2 * PAGE);
-
-	hpa_central_dalloc(TSDN_NULL, central, edata1);
-
-	/*
-	 * Now, we have a 2-page inactive extent, then a 2-page active extent,
-	 * then a 6-page inactive extent.  If our minimum size is 2 but the goal
-	 * size is 4, we should still pick the first hole rather than the
-	 * second.
-	 */
-	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 4 * PAGE);
-	edata_expect_alloc(base, edata1, 10 * PAGE, 2 * PAGE);
-
-	/*
-	 * Make sure we didn't succeed only by forgetting about that last range
-	 * or something.
-	 */
-	edata_t *edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, 4 * PAGE,
-	    4 * PAGE);
-	edata_expect_alloc(base, edata3, 14 * PAGE, 4 * PAGE);
-
-	destroy_test_data(central);
-}
-TEST_END
-
-TEST_BEGIN(test_merging) {
-	hpa_central_t *central;
-	base_t *base;
-	create_test_data(&central, &base);
-
-	/* Test an exact match */
-	bool err;
-	edata_t *edata1 = test_edata(base, 10 * PAGE, PAGE);
-	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata1);
-	expect_false(err, "Alloc should have succeeded");
-	edata_expect_alloc(base, edata1, 10 * PAGE, PAGE);
-
-	edata_t *edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
-	    PAGE);
-	expect_ptr_null(edata2, "Allocation should have failed");
-
-	/*
-	 * Create two more regions; one immediately before the first and one
-	 * immediately after.  The extents shouldn't get merged.
-	 */
-	edata2 = test_edata(base, 11 * PAGE, PAGE);
-	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata2);
-	edata_expect_alloc(base, edata2, 11 * PAGE, PAGE);
-
-	edata_t *edata3 = test_edata(base, 12 * PAGE, 20 * PAGE);
-	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata3);
-	edata_expect_alloc(base, edata3, 12 * PAGE, PAGE);
-
-	/*
-	 * OK, we've got 3 contiguous ranges; [10, 11), [11, 12), and [12, 22).
-	 * They shouldn't get merged though, even once freed.  We free the
-	 * middle range last to test merging (or rather, the lack thereof) in
-	 * both directions.
-	 */
-	hpa_central_dalloc(TSDN_NULL, central, edata1);
-	hpa_central_dalloc(TSDN_NULL, central, edata3);
-	hpa_central_dalloc(TSDN_NULL, central, edata2);
-
-	/*
-	 * A two-page range should only be satisfied by the third added region.
-	 */
-	edata_t *edata = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE,
-	    2 * PAGE);
-	edata_expect_alloc(base, edata, 12 * PAGE, 2 * PAGE);
-	hpa_central_dalloc(TSDN_NULL, central, edata);
-
-	/* Same with a three-page range. */
-	edata = hpa_central_alloc_reuse(TSDN_NULL, central, 3 * PAGE, 3 * PAGE);
-	edata_expect_alloc(base, edata, 12 * PAGE, 3 * PAGE);
-	hpa_central_dalloc(TSDN_NULL, central, edata);
-
-	/* Let's try some cases that *should* get merged. */
-	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
-	edata_expect_alloc(base, edata1, 12 * PAGE, 2 * PAGE);
-	edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
-	edata_expect_alloc(base, edata2, 14 * PAGE, 2 * PAGE);
-	edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
-	edata_expect_alloc(base, edata3, 16 * PAGE, 2 * PAGE);
-
-	/* Merge with predecessor. */
-	hpa_central_dalloc(TSDN_NULL, central, edata1);
-	hpa_central_dalloc(TSDN_NULL, central, edata2);
-	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 4 * PAGE,
-	    4 * PAGE);
-	edata_expect_alloc(base, edata1, 12 * PAGE, 4 * PAGE);
-
-	/* Merge with successor */
-	hpa_central_dalloc(TSDN_NULL, central, edata3);
-	hpa_central_dalloc(TSDN_NULL, central, edata1);
-	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 6 * PAGE,
-	    6 * PAGE);
-	edata_expect_alloc(base, edata1, 12 * PAGE, 6 * PAGE);
-	hpa_central_dalloc(TSDN_NULL, central, edata1);
-
-	/*
-	 * Let's try merging with both.  We need to get three adjacent
-	 * allocations again; do it the same way as before.
-	 */
-	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
-	edata_expect_alloc(base, edata1, 12 * PAGE, 2 * PAGE);
-	edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
-	edata_expect_alloc(base, edata2, 14 * PAGE, 2 * PAGE);
-	edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
-	edata_expect_alloc(base, edata3, 16 * PAGE, 2 * PAGE);
-
-	hpa_central_dalloc(TSDN_NULL, central, edata1);
-	hpa_central_dalloc(TSDN_NULL, central, edata3);
-	hpa_central_dalloc(TSDN_NULL, central, edata2);
-
-	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 6 * PAGE,
-	    6 * PAGE);
-	edata_expect_alloc(base, edata1, 12 * PAGE, 6 * PAGE);
-
-	destroy_test_data(central);
-}
-TEST_END
-
-TEST_BEGIN(test_stress_simple) {
-	hpa_central_t *central;
-	base_t *base;
-	create_test_data(&central, &base);
-
-	enum {
-		range_base = 1024 * PAGE,
-		range_pages = 256,
-		range_size = range_pages * PAGE
-	};
-
-	edata_t *edatas[range_pages];
-
-	bool err;
-	edata_t *range = test_edata(base, range_base, range_size);
-	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, range);
-	expect_false(err, "Unexpected grow failure");
-	hpa_central_dalloc(TSDN_NULL, central, range);
-
-	for (size_t i = 0; i < range_pages; i++) {
-		edatas[i] = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
-		    PAGE);
-		edata_expect_alloc(base, edatas[i], range_base + i * PAGE,
-		    PAGE);
-	}
-	/* Free up the odd indices. */
-	for (size_t i = 0; i < range_pages; i++) {
-		if (i % 2 == 0) {
-			continue;
-		}
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i]);
-	}
-	/*
-	 * Reallocate them again.  Try it with a goal size that can't be
-	 * satisfied.
-	 */
-	for (size_t i = 0; i < range_pages; i++) {
-		if (i % 2 == 0) {
-			continue;
-		}
-		edatas[i] = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
-		    PAGE);
-		edata_expect_alloc(base, edatas[i], range_base + i * PAGE,
-		    PAGE);
-	}
-	/*
-	 * In each batch of 8, create a free range of 4 pages and a free range
-	 * of 2 pages.
-	 */
-	for (size_t i = 0; i < range_pages; i += 8) {
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 1]);
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 2]);
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 3]);
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 4]);
-
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 6]);
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 7]);
-	}
-
-	/*
-	 * And allocate 3 pages into the first, and 2 pages into the second.  To
-	 * mix things up a little, lets get those amounts via goal sizes
-	 * instead.
-	 */
-	for (size_t i = 0; i < range_pages; i += 8) {
-		edatas[i + 1] = hpa_central_alloc_reuse(TSDN_NULL, central,
-		    2 * PAGE, 3 * PAGE);
-		edata_expect_alloc(base, edatas[i + 1],
-		    range_base + (i + 1) * PAGE, 3 * PAGE);
-
-		edatas[i + 6] = hpa_central_alloc_reuse(TSDN_NULL, central,
-		    2 * PAGE, 4 * PAGE);
-		edata_expect_alloc(base, edatas[i + 6],
-		    range_base + (i + 6) * PAGE, 2 * PAGE);
-	}
-
-	edata_t *edata = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE,
-	    2 * PAGE);
-	expect_ptr_null(edata, "Should be no free ranges of 2 pages");
-
-	destroy_test_data(central);
-}
-TEST_END
-
-TEST_BEGIN(test_stress_random) {
-	const size_t range_length = 32 * PAGE;
-	const size_t range_base = 100 * PAGE;
-	const size_t size_max_pages = 16;
-
-	hpa_central_t *central;
-	base_t *base;
-	create_test_data(&central, &base);
-
-	/*
-	 * We loop through this once per some operations, so we don't want it to
-	 * get too big.
-	 */
-	const size_t nlive_edatas_max = 100;
-	size_t nlive_edatas = 0;
-	edata_t **live_edatas = calloc(nlive_edatas_max, sizeof(edata_t *));
-	size_t nranges = 0;
-
-	/*
-	 * Nothing special about this constant; we're only fixing it for
-	 * consistency across runs.
-	 */
-	size_t prng_state = (size_t)0x76999ffb014df07c;
-	for (size_t i = 0; i < 100 * 1000; i++) {
-		size_t operation = prng_range_zu(&prng_state, 2);
-		if (operation == 0) {
-			/* Do an alloc. */
-			if (nlive_edatas == nlive_edatas_max) {
-				continue;
-			}
-			size_t min_pages = 1 + prng_range_zu(
-			    &prng_state, size_max_pages);
-			size_t goal_pages = min_pages + prng_range_zu(
-			    &prng_state, size_max_pages - min_pages + 1);
-			edata_t *edata = hpa_central_alloc_reuse(TSDN_NULL,
-			    central, min_pages * PAGE, goal_pages * PAGE);
-			if (edata == NULL) {
-				edata = test_edata(base,
-				    range_base + range_length * nranges,
-				    range_length);
-				bool err = hpa_central_alloc_grow(TSDN_NULL,
-				    central, goal_pages * PAGE, edata);
-				assert_false(err, "Unexpected grow failure");
-				nranges++;
-			}
-			uintptr_t begin = (uintptr_t)edata_base_get(edata);
-			uintptr_t end = (uintptr_t)edata_last_get(edata);
-			size_t range_begin = (begin - range_base) / range_length;
-			size_t range_end = (end - range_base) / range_length;
-			expect_zu_eq(range_begin, range_end,
-			    "Should not have allocations spanning "
-			    "multiple ranges");
-			expect_zu_ge(begin, range_base,
-			    "Gave back a pointer outside of the reserved "
-			    "range");
-			expect_zu_lt(end, range_base + range_length * nranges,
-			    "Gave back a pointer outside of the reserved "
-			    "range");
-			for (size_t j = 0; j < nlive_edatas; j++) {
-				edata_t *other = live_edatas[j];
-				uintptr_t other_begin =
-				    (uintptr_t)edata_base_get(other);
-				uintptr_t other_end =
-				    (uintptr_t)edata_last_get(other);
-				expect_true(
-				    (begin < other_begin && end < other_begin)
-				    || (begin > other_end),
-				    "Gave back two extents that overlap");
-			}
-			live_edatas[nlive_edatas] = edata;
-			nlive_edatas++;
-		} else {
-			/* Do a free. */
-			if (nlive_edatas == 0) {
-				continue;
-			}
-			size_t victim = prng_range_zu(&prng_state,
-			    nlive_edatas);
-			edata_t *to_free = live_edatas[victim];
-			live_edatas[victim] = live_edatas[nlive_edatas - 1];
-			nlive_edatas--;
-			hpa_central_dalloc(TSDN_NULL, central, to_free);
-		}
-	}
-
-	free(live_edatas);
-	destroy_test_data(central);
-}
-TEST_END
-
-int main(void) {
-	return test_no_reentrancy(
-	    test_empty,
-	    test_first_fit_simple,
-	    test_first_fit_large_goal,
-	    test_merging,
-	    test_stress_simple,
-	    test_stress_random);
-}
-- 
cgit v0.12


From d93eef2f405b7c6e2a78f589a5037a26d4bd4d44 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 7 May 2021 13:54:26 -0700
Subject: HPA: Introduce a redesigned hpa_central_t.

For now, this only handles allocating virtual address space to shards, with no
reuse.  This is framework, though; it will change over time.
---
 include/jemalloc/internal/arena_externs.h |   2 +-
 include/jemalloc/internal/hpa.h           |  70 +++++++---
 include/jemalloc/internal/pa.h            |  24 +++-
 include/jemalloc/internal/witness.h       |   4 +-
 src/arena.c                               |  16 ++-
 src/hpa.c                                 | 205 +++++++++++++++++++-----------
 src/jemalloc.c                            |  16 ++-
 src/pa.c                                  |  29 +++--
 test/unit/hpa.c                           |  10 +-
 test/unit/pa.c                            |   9 +-
 10 files changed, 257 insertions(+), 128 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index bb3462f..557e49f 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -99,7 +99,7 @@ bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard);
 size_t arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     void **ptrs, size_t nfill, bool zero);
-void arena_boot(sc_data_t *sc_data);
+bool arena_boot(sc_data_t *sc_data, base_t *base, bool hpa);
 void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork2(tsdn_t *tsdn, arena_t *arena);
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 3132a6f..623f9c4 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -7,6 +7,37 @@
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
+typedef struct hpa_central_s hpa_central_t;
+struct hpa_central_s {
+	/*
+	 * The mutex guarding most of the operations on the central data
+	 * structure.
+	 */
+	malloc_mutex_t mtx;
+	/*
+	 * Guards expansion of eden.  We separate this from the regular mutex so
+	 * that cheaper operations can still continue while we're doing the OS
+	 * call.
+	 */
+	malloc_mutex_t grow_mtx;
+	/*
+	 * Either NULL (if empty), or some integer multiple of a
+	 * hugepage-aligned number of hugepages.  We carve them off one at a
+	 * time to satisfy new pageslab requests.
+	 *
+	 * Guarded by grow_mtx.
+	 */
+	void *eden;
+	size_t eden_len;
+	/* Source for metadata. */
+	base_t *base;
+	/* Number of grow operations done on this hpa_central_t. */
+	uint64_t age_counter;
+
+	/* The HPA hooks. */
+	hpa_hooks_t hooks;
+};
+
 typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
 struct hpa_shard_nonderived_stats_s {
 	/*
@@ -52,18 +83,19 @@ struct hpa_shard_s {
 	 * pointer to the hpa_shard_t.
 	 */
 	pai_t pai;
-	malloc_mutex_t grow_mtx;
-	malloc_mutex_t mtx;
-	/* The base metadata allocator. */
-	base_t *base;
 
+	/* The central allocator we get our hugepages from. */
+	hpa_central_t *central;
+	/* Protects most of this shard's state. */
+	malloc_mutex_t mtx;
 	/*
-	 * The HPA hooks for this shard.  Eventually, once we have the
-	 * hpa_central_t back, these should live there (since it doesn't make
-	 * sense for different shards on the same hpa_central_t to have
-	 * different hooks).
+	 * Guards the shard's access to the central allocator (preventing
+	 * multiple threads operating on this shard from accessing the central
+	 * allocator).
 	 */
-	hpa_hooks_t hooks;
+	malloc_mutex_t grow_mtx;
+	/* The base metadata allocator. */
+	base_t *base;
 
 	/*
 	 * This edata cache is the one we use when allocating a small extent
@@ -81,18 +113,13 @@ struct hpa_shard_s {
 	 */
 	uint64_t age_counter;
 
-	/*
-	 * Either NULL (if empty), or some integer multiple of a
-	 * hugepage-aligned number of hugepages.  We carve them off one at a
-	 * time to satisfy new pageslab requests.
-	 *
-	 * Guarded by grow_mtx.
-	 */
-	void *eden;
-	size_t eden_len;
-
 	/* The arena ind we're associated with. */
 	unsigned ind;
+
+	/*
+	 * Our emap.  This is just a cache of the emap pointer in the associated
+	 * hpa_central.
+	 */
 	emap_t *emap;
 
 	/* The configuration choices for this hpa shard. */
@@ -117,8 +144,9 @@ struct hpa_shard_s {
  * just that it can function properly given the system it's running on.
  */
 bool hpa_supported();
-bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
-    edata_cache_t *edata_cache, unsigned ind, const hpa_hooks_t *hooks,
+bool hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
+bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
+    base_t *base, edata_cache_t *edata_cache, unsigned ind,
     const hpa_shard_opts_t *opts);
 
 void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 582625b..2e5b9ef 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -20,6 +20,11 @@
  * others will be coming soon.
  */
 
+typedef struct pa_central_s pa_central_t;
+struct pa_central_s {
+	hpa_central_t hpa;
+};
+
 /*
  * The stats for a particular pa_shard.  Because of the way the ctl module
  * handles stats epoch data collection (it has its own arena_stats, and merges
@@ -61,6 +66,9 @@ struct pa_shard_stats_s {
  */
 typedef struct pa_shard_s pa_shard_t;
 struct pa_shard_s {
+	/* The central PA this shard is associated with. */
+	pa_central_t *central;
+
 	/*
 	 * Number of pages in active extents.
 	 *
@@ -76,6 +84,7 @@ struct pa_shard_s {
 	 * for those allocations.
 	 */
 	atomic_b_t use_hpa;
+
 	/*
 	 * If we never used the HPA to begin with, it wasn't initialized, and so
 	 * we shouldn't try to e.g. acquire its mutexes during fork.  This
@@ -121,18 +130,21 @@ pa_shard_ehooks_get(pa_shard_t *shard) {
 }
 
 /* Returns true on error. */
-bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
-    unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
-    nstime_t *cur_time, size_t oversize_threshold, ssize_t dirty_decay_ms,
-    ssize_t muzzy_decay_ms);
+bool pa_central_init(pa_central_t *central, base_t *base, bool hpa,
+    hpa_hooks_t *hpa_hooks);
+
+/* Returns true on error. */
+bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
+    emap_t *emap, base_t *base, unsigned ind, pa_shard_stats_t *stats,
+    malloc_mutex_t *stats_mtx, nstime_t *cur_time, size_t oversize_threshold,
+    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
 
 /*
  * This isn't exposed to users; we allow late enablement of the HPA shard so
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
 bool pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
-    const hpa_hooks_t *hpa_hooks, const hpa_shard_opts_t *hpa_opts,
-    const sec_opts_t *hpa_sec_opts);
+    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts);
 
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 0c29321..c12a705 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -52,8 +52,8 @@ enum witness_rank_e {
 	WITNESS_RANK_EXTENTS,
 	WITNESS_RANK_HPA_SHARD = WITNESS_RANK_EXTENTS,
 
-	WITNESS_RANK_HPA_GROW,
-	WITNESS_RANK_HPA,
+	WITNESS_RANK_HPA_CENTRAL_GROW,
+	WITNESS_RANK_HPA_CENTRAL,
 
 	WITNESS_RANK_EDATA_CACHE,
 
diff --git a/src/arena.c b/src/arena.c
index 5daeea3..a495ef6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -36,6 +36,7 @@ static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
 emap_t arena_emap_global;
+pa_central_t arena_pa_central_global;
 
 const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
@@ -1541,9 +1542,10 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
-	if (pa_shard_init(tsdn, &arena->pa_shard, &arena_emap_global, base, ind,
-	    &arena->stats.pa_shard_stats, LOCKEDINT_MTX(arena->stats.mtx),
-	    &cur_time, oversize_threshold, arena_dirty_decay_ms_default_get(),
+	if (pa_shard_init(tsdn, &arena->pa_shard, &arena_pa_central_global,
+	    &arena_emap_global, base, ind, &arena->stats.pa_shard_stats,
+	    LOCKEDINT_MTX(arena->stats.mtx), &cur_time, oversize_threshold,
+	    arena_dirty_decay_ms_default_get(),
 	    arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
@@ -1575,7 +1577,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
 		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard,
-		    &hpa_hooks_default, &hpa_shard_opts, &opt_hpa_sec_opts)) {
+		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
 	}
@@ -1664,8 +1666,8 @@ arena_is_huge(unsigned arena_ind) {
 	return (arena_ind == huge_arena_ind);
 }
 
-void
-arena_boot(sc_data_t *sc_data) {
+bool
+arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 	arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
 	arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms);
 	for (unsigned i = 0; i < SC_NBINS; i++) {
@@ -1680,6 +1682,8 @@ arena_boot(sc_data_t *sc_data) {
 		nbins_total += bin_infos[i].n_shards;
 		cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
 	}
+	return pa_central_init(&arena_pa_central_global, base, hpa,
+	    &hpa_hooks_default);
 }
 
 void
diff --git a/src/hpa.c b/src/hpa.c
index 4ae30b9..1059458 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -51,9 +51,125 @@ hpa_supported() {
 }
 
 bool
-hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
-    edata_cache_t *edata_cache, unsigned ind,
-    const hpa_hooks_t *hooks, const hpa_shard_opts_t *opts) {
+hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
+	/* malloc_conf processing should have filtered out these cases. */
+	assert(hpa_supported());
+	bool err;
+	err = malloc_mutex_init(&central->grow_mtx, "hpa_central_grow",
+	    WITNESS_RANK_HPA_CENTRAL_GROW, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	err = malloc_mutex_init(&central->mtx, "hpa_central",
+	    WITNESS_RANK_HPA_CENTRAL, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	central->base = base;
+	central->eden = NULL;
+	central->eden_len = 0;
+	central->age_counter = 0;
+	central->hooks = *hooks;
+	return false;
+}
+
+static hpdata_t *
+hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
+	return (hpdata_t *)base_alloc(tsdn, central->base, sizeof(hpdata_t),
+	    CACHELINE);
+}
+
+hpdata_t *
+hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
+    bool *oom) {
+	/* Don't yet support big allocations; these should get filtered out. */
+	assert(size <= HUGEPAGE);
+	/*
+	 * Should only try to extract from the central allocator if the local
+	 * shard is exhausted.  We should hold the grow_mtx on that shard.
+	 */
+	witness_assert_positive_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_HPA_SHARD_GROW);
+
+	malloc_mutex_lock(tsdn, &central->grow_mtx);
+	*oom = false;
+
+	hpdata_t *ps = NULL;
+
+	/* Is eden a perfect fit? */
+	if (central->eden != NULL && central->eden_len == HUGEPAGE) {
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		hpdata_init(ps, central->eden, central->age_counter++);
+		central->eden = NULL;
+		central->eden_len = 0;
+		malloc_mutex_unlock(tsdn, &central->grow_mtx);
+		return ps;
+	}
+
+	/*
+	 * We're about to try to allocate from eden by splitting.  If eden is
+	 * NULL, we have to allocate it too.  Otherwise, we just have to
+	 * allocate an edata_t for the new psset.
+	 */
+	if (central->eden == NULL) {
+		/*
+		 * During development, we're primarily concerned with systems
+		 * with overcommit.  Eventually, we should be more careful here.
+		 */
+		bool commit = true;
+		/* Allocate address space, bailing if we fail. */
+		void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE,
+		    &commit);
+		if (new_eden == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			pages_unmap(new_eden, HPA_EDEN_SIZE);
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		central->eden = new_eden;
+		central->eden_len = HPA_EDEN_SIZE;
+	} else {
+		/* Eden is already nonempty; only need an edata for ps. */
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+	}
+	assert(ps != NULL);
+	assert(central->eden != NULL);
+	assert(central->eden_len > HUGEPAGE);
+	assert(central->eden_len % HUGEPAGE == 0);
+	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
+
+	hpdata_init(ps, central->eden, central->age_counter++);
+
+	char *eden_char = (char *)central->eden;
+	eden_char += HUGEPAGE;
+	central->eden = (void *)eden_char;
+	central->eden_len -= HUGEPAGE;
+
+	malloc_mutex_unlock(tsdn, &central->grow_mtx);
+
+	return ps;
+}
+
+bool
+hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
+    base_t *base, edata_cache_t *edata_cache, unsigned ind,
+    const hpa_shard_opts_t *opts) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@@ -69,13 +185,11 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	}
 
 	assert(edata_cache != NULL);
+	shard->central = central;
 	shard->base = base;
-	shard->hooks = *hooks;
 	edata_cache_small_init(&shard->ecs, edata_cache);
 	psset_init(&shard->psset);
 	shard->age_counter = 0;
-	shard->eden = NULL;
-	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
 
@@ -136,12 +250,6 @@ hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
 
-static hpdata_t *
-hpa_alloc_ps(tsdn_t *tsdn, hpa_shard_t *shard) {
-	return (hpdata_t *)base_alloc(tsdn, shard->base, sizeof(hpdata_t),
-	    CACHELINE);
-}
-
 static bool
 hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	/*
@@ -227,7 +335,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	if (hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
 		nstime_t now;
-		shard->hooks.curtime(&now);
+		shard->central->hooks.curtime(&now);
 		hpdata_allow_hugify(ps, now);
 	}
 	/*
@@ -247,64 +355,6 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	}
 }
 
-static hpdata_t *
-hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
-	hpdata_t *ps = NULL;
-
-	/* Is eden a perfect fit? */
-	if (shard->eden != NULL && shard->eden_len == HUGEPAGE) {
-		ps = hpa_alloc_ps(tsdn, shard);
-		if (ps == NULL) {
-			return NULL;
-		}
-		hpdata_init(ps, shard->eden, shard->age_counter++);
-		shard->eden = NULL;
-		shard->eden_len = 0;
-		return ps;
-	}
-
-	/*
-	 * We're about to try to allocate from eden by splitting.  If eden is
-	 * NULL, we have to allocate it too.  Otherwise, we just have to
-	 * allocate an edata_t for the new psset.
-	 */
-	if (shard->eden == NULL) {
-		/* Allocate address space, bailing if we fail. */
-		void *new_eden = shard->hooks.map(HPA_EDEN_SIZE);
-		if (new_eden == NULL) {
-			return NULL;
-		}
-		ps = hpa_alloc_ps(tsdn, shard);
-		if (ps == NULL) {
-			shard->hooks.unmap(new_eden, HPA_EDEN_SIZE);
-			return NULL;
-		}
-		shard->eden = new_eden;
-		shard->eden_len = HPA_EDEN_SIZE;
-	} else {
-		/* Eden is already nonempty; only need an edata for ps. */
-		ps = hpa_alloc_ps(tsdn, shard);
-		if (ps == NULL) {
-			return NULL;
-		}
-	}
-	assert(ps != NULL);
-	assert(shard->eden != NULL);
-	assert(shard->eden_len > HUGEPAGE);
-	assert(shard->eden_len % HUGEPAGE == 0);
-	assert(HUGEPAGE_ADDR2BASE(shard->eden) == shard->eden);
-
-	hpdata_init(ps, shard->eden, shard->age_counter++);
-
-	char *eden_char = (char *)shard->eden;
-	eden_char += HUGEPAGE;
-	shard->eden = (void *)eden_char;
-	shard->eden_len -= HUGEPAGE;
-
-	return ps;
-}
-
 /* Returns whether or not we purged anything. */
 static bool
 hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
@@ -348,7 +398,8 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	/* Actually do the purging, now that the lock is dropped. */
 	if (dehugify) {
-		shard->hooks.dehugify(hpdata_addr_get(to_purge), HUGEPAGE);
+		shard->central->hooks.dehugify(hpdata_addr_get(to_purge),
+		    HUGEPAGE);
 	}
 	size_t total_purged = 0;
 	uint64_t purges_this_pass = 0;
@@ -359,7 +410,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 		total_purged += purge_size;
 		assert(total_purged <= HUGEPAGE);
 		purges_this_pass++;
-		shard->hooks.purge(purge_addr, purge_size);
+		shard->central->hooks.purge(purge_addr, purge_size);
 	}
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
@@ -406,7 +457,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	/* Make sure that it's been hugifiable for long enough. */
 	nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify);
 	nstime_t nstime;
-	shard->hooks.curtime(&nstime);
+	shard->central->hooks.curtime(&nstime);
 	nstime_subtract(&nstime, &time_hugify_allowed);
 	uint64_t millis = nstime_msec(&nstime);
 	if (millis < shard->opts.hugify_delay_ms) {
@@ -427,7 +478,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	shard->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE);
+	shard->central->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	shard->stats.nhugifies++;
@@ -604,7 +655,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	hpdata_t *ps = hpa_grow(tsdn, shard);
+	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size, &oom);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;
@@ -833,7 +884,7 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 		/* There should be no allocations anywhere. */
 		assert(hpdata_empty(ps));
 		psset_remove(&shard->psset, ps);
-		shard->hooks.unmap(hpdata_addr_get(ps), HUGEPAGE);
+		shard->central->hooks.unmap(hpdata_addr_get(ps), HUGEPAGE);
 	}
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 71efcb6..8d57180 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1747,7 +1747,19 @@ malloc_init_hard_a0_locked() {
 	if (config_prof) {
 		prof_boot1();
 	}
-	arena_boot(&sc_data);
+	if (opt_hpa && !hpa_supported()) {
+		malloc_printf("<jemalloc>: HPA not supported in the current "
+		    "configuration; %s.",
+		    opt_abort_conf ? "aborting" : "disabling");
+		if (opt_abort_conf) {
+			malloc_abort_invalid_conf();
+		} else {
+			opt_hpa = false;
+		}
+	}
+	if (arena_boot(&sc_data, b0get(), opt_hpa)) {
+		return true;
+	}
 	if (tcache_boot(TSDN_NULL, b0get())) {
 		return true;
 	}
@@ -1786,7 +1798,7 @@ malloc_init_hard_a0_locked() {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
 		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard,
-		    &hpa_hooks_default, &hpa_shard_opts, &opt_hpa_sec_opts)) {
+		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			return true;
 		}
 	}
diff --git a/src/pa.c b/src/pa.c
index 0172dfa..aebb8e9 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -15,10 +15,23 @@ pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
 }
 
 bool
-pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
-    unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
-    nstime_t *cur_time, size_t oversize_threshold, ssize_t dirty_decay_ms,
-    ssize_t muzzy_decay_ms) {
+pa_central_init(pa_central_t *central, base_t *base, bool hpa,
+    hpa_hooks_t *hpa_hooks) {
+	bool err;
+	if (hpa) {
+		err = hpa_central_init(&central->hpa, base, hpa_hooks);
+		if (err) {
+			return true;
+		}
+	}
+	return false;
+}
+
+bool
+pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
+    emap_t *emap, base_t *base, unsigned ind, pa_shard_stats_t *stats,
+    malloc_mutex_t *stats_mtx, nstime_t *cur_time, size_t oversize_threshold,
+    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	if (edata_cache_init(&shard->edata_cache, base)) {
@@ -42,6 +55,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->stats = stats;
 	memset(shard->stats, 0, sizeof(*shard->stats));
 
+	shard->central = central;
 	shard->emap = emap;
 	shard->base = base;
 
@@ -50,10 +64,9 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 
 bool
 pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
-    const hpa_hooks_t *hpa_hooks, const hpa_shard_opts_t *hpa_opts,
-    const sec_opts_t *hpa_sec_opts) {
-	if (hpa_shard_init(&shard->hpa_shard, shard->emap, shard->base,
-	    &shard->edata_cache, shard->ind, hpa_hooks, hpa_opts)) {
+    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) {
+	if (hpa_shard_init(&shard->hpa_shard, &shard->central->hpa, shard->emap,
+	    shard->base, &shard->edata_cache, shard->ind, hpa_opts)) {
 		return true;
 	}
 	if (sec_init(tsdn, &shard->hpa_sec, shard->base, &shard->hpa_shard.pai,
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index a9e551f..2d4fa9b 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -13,6 +13,7 @@ struct test_data_s {
 	 * test_data_t and the hpa_shard_t;
 	 */
 	hpa_shard_t shard;
+	hpa_central_t central;
 	base_t *base;
 	edata_cache_t shard_edata_cache;
 
@@ -50,9 +51,12 @@ create_test_data(hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
 	assert_false(err, "");
 
-	err = hpa_shard_init(&test_data->shard, &test_data->emap,
-	    test_data->base, &test_data->shard_edata_cache, SHARD_IND,
-	    hooks, opts);
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+
+	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
diff --git a/test/unit/pa.c b/test/unit/pa.c
index dacd8e7..4206e85 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -40,6 +40,7 @@ init_test_extent_hooks(extent_hooks_t *hooks) {
 typedef struct test_data_s test_data_t;
 struct test_data_s {
 	pa_shard_t shard;
+	pa_central_t central;
 	base_t *base;
 	emap_t emap;
 	pa_shard_stats_t stats;
@@ -63,9 +64,13 @@ test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	nstime_t time;
 	nstime_init(&time, 0);
 
+	err = pa_central_init(&test_data->central, base, opt_hpa,
+	    &hpa_hooks_default);
+	assert_false(err, "");
+
 	const size_t oversize_threshold = 8 * 1024 * 1024;
-	err = pa_shard_init(TSDN_NULL, &test_data->shard, &test_data->emap,
-	    test_data->base, /* ind */ 1, &test_data->stats,
+	err = pa_shard_init(TSDN_NULL, &test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, /* ind */ 1, &test_data->stats,
 	    &test_data->stats_mtx, &time, oversize_threshold, dirty_decay_ms,
 	    muzzy_decay_ms);
 	assert_false(err, "");
-- 
cgit v0.12


From 92a1e38f5286bcc8f206c02219cd6b703b39d80d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 23 Jul 2021 15:29:43 -0700
Subject: edata_cache: Allow unbounded fast caching.

The edata_cache_small had a fill/flush heuristic.  In retrospect, this was a
premature optimization; more testing indicates that an unbounded cache is
effectively fine here, and moreover we spend a nontrivial amount of time doing
unnecessary filling/flushing.

As the HPA takes on a larger and larger fraction of all allocations, any
theoretical differences in allocation patterns should shrink.  The HPA is more
efficient with its metadata in general, so it still comes out ahead on metadata
usage anyways.
---
 include/jemalloc/internal/edata_cache.h |  24 ++---
 include/jemalloc/internal/hpa.h         |   2 +-
 src/edata_cache.c                       |  39 +++----
 src/hpa.c                               |  12 +--
 test/unit/edata_cache.c                 | 173 +++++++++++++-------------------
 5 files changed, 99 insertions(+), 151 deletions(-)

diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 9a54df0..8b6c0ef 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -3,15 +3,8 @@
 
 #include "jemalloc/internal/base.h"
 
-/*
- * Public for tests.  When we go to the fallback when the small cache is empty,
- * we grab up to 8 items (grabbing less only if the fallback is exhausted).
- * When we exceed 16, we flush.  This caps the maximum memory lost per cache to
- * 16 * sizeof(edata_t), a max of 2k on architectures where the edata_t is 128
- * bytes.
- */
-#define EDATA_CACHE_SMALL_MAX 16
-#define EDATA_CACHE_SMALL_FILL 8
+/* For tests only. */
+#define EDATA_CACHE_FAST_FILL 4
 
 /*
  * A cache of edata_t structures allocated via base_alloc_edata (as opposed to
@@ -40,18 +33,17 @@ void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
  * synchronization and avoids first-fit strategies.
  */
 
-typedef struct edata_cache_small_s edata_cache_small_t;
-struct edata_cache_small_s {
+typedef struct edata_cache_fast_s edata_cache_fast_t;
+struct edata_cache_fast_s {
 	edata_list_inactive_t list;
-	size_t count;
 	edata_cache_t *fallback;
 	bool disabled;
 };
 
-void edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback);
-edata_t *edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs);
-void edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs,
+void edata_cache_fast_init(edata_cache_fast_t *ecs, edata_cache_t *fallback);
+edata_t *edata_cache_fast_get(tsdn_t *tsdn, edata_cache_fast_t *ecs);
+void edata_cache_fast_put(tsdn_t *tsdn, edata_cache_fast_t *ecs,
     edata_t *edata);
-void edata_cache_small_disable(tsdn_t *tsdn, edata_cache_small_t *ecs);
+void edata_cache_fast_disable(tsdn_t *tsdn, edata_cache_fast_t *ecs);
 
 #endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 623f9c4..46878a8 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -102,7 +102,7 @@ struct hpa_shard_s {
 	 * from a pageslab.  The pageslab itself comes from the centralized
 	 * allocator, and so will use its edata_cache.
 	 */
-	edata_cache_small_t ecs;
+	edata_cache_fast_t ecf;
 
 	psset_t psset;
 
diff --git a/src/edata_cache.c b/src/edata_cache.c
index ecfce41..6bc1848 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -56,39 +56,34 @@ edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 }
 
 void
-edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
+edata_cache_fast_init(edata_cache_fast_t *ecs, edata_cache_t *fallback) {
 	edata_list_inactive_init(&ecs->list);
-	ecs->count = 0;
 	ecs->fallback = fallback;
 	ecs->disabled = false;
 }
 
 static void
-edata_cache_small_try_fill_from_fallback(tsdn_t *tsdn,
-    edata_cache_small_t *ecs) {
-	assert(ecs->count == 0);
+edata_cache_fast_try_fill_from_fallback(tsdn_t *tsdn,
+    edata_cache_fast_t *ecs) {
 	edata_t *edata;
 	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
-	while (ecs->count < EDATA_CACHE_SMALL_FILL) {
-		edata = edata_avail_first(&ecs->fallback->avail);
+	for (int i = 0; i < EDATA_CACHE_FAST_FILL; i++) {
+		edata = edata_avail_remove_first(&ecs->fallback->avail);
 		if (edata == NULL) {
 			break;
 		}
-		edata_avail_remove(&ecs->fallback->avail, edata);
 		edata_list_inactive_append(&ecs->list, edata);
-		ecs->count++;
 		atomic_load_sub_store_zu(&ecs->fallback->count, 1);
 	}
 	malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
 }
 
 edata_t *
-edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
+edata_cache_fast_get(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_EDATA_CACHE, 0);
 
 	if (ecs->disabled) {
-		assert(ecs->count == 0);
 		assert(edata_list_inactive_first(&ecs->list) == NULL);
 		return edata_cache_get(tsdn, ecs->fallback);
 	}
@@ -96,15 +91,13 @@ edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
 	edata_t *edata = edata_list_inactive_first(&ecs->list);
 	if (edata != NULL) {
 		edata_list_inactive_remove(&ecs->list, edata);
-		ecs->count--;
 		return edata;
 	}
 	/* Slow path; requires synchronization. */
-	edata_cache_small_try_fill_from_fallback(tsdn, ecs);
+	edata_cache_fast_try_fill_from_fallback(tsdn, ecs);
 	edata = edata_list_inactive_first(&ecs->list);
 	if (edata != NULL) {
 		edata_list_inactive_remove(&ecs->list, edata);
-		ecs->count--;
 	} else {
 		/*
 		 * Slowest path (fallback was also empty); allocate something
@@ -116,7 +109,7 @@ edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
 }
 
 static void
-edata_cache_small_flush_all(tsdn_t *tsdn, edata_cache_small_t *ecs) {
+edata_cache_fast_flush_all(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
 	/*
 	 * You could imagine smarter cache management policies (like
 	 * only flushing down to some threshold in anticipation of
@@ -132,19 +125,16 @@ edata_cache_small_flush_all(tsdn_t *tsdn, edata_cache_small_t *ecs) {
 		edata_avail_insert(&ecs->fallback->avail, edata);
 		nflushed++;
 	}
-	atomic_load_add_store_zu(&ecs->fallback->count, ecs->count);
+	atomic_load_add_store_zu(&ecs->fallback->count, nflushed);
 	malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
-	assert(nflushed == ecs->count);
-	ecs->count = 0;
 }
 
 void
-edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs, edata_t *edata) {
+edata_cache_fast_put(tsdn_t *tsdn, edata_cache_fast_t *ecs, edata_t *edata) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_EDATA_CACHE, 0);
 
 	if (ecs->disabled) {
-		assert(ecs->count == 0);
 		assert(edata_list_inactive_first(&ecs->list) == NULL);
 		edata_cache_put(tsdn, ecs->fallback, edata);
 		return;
@@ -155,15 +145,10 @@ edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs, edata_t *edata) {
 	 * cache locality.
 	 */
 	edata_list_inactive_prepend(&ecs->list, edata);
-	ecs->count++;
-	if (ecs->count > EDATA_CACHE_SMALL_MAX) {
-		assert(ecs->count == EDATA_CACHE_SMALL_MAX + 1);
-		edata_cache_small_flush_all(tsdn, ecs);
-	}
 }
 
 void
-edata_cache_small_disable(tsdn_t *tsdn, edata_cache_small_t *ecs) {
-	edata_cache_small_flush_all(tsdn, ecs);
+edata_cache_fast_disable(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
+	edata_cache_fast_flush_all(tsdn, ecs);
 	ecs->disabled = true;
 }
diff --git a/src/hpa.c b/src/hpa.c
index 1059458..6441b4e 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -187,7 +187,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	assert(edata_cache != NULL);
 	shard->central = central;
 	shard->base = base;
-	edata_cache_small_init(&shard->ecs, edata_cache);
+	edata_cache_fast_init(&shard->ecf, edata_cache);
 	psset_init(&shard->psset);
 	shard->age_counter = 0;
 	shard->ind = ind;
@@ -537,7 +537,7 @@ static edata_t *
 hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
     bool *oom) {
 	bool err;
-	edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs);
+	edata_t *edata = edata_cache_fast_get(tsdn, &shard->ecf);
 	if (edata == NULL) {
 		*oom = true;
 		return NULL;
@@ -545,7 +545,7 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 
 	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
 	if (ps == NULL) {
-		edata_cache_small_put(tsdn, &shard->ecs, edata);
+		edata_cache_fast_put(tsdn, &shard->ecf, edata);
 		return NULL;
 	}
 
@@ -592,7 +592,7 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 		 * tweaked the stats, but our tweaks weren't really accurate).
 		 */
 		psset_update_end(&shard->psset, ps);
-		edata_cache_small_put(tsdn, &shard->ecs, edata);
+		edata_cache_fast_put(tsdn, &shard->ecf, edata);
 		*oom = true;
 		return NULL;
 	}
@@ -805,7 +805,7 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 	assert(ps != NULL);
 	void *unreserve_addr = edata_addr_get(edata);
 	size_t unreserve_size = edata_size_get(edata);
-	edata_cache_small_put(tsdn, &shard->ecs, edata);
+	edata_cache_fast_put(tsdn, &shard->ecf, edata);
 
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
@@ -844,7 +844,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 void
 hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
-	edata_cache_small_disable(tsdn, &shard->ecs);
+	edata_cache_fast_disable(tsdn, &shard->ecf);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
index 9a5d14b..fe920c9 100644
--- a/test/unit/edata_cache.c
+++ b/test/unit/edata_cache.c
@@ -47,38 +47,48 @@ TEST_BEGIN(test_edata_cache) {
 }
 TEST_END
 
-TEST_BEGIN(test_edata_cache_small_simple) {
+static size_t
+ecf_count(edata_cache_fast_t *ecf) {
+	size_t count = 0;
+	edata_t *cur;
+	ql_foreach(cur, &ecf->list.head, ql_link_inactive) {
+		count++;
+	}
+	return count;
+}
+
+TEST_BEGIN(test_edata_cache_fast_simple) {
 	edata_cache_t ec;
-	edata_cache_small_t ecs;
+	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
-	edata_cache_small_init(&ecs, &ec);
+	edata_cache_fast_init(&ecf, &ec);
 
-	edata_t *ed1 = edata_cache_small_get(TSDN_NULL, &ecs);
+	edata_t *ed1 = edata_cache_fast_get(TSDN_NULL, &ecf);
 	expect_ptr_not_null(ed1, "");
-	expect_zu_eq(ecs.count, 0, "");
+	expect_zu_eq(ecf_count(&ecf), 0, "");
 	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_t *ed2 = edata_cache_small_get(TSDN_NULL, &ecs);
+	edata_t *ed2 = edata_cache_fast_get(TSDN_NULL, &ecf);
 	expect_ptr_not_null(ed2, "");
-	expect_zu_eq(ecs.count, 0, "");
+	expect_zu_eq(ecf_count(&ecf), 0, "");
 	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_cache_small_put(TSDN_NULL, &ecs, ed1);
-	expect_zu_eq(ecs.count, 1, "");
+	edata_cache_fast_put(TSDN_NULL, &ecf, ed1);
+	expect_zu_eq(ecf_count(&ecf), 1, "");
 	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_cache_small_put(TSDN_NULL, &ecs, ed2);
-	expect_zu_eq(ecs.count, 2, "");
+	edata_cache_fast_put(TSDN_NULL, &ecf, ed2);
+	expect_zu_eq(ecf_count(&ecf), 2, "");
 	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
 	/* LIFO ordering. */
-	expect_ptr_eq(ed2, edata_cache_small_get(TSDN_NULL, &ecs), "");
-	expect_zu_eq(ecs.count, 1, "");
+	expect_ptr_eq(ed2, edata_cache_fast_get(TSDN_NULL, &ecf), "");
+	expect_zu_eq(ecf_count(&ecf), 1, "");
 	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	expect_ptr_eq(ed1, edata_cache_small_get(TSDN_NULL, &ecs), "");
-	expect_zu_eq(ecs.count, 0, "");
+	expect_ptr_eq(ed1, edata_cache_fast_get(TSDN_NULL, &ecf), "");
+	expect_zu_eq(ecf_count(&ecf), 0, "");
 	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
 	test_edata_cache_destroy(&ec);
@@ -87,41 +97,41 @@ TEST_END
 
 TEST_BEGIN(test_edata_cache_fill) {
 	edata_cache_t ec;
-	edata_cache_small_t ecs;
+	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
-	edata_cache_small_init(&ecs, &ec);
+	edata_cache_fast_init(&ecf, &ec);
 
-	edata_t *allocs[EDATA_CACHE_SMALL_FILL * 2];
+	edata_t *allocs[EDATA_CACHE_FAST_FILL * 2];
 
 	/*
 	 * If the fallback cache can't satisfy the request, we shouldn't do
 	 * extra allocations until compelled to.  Put half the fill goal in the
 	 * fallback.
 	 */
-	for (int i = 0; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
+	for (int i = 0; i < EDATA_CACHE_FAST_FILL / 2; i++) {
 		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
 	}
-	for (int i = 0; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
+	for (int i = 0; i < EDATA_CACHE_FAST_FILL / 2; i++) {
 		edata_cache_put(TSDN_NULL, &ec, allocs[i]);
 	}
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL / 2,
+	expect_zu_eq(EDATA_CACHE_FAST_FILL / 2,
 	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
 
-	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL / 2 - 1, ecs.count,
+	allocs[0] = edata_cache_fast_get(TSDN_NULL, &ecf);
+	expect_zu_eq(EDATA_CACHE_FAST_FILL / 2 - 1, ecf_count(&ecf),
 	    "Should have grabbed all edatas available but no more.");
 
-	for (int i = 1; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
-		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+	for (int i = 1; i < EDATA_CACHE_FAST_FILL / 2; i++) {
+		allocs[i] = edata_cache_fast_get(TSDN_NULL, &ecf);
 		expect_ptr_not_null(allocs[i], "");
 	}
-	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(0, ecf_count(&ecf), "");
 
 	/* When forced, we should alloc from the base. */
-	edata_t *edata = edata_cache_small_get(TSDN_NULL, &ecs);
+	edata_t *edata = edata_cache_fast_get(TSDN_NULL, &ecf);
 	expect_ptr_not_null(edata, "");
-	expect_zu_eq(0, ecs.count, "Allocated more than necessary");
+	expect_zu_eq(0, ecf_count(&ecf), "Allocated more than necessary");
 	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED),
 	    "Allocated more than necessary");
 
@@ -129,116 +139,78 @@ TEST_BEGIN(test_edata_cache_fill) {
 	 * We should correctly fill in the common case where the fallback isn't
 	 * exhausted, too.
 	 */
-	for (int i = 0; i < EDATA_CACHE_SMALL_FILL * 2; i++) {
+	for (int i = 0; i < EDATA_CACHE_FAST_FILL * 2; i++) {
 		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
 		expect_ptr_not_null(allocs[i], "");
 	}
-	for (int i = 0; i < EDATA_CACHE_SMALL_FILL * 2; i++) {
+	for (int i = 0; i < EDATA_CACHE_FAST_FILL * 2; i++) {
 		edata_cache_put(TSDN_NULL, &ec, allocs[i]);
 	}
 
-	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1, ecs.count, "");
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	allocs[0] = edata_cache_fast_get(TSDN_NULL, &ecf);
+	expect_zu_eq(EDATA_CACHE_FAST_FILL - 1, ecf_count(&ecf), "");
+	expect_zu_eq(EDATA_CACHE_FAST_FILL,
 	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-	for (int i = 1; i < EDATA_CACHE_SMALL_FILL; i++) {
-		expect_zu_eq(EDATA_CACHE_SMALL_FILL - i, ecs.count, "");
-		expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	for (int i = 1; i < EDATA_CACHE_FAST_FILL; i++) {
+		expect_zu_eq(EDATA_CACHE_FAST_FILL - i, ecf_count(&ecf), "");
+		expect_zu_eq(EDATA_CACHE_FAST_FILL,
 		    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+		allocs[i] = edata_cache_fast_get(TSDN_NULL, &ecf);
 		expect_ptr_not_null(allocs[i], "");
 	}
-	expect_zu_eq(0, ecs.count, "");
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	expect_zu_eq(0, ecf_count(&ecf), "");
+	expect_zu_eq(EDATA_CACHE_FAST_FILL,
 	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
 
-	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1, ecs.count, "");
+	allocs[0] = edata_cache_fast_get(TSDN_NULL, &ecf);
+	expect_zu_eq(EDATA_CACHE_FAST_FILL - 1, ecf_count(&ecf), "");
 	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-	for (int i = 1; i < EDATA_CACHE_SMALL_FILL; i++) {
-		expect_zu_eq(EDATA_CACHE_SMALL_FILL - i, ecs.count, "");
+	for (int i = 1; i < EDATA_CACHE_FAST_FILL; i++) {
+		expect_zu_eq(EDATA_CACHE_FAST_FILL - i, ecf_count(&ecf), "");
 		expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+		allocs[i] = edata_cache_fast_get(TSDN_NULL, &ecf);
 		expect_ptr_not_null(allocs[i], "");
 	}
-	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(0, ecf_count(&ecf), "");
 	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
 
 	test_edata_cache_destroy(&ec);
 }
 TEST_END
 
-TEST_BEGIN(test_edata_cache_flush) {
-	edata_cache_t ec;
-	edata_cache_small_t ecs;
-
-	test_edata_cache_init(&ec);
-	edata_cache_small_init(&ecs, &ec);
-
-	edata_t *allocs[2 * EDATA_CACHE_SMALL_MAX + 2];
-	for (int i = 0; i < 2 * EDATA_CACHE_SMALL_MAX + 2; i++) {
-		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
-		expect_ptr_not_null(allocs[i], "");
-	}
-	for (int i = 0; i < EDATA_CACHE_SMALL_MAX; i++) {
-		edata_cache_small_put(TSDN_NULL, &ecs, allocs[i]);
-		expect_zu_eq(i + 1, ecs.count, "");
-		expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-	}
-	edata_cache_small_put(TSDN_NULL, &ecs, allocs[EDATA_CACHE_SMALL_MAX]);
-	expect_zu_eq(0, ecs.count, "");
-	expect_zu_eq(EDATA_CACHE_SMALL_MAX + 1,
-	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-
-	for (int i = EDATA_CACHE_SMALL_MAX + 1;
-	    i < 2 * EDATA_CACHE_SMALL_MAX + 1; i++) {
-		edata_cache_small_put(TSDN_NULL, &ecs, allocs[i]);
-		expect_zu_eq(i - EDATA_CACHE_SMALL_MAX, ecs.count, "");
-		expect_zu_eq(EDATA_CACHE_SMALL_MAX + 1,
-		    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-	}
-	edata_cache_small_put(TSDN_NULL, &ecs, allocs[2 * EDATA_CACHE_SMALL_MAX + 1]);
-	expect_zu_eq(0, ecs.count, "");
-	expect_zu_eq(2 * EDATA_CACHE_SMALL_MAX + 2,
-	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-
-	test_edata_cache_destroy(&ec);
-}
-TEST_END
-
 TEST_BEGIN(test_edata_cache_disable) {
 	edata_cache_t ec;
-	edata_cache_small_t ecs;
+	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
-	edata_cache_small_init(&ecs, &ec);
+	edata_cache_fast_init(&ecf, &ec);
 
-	for (int i = 0; i < EDATA_CACHE_SMALL_FILL; i++) {
+	for (int i = 0; i < EDATA_CACHE_FAST_FILL; i++) {
 		edata_t *edata = edata_cache_get(TSDN_NULL, &ec);
 		expect_ptr_not_null(edata, "");
-		edata_cache_small_put(TSDN_NULL, &ecs, edata);
+		edata_cache_fast_put(TSDN_NULL, &ecf, edata);
 	}
 
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_FAST_FILL, ecf_count(&ecf), "");
 	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
 
-	edata_cache_small_disable(TSDN_NULL, &ecs);
+	edata_cache_fast_disable(TSDN_NULL, &ecf);
 
-	expect_zu_eq(0, ecs.count, "");
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	expect_zu_eq(0, ecf_count(&ecf), "");
+	expect_zu_eq(EDATA_CACHE_FAST_FILL,
 	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "Disabling should flush");
 
-	edata_t *edata = edata_cache_small_get(TSDN_NULL, &ecs);
-	expect_zu_eq(0, ecs.count, "");
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1,
+	edata_t *edata = edata_cache_fast_get(TSDN_NULL, &ecf);
+	expect_zu_eq(0, ecf_count(&ecf), "");
+	expect_zu_eq(EDATA_CACHE_FAST_FILL - 1,
 	    atomic_load_zu(&ec.count, ATOMIC_RELAXED),
-	    "Disabled ecs should forward on get");
+	    "Disabled ecf should forward on get");
 
-	edata_cache_small_put(TSDN_NULL, &ecs, edata);
-	expect_zu_eq(0, ecs.count, "");
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	edata_cache_fast_put(TSDN_NULL, &ecf, edata);
+	expect_zu_eq(0, ecf_count(&ecf), "");
+	expect_zu_eq(EDATA_CACHE_FAST_FILL,
 	    atomic_load_zu(&ec.count, ATOMIC_RELAXED),
-	    "Disabled ecs should forward on put");
+	    "Disabled ecf should forward on put");
 
 	test_edata_cache_destroy(&ec);
 }
@@ -248,8 +220,7 @@ int
 main(void) {
 	return test(
 	    test_edata_cache,
-	    test_edata_cache_small_simple,
+	    test_edata_cache_fast_simple,
 	    test_edata_cache_fill,
-	    test_edata_cache_flush,
 	    test_edata_cache_disable);
 }
-- 
cgit v0.12


From 08a4cc0969edf054c8483efd35981eb8b66eb0c1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 19 Jul 2021 16:47:10 -0700
Subject: Pairing heap: inline functions instead of macros.

By force-inlining everything that would otherwise be a macro, we get the same
effect (it's not clear in the first place that this is actually a good idea, but
it avoids making any changes to the existing performance profile).

This makes the code more maintainable (in anticipation of subsequent changes),
as well as making performance profiles and debug info more readable (we get
"real" line numbers, instead of making everything point to the macro definition
of all associated functions).
---
 include/jemalloc/internal/edata.h  |  13 +-
 include/jemalloc/internal/hpdata.h |   6 +-
 include/jemalloc/internal/ph.h     | 746 ++++++++++++++++++++-----------------
 src/edata.c                        |   4 +-
 src/hpdata.c                       |   2 +-
 test/unit/ph.c                     |  60 +--
 6 files changed, 452 insertions(+), 379 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 55d1dfe..3a04a9a 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -81,8 +81,8 @@ struct edata_map_info_s {
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
-typedef ph(edata_t) edata_avail_t;
-typedef ph(edata_t) edata_heap_t;
+ph_structs(edata_avail, edata_t);
+ph_structs(edata_heap, edata_t);
 struct edata_s {
 	/*
 	 * Bitfield containing several fields:
@@ -214,7 +214,10 @@ struct edata_s {
 		 * slabs_nonfull, or when the edata_t is unassociated with an
 		 * extent and sitting in an edata_cache.
 		 */
-		phn(edata_t)	ph_link;
+		union {
+			edata_heap_link_t heap_link;
+			edata_avail_link_t avail_link;
+		};
 	};
 
 	union {
@@ -664,7 +667,7 @@ edata_esnead_comp(const edata_t *a, const edata_t *b) {
 	return ret;
 }
 
-ph_proto(, edata_avail_, edata_avail_t, edata_t)
-ph_proto(, edata_heap_, edata_heap_t, edata_t)
+ph_proto(, edata_avail, edata_t)
+ph_proto(, edata_heap, edata_t)
 
 #endif /* JEMALLOC_INTERNAL_EDATA_H */
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 2a12add..c2ed692 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -18,6 +18,7 @@
  * hugepage-sized and hugepage-aligned; it's *potentially* huge.
  */
 typedef struct hpdata_s hpdata_t;
+ph_structs(hpdata_age_heap, hpdata_t);
 struct hpdata_s {
 	/*
 	 * We likewise follow the edata convention of mangling names and forcing
@@ -82,7 +83,7 @@ struct hpdata_s {
 
 	union {
 		/* When nonempty (and also nonfull), used by the psset bins. */
-		phn(hpdata_t) ph_link;
+		hpdata_age_heap_link_t age_link;
 		/*
 		 * When empty (or not corresponding to any hugepage), list
 		 * linkage.
@@ -120,8 +121,7 @@ TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
 TYPED_LIST(hpdata_purge_list, hpdata_t, ql_link_purge)
 TYPED_LIST(hpdata_hugify_list, hpdata_t, ql_link_hugify)
 
-typedef ph(hpdata_t) hpdata_age_heap_t;
-ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
+ph_proto(, hpdata_age_heap, hpdata_t);
 
 static inline void *
 hpdata_addr_get(const hpdata_t *hpdata) {
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 63aeac9..beb50d5 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -15,377 +15,435 @@
  *******************************************************************************
  */
 
+typedef int (*ph_cmp_t)(void *, void *);
+
 /* Node structure. */
-#define phn(a_type)							\
-struct {								\
-	a_type	*phn_prev;						\
-	a_type	*phn_next;						\
-	a_type	*phn_lchild;						\
+typedef struct phn_link_s phn_link_t;
+struct phn_link_s {
+	void *prev;
+	void *next;
+	void *lchild;
+};
+
+typedef struct ph_s ph_t;
+struct ph_s {
+	void *root;
+};
+
+JEMALLOC_ALWAYS_INLINE phn_link_t *
+phn_link_get(void *phn, size_t offset) {
+	return (phn_link_t *)(((uintptr_t)phn) + offset);
 }
 
-/* Root structure. */
-#define ph(a_type)							\
-struct {								\
-	a_type	*ph_root;						\
+JEMALLOC_ALWAYS_INLINE void
+phn_link_init(void *phn, size_t offset) {
+	phn_link_get(phn, offset)->prev = NULL;
+	phn_link_get(phn, offset)->next = NULL;
+	phn_link_get(phn, offset)->lchild = NULL;
 }
 
-/* Internal utility macros. */
-#define phn_lchild_get(a_type, a_field, a_phn)				\
-	(a_phn->a_field.phn_lchild)
-#define phn_lchild_set(a_type, a_field, a_phn, a_lchild) do {		\
-	a_phn->a_field.phn_lchild = a_lchild;				\
-} while (0)
-
-#define phn_next_get(a_type, a_field, a_phn)				\
-	(a_phn->a_field.phn_next)
-#define phn_prev_set(a_type, a_field, a_phn, a_prev) do {		\
-	a_phn->a_field.phn_prev = a_prev;				\
-} while (0)
-
-#define phn_prev_get(a_type, a_field, a_phn)				\
-	(a_phn->a_field.phn_prev)
-#define phn_next_set(a_type, a_field, a_phn, a_next) do {		\
-	a_phn->a_field.phn_next = a_next;				\
-} while (0)
-
-#define phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do {	\
-	a_type *phn0child;						\
-									\
-	assert(a_phn0 != NULL);						\
-	assert(a_phn1 != NULL);						\
-	assert(a_cmp(a_phn0, a_phn1) <= 0);				\
-									\
-	phn_prev_set(a_type, a_field, a_phn1, a_phn0);			\
-	phn0child = phn_lchild_get(a_type, a_field, a_phn0);		\
-	phn_next_set(a_type, a_field, a_phn1, phn0child);		\
-	if (phn0child != NULL) {					\
-		phn_prev_set(a_type, a_field, phn0child, a_phn1);	\
-	}								\
-	phn_lchild_set(a_type, a_field, a_phn0, a_phn1);		\
-} while (0)
-
-#define phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do {	\
-	if (a_phn0 == NULL) {						\
-		r_phn = a_phn1;						\
-	} else if (a_phn1 == NULL) {					\
-		r_phn = a_phn0;						\
-	} else if (a_cmp(a_phn0, a_phn1) < 0) {				\
-		phn_merge_ordered(a_type, a_field, a_phn0, a_phn1,	\
-		    a_cmp);						\
-		r_phn = a_phn0;						\
-	} else {							\
-		phn_merge_ordered(a_type, a_field, a_phn1, a_phn0,	\
-		    a_cmp);						\
-		r_phn = a_phn1;						\
-	}								\
-} while (0)
-
-#define ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
-	a_type *head = NULL;						\
-	a_type *tail = NULL;						\
-	a_type *phn0 = a_phn;						\
-	a_type *phn1 = phn_next_get(a_type, a_field, phn0);		\
+/* Internal utility helpers. */
+JEMALLOC_ALWAYS_INLINE void *
+phn_lchild_get(void *phn, size_t offset) {
+	return phn_link_get(phn, offset)->lchild;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+phn_lchild_set(void *phn, void *lchild, size_t offset) {
+	phn_link_get(phn, offset)->lchild = lchild;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+phn_next_get(void *phn, size_t offset) {
+	return phn_link_get(phn, offset)->next;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+phn_next_set(void *phn, void *next, size_t offset) {
+	phn_link_get(phn, offset)->next = next;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+phn_prev_get(void *phn, size_t offset) {
+	return phn_link_get(phn, offset)->prev;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+phn_prev_set(void *phn, void *prev, size_t offset) {
+	phn_link_get(phn, offset)->prev = prev;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+phn_merge_ordered(void *phn0, void *phn1, size_t offset,
+    ph_cmp_t cmp) {
+	void *phn0child;
+
+	assert(phn0 != NULL);
+	assert(phn1 != NULL);
+	assert(cmp(phn0, phn1) <= 0);
+
+	phn_prev_set(phn1, phn0, offset);
+	phn0child = phn_lchild_get(phn0, offset);
+	phn_next_set(phn1, phn0child, offset);
+	if (phn0child != NULL) {
+		phn_prev_set(phn0child, phn1, offset);
+	}
+	phn_lchild_set(phn0, phn1, offset);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+phn_merge(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) {
+	void *result;
+	if (phn0 == NULL) {
+		result = phn1;
+	} else if (phn1 == NULL) {
+		result = phn0;
+	} else if (cmp(phn0, phn1) < 0) {
+		phn_merge_ordered(phn0, phn1, offset, cmp);
+		result = phn0;
+	} else {
+		phn_merge_ordered(phn1, phn0, offset, cmp);
+		result = phn1;
+	}
+	return result;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
+	void *head = NULL;
+	void *tail = NULL;
+	void *phn0 = phn;
+	void *phn1 = phn_next_get(phn0, offset);
+
+	/*
+	 * Multipass merge, wherein the first two elements of a FIFO
+	 * are repeatedly merged, and each result is appended to the
+	 * singly linked FIFO, until the FIFO contains only a single
+	 * element.  We start with a sibling list but no reference to
+	 * its tail, so we do a single pass over the sibling list to
+	 * populate the FIFO.
+	 */
+	if (phn1 != NULL) {
+		void *phnrest = phn_next_get(phn1, offset);
+		if (phnrest != NULL) {
+			phn_prev_set(phnrest, NULL, offset);
+		}
+		phn_prev_set(phn0, NULL, offset);
+		phn_next_set(phn0, NULL, offset);
+		phn_prev_set(phn1, NULL, offset);
+		phn_next_set(phn1, NULL, offset);
+		phn0 = phn_merge(phn0, phn1, offset, cmp);
+		head = tail = phn0;
+		phn0 = phnrest;
+		while (phn0 != NULL) {
+			phn1 = phn_next_get(phn0, offset);
+			if (phn1 != NULL) {
+				phnrest = phn_next_get(phn1, offset);
+				if (phnrest != NULL) {
+					phn_prev_set(phnrest, NULL, offset);
+				}
+				phn_prev_set(phn0, NULL, offset);
+				phn_next_set(phn0, NULL, offset);
+				phn_prev_set(phn1, NULL, offset);
+				phn_next_set(phn1, NULL, offset);
+				phn0 = phn_merge(phn0, phn1, offset, cmp);
+				phn_next_set(tail, phn0, offset);
+				tail = phn0;
+				phn0 = phnrest;
+			} else {
+				phn_next_set(tail, phn0, offset);
+				tail = phn0;
+				phn0 = NULL;
+			}
+		}
+		phn0 = head;
+		phn1 = phn_next_get(phn0, offset);
+		if (phn1 != NULL) {
+			while (true) {
+				head = phn_next_get(phn1, offset);
+				assert(phn_prev_get(phn0, offset) == NULL);
+				phn_next_set(phn0, NULL, offset);
+				assert(phn_prev_get(phn1, offset) == NULL);
+				phn_next_set(phn1, NULL, offset);
+				phn0 = phn_merge(phn0, phn1, offset, cmp);
+				if (head == NULL) {
+					break;
+				}
+				phn_next_set(tail, phn0, offset);
+				tail = phn0;
+				phn0 = head;
+				phn1 = phn_next_get(phn0, offset);
+			}
+		}
+	}
+	return phn0;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_merge_aux(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+	void *phn = phn_next_get(ph->root, offset);
+	if (phn != NULL) {
+		phn_prev_set(ph->root, NULL, offset);
+		phn_next_set(ph->root, NULL, offset);
+		phn_prev_set(phn, NULL, offset);
+		phn = phn_merge_siblings(phn, offset, cmp);
+		assert(phn_next_get(phn, offset) == NULL);
+		ph->root = phn_merge(ph->root, phn, offset, cmp);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_merge_children(void *phn, size_t offset, ph_cmp_t cmp) {
+	void *result;
+	void *lchild = phn_lchild_get(phn, offset);
+	if (lchild == NULL) {
+		result = NULL;
+	} else {
+		result = phn_merge_siblings(lchild, offset, cmp);
+	}
+	return result;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_new(ph_t *ph) {
+	ph->root = NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+ph_empty(ph_t *ph) {
+	return ph->root == NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+	if (ph->root == NULL) {
+		return NULL;
+	}
+	ph_merge_aux(ph, offset, cmp);
+	return ph->root;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_any(ph_t *ph, size_t offset) {
+	if (ph->root == NULL) {
+		return NULL;
+	}
+	void *aux = phn_next_get(ph->root, offset);
+	if (aux != NULL) {
+		return aux;
+	}
+	return ph->root;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_insert(ph_t *ph, void *phn, size_t offset) {
+	phn_link_init(phn, offset);
+
+	/*
+	 * Treat the root as an aux list during insertion, and lazily merge
+	 * during a_prefix##remove_first().  For elements that are inserted,
+	 * then removed via a_prefix##remove() before the aux list is ever
+	 * processed, this makes insert/remove constant-time, whereas eager
+	 * merging would make insert O(log n).
+	 */
+	if (ph->root == NULL) {
+		ph->root = phn;
+	} else {
+		phn_next_set(phn, phn_next_get(ph->root, offset), offset);
+		if (phn_next_get(ph->root, offset) != NULL) {
+			phn_prev_set(phn_next_get(ph->root, offset), phn,
+			    offset);
+		}
+		phn_prev_set(phn, ph->root, offset);
+		phn_next_set(ph->root, phn, offset);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+	void *ret;
+
+	if (ph->root == NULL) {
+		return NULL;
+	}
+	ph_merge_aux(ph, offset, cmp);
+	ret = ph->root;
+	ph->root = ph_merge_children(ph->root, offset, cmp);
+
+	return ret;
+
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_remove_any(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+	/*
+	 * Remove the most recently inserted aux list element, or the root if
+	 * the aux list is empty.  This has the effect of behaving as a LIFO
+	 * (and insertion/removal is therefore constant-time) if
+	 * a_prefix##[remove_]first() are never called.
+	 */
+	if (ph->root == NULL) {
+		return NULL;
+	}
+	void *ret = phn_next_get(ph->root, offset);
+	if (ret != NULL) {
+		void *aux = phn_next_get(ret, offset);
+		phn_next_set(ph->root, aux, offset);
+		if (aux != NULL) {
+			phn_prev_set(aux, ph->root, offset);
+		}
+		return ret;
+	}
+	ret = ph->root;
+	ph->root = ph_merge_children(ph->root, offset, cmp);
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
+	void *replace;
+	void *parent;
+
+	if (ph->root == phn) {
+		/*
+		 * We can delete from aux list without merging it, but we need
+		 * to merge if we are dealing with the root node and it has
+		 * children.
+		 */
+		if (phn_lchild_get(phn, offset) == NULL) {
+			ph->root = phn_next_get(phn, offset);
+			if (ph->root != NULL) {
+				phn_prev_set(ph->root, NULL, offset);
+			}
+			return;
+		}
+		ph_merge_aux(ph, offset, cmp);
+		if (ph->root == phn) {
+			ph->root = ph_merge_children(ph->root, offset, cmp);
+			return;
+		}
+	}
+
+	/* Get parent (if phn is leftmost child) before mutating. */
+	if ((parent = phn_prev_get(phn, offset)) != NULL) {
+		if (phn_lchild_get(parent, offset) != phn) {
+			parent = NULL;
+		}
+	}
+	/* Find a possible replacement node, and link to parent. */
+	replace = ph_merge_children(phn, offset, cmp);
+	/* Set next/prev for sibling linked list. */
+	if (replace != NULL) {
+		if (parent != NULL) {
+			phn_prev_set(replace, parent, offset);
+			phn_lchild_set(parent, replace, offset);
+		} else {
+			phn_prev_set(replace, phn_prev_get(phn, offset),
+			    offset);
+			if (phn_prev_get(phn, offset) != NULL) {
+				phn_next_set(phn_prev_get(phn, offset), replace,
+				    offset);
+			}
+		}
+		phn_next_set(replace, phn_next_get(phn, offset), offset);
+		if (phn_next_get(phn, offset) != NULL) {
+			phn_prev_set(phn_next_get(phn, offset), replace,
+			    offset);
+		}
+	} else {
+		if (parent != NULL) {
+			void *next = phn_next_get(phn, offset);
+			phn_lchild_set(parent, next, offset);
+			if (next != NULL) {
+				phn_prev_set(next, parent, offset);
+			}
+		} else {
+			assert(phn_prev_get(phn, offset) != NULL);
+			phn_next_set(
+			    phn_prev_get(phn, offset),
+			    phn_next_get(phn, offset), offset);
+		}
+		if (phn_next_get(phn, offset) != NULL) {
+			phn_prev_set(
+			    phn_next_get(phn, offset),
+			    phn_prev_get(phn, offset), offset);
+		}
+	}
+}
+
+#define ph_structs(a_prefix, a_type)					\
+typedef struct {							\
+	phn_link_t link;						\
+} a_prefix##_link_t;							\
 									\
-	/*								\
-	 * Multipass merge, wherein the first two elements of a FIFO	\
-	 * are repeatedly merged, and each result is appended to the	\
-	 * singly linked FIFO, until the FIFO contains only a single	\
-	 * element.  We start with a sibling list but no reference to	\
-	 * its tail, so we do a single pass over the sibling list to	\
-	 * populate the FIFO.						\
-	 */								\
-	if (phn1 != NULL) {						\
-		a_type *phnrest = phn_next_get(a_type, a_field, phn1);	\
-		if (phnrest != NULL) {					\
-			phn_prev_set(a_type, a_field, phnrest, NULL);	\
-		}							\
-		phn_prev_set(a_type, a_field, phn0, NULL);		\
-		phn_next_set(a_type, a_field, phn0, NULL);		\
-		phn_prev_set(a_type, a_field, phn1, NULL);		\
-		phn_next_set(a_type, a_field, phn1, NULL);		\
-		phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0);	\
-		head = tail = phn0;					\
-		phn0 = phnrest;						\
-		while (phn0 != NULL) {					\
-			phn1 = phn_next_get(a_type, a_field, phn0);	\
-			if (phn1 != NULL) {				\
-				phnrest = phn_next_get(a_type, a_field,	\
-				    phn1);				\
-				if (phnrest != NULL) {			\
-					phn_prev_set(a_type, a_field,	\
-					    phnrest, NULL);		\
-				}					\
-				phn_prev_set(a_type, a_field, phn0,	\
-				    NULL);				\
-				phn_next_set(a_type, a_field, phn0,	\
-				    NULL);				\
-				phn_prev_set(a_type, a_field, phn1,	\
-				    NULL);				\
-				phn_next_set(a_type, a_field, phn1,	\
-				    NULL);				\
-				phn_merge(a_type, a_field, phn0, phn1,	\
-				    a_cmp, phn0);			\
-				phn_next_set(a_type, a_field, tail,	\
-				    phn0);				\
-				tail = phn0;				\
-				phn0 = phnrest;				\
-			} else {					\
-				phn_next_set(a_type, a_field, tail,	\
-				    phn0);				\
-				tail = phn0;				\
-				phn0 = NULL;				\
-			}						\
-		}							\
-		phn0 = head;						\
-		phn1 = phn_next_get(a_type, a_field, phn0);		\
-		if (phn1 != NULL) {					\
-			while (true) {					\
-				head = phn_next_get(a_type, a_field,	\
-				    phn1);				\
-				assert(phn_prev_get(a_type, a_field,	\
-				    phn0) == NULL);			\
-				phn_next_set(a_type, a_field, phn0,	\
-				    NULL);				\
-				assert(phn_prev_get(a_type, a_field,	\
-				    phn1) == NULL);			\
-				phn_next_set(a_type, a_field, phn1,	\
-				    NULL);				\
-				phn_merge(a_type, a_field, phn0, phn1,	\
-				    a_cmp, phn0);			\
-				if (head == NULL) {			\
-					break;				\
-				}					\
-				phn_next_set(a_type, a_field, tail,	\
-				    phn0);				\
-				tail = phn0;				\
-				phn0 = head;				\
-				phn1 = phn_next_get(a_type, a_field,	\
-				    phn0);				\
-			}						\
-		}							\
-	}								\
-	r_phn = phn0;							\
-} while (0)
-
-#define ph_merge_aux(a_type, a_field, a_ph, a_cmp) do {			\
-	a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root);	\
-	if (phn != NULL) {						\
-		phn_prev_set(a_type, a_field, a_ph->ph_root, NULL);	\
-		phn_next_set(a_type, a_field, a_ph->ph_root, NULL);	\
-		phn_prev_set(a_type, a_field, phn, NULL);		\
-		ph_merge_siblings(a_type, a_field, phn, a_cmp, phn);	\
-		assert(phn_next_get(a_type, a_field, phn) == NULL);	\
-		phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp,	\
-		    a_ph->ph_root);					\
-	}								\
-} while (0)
-
-#define ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
-	a_type *lchild = phn_lchild_get(a_type, a_field, a_phn);	\
-	if (lchild == NULL) {						\
-		r_phn = NULL;						\
-	} else {							\
-		ph_merge_siblings(a_type, a_field, lchild, a_cmp,	\
-		    r_phn);						\
-	}								\
-} while (0)
+typedef struct {							\
+	ph_t ph;							\
+} a_prefix##_t;
 
 /*
  * The ph_proto() macro generates function prototypes that correspond to the
  * functions generated by an equivalently parameterized call to ph_gen().
  */
-#define ph_proto(a_attr, a_prefix, a_ph_type, a_type)			\
-a_attr void	a_prefix##new(a_ph_type *ph);				\
-a_attr bool	a_prefix##empty(a_ph_type *ph);				\
-a_attr a_type	*a_prefix##first(a_ph_type *ph);			\
-a_attr a_type	*a_prefix##any(a_ph_type *ph);				\
-a_attr void	a_prefix##insert(a_ph_type *ph, a_type *phn);		\
-a_attr a_type	*a_prefix##remove_first(a_ph_type *ph);			\
-a_attr a_type	*a_prefix##remove_any(a_ph_type *ph);			\
-a_attr void	a_prefix##remove(a_ph_type *ph, a_type *phn);
+#define ph_proto(a_attr, a_prefix, a_type)				\
+									\
+a_attr void a_prefix##_new(a_prefix##_t *ph);				\
+a_attr bool a_prefix##_empty(a_prefix##_t *ph);				\
+a_attr a_type *a_prefix##_first(a_prefix##_t *ph);			\
+a_attr a_type *a_prefix##_any(a_prefix##_t *ph);			\
+a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn);		\
+a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);		\
+a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);			\
+a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn);
 
-/*
- * The ph_gen() macro generates a type-specific pairing heap implementation,
- * based on the above cpp macros.
- */
-#define ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp)	\
+/* The ph_gen() macro generates a type-specific pairing heap implementation. */
+#define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)		\
+JEMALLOC_ALWAYS_INLINE int						\
+a_prefix##_ph_cmp(void *a, void *b) {					\
+	return a_cmp((a_type *)a, (a_type *)b);				\
+}									\
+									\
 a_attr void								\
-a_prefix##new(a_ph_type *ph) {						\
-	memset(ph, 0, sizeof(ph(a_type)));				\
+a_prefix##_new(a_prefix##_t *ph) {					\
+	ph_new(&ph->ph);						\
 }									\
+									\
 a_attr bool								\
-a_prefix##empty(a_ph_type *ph) {					\
-	return (ph->ph_root == NULL);					\
+a_prefix##_empty(a_prefix##_t *ph) {					\
+	return ph_empty(&ph->ph);					\
 }									\
+									\
 a_attr a_type *								\
-a_prefix##first(a_ph_type *ph) {					\
-	if (ph->ph_root == NULL) {					\
-		return NULL;						\
-	}								\
-	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
-	return ph->ph_root;						\
+a_prefix##_first(a_prefix##_t *ph) {					\
+	return ph_first(&ph->ph, offsetof(a_type, a_field),		\
+	    &a_prefix##_ph_cmp);					\
 }									\
+									\
 a_attr a_type *								\
-a_prefix##any(a_ph_type *ph) {						\
-	if (ph->ph_root == NULL) {					\
-		return NULL;						\
-	}								\
-	a_type *aux = phn_next_get(a_type, a_field, ph->ph_root);	\
-	if (aux != NULL) {						\
-		return aux;						\
-	}								\
-	return ph->ph_root;						\
+a_prefix##_any(a_prefix##_t *ph) {					\
+	return ph_any(&ph->ph, offsetof(a_type, a_field));		\
 }									\
-a_attr void								\
-a_prefix##insert(a_ph_type *ph, a_type *phn) {				\
-	memset(&phn->a_field, 0, sizeof(phn(a_type)));			\
 									\
-	/*								\
-	 * Treat the root as an aux list during insertion, and lazily	\
-	 * merge during a_prefix##remove_first().  For elements that	\
-	 * are inserted, then removed via a_prefix##remove() before the	\
-	 * aux list is ever processed, this makes insert/remove		\
-	 * constant-time, whereas eager merging would make insert	\
-	 * O(log n).							\
-	 */								\
-	if (ph->ph_root == NULL) {					\
-		ph->ph_root = phn;					\
-	} else {							\
-		phn_next_set(a_type, a_field, phn, phn_next_get(a_type,	\
-		    a_field, ph->ph_root));				\
-		if (phn_next_get(a_type, a_field, ph->ph_root) !=	\
-		    NULL) {						\
-			phn_prev_set(a_type, a_field,			\
-			    phn_next_get(a_type, a_field, ph->ph_root),	\
-			    phn);					\
-		}							\
-		phn_prev_set(a_type, a_field, phn, ph->ph_root);	\
-		phn_next_set(a_type, a_field, ph->ph_root, phn);	\
-	}								\
+a_attr void								\
+a_prefix##_insert(a_prefix##_t *ph, a_type *phn) {			\
+	ph_insert(&ph->ph, phn, offsetof(a_type, a_field));		\
 }									\
-a_attr a_type *								\
-a_prefix##remove_first(a_ph_type *ph) {					\
-	a_type *ret;							\
 									\
-	if (ph->ph_root == NULL) {					\
-		return NULL;						\
-	}								\
-	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
-									\
-	ret = ph->ph_root;						\
-									\
-	ph_merge_children(a_type, a_field, ph->ph_root, a_cmp,		\
-	    ph->ph_root);						\
-									\
-	return ret;							\
-}									\
 a_attr a_type *								\
-a_prefix##remove_any(a_ph_type *ph) {					\
-	/*								\
-	 * Remove the most recently inserted aux list element, or the	\
-	 * root if the aux list is empty.  This has the effect of	\
-	 * behaving as a LIFO (and insertion/removal is therefore	\
-	 * constant-time) if a_prefix##[remove_]first() are never	\
-	 * called.							\
-	 */								\
-	if (ph->ph_root == NULL) {					\
-		return NULL;						\
-	}								\
-	a_type *ret = phn_next_get(a_type, a_field, ph->ph_root);	\
-	if (ret != NULL) {						\
-		a_type *aux = phn_next_get(a_type, a_field, ret);	\
-		phn_next_set(a_type, a_field, ph->ph_root, aux);	\
-		if (aux != NULL) {					\
-			phn_prev_set(a_type, a_field, aux,		\
-			    ph->ph_root);				\
-		}							\
-		return ret;						\
-	}								\
-	ret = ph->ph_root;						\
-	ph_merge_children(a_type, a_field, ph->ph_root, a_cmp,		\
-	    ph->ph_root);						\
-	return ret;							\
+a_prefix##_remove_first(a_prefix##_t *ph) {				\
+	return ph_remove_first(&ph->ph, offsetof(a_type, a_field),	\
+	    a_prefix##_ph_cmp);						\
 }									\
-a_attr void								\
-a_prefix##remove(a_ph_type *ph, a_type *phn) {				\
-	a_type *replace, *parent;					\
 									\
-	if (ph->ph_root == phn) {					\
-		/*							\
-		 * We can delete from aux list without merging it, but	\
-		 * we need to merge if we are dealing with the root	\
-		 * node and it has children.				\
-		 */							\
-		if (phn_lchild_get(a_type, a_field, phn) == NULL) {	\
-			ph->ph_root = phn_next_get(a_type, a_field,	\
-			    phn);					\
-			if (ph->ph_root != NULL) {			\
-				phn_prev_set(a_type, a_field,		\
-				    ph->ph_root, NULL);			\
-			}						\
-			return;						\
-		}							\
-		ph_merge_aux(a_type, a_field, ph, a_cmp);		\
-		if (ph->ph_root == phn) {				\
-			ph_merge_children(a_type, a_field, ph->ph_root,	\
-			    a_cmp, ph->ph_root);			\
-			return;						\
-		}							\
-	}								\
+a_attr a_type *								\
+a_prefix##_remove_any(a_prefix##_t *ph) {				\
+	return ph_remove_any(&ph->ph, offsetof(a_type, a_field),	\
+	    a_prefix##_ph_cmp);						\
+}									\
 									\
-	/* Get parent (if phn is leftmost child) before mutating. */	\
-	if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) {	\
-		if (phn_lchild_get(a_type, a_field, parent) != phn) {	\
-			parent = NULL;					\
-		}							\
-	}								\
-	/* Find a possible replacement node, and link to parent. */	\
-	ph_merge_children(a_type, a_field, phn, a_cmp, replace);	\
-	/* Set next/prev for sibling linked list. */			\
-	if (replace != NULL) {						\
-		if (parent != NULL) {					\
-			phn_prev_set(a_type, a_field, replace, parent);	\
-			phn_lchild_set(a_type, a_field, parent,		\
-			    replace);					\
-		} else {						\
-			phn_prev_set(a_type, a_field, replace,		\
-			    phn_prev_get(a_type, a_field, phn));	\
-			if (phn_prev_get(a_type, a_field, phn) !=	\
-			    NULL) {					\
-				phn_next_set(a_type, a_field,		\
-				    phn_prev_get(a_type, a_field, phn),	\
-				    replace);				\
-			}						\
-		}							\
-		phn_next_set(a_type, a_field, replace,			\
-		    phn_next_get(a_type, a_field, phn));		\
-		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
-			phn_prev_set(a_type, a_field,			\
-			    phn_next_get(a_type, a_field, phn),		\
-			    replace);					\
-		}							\
-	} else {							\
-		if (parent != NULL) {					\
-			a_type *next = phn_next_get(a_type, a_field,	\
-			    phn);					\
-			phn_lchild_set(a_type, a_field, parent, next);	\
-			if (next != NULL) {				\
-				phn_prev_set(a_type, a_field, next,	\
-				    parent);				\
-			}						\
-		} else {						\
-			assert(phn_prev_get(a_type, a_field, phn) !=	\
-			    NULL);					\
-			phn_next_set(a_type, a_field,			\
-			    phn_prev_get(a_type, a_field, phn),		\
-			    phn_next_get(a_type, a_field, phn));	\
-		}							\
-		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
-			phn_prev_set(a_type, a_field,			\
-			    phn_next_get(a_type, a_field, phn),		\
-			    phn_prev_get(a_type, a_field, phn));	\
-		}							\
-	}								\
+a_attr void								\
+a_prefix##_remove(a_prefix##_t *ph, a_type *phn) {			\
+	ph_remove(&ph->ph, phn, offsetof(a_type, a_field),		\
+	    a_prefix##_ph_cmp);						\
 }
 
 #endif /* JEMALLOC_INTERNAL_PH_H */
diff --git a/src/edata.c b/src/edata.c
index 23523dd..82b6f56 100644
--- a/src/edata.c
+++ b/src/edata.c
@@ -1,6 +1,6 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-ph_gen(, edata_avail_, edata_avail_t, edata_t, ph_link,
+ph_gen(, edata_avail, edata_t, avail_link,
     edata_esnead_comp)
-ph_gen(, edata_heap_, edata_heap_t, edata_t, ph_link, edata_snad_comp)
+ph_gen(, edata_heap, edata_t, heap_link, edata_snad_comp)
diff --git a/src/hpdata.c b/src/hpdata.c
index 18519be..e7d7294 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -15,7 +15,7 @@ hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
 	return (a_age > b_age) - (a_age < b_age);
 }
 
-ph_gen(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t, ph_link, hpdata_age_comp)
+ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp)
 
 void
 hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 0f7c991..28f5e48 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -3,11 +3,12 @@
 #include "jemalloc/internal/ph.h"
 
 typedef struct node_s node_t;
+ph_structs(heap, node_t);
 
 struct node_s {
 #define NODE_MAGIC 0x9823af7e
 	uint32_t magic;
-	phn(node_t) link;
+	heap_link_t link;
 	uint64_t key;
 };
 
@@ -36,8 +37,22 @@ node_cmp_magic(const node_t *a, const node_t *b) {
 	return node_cmp(a, b);
 }
 
-typedef ph(node_t) heap_t;
-ph_gen(static, heap_, heap_t, node_t, link, node_cmp_magic);
+ph_gen(static, heap, node_t, link, node_cmp_magic);
+
+static node_t *
+node_next_get(const node_t *node) {
+	return phn_next_get((node_t *)node, offsetof(node_t, link));
+}
+
+static node_t *
+node_prev_get(const node_t *node) {
+	return phn_prev_get((node_t *)node, offsetof(node_t, link));
+}
+
+static node_t *
+node_lchild_get(const node_t *node) {
+	return phn_lchild_get((node_t *)node, offsetof(node_t, link));
+}
 
 static void
 node_print(const node_t *node, unsigned depth) {
@@ -49,14 +64,14 @@ node_print(const node_t *node, unsigned depth) {
 	}
 	malloc_printf("%2"FMTu64"\n", node->key);
 
-	leftmost_child = phn_lchild_get(node_t, link, node);
+	leftmost_child = node_lchild_get(node);
 	if (leftmost_child == NULL) {
 		return;
 	}
 	node_print(leftmost_child, depth + 1);
 
-	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
-	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
+	for (sibling = node_next_get(leftmost_child); sibling !=
+	    NULL; sibling = node_next_get(sibling)) {
 		node_print(sibling, depth + 1);
 	}
 }
@@ -66,16 +81,15 @@ heap_print(const heap_t *heap) {
 	node_t *auxelm;
 
 	malloc_printf("vvv heap %p vvv\n", heap);
-	if (heap->ph_root == NULL) {
+	if (heap->ph.root == NULL) {
 		goto label_return;
 	}
 
-	node_print(heap->ph_root, 0);
+	node_print(heap->ph.root, 0);
 
-	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
-	    auxelm = phn_next_get(node_t, link, auxelm)) {
-		expect_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
-		    link, auxelm)), auxelm,
+	for (auxelm = node_next_get(heap->ph.root); auxelm != NULL;
+	    auxelm = node_next_get(auxelm)) {
+		expect_ptr_eq(node_next_get(node_prev_get(auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		node_print(auxelm, 0);
 	}
@@ -94,18 +108,17 @@ node_validate(const node_t *node, const node_t *parent) {
 		    "Child is less than parent");
 	}
 
-	leftmost_child = phn_lchild_get(node_t, link, node);
+	leftmost_child = node_lchild_get(node);
 	if (leftmost_child == NULL) {
 		return nnodes;
 	}
-	expect_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child),
+	expect_ptr_eq(node_prev_get(leftmost_child),
 	    (void *)node, "Leftmost child does not link to node");
 	nnodes += node_validate(leftmost_child, node);
 
-	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
-	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
-		expect_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
-		    link, sibling)), sibling,
+	for (sibling = node_next_get(leftmost_child); sibling !=
+	    NULL; sibling = node_next_get(sibling)) {
+		expect_ptr_eq(node_next_get(node_prev_get(sibling)), sibling,
 		    "sibling's prev doesn't link to sibling");
 		nnodes += node_validate(sibling, node);
 	}
@@ -117,16 +130,15 @@ heap_validate(const heap_t *heap) {
 	unsigned nnodes = 0;
 	node_t *auxelm;
 
-	if (heap->ph_root == NULL) {
+	if (heap->ph.root == NULL) {
 		goto label_return;
 	}
 
-	nnodes += node_validate(heap->ph_root, NULL);
+	nnodes += node_validate(heap->ph.root, NULL);
 
-	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
-	    auxelm = phn_next_get(node_t, link, auxelm)) {
-		expect_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
-		    link, auxelm)), auxelm,
+	for (auxelm = node_next_get(heap->ph.root); auxelm != NULL;
+	    auxelm = node_next_get(auxelm)) {
+		expect_ptr_eq(node_next_get(node_prev_get(auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		nnodes += node_validate(auxelm, NULL);
 	}
-- 
cgit v0.12


From 0170dd198ae0ef92ae923b454c02259802b78b76 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 20 Jul 2021 08:46:19 -0700
Subject: Edata: Fix a couple typos.

Some readability-enhancing whitespace, and a spelling error.
---
 include/jemalloc/internal/edata.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 3a04a9a..da0774f 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -64,7 +64,7 @@ typedef struct e_prof_info_s e_prof_info_t;
 
 /*
  * The information about a particular edata that lives in an emap.  Space is
- * more previous there (the information, plus the edata pointer, has to live in
+ * more precious there (the information, plus the edata pointer, has to live in
  * a 64-bit word if we want to enable a packed representation.
  *
  * There are two things that are special about the information here:
@@ -196,6 +196,7 @@ struct edata_s {
 	 * into pageslabs).  This tracks it.
 	 */
 	hpdata_t *e_ps;
+
 	/*
 	 * Serial number.  These are not necessarily unique; splitting an extent
 	 * results in two extents with the same serial number.
-- 
cgit v0.12


From dc0a4b8b2f2daf17a27b4b1fc869ef48d40d3ef2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 20 Jul 2021 09:02:17 -0700
Subject: Edata: Pull out comparison fields into a summary.

For now, this is a no-op; eventually, it will allow some caching in the eset.
---
 include/jemalloc/internal/edata.h | 44 ++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index da0774f..ff14982 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -79,6 +79,12 @@ struct edata_map_info_s {
 	szind_t szind;
 };
 
+typedef struct edata_cmp_summary_s edata_cmp_summary_t;
+struct edata_cmp_summary_s {
+	uint64_t sn;
+	uintptr_t addr;
+};
+
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
 ph_structs(edata_avail, edata_t);
@@ -611,14 +617,6 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn) {
 }
 
 static inline int
-edata_sn_comp(const edata_t *a, const edata_t *b) {
-	uint64_t a_sn = edata_sn_get(a);
-	uint64_t b_sn = edata_sn_get(b);
-
-	return (a_sn > b_sn) - (a_sn < b_sn);
-}
-
-static inline int
 edata_esn_comp(const edata_t *a, const edata_t *b) {
 	size_t a_esn = edata_esn_get(a);
 	size_t b_esn = edata_esn_get(b);
@@ -627,14 +625,6 @@ edata_esn_comp(const edata_t *a, const edata_t *b) {
 }
 
 static inline int
-edata_ad_comp(const edata_t *a, const edata_t *b) {
-	uintptr_t a_addr = (uintptr_t)edata_addr_get(a);
-	uintptr_t b_addr = (uintptr_t)edata_addr_get(b);
-
-	return (a_addr > b_addr) - (a_addr < b_addr);
-}
-
-static inline int
 edata_ead_comp(const edata_t *a, const edata_t *b) {
 	uintptr_t a_eaddr = (uintptr_t)a;
 	uintptr_t b_eaddr = (uintptr_t)b;
@@ -642,20 +632,32 @@ edata_ead_comp(const edata_t *a, const edata_t *b) {
 	return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
 }
 
+static inline edata_cmp_summary_t
+edata_cmp_summary_get(const edata_t *edata) {
+	return (edata_cmp_summary_t){edata_sn_get(edata),
+		(uintptr_t)edata_addr_get(edata)};
+}
+
 static inline int
-edata_snad_comp(const edata_t *a, const edata_t *b) {
+edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
 	int ret;
-
-	ret = edata_sn_comp(a, b);
+	ret = (a.sn > b.sn) - (a.sn < b.sn);
 	if (ret != 0) {
 		return ret;
 	}
-
-	ret = edata_ad_comp(a, b);
+	ret = (a.addr > b.addr) - (a.addr < b.addr);
 	return ret;
 }
 
 static inline int
+edata_snad_comp(const edata_t *a, const edata_t *b) {
+	edata_cmp_summary_t a_cmp = edata_cmp_summary_get(a);
+	edata_cmp_summary_t b_cmp = edata_cmp_summary_get(b);
+
+	return edata_cmp_summary_comp(a_cmp, b_cmp);
+}
+
+static inline int
 edata_esnead_comp(const edata_t *a, const edata_t *b) {
 	int ret;
 
-- 
cgit v0.12


From 252e0942d0346f1cc700874b55d0c1fef95c40e7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 20 Jul 2021 09:26:09 -0700
Subject: Eset: Pull per-pszind data into structs.

We currently have one for stats and one for the data.  The data struct is just a
wrapper around the edata_heap_t, but this will change shortly.
---
 include/jemalloc/internal/eset.h | 22 +++++++++++----
 src/eset.c                       | 61 +++++++++++++++++++++++++---------------
 2 files changed, 56 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index ff5e57d..708ef99 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -14,16 +14,28 @@
  * there are mutating operations.  One exception is the stats counters, which
  * may be read without any locking.
  */
+
+typedef struct eset_bin_s eset_bin_t;
+struct eset_bin_s {
+	edata_heap_t heap;
+};
+
+typedef struct eset_bin_stats_s eset_bin_stats_t;
+struct eset_bin_stats_s {
+	atomic_zu_t nextents;
+	atomic_zu_t nbytes;
+};
+
 typedef struct eset_s eset_t;
 struct eset_s {
-	/* Quantized per size class heaps of extents. */
-	edata_heap_t heaps[SC_NPSIZES + 1];
-	atomic_zu_t nextents[SC_NPSIZES + 1];
-	atomic_zu_t nbytes[SC_NPSIZES + 1];
-
 	/* Bitmap for which set bits correspond to non-empty heaps. */
 	fb_group_t bitmap[FB_NGROUPS(SC_NPSIZES + 1)];
 
+	/* Quantized per size class heaps of extents. */
+	eset_bin_t bins[SC_NPSIZES + 1];
+
+	eset_bin_stats_t bin_stats[SC_NPSIZES + 1];
+
 	/* LRU of all extents in heaps. */
 	edata_list_inactive_t lru;
 
diff --git a/src/eset.c b/src/eset.c
index 9183ac6..01af422 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -5,14 +5,25 @@
 
 #define ESET_NPSIZES (SC_NPSIZES + 1)
 
+static void
+eset_bin_init(eset_bin_t *bin) {
+	edata_heap_new(&bin->heap);
+}
+
+static void
+eset_bin_stats_init(eset_bin_stats_t *bin_stats) {
+	atomic_store_zu(&bin_stats->nextents, 0, ATOMIC_RELAXED);
+	atomic_store_zu(&bin_stats->nbytes, 0, ATOMIC_RELAXED);
+}
+
 void
 eset_init(eset_t *eset, extent_state_t state) {
 	for (unsigned i = 0; i < ESET_NPSIZES; i++) {
-		edata_heap_new(&eset->heaps[i]);
+		eset_bin_init(&eset->bins[i]);
+		eset_bin_stats_init(&eset->bin_stats[i]);
 	}
 	fb_init(eset->bitmap, ESET_NPSIZES);
 	edata_list_inactive_init(&eset->lru);
-	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
 	eset->state = state;
 }
 
@@ -23,28 +34,34 @@ eset_npages_get(eset_t *eset) {
 
 size_t
 eset_nextents_get(eset_t *eset, pszind_t pind) {
-	return atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
+	return atomic_load_zu(&eset->bin_stats[pind].nextents, ATOMIC_RELAXED);
 }
 
 size_t
 eset_nbytes_get(eset_t *eset, pszind_t pind) {
-	return atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
+	return atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED);
 }
 
 static void
 eset_stats_add(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nextents[pind], cur + 1, ATOMIC_RELAXED);
-	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nbytes[pind], cur + sz, ATOMIC_RELAXED);
+	size_t cur = atomic_load_zu(&eset->bin_stats[pind].nextents,
+	    ATOMIC_RELAXED);
+	atomic_store_zu(&eset->bin_stats[pind].nextents, cur + 1,
+	    ATOMIC_RELAXED);
+	cur = atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED);
+	atomic_store_zu(&eset->bin_stats[pind].nbytes, cur + sz,
+	    ATOMIC_RELAXED);
 }
 
 static void
 eset_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nextents[pind], cur - 1, ATOMIC_RELAXED);
-	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nbytes[pind], cur - sz, ATOMIC_RELAXED);
+	size_t cur = atomic_load_zu(&eset->bin_stats[pind].nextents,
+	    ATOMIC_RELAXED);
+	atomic_store_zu(&eset->bin_stats[pind].nextents, cur - 1,
+	    ATOMIC_RELAXED);
+	cur = atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED);
+	atomic_store_zu(&eset->bin_stats[pind].nbytes, cur - sz,
+	    ATOMIC_RELAXED);
 }
 
 void
@@ -54,10 +71,10 @@ eset_insert(eset_t *eset, edata_t *edata) {
 	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	if (edata_heap_empty(&eset->heaps[pind])) {
+	if (edata_heap_empty(&eset->bins[pind].heap)) {
 		fb_set(eset->bitmap, ESET_NPSIZES, (size_t)pind);
 	}
-	edata_heap_insert(&eset->heaps[pind], edata);
+	edata_heap_insert(&eset->bins[pind].heap, edata);
 
 	if (config_stats) {
 		eset_stats_add(eset, pind, size);
@@ -84,13 +101,13 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	edata_heap_remove(&eset->heaps[pind], edata);
+	edata_heap_remove(&eset->bins[pind].heap, edata);
 
 	if (config_stats) {
 		eset_stats_sub(eset, pind, size);
 	}
 
-	if (edata_heap_empty(&eset->heaps[pind])) {
+	if (edata_heap_empty(&eset->bins[pind].heap)) {
 		fb_unset(eset->bitmap, ESET_NPSIZES, (size_t)pind);
 	}
 	edata_list_inactive_remove(&eset->lru, edata);
@@ -125,8 +142,8 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
 	    i < pind_max;
 	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(i < SC_NPSIZES);
-		assert(!edata_heap_empty(&eset->heaps[i]));
-		edata_t *edata = edata_heap_first(&eset->heaps[i]);
+		assert(!edata_heap_empty(&eset->bins[i].heap));
+		edata_t *edata = edata_heap_first(&eset->bins[i].heap);
 		uintptr_t base = (uintptr_t)edata_base_get(edata);
 		size_t candidate_size = edata_size_get(edata);
 		assert(candidate_size >= min_size);
@@ -165,16 +182,16 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
 	if (exact_only) {
-		return edata_heap_empty(&eset->heaps[pind]) ? NULL :
-		    edata_heap_first(&eset->heaps[pind]);
+		return edata_heap_empty(&eset->bins[pind].heap) ? NULL :
+		    edata_heap_first(&eset->bins[pind].heap);
 	}
 
 	for (pszind_t i =
 	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
 	    i < ESET_NPSIZES;
 	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
-		assert(!edata_heap_empty(&eset->heaps[i]));
-		edata_t *edata = edata_heap_first(&eset->heaps[i]);
+		assert(!edata_heap_empty(&eset->bins[i].heap));
+		edata_t *edata = edata_heap_first(&eset->bins[i].heap);
 		assert(edata_size_get(edata) >= size);
 		if (lg_max_fit == SC_PTR_BITS) {
 			/*
-- 
cgit v0.12


From dcb7b83facf4f7641cefc0fc7c11c3d88310dae0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 20 Jul 2021 10:20:44 -0700
Subject: Eset: Cache summary information for heap edatas.

This lets us do a single array scan to find first fits, instead of taking a
cache miss per examined size class.
---
 include/jemalloc/internal/eset.h |  8 ++++++
 src/eset.c                       | 60 ++++++++++++++++++++++++++++++++++------
 2 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 708ef99..4f689b4 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -18,6 +18,14 @@
 typedef struct eset_bin_s eset_bin_t;
 struct eset_bin_s {
 	edata_heap_t heap;
+	/*
+	 * We do first-fit across multiple size classes.  If we compared against
+	 * the min element in each heap directly, we'd take a cache miss per
+	 * extent we looked at.  If we co-locate the edata summaries, we only
+	 * take a miss on the edata we're actually going to return (which is
+	 * inevitable anyways).
+	 */
+	edata_cmp_summary_t heap_min;
 };
 
 typedef struct eset_bin_stats_s eset_bin_stats_t;
diff --git a/src/eset.c b/src/eset.c
index 01af422..6f8f335 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -8,6 +8,10 @@
 static void
 eset_bin_init(eset_bin_t *bin) {
 	edata_heap_new(&bin->heap);
+	/*
+	 * heap_min doesn't need initialization; it gets filled in when the bin
+	 * goes from non-empty to empty.
+	 */
 }
 
 static void
@@ -71,8 +75,21 @@ eset_insert(eset_t *eset, edata_t *edata) {
 	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
+
+	edata_cmp_summary_t edata_cmp_summary = edata_cmp_summary_get(edata);
 	if (edata_heap_empty(&eset->bins[pind].heap)) {
 		fb_set(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+		/* Only element is automatically the min element. */
+		eset->bins[pind].heap_min = edata_cmp_summary;
+	} else {
+		/*
+		 * There's already a min element; update the summary if we're
+		 * about to insert a lower one.
+		 */
+		if (edata_cmp_summary_comp(edata_cmp_summary,
+		    eset->bins[pind].heap_min) < 0) {
+			eset->bins[pind].heap_min = edata_cmp_summary;
+		}
 	}
 	edata_heap_insert(&eset->bins[pind].heap, edata);
 
@@ -101,14 +118,29 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	edata_heap_remove(&eset->bins[pind].heap, edata);
-
 	if (config_stats) {
 		eset_stats_sub(eset, pind, size);
 	}
 
+	edata_cmp_summary_t edata_cmp_summary = edata_cmp_summary_get(edata);
+	edata_heap_remove(&eset->bins[pind].heap, edata);
 	if (edata_heap_empty(&eset->bins[pind].heap)) {
 		fb_unset(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+	} else {
+		/*
+		 * This is a little weird; we compare if the summaries are
+		 * equal, rather than if the edata we removed was the heap
+		 * minimum.  The reason why is that getting the heap minimum
+		 * can cause a pairing heap merge operation.  We can avoid this
+		 * if we only update the min if it's changed, in which case the
+		 * summaries of the removed element and the min element should
+		 * compare equal.
+		 */
+		if (edata_cmp_summary_comp(edata_cmp_summary,
+		    eset->bins[pind].heap_min) == 0) {
+			eset->bins[pind].heap_min = edata_cmp_summary_get(
+			    edata_heap_first(&eset->bins[pind].heap));
+		}
 	}
 	edata_list_inactive_remove(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
@@ -116,10 +148,6 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	 * As in eset_insert, we hold eset->mtx and so don't need atomic
 	 * operations for updating eset->npages.
 	 */
-	/*
-	 * This class is not thread-safe in general; we rely on external
-	 * synchronization for all mutating operations.
-	 */
 	size_t cur_extents_npages =
 	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
 	assert(cur_extents_npages >= npages);
@@ -178,6 +206,7 @@ static edata_t *
 eset_first_fit(eset_t *eset, size_t size, bool exact_only,
     unsigned lg_max_fit) {
 	edata_t *ret = NULL;
+	edata_cmp_summary_t ret_summ JEMALLOC_CC_SILENCE_INIT({0});
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
@@ -191,8 +220,6 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 	    i < ESET_NPSIZES;
 	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(!edata_heap_empty(&eset->bins[i].heap));
-		edata_t *edata = edata_heap_first(&eset->bins[i].heap);
-		assert(edata_size_get(edata) >= size);
 		if (lg_max_fit == SC_PTR_BITS) {
 			/*
 			 * We'll shift by this below, and shifting out all the
@@ -204,8 +231,23 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 		if ((sz_pind2sz(i) >> lg_max_fit) > size) {
 			break;
 		}
-		if (ret == NULL || edata_snad_comp(edata, ret) < 0) {
+		if (ret == NULL || edata_cmp_summary_comp(
+		    eset->bins[i].heap_min, ret_summ) < 0) {
+			/*
+			 * We grab the edata as early as possible, even though
+			 * we might change it later.  Practically, a large
+			 * portion of eset_fit calls succeed at the first valid
+			 * index, so this doesn't cost much, and we get the
+			 * effect of prefetching the edata as early as possible.
+			 */
+			edata_t *edata = edata_heap_first(&eset->bins[i].heap);
+			assert(edata_size_get(edata) >= size);
+			assert(ret == NULL || edata_snad_comp(edata, ret) < 0);
+			assert(ret == NULL || edata_cmp_summary_comp(
+			    eset->bins[i].heap_min,
+			    edata_cmp_summary_get(edata)) == 0);
 			ret = edata;
+			ret_summ = eset->bins[i].heap_min;
 		}
 		if (i == SC_NPSIZES) {
 			break;
-- 
cgit v0.12


From 40d53e007c054f37a5666b2550304adc65c74c78 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 26 Jul 2021 11:52:42 -0700
Subject: ph: Add aux-list counting and pre-merging.

---
 include/jemalloc/internal/ph.h | 127 +++++++++++++++++++++++++++++------------
 1 file changed, 92 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index beb50d5..3f7d759 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -13,6 +13,40 @@
  * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
  *
  *******************************************************************************
+ *
+ * We include a non-obvious optimization:
+ * - First, we introduce a new pop-and-link operation; pop the two most
+ *   recently-inserted items off the aux-list, link them, and push the resulting
+ *   heap.
+ * - We maintain a count of the number of insertions since the last time we
+ *   merged the aux-list (i.e. via first() or remove_first()).  After N inserts,
+ *   we do ffs(N) pop-and-link operations.
+ *
+ * One way to think of this is that we're progressively building up a tree in
+ * the aux-list, rather than a linked-list (think of the series of merges that
+ * will be performed as the aux-count grows).
+ *
+ * There's a couple reasons we benefit from this:
+ * - Ordinarily, after N insertions, the aux-list is of size N.  With our
+ *   strategy, it's of size O(log(N)).  So we decrease the worst-case time of
+ *   first() calls, and reduce the average cost of remove_min calls.  Since
+ *   these almost always occur while holding a lock, we practically reduce the
+ *   frequency of unusually long hold times.
+ * - This moves the bulk of the work of merging the aux-list onto the threads
+ *   that are inserting into the heap.  In some common scenarios, insertions
+ *   happen in bulk, from a single thread (think tcache flushing; we potentially
+ *   move many slabs from slabs_full to slabs_nonfull).  All the nodes in this
+ *   case are in the inserting threads cache, and linking them is very cheap
+ *   (cache misses dominate linking cost).  Without this optimization, linking
+ *   happens on the next call to remove_first.  Since that remove_first call
+ *   likely happens on a different thread (or at least, after the cache has
+ *   gotten cold if done on the same thread), deferring linking trades cheap
+ *   link operations now for expensive ones later.
+ *
+ * The ffs trick keeps amortized insert cost at constant time.  Similar
+ * strategies based on periodically sorting the list after a batch of operations
+ * perform worse than this in practice, even with various fancy tricks; they
+ * all took amortized complexity of an insert from O(1) to O(log(n)).
  */
 
 typedef int (*ph_cmp_t)(void *, void *);
@@ -28,6 +62,13 @@ struct phn_link_s {
 typedef struct ph_s ph_t;
 struct ph_s {
 	void *root;
+	/*
+	 * Inserts done since the last aux-list merge.  This is not necessarily
+	 * the size of the aux-list, since it's possible that removals have
+	 * happened since, and we don't track whether or not those removals are
+	 * from the aux list.
+	 */
+	size_t auxcount;
 };
 
 JEMALLOC_ALWAYS_INLINE phn_link_t *
@@ -181,6 +222,7 @@ phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
 
 JEMALLOC_ALWAYS_INLINE void
 ph_merge_aux(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+	ph->auxcount = 0;
 	void *phn = phn_next_get(ph->root, offset);
 	if (phn != NULL) {
 		phn_prev_set(ph->root, NULL, offset);
@@ -207,6 +249,7 @@ ph_merge_children(void *phn, size_t offset, ph_cmp_t cmp) {
 JEMALLOC_ALWAYS_INLINE void
 ph_new(ph_t *ph) {
 	ph->root = NULL;
+	ph->auxcount = 0;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -235,8 +278,35 @@ ph_any(ph_t *ph, size_t offset) {
 	return ph->root;
 }
 
+/* Returns true if we should stop trying to merge. */
+JEMALLOC_ALWAYS_INLINE bool
+ph_try_aux_merge_pair(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+	assert(ph->root != NULL);
+	void *phn0 = phn_next_get(ph->root, offset);
+	if (phn0 == NULL) {
+		return true;
+	}
+	void *phn1 = phn_next_get(phn0, offset);
+	if (phn1 == NULL) {
+		return true;
+	}
+	void *next_phn1 = phn_next_get(phn1, offset);
+	phn_next_set(phn0, NULL, offset);
+	phn_prev_set(phn0, NULL, offset);
+	phn_next_set(phn1, NULL, offset);
+	phn_prev_set(phn1, NULL, offset);
+	phn0 = phn_merge(phn0, phn1, offset, cmp);
+	phn_next_set(phn0, next_phn1, offset);
+	if (next_phn1 != NULL) {
+		phn_prev_set(next_phn1, phn0, offset);
+	}
+	phn_next_set(ph->root, phn0, offset);
+	phn_prev_set(phn0, ph->root, offset);
+	return next_phn1 == NULL;
+}
+
 JEMALLOC_ALWAYS_INLINE void
-ph_insert(ph_t *ph, void *phn, size_t offset) {
+ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 	phn_link_init(phn, offset);
 
 	/*
@@ -249,6 +319,7 @@ ph_insert(ph_t *ph, void *phn, size_t offset) {
 	if (ph->root == NULL) {
 		ph->root = phn;
 	} else {
+		ph->auxcount++;
 		phn_next_set(phn, phn_next_get(ph->root, offset), offset);
 		if (phn_next_get(ph->root, offset) != NULL) {
 			phn_prev_set(phn_next_get(ph->root, offset), phn,
@@ -257,6 +328,13 @@ ph_insert(ph_t *ph, void *phn, size_t offset) {
 		phn_prev_set(phn, ph->root, offset);
 		phn_next_set(ph->root, phn, offset);
 	}
+	if (ph->auxcount > 1) {
+		unsigned nmerges = ffs_zu(ph->auxcount - 1);
+		bool done = false;
+		for (unsigned i = 0; i < nmerges && !done; i++) {
+			done = ph_try_aux_merge_pair(ph, offset, cmp);
+		}
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -274,31 +352,6 @@ ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 
 }
 
-JEMALLOC_ALWAYS_INLINE void *
-ph_remove_any(ph_t *ph, size_t offset, ph_cmp_t cmp) {
-	/*
-	 * Remove the most recently inserted aux list element, or the root if
-	 * the aux list is empty.  This has the effect of behaving as a LIFO
-	 * (and insertion/removal is therefore constant-time) if
-	 * a_prefix##[remove_]first() are never called.
-	 */
-	if (ph->root == NULL) {
-		return NULL;
-	}
-	void *ret = phn_next_get(ph->root, offset);
-	if (ret != NULL) {
-		void *aux = phn_next_get(ret, offset);
-		phn_next_set(ph->root, aux, offset);
-		if (aux != NULL) {
-			phn_prev_set(aux, ph->root, offset);
-		}
-		return ret;
-	}
-	ret = ph->root;
-	ph->root = ph_merge_children(ph->root, offset, cmp);
-	return ret;
-}
-
 JEMALLOC_ALWAYS_INLINE void
 ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 	void *replace;
@@ -392,8 +445,8 @@ a_attr a_type *a_prefix##_first(a_prefix##_t *ph);			\
 a_attr a_type *a_prefix##_any(a_prefix##_t *ph);			\
 a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn);		\
 a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);		\
-a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);			\
-a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn);
+a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn);		\
+a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);
 
 /* The ph_gen() macro generates a type-specific pairing heap implementation. */
 #define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)		\
@@ -425,7 +478,8 @@ a_prefix##_any(a_prefix##_t *ph) {					\
 									\
 a_attr void								\
 a_prefix##_insert(a_prefix##_t *ph, a_type *phn) {			\
-	ph_insert(&ph->ph, phn, offsetof(a_type, a_field));		\
+	ph_insert(&ph->ph, phn, offsetof(a_type, a_field),		\
+	    a_prefix##_ph_cmp);						\
 }									\
 									\
 a_attr a_type *								\
@@ -434,16 +488,19 @@ a_prefix##_remove_first(a_prefix##_t *ph) {				\
 	    a_prefix##_ph_cmp);						\
 }									\
 									\
-a_attr a_type *								\
-a_prefix##_remove_any(a_prefix##_t *ph) {				\
-	return ph_remove_any(&ph->ph, offsetof(a_type, a_field),	\
-	    a_prefix##_ph_cmp);						\
-}									\
-									\
 a_attr void								\
 a_prefix##_remove(a_prefix##_t *ph, a_type *phn) {			\
 	ph_remove(&ph->ph, phn, offsetof(a_type, a_field),		\
 	    a_prefix##_ph_cmp);						\
+}									\
+									\
+a_attr a_type *								\
+a_prefix##_remove_any(a_prefix##_t *ph) {				\
+	a_type *ret = a_prefix##_any(ph);				\
+	if (ret != NULL) {						\
+		a_prefix##_remove(ph, ret);				\
+	}								\
+	return ret;							\
 }
 
 #endif /* JEMALLOC_INTERNAL_PH_H */
-- 
cgit v0.12


From dae24589bc4e4bcb2a19844e3c5753b8c50d714a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 26 Jul 2021 13:51:38 -0700
Subject: PH: Insert-below-min fast-path.

---
 include/jemalloc/internal/ph.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 3f7d759..5f091c5 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -319,6 +319,20 @@ ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 	if (ph->root == NULL) {
 		ph->root = phn;
 	} else {
+		/*
+		 * As a special case, check to see if we can replace the root.
+		 * This is practically common in some important cases, and lets
+		 * us defer some insertions (hopefully, until the point where
+		 * some of the items in the aux list have been removed, savings
+		 * us from linking them at all).
+		 */
+		if (cmp(phn, ph->root) < 0) {
+			phn_lchild_set(phn, ph->root, offset);
+			phn_prev_set(ph->root, phn, offset);
+			ph->root = phn;
+			ph->auxcount = 0;
+			return;
+		}
 		ph->auxcount++;
 		phn_next_set(phn, phn_next_get(ph->root, offset), offset);
 		if (phn_next_get(ph->root, offset) != NULL) {
-- 
cgit v0.12


From 6f41ba55ee85ce505d61713650f49f8bbb5bee6b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Aug 2021 12:53:39 -0700
Subject: Mutex: Make spin count configurable.

Don't document it since we don't want to support this as a "real" setting, but
it's handy for testing.
---
 include/jemalloc/internal/mutex.h | 10 +++-------
 src/ctl.c                         |  3 +++
 src/jemalloc.c                    |  3 +++
 src/mutex.c                       | 10 ++++++++--
 src/stats.c                       |  1 +
 5 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index f5b1163..63a0b1b 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -6,6 +6,8 @@
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/witness.h"
 
+extern int64_t opt_mutex_max_spin;
+
 typedef enum {
 	/* Can only acquire one mutex of a given witness rank at a time. */
 	malloc_mutex_rank_exclusive,
@@ -43,7 +45,7 @@ struct malloc_mutex_s {
 #else
 			pthread_mutex_t		lock;
 #endif
-			/* 
+			/*
 			 * Hint flag to avoid exclusive cache line contention
 			 * during spin waiting
 			 */
@@ -67,12 +69,6 @@ struct malloc_mutex_s {
 #endif
 };
 
-/*
- * Based on benchmark results, a fixed spin with this amount of retries works
- * well for our critical sections.
- */
-#define MALLOC_MUTEX_MAX_SPIN 250
-
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 #    define MALLOC_MUTEX_LOCK(m)    AcquireSRWLockExclusive(&(m)->lock)
diff --git a/src/ctl.c b/src/ctl.c
index b3e62df..3ed0007 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -110,6 +110,7 @@ CTL_PROTO(opt_narenas)
 CTL_PROTO(opt_percpu_arena)
 CTL_PROTO(opt_oversize_threshold)
 CTL_PROTO(opt_background_thread)
+CTL_PROTO(opt_mutex_max_spin)
 CTL_PROTO(opt_max_background_threads)
 CTL_PROTO(opt_background_thread_hpa_interval_max_ms)
 CTL_PROTO(opt_dirty_decay_ms)
@@ -421,6 +422,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("narenas"),	CTL(opt_narenas)},
 	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
 	{NAME("oversize_threshold"),	CTL(opt_oversize_threshold)},
+	{NAME("mutex_max_spin"),	CTL(opt_mutex_max_spin)},
 	{NAME("background_thread"),	CTL(opt_background_thread)},
 	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
 	{NAME("background_thread_hpa_interval_max_ms"),
@@ -2138,6 +2140,7 @@ CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
 CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
     const char *)
+CTL_RO_NL_GEN(opt_mutex_max_spin, opt_mutex_max_spin, int64_t)
 CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t)
 CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
 CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8d57180..d5e886e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1256,6 +1256,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				} while (vlen_left > 0);
 				CONF_CONTINUE;
 			}
+			CONF_HANDLE_INT64_T(opt_mutex_max_spin,
+			    "mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false);
 			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
 			    "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
 			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
diff --git a/src/mutex.c b/src/mutex.c
index 83d9ce7..79b8f27 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -9,6 +9,12 @@
 #define _CRT_SPINCOUNT 4000
 #endif
 
+/*
+ * Based on benchmark results, a fixed spin with this amount of retries works
+ * well for our critical sections.
+ */
+int64_t opt_mutex_max_spin = 250;
+
 /******************************************************************************/
 /* Data. */
 
@@ -51,7 +57,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 		goto label_spin_done;
 	}
 
-	int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN;
+	int cnt = 0;
 	do {
 		spin_cpu_spinwait();
 		if (!atomic_load_b(&mutex->locked, ATOMIC_RELAXED)
@@ -59,7 +65,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 			data->n_spin_acquired++;
 			return;
 		}
-	} while (cnt++ < max_cnt);
+	} while (cnt++ < opt_mutex_max_spin || opt_mutex_max_spin == -1);
 
 	if (!config_stats) {
 		/* Only spin is useful when stats is off. */
diff --git a/src/stats.c b/src/stats.c
index 16aa3fd..3a2806e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1496,6 +1496,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_sec_bytes_after_flush")
 	OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
 	OPT_WRITE_CHAR_P("metadata_thp")
+	OPT_WRITE_INT64("mutex_max_spin")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T("background_thread_hpa_interval_max_ms")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
-- 
cgit v0.12


From 27f71242b74ea402db45c1e6b3b79708b78762d4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 5 Aug 2021 10:27:25 -0700
Subject: Mutex: Tweak internal spin count.

The recent pairing heap optimizations flattened the lock hold time profile.
This was a win for raw cycle counts, but ended up causing us to "just miss"
acquiring the mutex before sleeping more often.  Bump those counts.
---
 src/mutex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mutex.c b/src/mutex.c
index 79b8f27..0b3547a 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -13,7 +13,7 @@
  * Based on benchmark results, a fixed spin with this amount of retries works
  * well for our critical sections.
  */
-int64_t opt_mutex_max_spin = 250;
+int64_t opt_mutex_max_spin = 600;
 
 /******************************************************************************/
 /* Data. */
-- 
cgit v0.12


From f58064b9321b30bdf9b31715acbe523e4a964adf Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 5 Aug 2021 14:28:32 -0700
Subject: Verify that HPA is used before calling its functions

This change eliminates the possibility of PA calling functions of uninitialized
HPA.
---
 src/hpa.c | 24 ++++++++++++++++++++++++
 src/pa.c  | 10 +++++++---
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 6441b4e..6b7517d 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -50,6 +50,11 @@ hpa_supported() {
 	return true;
 }
 
+static void
+hpa_do_consistency_checks(hpa_shard_t *shard) {
+	assert(shard->base != NULL);
+}
+
 bool
 hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
 	/* malloc_conf processing should have filtered out these cases. */
@@ -214,6 +219,8 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	shard->pai.dalloc = &hpa_dalloc;
 	shard->pai.dalloc_batch = &hpa_dalloc_batch;
 
+	hpa_do_consistency_checks(shard);
+
 	return false;
 }
 
@@ -242,6 +249,8 @@ hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
 void
 hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
     hpa_shard_stats_t *dst) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	psset_stats_accum(&dst->psset_stats, &shard->psset.stats);
@@ -843,6 +852,8 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 
 void
 hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata_cache_fast_disable(tsdn, &shard->ecf);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
@@ -868,6 +879,7 @@ hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
 
 void
 hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
 	/*
 	 * By the time we're here, the arena code should have dalloc'd all the
 	 * active extents, which means we should have eventually evicted
@@ -891,6 +903,8 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 void
 hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
     bool deferral_allowed) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	bool deferral_previously_allowed = shard->opts.deferral_allowed;
 	shard->opts.deferral_allowed = deferral_allowed;
@@ -903,6 +917,8 @@ hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
 
 void
 hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ true);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
@@ -910,22 +926,30 @@ hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 void
 hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_prefork(tsdn, &shard->grow_mtx);
 }
 
 void
 hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_prefork(tsdn, &shard->mtx);
 }
 
 void
 hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_postfork_parent(tsdn, &shard->grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->mtx);
 }
 
 void
 hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_postfork_child(tsdn, &shard->grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->mtx);
 }
diff --git a/src/pa.c b/src/pa.c
index aebb8e9..93da02e 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -226,11 +226,15 @@ pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) {
 void
 pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
     bool deferral_allowed) {
-	hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard,
-	    deferral_allowed);
+	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
+		hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard,
+		    deferral_allowed);
+	}
 }
 
 void
 pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
-	hpa_shard_do_deferred_work(tsdn, &shard->hpa_shard);
+	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
+		hpa_shard_do_deferred_work(tsdn, &shard->hpa_shard);
+	}
 }
-- 
cgit v0.12


From 6a0160071241bce956978550a60208a37bc971c1 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 6 Aug 2021 17:15:56 -0700
Subject: Add Cirrus CI testing matrix

Contains 16 testing configs -- a mix of debug, prof, -m32
and a few uncommon options.
---
 .cirrus.yml | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 30fe830..4cca64b 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -3,6 +3,30 @@ env:
   ARCH: amd64
 
 task:
+  matrix:
+      env:
+        DEBUG_CONFIG: --enable-debug
+      env:
+        DEBUG_CONFIG: --disable-debug
+  matrix:
+    - env:
+        PROF_CONFIG: --enable-prof
+    - env:
+        PROF_CONFIG: --disable-prof
+  matrix:
+    - name: 64-bit
+      env:
+        CC:
+        CXX:
+    - name: 32-bit
+      env:
+        CC: cc -m32
+        CXX: c++ -m32
+  matrix:
+    - env:
+        UNCOMMON_CONFIG:
+    - env:
+        UNCOMMON_CONFIG: --with-lg-page=16 --with-malloc-conf=tcache:false
   freebsd_instance:
     matrix:
       image: freebsd-12-2-release-amd64
@@ -12,11 +36,10 @@ task:
     - pkg install -y autoconf gmake
   script:
     - autoconf
-    #- ./configure ${COMPILER_FLAGS:+       CC="$CC $COMPILER_FLAGS"       CXX="$CXX $COMPILER_FLAGS" }       $CONFIGURE_FLAGS
     # We don't perfectly track freebsd stdlib.h definitions.  This is fine when
     # we count as a system header, but breaks otherwise, like during these
     # tests.
-    - ./configure --with-jemalloc-prefix=ci_
+    - ./configure --with-jemalloc-prefix=ci_ ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
     - export JFLAG=`sysctl -n kern.smp.cpus`
     - gmake -j${JFLAG}
     - gmake -j${JFLAG} tests
-- 
cgit v0.12


From 5884a076fb858320e7bcf86b961dd1555a81a75e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 12 Aug 2021 15:48:02 -0700
Subject: Rename prof.dump_prefix to prof.prefix

This better aligns with our naming convention.  The option has not been included
in any upstream release yet.
---
 doc/jemalloc.xml.in                  | 22 +++++++-----------
 include/jemalloc/internal/prof_sys.h |  2 +-
 src/ctl.c                            |  8 +++----
 src/prof_sys.c                       | 43 ++++++++++++++++++------------------
 test/unit/prof_idump.c               |  6 ++---
 5 files changed, 37 insertions(+), 44 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index fa53715..b8b96ab 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1410,8 +1410,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         primarily useful for disabling the automatic final heap dump (which
         also disables leak reporting, if enabled).  The default prefix is
         <filename>jeprof</filename>.  This prefix value can be overriden by
-        <link
-        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>.
+        <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>.
         </para></listitem>
       </varlistentry>
 
@@ -1492,8 +1491,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         where <literal>&lt;prefix&gt;</literal> is controlled by the
         <link
         linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
-        <link
-        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
         options.  By default, interval-triggered profile dumping is disabled
         (encoded as -1).
         </para></listitem>
@@ -1527,8 +1525,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the <link
         linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
-        <link
-        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
         options.  Note that <function>atexit()</function> may allocate
         memory during application initialization and then deadlock internally
         when jemalloc in turn calls <function>atexit()</function>, so
@@ -2398,16 +2395,14 @@ struct extent_hooks_s {
         is specified, to a file according to the pattern
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.m&lt;mseq&gt;.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the
-        <link
-        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
-        <link
-        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        <link linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
+        and <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
         options.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="prof.dump_prefix">
+      <varlistentry id="prof.prefix">
         <term>
-          <mallctl>prof.dump_prefix</mallctl>
+          <mallctl>prof.prefix</mallctl>
           (<type>const char *</type>)
           <literal>-w</literal>
           [<option>--enable-prof</option>]
@@ -2433,8 +2428,7 @@ struct extent_hooks_s {
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.u&lt;useq&gt;.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the <link
         linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
-        <link
-        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
         options.</para></listitem>
       </varlistentry>
 
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index d784ef9..6e4e811 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -10,7 +10,7 @@ void prof_unwind_init();
 void prof_sys_thread_name_fetch(tsd_t *tsd);
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
-bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
+bool prof_prefix_set(tsdn_t *tsdn, const char *prefix);
 void prof_fdump_impl(tsd_t *tsd);
 void prof_idump_impl(tsd_t *tsd);
 bool prof_mdump_impl(tsd_t *tsd, const char *filename);
diff --git a/src/ctl.c b/src/ctl.c
index 3ed0007..253341a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -187,7 +187,7 @@ CTL_PROTO(prof_thread_active_init)
 CTL_PROTO(prof_active)
 CTL_PROTO(prof_dump)
 CTL_PROTO(prof_gdump)
-CTL_PROTO(prof_dump_prefix)
+CTL_PROTO(prof_prefix)
 CTL_PROTO(prof_reset)
 CTL_PROTO(prof_interval)
 CTL_PROTO(lg_prof_sample)
@@ -578,7 +578,7 @@ static const ctl_named_node_t	prof_node[] = {
 	{NAME("active"),	CTL(prof_active)},
 	{NAME("dump"),		CTL(prof_dump)},
 	{NAME("gdump"),		CTL(prof_gdump)},
-	{NAME("dump_prefix"),	CTL(prof_dump_prefix)},
+	{NAME("prefix"),	CTL(prof_prefix)},
 	{NAME("reset"),		CTL(prof_reset)},
 	{NAME("interval"),	CTL(prof_interval)},
 	{NAME("lg_sample"),	CTL(lg_prof_sample)},
@@ -3227,7 +3227,7 @@ label_return:
 }
 
 static int
-prof_dump_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+prof_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	const char *prefix = NULL;
@@ -3240,7 +3240,7 @@ prof_dump_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	WRITEONLY();
 	WRITE(prefix, const char *);
 
-	ret = prof_dump_prefix_set(tsd_tsdn(tsd), prefix) ? EFAULT : 0;
+	ret = prof_prefix_set(tsd_tsdn(tsd), prefix) ? EFAULT : 0;
 label_return:
 	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return ret;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 87cd2b2..6a5b2b1 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -34,7 +34,7 @@ static uint64_t prof_dump_iseq;
 static uint64_t prof_dump_mseq;
 static uint64_t prof_dump_useq;
 
-static char *prof_dump_prefix = NULL;
+static char *prof_prefix = NULL;
 
 /* The fallback allocator profiling functionality will use. */
 base_t *prof_base;
@@ -524,16 +524,16 @@ prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
 }
 
 static const char *
-prof_dump_prefix_get(tsdn_t* tsdn) {
+prof_prefix_get(tsdn_t* tsdn) {
 	malloc_mutex_assert_owner(tsdn, &prof_dump_filename_mtx);
 
-	return prof_dump_prefix == NULL ? opt_prof_prefix : prof_dump_prefix;
+	return prof_prefix == NULL ? opt_prof_prefix : prof_prefix;
 }
 
 static bool
-prof_dump_prefix_is_empty(tsdn_t *tsdn) {
+prof_prefix_is_empty(tsdn_t *tsdn) {
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	bool ret = (prof_dump_prefix_get(tsdn)[0] == '\0');
+	bool ret = (prof_prefix_get(tsdn)[0] == '\0');
 	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 	return ret;
 }
@@ -545,18 +545,18 @@ prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 	cassert(config_prof);
 
 	assert(tsd_reentrancy_level_get(tsd) == 0);
-	const char *prof_prefix = prof_dump_prefix_get(tsd_tsdn(tsd));
+	const char *prof_prefix = prof_prefix_get(tsd_tsdn(tsd));
 
 	if (vseq != VSEQ_INVALID) {
 	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
-		    prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
+		    "%s.%d.%"FMTu64".%c%"FMTu64".heap", prof_prefix,
+		    prof_getpid(), prof_dump_seq, v, vseq);
 	} else {
 	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c.heap",
-		    prof_prefix, prof_getpid(), prof_dump_seq, v);
+		    "%s.%d.%"FMTu64".%c.heap", prof_prefix,
+		    prof_getpid(), prof_dump_seq, v);
 	}
 	prof_dump_seq++;
 }
@@ -565,8 +565,7 @@ void
 prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind) {
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
 	malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
-	    "%s.%d.%"FMTu64".json", prof_dump_prefix_get(tsdn), prof_getpid(),
-	    ind);
+	    "%s.%d.%"FMTu64".json", prof_prefix_get(tsdn), prof_getpid(), ind);
 	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 }
 
@@ -574,7 +573,7 @@ void
 prof_fdump_impl(tsd_t *tsd) {
 	char filename[DUMP_FILENAME_BUFSIZE];
 
-	assert(!prof_dump_prefix_is_empty(tsd_tsdn(tsd)));
+	assert(!prof_prefix_is_empty(tsd_tsdn(tsd)));
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 	prof_dump_filename(tsd, filename, 'f', VSEQ_INVALID);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
@@ -582,11 +581,11 @@ prof_fdump_impl(tsd_t *tsd) {
 }
 
 bool
-prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
+prof_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	cassert(config_prof);
 	ctl_mtx_assert_held(tsdn);
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	if (prof_dump_prefix == NULL) {
+	if (prof_prefix == NULL) {
 		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 		/* Everything is still guarded by ctl_mtx. */
 		char *buffer = base_alloc(tsdn, prof_base,
@@ -595,12 +594,12 @@ prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 			return true;
 		}
 		malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-		prof_dump_prefix = buffer;
+		prof_prefix = buffer;
 	}
-	assert(prof_dump_prefix != NULL);
+	assert(prof_prefix != NULL);
 
-	prof_strncpy(prof_dump_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
-	prof_dump_prefix[PROF_DUMP_FILENAME_LEN - 1] = '\0';
+	prof_strncpy(prof_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
+	prof_prefix[PROF_DUMP_FILENAME_LEN - 1] = '\0';
 	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 
 	return false;
@@ -609,7 +608,7 @@ prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 void
 prof_idump_impl(tsd_t *tsd) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-	if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
+	if (prof_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		return;
 	}
@@ -626,7 +625,7 @@ prof_mdump_impl(tsd_t *tsd, const char *filename) {
 	if (filename == NULL) {
 		/* No filename specified, so automatically generate one. */
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
+		if (prof_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
 			malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 			return true;
 		}
@@ -642,7 +641,7 @@ void
 prof_gdump_impl(tsd_t *tsd) {
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	if (prof_dump_prefix_get(tsdn)[0] == '\0') {
+	if (prof_prefix_get(tsdn)[0] == '\0') {
 		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 		return;
 	}
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 607944c..e9f5e56 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -26,14 +26,14 @@ TEST_BEGIN(test_idump) {
 	bool active;
 	void *p;
 
-	const char *dump_prefix = TEST_PREFIX;
+	const char *prefix = TEST_PREFIX;
 
 	test_skip_if(!config_prof);
 
 	active = true;
 
-	expect_d_eq(mallctl("prof.dump_prefix", NULL, NULL,
-	    (void *)&dump_prefix, sizeof(dump_prefix)), 0,
+	expect_d_eq(mallctl("prof.prefix", NULL, NULL, (void *)&prefix,
+	    sizeof(prefix)), 0,
 	    "Unexpected mallctl failure while overwriting dump prefix");
 
 	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
-- 
cgit v0.12


From 9d02bdc8838d03b043de5017eaaa837f21dbc4c0 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 12 Aug 2021 19:21:56 -0700
Subject: Port gen_run_tests.py to python3

Insignificant changes to make the script runnable on python3.
---
 scripts/gen_run_tests.py | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 77c2ce5..7c3075f 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import sys
 from itertools import combinations
@@ -21,7 +21,7 @@ else:
 
 def powerset(items):
     result = []
-    for i in xrange(len(items) + 1):
+    for i in range(len(items) + 1):
         result += combinations(items, i)
     return result
 
@@ -53,19 +53,20 @@ possible_malloc_conf_opts = [
     'background_thread:true',
 ]
 
-print 'set -e'
-print 'if [ -f Makefile ] ; then %(make_cmd)s relclean ; fi' % {'make_cmd': make_cmd}
-print 'autoconf'
-print 'rm -rf run_tests.out'
-print 'mkdir run_tests.out'
-print 'cd run_tests.out'
+print('set -e')
+print('if [ -f Makefile ] ; then %(make_cmd)s relclean ; fi' % {'make_cmd':
+    make_cmd})
+print('autoconf')
+print('rm -rf run_tests.out')
+print('mkdir run_tests.out')
+print('cd run_tests.out')
 
 ind = 0
 for cc, cxx in possible_compilers:
     for compiler_opts in powerset(possible_compiler_opts):
         for config_opts in powerset(possible_config_opts):
             for malloc_conf_opts in powerset(possible_malloc_conf_opts):
-                if cc is 'clang' \
+                if cc == 'clang' \
                   and '-m32' in possible_compiler_opts \
                   and '--enable-prof' in config_opts:
                     continue
@@ -80,9 +81,9 @@ for cc, cxx in possible_compilers:
                 )
 
                 # We don't want to test large vaddr spaces in 32-bit mode.
-		if ('-m32' in compiler_opts and '--with-lg-vaddr=56' in
-                  config_opts):
-		    continue
+                if ('-m32' in compiler_opts and '--with-lg-vaddr=56' in
+                    config_opts):
+                    continue
 
                 # Per CPU arenas are only supported on Linux.
                 linux_supported = ('percpu_arena:percpu' in malloc_conf_opts \
@@ -93,7 +94,7 @@ for cc, cxx in possible_compilers:
                 if (uname == 'Linux' and linux_supported) \
                   or (not linux_supported and (uname != 'Darwin' or \
                   not darwin_unsupported)):
-                    print """cat <<EOF > run_test_%(ind)d.sh
+                    print("""cat <<EOF > run_test_%(ind)d.sh
 #!/bin/sh
 
 set -e
@@ -121,7 +122,9 @@ run_cmd %(make_cmd)s all tests
 run_cmd %(make_cmd)s check
 run_cmd %(make_cmd)s distclean
 EOF
-chmod 755 run_test_%(ind)d.sh""" % {'ind': ind, 'config_line': config_line, 'make_cmd': make_cmd}
+chmod 755 run_test_%(ind)d.sh""" % {'ind': ind, 'config_line': config_line,
+      'make_cmd': make_cmd})
                     ind += 1
 
-print 'for i in `seq 0 %(last_ind)d` ; do echo run_test_${i}.sh ; done | xargs -P %(nparallel)d -n 1 sh' % {'last_ind': ind-1, 'nparallel': nparallel}
+print('for i in `seq 0 %(last_ind)d` ; do echo run_test_${i}.sh ; done | xargs'
+    ' -P %(nparallel)d -n 1 sh' % {'last_ind': ind-1, 'nparallel': nparallel})
-- 
cgit v0.12


From 2c625d5cd97e9cb133072feab2edb6b8c78861ef Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 12 Aug 2021 19:02:12 -0700
Subject: Fix warnings when compiled with clang

When clang sees an unknown warning option, unlike gcc it doesn't fail the build
with error. It issues a warning. Hence JE_CFLAGS_ADD with warning options that
didnt't exist in clang would still mark those options as available. This led to
several warnings when built with clang or "gcc" on OSX. This change fixes those
warnings by simply making clang fail builds with non-existent warning options.
---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index 5eb4d46..3e18f4a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -244,6 +244,7 @@ if test "x$GCC" = "xyes" ; then
       AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
     fi
   fi
+  JE_CFLAGS_ADD([-Werror=unknown-warning-option])
   JE_CFLAGS_ADD([-Wall])
   JE_CFLAGS_ADD([-Wextra])
   JE_CFLAGS_ADD([-Wshorten-64-to-32])
-- 
cgit v0.12


From c01a885e94b6edb8545113d3ba43248b4b75e90c Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 19 Aug 2021 15:16:11 -0700
Subject: HPA: Correctly calculate retained pages

Retained pages are those which haven't been touched and are unbacked from OS
perspective. For a pageslab their number should equal "total pages in slab"
minus "touched pages".
---
 include/jemalloc/internal/hpdata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index c2ed692..1fb534d 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -292,7 +292,7 @@ hpdata_ndirty_get(hpdata_t *hpdata) {
 
 static inline size_t
 hpdata_nretained_get(hpdata_t *hpdata) {
-	return hpdata->h_nactive - hpdata->h_ntouched;
+	return HUGEPAGE_PAGES - hpdata->h_ntouched;
 }
 
 static inline void
-- 
cgit v0.12


From 8b24cb8fdf2bf210e243c1d676484a4ffa5c3f6c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 23 Aug 2021 15:55:54 -0700
Subject: Don't assume initialized arena in the default alloc hook.

Specifically, this change allows the default alloc hook to used during
arenas.create.  One use case is to invoke the default alloc hook in a customized
hook arena, i.e. the default hooks can be read out of a default arena, then
create customized ones based on these hooks.  Note that mixing the default with
customized hooks is not recommended, and should only be considered when the
customization is simple and straightforward.
---
 src/ehooks.c            | 25 ++++++++-----------------
 test/unit/arena_reset.c | 15 +++++++++++++++
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/src/ehooks.c b/src/ehooks.c
index 535066e..5d12d00 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -52,9 +52,12 @@ void *
 ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	arena_t *arena = arena_get(tsdn, arena_ind, false);
-	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
-	    commit, (dss_prec_t)atomic_load_u(&arena->dss_prec,
-	    ATOMIC_RELAXED));
+	/* NULL arena indicates arena_create. */
+	assert(arena != NULL || alignment == HUGEPAGE);
+	dss_prec_t dss = (arena == NULL) ? dss_prec_disabled :
+	    (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED);
+	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment,
+	    zero, commit, dss);
 	if (have_madvise_huge && ret) {
 		pages_set_thp_state(ret, size);
 	}
@@ -64,20 +67,8 @@ ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
 static void *
 ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
-	tsdn_t *tsdn;
-	arena_t *arena;
-
-	tsdn = tsdn_fetch();
-	arena = arena_get(tsdn, arena_ind, false);
-	/*
-	 * The arena we're allocating on behalf of must have been initialized
-	 * already.
-	 */
-	assert(arena != NULL);
-
-	return ehooks_default_alloc_impl(tsdn, new_addr, size,
-	    ALIGNMENT_CEILING(alignment, PAGE), zero, commit,
-	    arena_ind_get(arena));
+	return ehooks_default_alloc_impl(tsdn_fetch(), new_addr, size,
+	    ALIGNMENT_CEILING(alignment, PAGE), zero, commit, arena_ind);
 }
 
 bool
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index a2cf3e5..589689c 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -255,6 +255,21 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 	do_arena_reset_post(ptrs, nptrs, arena_ind);
 
 	do_arena_destroy(arena_ind_another);
+
+	/* Try arena.create with custom hooks. */
+	size_t sz = sizeof(extent_hooks_t *);
+	extent_hooks_t *default_hooks;
+	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&default_hooks,
+	    &sz, NULL, 0), 0, "Unexpected mallctlnametomib() failure");
+
+	/* Default impl; but wrapped as "customized". */
+	extent_hooks_t new_hooks = *default_hooks;
+	extent_hooks_t *hook = &new_hooks;
+	sz = sizeof(unsigned);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+	    (void *)&hook, sizeof(void *)), 0,
+	    "Unexpected mallctl() failure");
+	do_arena_destroy(arena_ind);
 }
 TEST_END
 
-- 
cgit v0.12


From e5062e9fb91e5f531266e5691a5567e7cc8fab5f Mon Sep 17 00:00:00 2001
From: Mingli Yu <mingli.yu@windriver.com>
Date: Tue, 10 Aug 2021 13:02:18 +0000
Subject: Makefile.in: make sure doc generated before install

There is a race between the doc generation and the doc installation,
so make the install depend on the build for doc.

Signed-off-by: Mingli Yu <mingli.yu@windriver.com>
---
 Makefile.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 286f7ea..51276ce 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -582,21 +582,21 @@ install_lib: install_lib_static
 endif
 install_lib: install_lib_pc
 
-install_doc_html:
+install_doc_html: build_doc_html
 	$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
 	@for d in $(DOCS_HTML); do \
 	echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
 	$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
 done
 
-install_doc_man:
+install_doc_man: build_doc_man
 	$(INSTALL) -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
 	echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
 	$(INSTALL) -m 644 $$d $(MANDIR)/man3; \
 done
 
-install_doc: build_doc install_doc_html install_doc_man
+install_doc: install_doc_html install_doc_man
 
 install: install_bin install_include install_lib
 
-- 
cgit v0.12


From 26140dd24676a06293e105e0ac4e1f1fef04f337 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 1 Sep 2021 10:45:16 -0700
Subject: Reject --enable-prof-libunwind without --enable-prof

Prior to the change you could specify --enable-prof-libunwind without
--enable-prof which would do effectively nothing. This was confusing as I
expected --enable-prof-libunwind to act like --enable-prof, but use libunwind.
---
 configure.ac | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/configure.ac b/configure.ac
index 3e18f4a..5a5887a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1281,6 +1281,9 @@ AC_ARG_ENABLE([prof-libunwind],
   enable_prof_libunwind="0"
 else
   enable_prof_libunwind="1"
+  if test "x$enable_prof" = "x0" ; then
+    AC_MSG_ERROR([--enable-prof-libunwind should only be used with --enable-prof])
+  fi
 fi
 ],
 [enable_prof_libunwind="0"]
-- 
cgit v0.12


From b8b8027f19d089821a19214f56cc9c1202df835d Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Fri, 6 Aug 2021 14:53:05 -0700
Subject: Allow PAI to calculate time until deferred work

Previously the calculation of sleep time between wakeups was implemented within
background_thread. This resulted in some parts of decay and hpa specific
logic mixing with background thread implementation. In this change, background
thread delegates this calculation to arena and it, in turn, delegates it to PAI.
The next step is to implement the actual calculation of time until deferred work
in HPA.
---
 include/jemalloc/internal/arena_externs.h          |   8 +-
 .../jemalloc/internal/background_thread_externs.h  |   3 +
 .../jemalloc/internal/background_thread_inlines.h  |  14 --
 .../jemalloc/internal/background_thread_structs.h  |   3 +
 include/jemalloc/internal/decay.h                  |  25 +++
 include/jemalloc/internal/pa.h                     |   2 +
 include/jemalloc/internal/pai.h                    |   6 +
 src/arena.c                                        |  86 ++++++++--
 src/background_thread.c                            | 177 +++++----------------
 src/decay.c                                        |  30 +++-
 src/hpa.c                                          |   7 +
 src/pa.c                                           |  67 +++++++-
 src/pac.c                                          |   7 +
 test/unit/decay.c                                  |  32 ++++
 14 files changed, 298 insertions(+), 169 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 557e49f..02e7c1c 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -8,6 +8,12 @@
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/stats.h"
 
+/*
+ * When the amount of pages to be purged exceeds this amount, deferred purge
+ * should happen.
+ */
+#define ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD UINT64_C(1024)
+
 extern ssize_t opt_dirty_decay_ms;
 extern ssize_t opt_muzzy_decay_ms;
 
@@ -16,7 +22,6 @@ extern const char *percpu_arena_mode_names[];
 
 extern div_info_t arena_binind_div_info[SC_NBINS];
 
-extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 extern emap_t arena_emap_global;
 
@@ -51,6 +56,7 @@ bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
 ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
 void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all);
+uint64_t arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena);
 void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index bc49bea..3d1ea6c 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -13,6 +13,9 @@ extern background_thread_info_t *background_thread_info;
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
 bool background_threads_disable(tsd_t *tsd);
+bool background_thread_running(background_thread_info_t* info);
+void background_thread_wakeup_early(background_thread_info_t *info,
+    nstime_t *remaining_sleep);
 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
     decay_t *decay, size_t npages_new);
 void background_thread_prefork0(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index 71b433c..92c5feb 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -45,18 +45,4 @@ background_thread_indefinite_sleep(background_thread_info_t *info) {
 	return atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
-    bool is_background_thread) {
-	if (!background_thread_enabled() || is_background_thread) {
-		return;
-	}
-	background_thread_info_t *info =
-	    arena_background_thread_info_get(arena);
-	if (background_thread_indefinite_sleep(info)) {
-		background_thread_interval_check(tsdn, arena,
-		    &arena->pa_shard.pac.decay_dirty, 0);
-	}
-}
-
 #endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H */
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index cc14dde..b884b68 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -19,6 +19,9 @@
 #define BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED (-2)
 #define BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED 5000
 
+#define BACKGROUND_THREAD_DEFERRED_MIN UINT64_C(0)
+#define BACKGROUND_THREAD_DEFERRED_MAX UINT64_C(-1)
+
 typedef enum {
 	background_thread_stopped,
 	background_thread_started,
diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index 8e51745..cf6a9d2 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -118,6 +118,25 @@ decay_epoch_duration_ns(const decay_t *decay) {
 	return nstime_ns(&decay->interval);
 }
 
+static inline bool
+decay_immediately(const decay_t *decay) {
+	ssize_t decay_ms = decay_ms_read(decay);
+	return decay_ms == 0;
+}
+
+static inline bool
+decay_disabled(const decay_t *decay) {
+	ssize_t decay_ms = decay_ms_read(decay);
+	return decay_ms < 0;
+}
+
+/* Returns true if decay is enabled and done gradually. */
+static inline bool
+decay_gradually(const decay_t *decay) {
+	ssize_t decay_ms = decay_ms_read(decay);
+	return decay_ms > 0;
+}
+
 /*
  * Returns true if the passed in decay time setting is valid.
  * < -1 : invalid
@@ -144,6 +163,12 @@ bool decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
  */
 void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
 
+/*
+ * Compute how many of 'npages_new' pages we would need to purge in 'time'.
+ */
+uint64_t decay_npages_purge_in(decay_t *decay, nstime_t *time,
+    size_t npages_new);
+
 /* Returns true if the epoch advanced and there are pages to purge. */
 bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
     size_t current_npages);
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 2e5b9ef..b2fed59 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -200,6 +200,8 @@ ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
 void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
     bool deferral_allowed);
 void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+uint64_t pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
 
 /******************************************************************************/
 /*
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 4d3a9e0..7179fd3 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -24,6 +24,7 @@ struct pai_s {
 	/* This function empties out list as a side-effect of being called. */
 	void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
 	    edata_list_active_t *list);
+	uint64_t (*time_until_deferred_work)(tsdn_t *tsdn, pai_t *self);
 };
 
 /*
@@ -64,6 +65,11 @@ pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
 	self->dalloc_batch(tsdn, self, list);
 }
 
+static inline uint64_t
+pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
+	return self->time_until_deferred_work(tsdn, self);
+}
+
 /*
  * An implementation of batch allocation that simply calls alloc once for
  * each item in the list.
diff --git a/src/arena.c b/src/arena.c
index a495ef6..3dd7782 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -38,13 +38,6 @@ static atomic_zd_t muzzy_decay_ms_default;
 emap_t arena_emap_global;
 pa_central_t arena_pa_central_global;
 
-const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
-#define STEP(step, h, x, y)			\
-		h,
-		SMOOTHSTEP
-#undef STEP
-};
-
 div_info_t arena_binind_div_info[SC_NBINS];
 
 size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
@@ -65,6 +58,9 @@ static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
 static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin);
+static void
+arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    size_t npages_new);
 
 /******************************************************************************/
 
@@ -189,6 +185,20 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	}
 }
 
+static void
+arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
+    bool is_background_thread) {
+	if (!background_thread_enabled() || is_background_thread) {
+		return;
+	}
+	background_thread_info_t *info =
+	    arena_background_thread_info_get(arena);
+	if (background_thread_indefinite_sleep(info)) {
+		arena_maybe_do_deferred_work(tsdn, arena,
+		    &arena->pa_shard.pac.decay_dirty, 0);
+	}
+}
+
 void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -420,8 +430,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 
 	if (have_background_thread && background_thread_enabled() &&
 	    epoch_advanced && !is_background_thread) {
-		background_thread_interval_check(tsdn, arena, decay,
-		    npages_new);
+		arena_maybe_do_deferred_work(tsdn, arena, decay, npages_new);
 	}
 
 	return false;
@@ -462,6 +471,65 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	arena_decay_muzzy(tsdn, arena, is_background_thread, all);
 }
 
+static void
+arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    size_t npages_new) {
+	background_thread_info_t *info = arena_background_thread_info_get(
+	    arena);
+	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
+		/*
+		 * Background thread may hold the mutex for a long period of
+		 * time.  We'd like to avoid the variance on application
+		 * threads.  So keep this non-blocking, and leave the work to a
+		 * future epoch.
+		 */
+		return;
+	}
+	if (!background_thread_running(info)) {
+		goto label_done;
+	}
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		goto label_done;
+	}
+	if (!decay_gradually(decay)) {
+		goto label_done_unlock2;
+	}
+
+	nstime_t diff;
+	nstime_init(&diff, background_thread_wakeup_time_get(info));
+	if (nstime_compare(&diff, &decay->epoch) <= 0) {
+		goto label_done_unlock2;
+	}
+	nstime_subtract(&diff, &decay->epoch);
+
+	if (npages_new > 0) {
+		uint64_t npurge_new = decay_npages_purge_in(decay, &diff,
+		    npages_new);
+		info->npages_to_purge_new += npurge_new;
+	}
+
+	bool should_signal;
+	if (info->npages_to_purge_new > ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD) {
+		should_signal = true;
+	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
+	    (ecache_npages_get(&arena->pa_shard.pac.ecache_dirty) > 0 ||
+	    ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy) > 0 ||
+	    info->npages_to_purge_new > 0)) {
+		should_signal = true;
+	} else {
+		should_signal = false;
+	}
+
+	if (should_signal) {
+		info->npages_to_purge_new = 0;
+		background_thread_wakeup_early(info, &diff);
+	}
+label_done_unlock2:
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+label_done:
+	malloc_mutex_unlock(tsdn, &info->mtx);
+}
+
 /* Called from background threads. */
 void
 arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {
diff --git a/src/background_thread.c b/src/background_thread.c
index 4951cd1..9e577cb 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -60,8 +60,9 @@ pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
-void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
-    decay_t *decay, size_t npages_new) NOT_REACHED
+bool background_thread_running(background_thread_info_t *info) NOT_REACHED
+void background_thread_wakeup_early(background_thread_info_t *info,
+    nstime_t *remaining_sleep) NOT_REACHED
 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
@@ -98,8 +99,6 @@ set_current_thread_affinity(int cpu) {
 #endif
 }
 
-/* Threshold for determining when to wake up the background thread. */
-#define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
 #define BILLION UINT64_C(1000000000)
 /* Minimal sleep interval 100 ms. */
 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
@@ -173,55 +172,10 @@ background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
 	return false;
 }
 
-static inline uint64_t
-arena_decay_compute_purge_interval(tsdn_t *tsdn, decay_t *decay,
-    size_t npages) {
-	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
-		/* Use minimal interval if decay is contended. */
-		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
-	}
-	uint64_t decay_ns = decay_ns_until_purge(decay, npages,
-	    BACKGROUND_THREAD_NPAGES_THRESHOLD);
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-
-	return decay_ns < BACKGROUND_THREAD_MIN_INTERVAL_NS ?
-	    BACKGROUND_THREAD_MIN_INTERVAL_NS :
-	    decay_ns;
-}
-
-
-static inline uint64_t
-arena_decay_compute_min_purge_interval(tsdn_t *tsdn, arena_t *arena) {
-	uint64_t dirty, muzzy;
-	dirty = arena_decay_compute_purge_interval(tsdn,
-	    &arena->pa_shard.pac.decay_dirty,
-	    ecache_npages_get(&arena->pa_shard.pac.ecache_dirty));
-	if (dirty == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
-		return dirty;
-	}
-	muzzy = arena_decay_compute_purge_interval(tsdn,
-	    &arena->pa_shard.pac.decay_muzzy,
-	    ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy));
-
-	uint64_t min_so_far = dirty < muzzy ? dirty : muzzy;
-	if (opt_background_thread_hpa_interval_max_ms >= 0) {
-		uint64_t hpa_interval = 1000 * 1000 *
-		    (uint64_t)opt_background_thread_hpa_interval_max_ms;
-		if (hpa_interval < min_so_far) {
-			if (hpa_interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
-				min_so_far = BACKGROUND_THREAD_MIN_INTERVAL_NS;
-			} else {
-				min_so_far = hpa_interval;
-			}
-		}
-	}
-
-	return min_so_far;
-}
-
 static inline void
-background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
-	uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
+    unsigned ind) {
+	uint64_t ns_until_deferred = BACKGROUND_THREAD_DEFERRED_MAX;
 	unsigned narenas = narenas_total_get();
 
 	for (unsigned i = ind; i < narenas; i += max_background_threads) {
@@ -230,19 +184,29 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne
 			continue;
 		}
 		arena_do_deferred_work(tsdn, arena);
-		if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+		if (ns_until_deferred <= BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 			/* Min interval will be used. */
 			continue;
 		}
-		uint64_t interval = arena_decay_compute_min_purge_interval(tsdn,
-		    arena);
-		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
-		if (interval != DECAY_UNBOUNDED_TIME_TO_PURGE &&
-		    min_interval > interval) {
-			min_interval = interval;
+		uint64_t ns_arena_deferred = pa_shard_time_until_deferred_work(
+		    tsdn, &arena->pa_shard);
+		if (ns_arena_deferred < ns_until_deferred) {
+			ns_until_deferred = ns_arena_deferred;
 		}
 	}
-	background_thread_sleep(tsdn, info, min_interval);
+
+	uint64_t sleep_ns;
+	if (ns_until_deferred == BACKGROUND_THREAD_DEFERRED_MAX) {
+		sleep_ns = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+	} else {
+		sleep_ns =
+		    (ns_until_deferred < BACKGROUND_THREAD_MIN_INTERVAL_NS)
+		    ? BACKGROUND_THREAD_MIN_INTERVAL_NS
+		    : ns_until_deferred;
+
+	}
+
+	background_thread_sleep(tsdn, info, sleep_ns);
 }
 
 static bool
@@ -609,88 +573,23 @@ background_threads_disable(tsd_t *tsd) {
 	return false;
 }
 
-/* Check if we need to signal the background thread early. */
+bool
+background_thread_running(background_thread_info_t *info) {
+	return info->state == background_thread_started;
+}
+
 void
-background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    size_t npages_new) {
-	background_thread_info_t *info = arena_background_thread_info_get(
-	    arena);
-	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
-		/*
-		 * Background thread may hold the mutex for a long period of
-		 * time.  We'd like to avoid the variance on application
-		 * threads.  So keep this non-blocking, and leave the work to a
-		 * future epoch.
-		 */
+background_thread_wakeup_early(background_thread_info_t *info,
+    nstime_t *remaining_sleep) {
+	/*
+	 * This is an optimization to increase batching. At this point
+	 * we know that background thread wakes up soon, so the time to cache
+	 * the just freed memory is bounded and low.
+	 */
+	if (nstime_ns(remaining_sleep) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return;
 	}
-
-	if (info->state != background_thread_started) {
-		goto label_done;
-	}
-	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
-		goto label_done;
-	}
-
-	ssize_t decay_time = decay_ms_read(decay);
-	if (decay_time <= 0) {
-		/* Purging is eagerly done or disabled currently. */
-		goto label_done_unlock2;
-	}
-	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
-	assert(decay_interval_ns > 0);
-
-	nstime_t diff;
-	nstime_init(&diff, background_thread_wakeup_time_get(info));
-	if (nstime_compare(&diff, &decay->epoch) <= 0) {
-		goto label_done_unlock2;
-	}
-	nstime_subtract(&diff, &decay->epoch);
-	if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
-		goto label_done_unlock2;
-	}
-
-	if (npages_new > 0) {
-		size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
-		/*
-		 * Compute how many new pages we would need to purge by the next
-		 * wakeup, which is used to determine if we should signal the
-		 * background thread.
-		 */
-		uint64_t npurge_new;
-		if (n_epoch >= SMOOTHSTEP_NSTEPS) {
-			npurge_new = npages_new;
-		} else {
-			uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
-			assert(h_steps_max >=
-			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
-			npurge_new = npages_new * (h_steps_max -
-			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
-			npurge_new >>= SMOOTHSTEP_BFP;
-		}
-		info->npages_to_purge_new += npurge_new;
-	}
-
-	bool should_signal;
-	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
-		should_signal = true;
-	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (ecache_npages_get(&arena->pa_shard.pac.ecache_dirty) > 0 ||
-	    ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy) > 0 ||
-	    info->npages_to_purge_new > 0)) {
-		should_signal = true;
-	} else {
-		should_signal = false;
-	}
-
-	if (should_signal) {
-		info->npages_to_purge_new = 0;
-		pthread_cond_signal(&info->cond);
-	}
-label_done_unlock2:
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-label_done:
-	malloc_mutex_unlock(tsdn, &info->mtx);
+	pthread_cond_signal(&info->cond);
 }
 
 void
diff --git a/src/decay.c b/src/decay.c
index fdbd63d..cdb8487 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -3,6 +3,13 @@
 
 #include "jemalloc/internal/decay.h"
 
+const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
+#define STEP(step, h, x, y)			\
+		h,
+		SMOOTHSTEP
+#undef STEP
+};
+
 /*
  * Generate a new deadline that is uniformly random within the next epoch after
  * the current one.
@@ -147,6 +154,25 @@ decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
 	return (nstime_compare(&decay->deadline, time) <= 0);
 }
 
+uint64_t
+decay_npages_purge_in(decay_t *decay, nstime_t *time, size_t npages_new) {
+	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
+	size_t n_epoch = (size_t)(nstime_ns(time) / decay_interval_ns);
+
+	uint64_t npages_purge;
+	if (n_epoch >= SMOOTHSTEP_NSTEPS) {
+		npages_purge = npages_new;
+	} else {
+		uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
+		assert(h_steps_max >=
+		    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+		npages_purge = npages_new * (h_steps_max -
+		    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+		npages_purge >>= SMOOTHSTEP_BFP;
+	}
+	return npages_purge;
+}
+
 bool
 decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
     size_t npages_current) {
@@ -214,9 +240,7 @@ decay_npurge_after_interval(decay_t *decay, size_t interval) {
 
 uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
     uint64_t npages_threshold) {
-	ssize_t decay_time = decay_ms_read(decay);
-	if (decay_time <= 0) {
-		/* Purging is eagerly done or disabled currently. */
+	if (!decay_gradually(decay)) {
 		return DECAY_UNBOUNDED_TIME_TO_PURGE;
 	}
 	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
diff --git a/src/hpa.c b/src/hpa.c
index 6b7517d..d45a3bd 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -19,6 +19,7 @@ static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
 static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
     edata_list_active_t *list);
+static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 bool
 hpa_supported() {
@@ -218,6 +219,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
 	shard->pai.dalloc_batch = &hpa_dalloc_batch;
+	shard->pai.time_until_deferred_work = &hpa_time_until_deferred_work;
 
 	hpa_do_consistency_checks(shard);
 
@@ -850,6 +852,11 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	hpa_dalloc_batch(tsdn, self, &dalloc_list);
 }
 
+static uint64_t
+hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
+	return opt_background_thread_hpa_interval_max_ms;
+}
+
 void
 hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
diff --git a/src/pa.c b/src/pa.c
index 93da02e..c5b8daa 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -96,6 +96,11 @@ pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
 	}
 }
 
+static bool
+pa_shard_uses_hpa(pa_shard_t *shard) {
+	return atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED);
+}
+
 void
 pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 	pac_destroy(tsdn, &shard->pac);
@@ -118,7 +123,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	    WITNESS_RANK_CORE, 0);
 
 	edata_t *edata = NULL;
-	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
+	if (pa_shard_uses_hpa(shard)) {
 		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
 		    zero);
 	}
@@ -226,7 +231,7 @@ pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) {
 void
 pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
     bool deferral_allowed) {
-	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
+	if (pa_shard_uses_hpa(shard)) {
 		hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard,
 		    deferral_allowed);
 	}
@@ -234,7 +239,63 @@ pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
 
 void
 pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
-	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
+	if (pa_shard_uses_hpa(shard)) {
 		hpa_shard_do_deferred_work(tsdn, &shard->hpa_shard);
 	}
 }
+
+static inline uint64_t
+pa_shard_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		/* Use minimal interval if decay is contended. */
+		return BACKGROUND_THREAD_DEFERRED_MIN;
+	}
+	uint64_t result = decay_ns_until_purge(decay, npages,
+	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
+
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+	return result;
+}
+
+/*
+ * Get time until next deferred work ought to happen. If there are multiple
+ * things that have been deferred, this function calculates the time until
+ * the soonest of those things.
+ */
+uint64_t
+pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
+	uint64_t time;
+	time = pa_shard_ns_until_purge(tsdn,
+	    &shard->pac.decay_dirty,
+	    ecache_npages_get(&shard->pac.ecache_dirty));
+	if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
+		return time;
+	}
+
+	uint64_t muzzy = pa_shard_ns_until_purge(tsdn,
+	    &shard->pac.decay_muzzy,
+	    ecache_npages_get(&shard->pac.ecache_muzzy));
+	if (muzzy < time) {
+		time = muzzy;
+		if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
+			return time;
+		}
+	}
+
+	uint64_t pac = pai_time_until_deferred_work(tsdn, &shard->pac.pai);
+	if (pac < time) {
+		time = pac;
+		if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
+			return time;
+		}
+	}
+
+	if (pa_shard_uses_hpa(shard)) {
+		uint64_t hpa =
+		    pai_time_until_deferred_work(tsdn, &shard->hpa_shard.pai);
+		if (hpa < time) {
+			time = hpa;
+		}
+	}
+	return time;
+}
diff --git a/src/pac.c b/src/pac.c
index 0737e68..c611d91 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -10,6 +10,7 @@ static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size);
 static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 static ehooks_t *
 pac_ehooks_get(pac_t *pac) {
@@ -96,6 +97,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	pac->pai.shrink = &pac_shrink_impl;
 	pac->pai.dalloc = &pac_dalloc_impl;
 	pac->pai.dalloc_batch = &pai_dalloc_batch_default;
+	pac->pai.time_until_deferred_work = &pac_time_until_deferred_work;
 
 	return false;
 }
@@ -196,6 +198,11 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata);
 }
 
+static uint64_t
+pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
+	return BACKGROUND_THREAD_DEFERRED_MAX;
+}
+
 bool
 pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
     size_t *new_limit) {
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 72484c8..6772219 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -36,6 +36,37 @@ TEST_BEGIN(test_decay_ms_valid) {
 }
 TEST_END
 
+TEST_BEGIN(test_decay_npages_purge_in) {
+	decay_t decay;
+	memset(&decay, 0, sizeof(decay));
+
+	nstime_t curtime;
+	nstime_init(&curtime, 0);
+
+	uint64_t decay_ms = 1000;
+	nstime_t decay_nstime;
+	nstime_init(&decay_nstime, decay_ms * 1000 * 1000);
+	expect_false(decay_init(&decay, &curtime, (ssize_t)decay_ms),
+	    "Failed to initialize decay");
+
+	const size_t new_pages = 100;
+
+	nstime_t time;
+	nstime_copy(&time, &decay_nstime);
+	expect_u64_eq(decay_npages_purge_in(&decay, &time, new_pages),
+	    new_pages, "Not all pages are expected to decay in decay_ms");
+
+	nstime_init(&time, 0);
+	expect_u64_eq(decay_npages_purge_in(&decay, &time, new_pages), 0,
+	    "More than zero pages are expected to instantly decay");
+
+	nstime_copy(&time, &decay_nstime);
+	nstime_idivide(&time, 2);
+	expect_u64_eq(decay_npages_purge_in(&decay, &time, new_pages),
+	    new_pages / 2, "Not half of pages decay in half the decay period");
+}
+TEST_END
+
 TEST_BEGIN(test_decay_maybe_advance_epoch) {
 	decay_t decay;
 	memset(&decay, 0, sizeof(decay));
@@ -244,6 +275,7 @@ main(void) {
 	return test(
 	    test_decay_init,
 	    test_decay_ms_valid,
+	    test_decay_npages_purge_in,
 	    test_decay_maybe_advance_epoch,
 	    test_decay_empty,
 	    test_decay,
-- 
cgit v0.12


From 97da57c13afec4690a38adf7c94bf97ccd5bfdff Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 18 Aug 2021 12:22:43 -0700
Subject: HPA: Add min_purge_interval_ms option

This rate limiting option is required to avoid purging too often.
---
 include/jemalloc/internal/hpa_opts.h |  9 ++++++++-
 src/ctl.c                            |  4 ++++
 src/jemalloc.c                       | 10 ++++++++--
 src/stats.c                          |  1 +
 4 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 2548f44..ee84fea 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -43,6 +43,11 @@ struct hpa_shard_opts_s {
 	 * actually get hugified.
 	 */
 	uint64_t hugify_delay_ms;
+
+	/*
+	 * Minimum amount of time between purges.
+	 */
+	uint64_t min_purge_interval_ms;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -61,7 +66,9 @@ struct hpa_shard_opts_s {
 	 */								\
 	false,								\
 	/* hugify_delay_ms */						\
-	10 * 1000							\
+	10 * 1000,							\
+	/* min_purge_interval_ms */					\
+	5 * 1000							\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/ctl.c b/src/ctl.c
index 253341a..9647478 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -97,6 +97,7 @@ CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_hugify_delay_ms)
+CTL_PROTO(opt_hpa_min_purge_interval_ms)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -408,6 +409,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_hugification_threshold"),
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
+	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@@ -2116,6 +2118,8 @@ CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
     opt_hpa_opts.hugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
+CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
+    uint64_t)
 
 /*
  * This will have to change before we publicly document this option; fxp_t and
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d5e886e..66e3685 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1447,9 +1447,15 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 
-			CONF_HANDLE_SIZE_T(
+			CONF_HANDLE_UINT64_T(
 			    opt_hpa_opts.hugify_delay_ms, "hpa_hugify_delay_ms",
-			    0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			    0, 0, CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    false);
+
+			CONF_HANDLE_UINT64_T(
+			    opt_hpa_opts.min_purge_interval_ms,
+			    "hpa_min_purge_interval_ms", 0, 0,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
 
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
diff --git a/src/stats.c b/src/stats.c
index 3a2806e..25ee235 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1472,6 +1472,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
+	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*
-- 
cgit v0.12


From 8229cc77c51109737774bcd053adab001de21e0e Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 18 Aug 2021 19:24:37 -0700
Subject: Wake up background threads on demand

This change allows every allocator conforming to PAI communicate that it
deferred some work for the future. Without it if a background thread goes into
indefinite sleep, there is no way to notify it about upcoming deferred work.
---
 include/jemalloc/internal/arena_externs.h          |   2 +-
 .../jemalloc/internal/background_thread_externs.h  |   2 +-
 .../jemalloc/internal/background_thread_structs.h  |   2 +-
 include/jemalloc/internal/hpa.h                    |   5 +
 include/jemalloc/internal/pa.h                     |   9 +-
 include/jemalloc/internal/pai.h                    |  50 +++++----
 src/arena.c                                        | 113 +++++++++++--------
 src/background_thread.c                            |  20 +++-
 src/decay.c                                        |   2 +-
 src/hpa.c                                          | 122 +++++++++++++++++----
 src/large.c                                        |  26 +++--
 src/pa.c                                           |  25 +++--
 src/pac.c                                          |  28 +++--
 src/pai.c                                          |  14 ++-
 src/sec.c                                          |  49 ++++++---
 test/unit/decay.c                                  |   2 +-
 test/unit/hpa.c                                    |  46 +++++---
 test/unit/hpa_background_thread.c                  |  18 +++
 test/unit/hpa_background_thread.sh                 |   2 +-
 test/unit/pa.c                                     |   7 +-
 test/unit/sec.c                                    | 115 ++++++++++++-------
 21 files changed, 445 insertions(+), 214 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 02e7c1c..b9231c5 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -42,7 +42,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
-void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
+void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena);
 edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool zero);
 void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 3d1ea6c..a2d79ad 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -13,7 +13,7 @@ extern background_thread_info_t *background_thread_info;
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
 bool background_threads_disable(tsd_t *tsd);
-bool background_thread_running(background_thread_info_t* info);
+bool background_thread_is_started(background_thread_info_t* info);
 void background_thread_wakeup_early(background_thread_info_t *info,
     nstime_t *remaining_sleep);
 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index b884b68..83a9198 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -20,7 +20,7 @@
 #define BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED 5000
 
 #define BACKGROUND_THREAD_DEFERRED_MIN UINT64_C(0)
-#define BACKGROUND_THREAD_DEFERRED_MAX UINT64_C(-1)
+#define BACKGROUND_THREAD_DEFERRED_MAX UINT64_MAX
 
 typedef enum {
 	background_thread_stopped,
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 46878a8..f356285 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -136,6 +136,11 @@ struct hpa_shard_s {
 	 * stats.
 	 */
 	hpa_shard_nonderived_stats_t stats;
+
+	/*
+	 * Last time we performed purge on this shard.
+	 */
+	nstime_t last_purge;
 };
 
 /*
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index b2fed59..9783413 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -167,16 +167,17 @@ void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);
 
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
-    size_t alignment, bool slab, szind_t szind, bool zero);
+    size_t alignment, bool slab, szind_t szind, bool zero,
+    bool *deferred_work_generated);
 /* Returns true on error, in which case nothing changed. */
 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool zero);
+    size_t new_size, szind_t szind, bool zero, bool *deferred_work_generated);
 /*
  * The same.  Sets *generated_dirty to true if we produced new dirty pages, and
  * false otherwise.
  */
 bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool *generated_dirty);
+    size_t new_size, szind_t szind, bool *deferred_work_generated);
 /*
  * Frees the given edata back to the pa.  Sets *generated_dirty if we produced
  * new dirty pages (well, we alwyas set it for now; but this need not be the
@@ -185,7 +186,7 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
  * consistent with the shrink pathway and our error codes here).
  */
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
-    bool *generated_dirty);
+    bool *deferred_work_generated);
 bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
     ssize_t decay_ms, pac_purge_eagerness_t eagerness);
 ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 7179fd3..ca5f616 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -7,7 +7,7 @@ typedef struct pai_s pai_t;
 struct pai_s {
 	/* Returns NULL on failure. */
 	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t alignment, bool zero);
+	    size_t alignment, bool zero, bool *deferred_work_generated);
 	/*
 	 * Returns the number of extents added to the list (which may be fewer
 	 * than requested, in case of OOM).  The list should already be
@@ -15,15 +15,18 @@ struct pai_s {
 	 * the results are not necessarily zeroed.
 	 */
 	size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t nallocs, edata_list_active_t *results);
+	    size_t nallocs, edata_list_active_t *results,
+	    bool *deferred_work_generated);
 	bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-	    size_t old_size, size_t new_size, bool zero);
+	    size_t old_size, size_t new_size, bool zero,
+	    bool *deferred_work_generated);
 	bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-	    size_t old_size, size_t new_size);
-	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+	    size_t old_size, size_t new_size, bool *deferred_work_generated);
+	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+	    bool *deferred_work_generated);
 	/* This function empties out list as a side-effect of being called. */
 	void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
-	    edata_list_active_t *list);
+	    edata_list_active_t *list, bool *deferred_work_generated);
 	uint64_t (*time_until_deferred_work)(tsdn_t *tsdn, pai_t *self);
 };
 
@@ -33,36 +36,43 @@ struct pai_s {
  */
 
 static inline edata_t *
-pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
-	return self->alloc(tsdn, self, size, alignment, zero);
+pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
+    bool *deferred_work_generated) {
+	return self->alloc(tsdn, self, size, alignment, zero,
+	    deferred_work_generated);
 }
 
 static inline size_t
 pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results) {
-	return self->alloc_batch(tsdn, self, size, nallocs, results);
+    edata_list_active_t *results, bool *deferred_work_generated) {
+	return self->alloc_batch(tsdn, self, size, nallocs, results,
+	    deferred_work_generated);
 }
 
 static inline bool
 pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero) {
-	return self->expand(tsdn, self, edata, old_size, new_size, zero);
+    size_t new_size, bool zero, bool *deferred_work_generated) {
+	return self->expand(tsdn, self, edata, old_size, new_size, zero,
+	    deferred_work_generated);
 }
 
 static inline bool
 pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size) {
-	return self->shrink(tsdn, self, edata, old_size, new_size);
+    size_t new_size, bool *deferred_work_generated) {
+	return self->shrink(tsdn, self, edata, old_size, new_size,
+	    deferred_work_generated);
 }
 
 static inline void
-pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
-	self->dalloc(tsdn, self, edata);
+pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
+	self->dalloc(tsdn, self, edata, deferred_work_generated);
 }
 
 static inline void
-pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
-	self->dalloc_batch(tsdn, self, list);
+pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
+    bool *deferred_work_generated) {
+	self->dalloc_batch(tsdn, self, list, deferred_work_generated);
 }
 
 static inline uint64_t
@@ -75,9 +85,9 @@ pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
  * each item in the list.
  */
 size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results);
+    size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
 /* Ditto, for dalloc. */
 void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list);
+    edata_list_active_t *list, bool *deferred_work_generated);
 
 #endif /* JEMALLOC_INTERNAL_PAI_H */
diff --git a/src/arena.c b/src/arena.c
index 3dd7782..c720bcb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -199,15 +199,17 @@ arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
 	}
 }
 
-void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena) {
+/*
+ * React to deferred work generated by a PAI function.
+ */
+void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (arena_decay_ms_get(arena, extent_state_dirty) == 0) {
+	if (decay_immediately(&arena->pa_shard.pac.decay_dirty)) {
 		arena_decay_dirty(tsdn, arena, false, true);
-	} else {
-		arena_background_thread_inactivity_check(tsdn, arena, false);
 	}
+	arena_background_thread_inactivity_check(tsdn, arena, false);
 }
 
 static void *
@@ -316,11 +318,14 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
 edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero) {
+	bool deferred_work_generated;
 	szind_t szind = sz_size2index(usize);
 	size_t esize = usize + sz_large_pad;
 
 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
-	    /* slab */ false, szind, zero);
+	    /* slab */ false, szind, zero, &deferred_work_generated);
+
+	assert(deferred_work_generated == false);
 
 	if (edata != NULL) {
 		if (config_stats) {
@@ -471,6 +476,45 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	arena_decay_muzzy(tsdn, arena, is_background_thread, all);
 }
 
+static bool
+arena_should_decay_early(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    background_thread_info_t *info, nstime_t *remaining_sleep,
+    size_t npages_new) {
+	malloc_mutex_assert_owner(tsdn, &info->mtx);
+
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		return false;
+	}
+
+	if (!decay_gradually(decay)) {
+		malloc_mutex_unlock(tsdn, &decay->mtx);
+		return false;
+	}
+
+	nstime_init(remaining_sleep, background_thread_wakeup_time_get(info));
+	if (nstime_compare(remaining_sleep, &decay->epoch) <= 0) {
+		malloc_mutex_unlock(tsdn, &decay->mtx);
+		return false;
+	}
+	nstime_subtract(remaining_sleep, &decay->epoch);
+	if (npages_new > 0) {
+		uint64_t npurge_new = decay_npages_purge_in(decay,
+		    remaining_sleep, npages_new);
+		info->npages_to_purge_new += npurge_new;
+	}
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+	return info->npages_to_purge_new >
+	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD;
+}
+
+/*
+ * Check if deferred work needs to be done sooner than planned.
+ * For decay we might want to wake up earlier because of an influx of dirty
+ * pages. Rather than waiting for previously estimated time, we proactively
+ * purge those pages.
+ * If background thread sleeps indefinitely, always wake up because some
+ * deferred work has been generated.
+ */
 static void
 arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     size_t npages_new) {
@@ -485,47 +529,18 @@ arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		 */
 		return;
 	}
-	if (!background_thread_running(info)) {
+	if (!background_thread_is_started(info)) {
 		goto label_done;
 	}
-	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
-		goto label_done;
-	}
-	if (!decay_gradually(decay)) {
-		goto label_done_unlock2;
-	}
-
-	nstime_t diff;
-	nstime_init(&diff, background_thread_wakeup_time_get(info));
-	if (nstime_compare(&diff, &decay->epoch) <= 0) {
-		goto label_done_unlock2;
-	}
-	nstime_subtract(&diff, &decay->epoch);
 
-	if (npages_new > 0) {
-		uint64_t npurge_new = decay_npages_purge_in(decay, &diff,
-		    npages_new);
-		info->npages_to_purge_new += npurge_new;
-	}
-
-	bool should_signal;
-	if (info->npages_to_purge_new > ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD) {
-		should_signal = true;
-	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (ecache_npages_get(&arena->pa_shard.pac.ecache_dirty) > 0 ||
-	    ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy) > 0 ||
-	    info->npages_to_purge_new > 0)) {
-		should_signal = true;
-	} else {
-		should_signal = false;
-	}
-
-	if (should_signal) {
+	nstime_t remaining_sleep;
+	if (background_thread_indefinite_sleep(info)) {
+		background_thread_wakeup_early(info, NULL);
+	} else if (arena_should_decay_early(tsdn, arena, decay, info,
+	    &remaining_sleep, npages_new)) {
 		info->npages_to_purge_new = 0;
-		background_thread_wakeup_early(info, &diff);
+		background_thread_wakeup_early(info, &remaining_sleep);
 	}
-label_done_unlock2:
-	malloc_mutex_unlock(tsdn, &decay->mtx);
 label_done:
 	malloc_mutex_unlock(tsdn, &info->mtx);
 }
@@ -539,10 +554,10 @@ arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
-	bool generated_dirty;
-	pa_dalloc(tsdn, &arena->pa_shard, slab, &generated_dirty);
-	if (generated_dirty) {
-		arena_handle_new_dirty_pages(tsdn, arena);
+	bool deferred_work_generated;
+	pa_dalloc(tsdn, &arena->pa_shard, slab, &deferred_work_generated);
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
 	}
 }
 
@@ -803,11 +818,17 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 static edata_t *
 arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
     const bin_info_t *bin_info) {
+	bool deferred_work_generated;
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
-	    PAGE, /* slab */ true, /* szind */ binind, /* zero */ false);
+	    PAGE, /* slab */ true, /* szind */ binind, /* zero */ false,
+	    &deferred_work_generated);
+
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
+	}
 
 	if (slab == NULL) {
 		return NULL;
diff --git a/src/background_thread.c b/src/background_thread.c
index 9e577cb..69ef983 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -119,7 +119,8 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 
 	int ret;
 	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
-		assert(background_thread_indefinite_sleep(info));
+		background_thread_wakeup_time_set(tsdn, info,
+		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
 		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
 		assert(ret == 0);
 	} else {
@@ -144,8 +145,6 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 		assert(!background_thread_indefinite_sleep(info));
 		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
 		assert(ret == ETIMEDOUT || ret == 0);
-		background_thread_wakeup_time_set(tsdn, info,
-		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
 	}
 	if (config_stats) {
 		gettimeofday(&tv, NULL);
@@ -177,13 +176,21 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
     unsigned ind) {
 	uint64_t ns_until_deferred = BACKGROUND_THREAD_DEFERRED_MAX;
 	unsigned narenas = narenas_total_get();
+	bool slept_indefinitely = background_thread_indefinite_sleep(info);
 
 	for (unsigned i = ind; i < narenas; i += max_background_threads) {
 		arena_t *arena = arena_get(tsdn, i, false);
 		if (!arena) {
 			continue;
 		}
-		arena_do_deferred_work(tsdn, arena);
+		/*
+		 * If thread was woken up from the indefinite sleep, don't
+		 * do the work instantly, but rather check when the deferred
+		 * work that caused this thread to wake up is scheduled for.
+		 */
+		if (!slept_indefinitely) {
+			arena_do_deferred_work(tsdn, arena);
+		}
 		if (ns_until_deferred <= BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 			/* Min interval will be used. */
 			continue;
@@ -574,7 +581,7 @@ background_threads_disable(tsd_t *tsd) {
 }
 
 bool
-background_thread_running(background_thread_info_t *info) {
+background_thread_is_started(background_thread_info_t *info) {
 	return info->state == background_thread_started;
 }
 
@@ -586,7 +593,8 @@ background_thread_wakeup_early(background_thread_info_t *info,
 	 * we know that background thread wakes up soon, so the time to cache
 	 * the just freed memory is bounded and low.
 	 */
-	if (nstime_ns(remaining_sleep) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+	if (remaining_sleep && nstime_ns(remaining_sleep) <
+	    BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return;
 	}
 	pthread_cond_signal(&info->cond);
diff --git a/src/decay.c b/src/decay.c
index cdb8487..d801b2b 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/decay.h"
 
-const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
+static const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
 		h,
 		SMOOTHSTEP
diff --git a/src/hpa.c b/src/hpa.c
index d45a3bd..d7422a3 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -9,16 +9,17 @@
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
 
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero);
+    size_t alignment, bool zero, bool *deferred_work_generated);
 static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results);
+    size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
 static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero);
+    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size);
-static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+    size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated);
 static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list);
+    edata_list_active_t *list, bool *deferred_work_generated);
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 bool
@@ -366,6 +367,13 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	}
 }
 
+static bool
+hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
+	return to_hugify != NULL || hpa_should_purge(tsdn, shard);
+}
+
 /* Returns whether or not we purged anything. */
 static bool
 hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
@@ -429,6 +437,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	shard->npending_purge -= num_to_purge;
 	shard->stats.npurge_passes++;
 	shard->stats.npurges += purges_this_pass;
+	shard->central->hooks.curtime(&shard->last_purge);
 	if (dehugify) {
 		shard->stats.ndehugifies++;
 	}
@@ -615,7 +624,8 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 
 static size_t
 hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    bool *oom, size_t nallocs, edata_list_active_t *results) {
+    bool *oom, size_t nallocs, edata_list_active_t *results,
+    bool *deferred_work_generated) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	size_t nsuccess = 0;
 	for (; nsuccess < nallocs; nsuccess++) {
@@ -628,18 +638,20 @@ hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	}
 
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
+	*deferred_work_generated = hpa_shard_has_deferred_work(tsdn, shard);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	return nsuccess;
 }
 
 static size_t
 hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    size_t nallocs, edata_list_active_t *results) {
+    size_t nallocs, edata_list_active_t *results,
+    bool *deferred_work_generated) {
 	assert(size <= shard->opts.slab_max_alloc);
 	bool oom = false;
 
 	size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs, results);
+	    nallocs, results, deferred_work_generated);
 
 	if (nsuccess == nallocs || oom) {
 		return nsuccess;
@@ -655,7 +667,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * in between when we dropped the main mutex and grabbed the grow mutex.
 	 */
 	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs - nsuccess, results);
+	    nallocs - nsuccess, results, deferred_work_generated);
 	if (nsuccess == nallocs || oom) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;
@@ -683,7 +695,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
 	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs - nsuccess, results);
+	    nallocs - nsuccess, results, deferred_work_generated);
 	/*
 	 * Drop grow_mtx before doing deferred work; other threads blocked on it
 	 * should be allowed to proceed while we're working.
@@ -704,7 +716,7 @@ hpa_from_pai(pai_t *self) {
 
 static size_t
 hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results) {
+    edata_list_active_t *results, bool *deferred_work_generated) {
 	assert(nallocs > 0);
 	assert((size & PAGE_MASK) == 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -716,7 +728,7 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	}
 
 	size_t nsuccess = hpa_alloc_batch_psset(tsdn, shard, size, nallocs,
-	    results);
+	    results, deferred_work_generated);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -737,7 +749,8 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 }
 
 static edata_t *
-hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
+hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
+    bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -753,23 +766,25 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 	edata_list_active_t results;
 	edata_list_active_init(&results);
 	size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1,
-	    &results);
+	    &results, deferred_work_generated);
 	assert(nallocs == 0 || nallocs == 1);
 	edata_t *edata = edata_list_active_first(&results);
 	return edata;
 }
 
 static bool
-hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero) {
+hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool zero, bool *deferred_work_generated) {
 	/* Expand not yet supported. */
+	*deferred_work_generated = false;
 	return true;
 }
 
 static bool
 hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size) {
+    size_t old_size, size_t new_size, bool *deferred_work_generated) {
 	/* Shrink not yet supported. */
+	*deferred_work_generated = false;
 	return true;
 }
 
@@ -825,7 +840,8 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 }
 
 static void
-hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
+hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
+    bool *deferred_work_generated) {
 	hpa_shard_t *shard = hpa_from_pai(self);
 
 	edata_t *edata;
@@ -840,21 +856,83 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
 		hpa_dalloc_locked(tsdn, shard, edata);
 	}
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
+	*deferred_work_generated =
+	    hpa_shard_has_deferred_work(tsdn, shard);
+
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
 static void
-hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
 	/* Just a dalloc_batch of size 1; this lets us share logic. */
 	edata_list_active_t dalloc_list;
 	edata_list_active_init(&dalloc_list);
 	edata_list_active_append(&dalloc_list, edata);
-	hpa_dalloc_batch(tsdn, self, &dalloc_list);
+	hpa_dalloc_batch(tsdn, self, &dalloc_list, deferred_work_generated);
 }
 
+/*
+ * Calculate time until either purging or hugification ought to happen.
+ * Called by background threads.
+ */
 static uint64_t
 hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
-	return opt_background_thread_hpa_interval_max_ms;
+	hpa_shard_t *shard = hpa_from_pai(self);
+	uint64_t time_ns = BACKGROUND_THREAD_DEFERRED_MAX;
+
+	malloc_mutex_lock(tsdn, &shard->mtx);
+
+	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
+	if (to_hugify != NULL) {
+		nstime_t time_hugify_allowed =
+		    hpdata_time_hugify_allowed(to_hugify);
+		nstime_t nstime;
+		shard->central->hooks.curtime(&nstime);
+		nstime_subtract(&nstime, &time_hugify_allowed);
+		uint64_t since_hugify_allowed_ms = nstime_msec(&nstime);
+		/*
+		 * If not enough time has passed since hugification was allowed,
+		 * sleep for the rest.
+		 */
+		if (since_hugify_allowed_ms < shard->opts.hugify_delay_ms) {
+			time_ns = shard->opts.hugify_delay_ms - since_hugify_allowed_ms;
+			time_ns *= 1000 * 1000;
+		} else {
+			malloc_mutex_unlock(tsdn, &shard->mtx);
+			return BACKGROUND_THREAD_DEFERRED_MIN;
+		}
+	}
+
+	if (hpa_should_purge(tsdn, shard)) {
+		/*
+		 * If we haven't purged before, no need to check interval
+		 * between purges. Simply purge as soon as possible.
+		 */
+		if (shard->stats.npurge_passes == 0) {
+			malloc_mutex_unlock(tsdn, &shard->mtx);
+			return BACKGROUND_THREAD_DEFERRED_MIN;
+		}
+		nstime_t nstime;
+		shard->central->hooks.curtime(&nstime);
+		nstime_subtract(&nstime, &shard->last_purge);
+		uint64_t since_last_purge_ms = nstime_msec(&nstime);
+
+		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
+			uint64_t until_purge_ns;
+			until_purge_ns = shard->opts.min_purge_interval_ms -
+			    since_last_purge_ms;
+			until_purge_ns *= 1000 * 1000;
+
+			if (until_purge_ns < time_ns) {
+				time_ns = until_purge_ns;
+			}
+		} else {
+			time_ns = BACKGROUND_THREAD_DEFERRED_MIN;
+		}
+	}
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	return time_ns;
 }
 
 void
diff --git a/src/large.c b/src/large.c
index bd29e5c..6dbb3d9 100644
--- a/src/large.c
+++ b/src/large.c
@@ -64,14 +64,15 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 		return true;
 	}
 
-	bool generated_dirty;
+	bool deferred_work_generated;
 	bool err = pa_shrink(tsdn, &arena->pa_shard, edata, old_size,
-	    usize + sz_large_pad, sz_size2index(usize), &generated_dirty);
+	    usize + sz_large_pad, sz_size2index(usize),
+	    &deferred_work_generated);
 	if (err) {
 		return true;
 	}
-	if (generated_dirty) {
-		arena_handle_new_dirty_pages(tsdn, arena);
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
 	}
 	arena_extent_ralloc_large_shrink(tsdn, arena, edata, old_usize);
 
@@ -88,8 +89,15 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	size_t new_size = usize + sz_large_pad;
 
 	szind_t szind = sz_size2index(usize);
+
+	bool deferred_work_generated;
 	bool err = pa_expand(tsdn, &arena->pa_shard, edata, old_size, new_size,
-	    szind, zero);
+	    szind, zero, &deferred_work_generated);
+
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
+	}
+
 	if (err) {
 		return true;
 	}
@@ -241,10 +249,10 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 
 static void
 large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
-	bool generated_dirty;
-	pa_dalloc(tsdn, &arena->pa_shard, edata, &generated_dirty);
-	if (generated_dirty) {
-		arena_handle_new_dirty_pages(tsdn, arena);
+	bool deferred_work_generated;
+	pa_dalloc(tsdn, &arena->pa_shard, edata, &deferred_work_generated);
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
 	}
 }
 
diff --git a/src/pa.c b/src/pa.c
index c5b8daa..a29e10b 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -118,21 +118,23 @@ pa_get_pai(pa_shard_t *shard, edata_t *edata) {
 
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
-    bool slab, szind_t szind, bool zero) {
+    bool slab, szind_t szind, bool zero, bool *deferred_work_generated) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	edata_t *edata = NULL;
+	*deferred_work_generated = false;
 	if (pa_shard_uses_hpa(shard)) {
 		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
-		    zero);
+		    zero, deferred_work_generated);
 	}
 	/*
 	 * Fall back to the PAC if the HPA is off or couldn't serve the given
 	 * allocation request.
 	 */
 	if (edata == NULL) {
-		edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero);
+		edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero,
+		    deferred_work_generated);
 	}
 
 	if (edata != NULL) {
@@ -152,7 +154,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool zero) {
+    size_t new_size, szind_t szind, bool zero, bool *deferred_work_generated) {
 	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
@@ -161,7 +163,8 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 
 	pai_t *pai = pa_get_pai(shard, edata);
 
-	bool error = pai_expand(tsdn, pai, edata, old_size, new_size, zero);
+	bool error = pai_expand(tsdn, pai, edata, old_size, new_size, zero,
+	    deferred_work_generated);
 	if (error) {
 		return true;
 	}
@@ -174,20 +177,19 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 
 bool
 pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool *generated_dirty) {
+    size_t new_size, szind_t szind, bool *deferred_work_generated) {
 	assert(new_size < old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
 	size_t shrink_amount = old_size - new_size;
 
-	*generated_dirty = false;
 	pai_t *pai = pa_get_pai(shard, edata);
-	bool error = pai_shrink(tsdn, pai, edata, old_size, new_size);
+	bool error = pai_shrink(tsdn, pai, edata, old_size, new_size,
+	    deferred_work_generated);
 	if (error) {
 		return true;
 	}
 	pa_nactive_sub(shard, shrink_amount >> LG_PAGE);
-	*generated_dirty = (edata_pai_get(edata) == EXTENT_PAI_PAC);
 
 	edata_szind_set(edata, szind);
 	emap_remap(tsdn, shard->emap, edata, szind, /* slab */ false);
@@ -196,7 +198,7 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 
 void
 pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
-    bool *generated_dirty) {
+    bool *deferred_work_generated) {
 	emap_remap(tsdn, shard->emap, edata, SC_NSIZES, /* slab */ false);
 	if (edata_slab_get(edata)) {
 		emap_deregister_interior(tsdn, shard->emap, edata);
@@ -206,8 +208,7 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	edata_szind_set(edata, SC_NSIZES);
 	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
 	pai_t *pai = pa_get_pai(shard, edata);
-	pai_dalloc(tsdn, pai, edata);
-	*generated_dirty = (edata_pai_get(edata) == EXTENT_PAI_PAC);
+	pai_dalloc(tsdn, pai, edata, deferred_work_generated);
 }
 
 bool
diff --git a/src/pac.c b/src/pac.c
index c611d91..2221c8d 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -4,12 +4,13 @@
 #include "jemalloc/internal/pac.h"
 
 static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero);
+    size_t alignment, bool zero, bool *deferred_work_generated);
 static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero);
+    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size);
-static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+    size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated);
 static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 static ehooks_t *
@@ -109,9 +110,11 @@ pac_may_have_muzzy(pac_t *pac) {
 
 static edata_t *
 pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
-    bool zero) {
+    bool zero, bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
 
+	*deferred_work_generated = false;
+
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 	edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
 	    NULL, size, alignment, zero);
@@ -133,10 +136,12 @@ pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 
 static bool
 pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero) {
+    size_t new_size, bool zero, bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
+	*deferred_work_generated = false;
+
 	size_t mapped_add = 0;
 	size_t expand_amount = new_size - old_size;
 
@@ -171,12 +176,13 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 
 static bool
 pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size) {
+    size_t new_size, bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
-
 	ehooks_t *ehooks = pac_ehooks_get(pac);
+
 	size_t shrink_amount = old_size - new_size;
 
+	*deferred_work_generated = false;
 
 	if (ehooks_split_will_fail(ehooks)) {
 		return true;
@@ -188,14 +194,18 @@ pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, trail);
+	*deferred_work_generated = true;
 	return false;
 }
 
 static void
-pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata);
+	/* Purging of deallocated pages is deferred */
+	*deferred_work_generated = true;
 }
 
 static uint64_t
diff --git a/src/pai.c b/src/pai.c
index bd6966c..e863a9b 100644
--- a/src/pai.c
+++ b/src/pai.c
@@ -2,11 +2,13 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 size_t
-pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results) {
+pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
+    edata_list_active_t *results, bool *deferred_work_generated) {
 	for (size_t i = 0; i < nallocs; i++) {
+		bool deferred_by_alloc = false;
 		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_by_alloc);
+		*deferred_work_generated |= deferred_by_alloc;
 		if (edata == NULL) {
 			return i;
 		}
@@ -17,10 +19,12 @@ pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
 
 void
 pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list) {
+    edata_list_active_t *list, bool *deferred_work_generated) {
 	edata_t *edata;
 	while ((edata = edata_list_active_first(list)) != NULL) {
+		bool deferred_by_dalloc = false;
 		edata_list_active_remove(list, edata);
-		pai_dalloc(tsdn, self, edata);
+		pai_dalloc(tsdn, self, edata, &deferred_by_dalloc);
+		*deferred_work_generated |= deferred_by_dalloc;
 	}
 }
diff --git a/src/sec.c b/src/sec.c
index 4175346..c6f611f 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -4,12 +4,13 @@
 #include "jemalloc/internal/sec.h"
 
 static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero);
+    size_t alignment, bool zero, bool *deferred_work_generated);
 static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero);
+    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size);
-static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+    size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated);
 
 static void
 sec_bin_init(sec_bin_t *bin) {
@@ -147,7 +148,9 @@ sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	}
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
+	bool deferred_work_generated;
+	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
+	    &deferred_work_generated);
 }
 
 static edata_t *
@@ -175,8 +178,9 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 
 	edata_list_active_t result;
 	edata_list_active_init(&result);
+	bool deferred_work_generated;
 	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
-	    1 + sec->opts.batch_fill_extra, &result);
+	    1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated);
 
 	edata_t *ret = edata_list_active_first(&result);
 	if (ret != NULL) {
@@ -213,14 +217,17 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 }
 
 static edata_t *
-sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
+sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
+    bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
 
 	sec_t *sec = (sec_t *)self;
+	*deferred_work_generated = false;
 
 	if (zero || alignment > PAGE || sec->opts.nshards == 0
 	    || size > sec->opts.max_alloc) {
-		return pai_alloc(tsdn, sec->fallback, size, alignment, zero);
+		return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
+		    deferred_work_generated);
 	}
 	pszind_t pszind = sz_psz2ind(size);
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
@@ -243,7 +250,7 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 			    size);
 		} else {
 			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
-			    zero);
+			    zero, deferred_work_generated);
 		}
 	}
 	return edata;
@@ -251,16 +258,18 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 
 static bool
 sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero) {
+    size_t new_size, bool zero, bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
-	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero);
+	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero,
+	    deferred_work_generated);
 }
 
 static bool
 sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size) {
+    size_t new_size, bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
-	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size);
+	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size,
+	    deferred_work_generated);
 }
 
 static void
@@ -281,7 +290,9 @@ sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	 * we're disabling the HPA or resetting the arena, both of which are
 	 * rare pathways.
 	 */
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
+	bool deferred_work_generated;
+	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
+	    &deferred_work_generated);
 }
 
 static void
@@ -317,20 +328,24 @@ sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 }
 
 static void
-sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
 	if (sec->opts.nshards == 0
 	    || edata_size_get(edata) > sec->opts.max_alloc) {
-		pai_dalloc(tsdn, sec->fallback, edata);
+		pai_dalloc(tsdn, sec->fallback, edata,
+		    deferred_work_generated);
 		return;
 	}
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	if (shard->enabled) {
+		*deferred_work_generated = false;
 		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
 	} else {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
-		pai_dalloc(tsdn, sec->fallback, edata);
+		pai_dalloc(tsdn, sec->fallback, edata,
+		    deferred_work_generated);
 	}
 }
 
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 6772219..bdb6d0a 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -49,7 +49,7 @@ TEST_BEGIN(test_decay_npages_purge_in) {
 	expect_false(decay_init(&decay, &curtime, (ssize_t)decay_ms),
 	    "Failed to initialize decay");
 
-	const size_t new_pages = 100;
+	size_t new_pages = 100;
 
 	nstime_t time;
 	nstime_copy(&time, &decay_nstime);
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 2d4fa9b..dc3acc0 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -79,9 +79,12 @@ TEST_BEGIN(test_alloc_max) {
 	edata_t *edata;
 
 	/* Small max */
-	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false);
+	bool deferred_work_generated;
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false,
+	    &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
-	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false);
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false,
+	    &deferred_work_generated);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
 
 	destroy_test_data(shard);
@@ -166,6 +169,8 @@ TEST_BEGIN(test_stress) {
 	mem_tree_t tree;
 	mem_tree_new(&tree);
 
+	bool deferred_work_generated;
+
 	for (size_t i = 0; i < 100 * 1000; i++) {
 		size_t operation = prng_range_zu(&prng_state, 2);
 		if (operation == 0) {
@@ -183,7 +188,8 @@ TEST_BEGIN(test_stress) {
 			size_t npages = npages_min + prng_range_zu(&prng_state,
 			    npages_max - npages_min);
 			edata_t *edata = pai_alloc(tsdn, &shard->pai,
-			    npages * PAGE, PAGE, false);
+			    npages * PAGE, PAGE, false,
+			    &deferred_work_generated);
 			assert_ptr_not_null(edata,
 			    "Unexpected allocation failure");
 			live_edatas[nlive_edatas] = edata;
@@ -199,7 +205,8 @@ TEST_BEGIN(test_stress) {
 			live_edatas[victim] = live_edatas[nlive_edatas - 1];
 			nlive_edatas--;
 			node_remove(&tree, to_free);
-			pai_dalloc(tsdn, &shard->pai, to_free);
+			pai_dalloc(tsdn, &shard->pai, to_free,
+			    &deferred_work_generated);
 		}
 	}
 
@@ -218,7 +225,8 @@ TEST_BEGIN(test_stress) {
 	for (size_t i = 0; i < nlive_edatas; i++) {
 		edata_t *to_free = live_edatas[i];
 		node_remove(&tree, to_free);
-		pai_dalloc(tsdn, &shard->pai, to_free);
+		pai_dalloc(tsdn, &shard->pai, to_free,
+		    &deferred_work_generated);
 	}
 	hpa_shard_destroy(tsdn, shard);
 
@@ -244,6 +252,8 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	    &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
+	bool deferred_work_generated;
+
 	enum {NALLOCS = 8};
 
 	edata_t *allocs[NALLOCS];
@@ -253,13 +263,13 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	 */
 	for (size_t i = 0; i < NALLOCS / 2; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	edata_list_active_t allocs_list;
 	edata_list_active_init(&allocs_list);
 	size_t nsuccess = pai_alloc_batch(tsdn, &shard->pai, PAGE, NALLOCS / 2,
-	    &allocs_list);
+	    &allocs_list, &deferred_work_generated);
 	expect_zu_eq(NALLOCS / 2, nsuccess, "Unexpected oom");
 	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
 		allocs[i] = edata_list_active_first(&allocs_list);
@@ -279,15 +289,17 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	for (size_t i = 0; i < NALLOCS / 2; i++) {
 		edata_list_active_append(&allocs_list, allocs[i]);
 	}
-	pai_dalloc_batch(tsdn, &shard->pai, &allocs_list);
+	pai_dalloc_batch(tsdn, &shard->pai, &allocs_list,
+	    &deferred_work_generated);
 	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
-		pai_dalloc(tsdn, &shard->pai, allocs[i]);
+		pai_dalloc(tsdn, &shard->pai, allocs[i],
+		    &deferred_work_generated);
 	}
 
 	/* Reallocate (individually), and ensure reuse and contiguity. */
 	for (size_t i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
 	}
 	void *new_base = edata_base_get(allocs[0]);
@@ -355,11 +367,14 @@ TEST_BEGIN(test_defer_time) {
 
 	hpa_shard_t *shard = create_test_data(&hooks, &opts);
 
+	bool deferred_work_generated;
+
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	edata_t *edatas[HUGEPAGE_PAGES];
 	for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) {
-		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false);
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -374,7 +389,8 @@ TEST_BEGIN(test_defer_time) {
 
 	/* Purge.  Recall that dirty_mult is .25. */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i]);
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+		    &deferred_work_generated);
 	}
 
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -391,14 +407,16 @@ TEST_BEGIN(test_defer_time) {
 	 * be marked for pending hugify.
 	 */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
-		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false);
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	/*
 	 * We would be ineligible for hugification, had we not already met the
 	 * threshold before dipping below it.
 	 */
-	pai_dalloc(tsdn, &shard->pai, edatas[0]);
+	pai_dalloc(tsdn, &shard->pai, edatas[0],
+	    &deferred_work_generated);
 	/* Wait for the threshold again. */
 	nstime_init2(&defer_curtime, 22, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 1907a6d..c468683 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -66,6 +66,23 @@ set_background_thread_enabled(bool enabled) {
 }
 
 static void
+wait_until_thread_is_enabled(unsigned arena_id) {
+	tsd_t* tsd = tsd_fetch();
+
+	bool sleeping = false;
+	int iterations = 0;
+	do {
+		background_thread_info_t *info =
+		    background_thread_info_get(arena_id);
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+		sleeping = background_thread_indefinite_sleep(info);
+		assert_d_lt(iterations, (int)1e6,
+		    "Waiting for a thread to start for too long");
+	} while (!sleeping);
+}
+
+static void
 expect_purging(unsigned arena_ind, bool expect_deferred) {
 	size_t empty_ndirty;
 
@@ -132,6 +149,7 @@ TEST_BEGIN(test_hpa_background_thread_enable_disable) {
 	expect_purging(arena_ind, false);
 
 	set_background_thread_enabled(true);
+	wait_until_thread_is_enabled(arena_ind);
 	expect_purging(arena_ind, true);
 }
 TEST_END
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
index 811da8b..65a56a0 100644
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
-export MALLOC_CONF="hpa_dirty_mult:0,background_thread_hpa_interval_max_ms:50,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
 
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 4206e85..4d3ad5e 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -87,12 +87,13 @@ static void *
 do_alloc_free_purge(void *arg) {
 	test_data_t *test_data = (test_data_t *)arg;
 	for (int i = 0; i < 10 * 1000; i++) {
+		bool deferred_work_generated;
 		edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, PAGE,
-		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false);
+		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false,
+		    &deferred_work_generated);
 		assert_ptr_not_null(edata, "");
-		bool generated_dirty;
 		pa_dalloc(TSDN_NULL, &test_data->shard, edata,
-		    &generated_dirty);
+		    &deferred_work_generated);
 		malloc_mutex_lock(TSDN_NULL,
 		    &test_data->shard.pac.decay_dirty.mtx);
 		pac_decay_all(TSDN_NULL, &test_data->shard.pac,
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 01455c8..82b0c9d 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -50,8 +50,9 @@ test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
 
 static inline edata_t *
 pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero) {
+    size_t alignment, bool zero, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	if (ta->alloc_fail) {
 		return NULL;
 	}
@@ -70,8 +71,10 @@ pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
 
 static inline size_t
 pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results) {
+    size_t nallocs, edata_list_active_t *results,
+    bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	if (ta->alloc_fail) {
 		return 0;
 	}
@@ -92,31 +95,37 @@ pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
 
 static bool
 pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero) {
+    size_t old_size, size_t new_size, bool zero,
+    bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	ta->expand_count++;
 	return ta->expand_return_value;
 }
 
 static bool
 pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size) {
+    size_t old_size, size_t new_size, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	ta->shrink_count++;
 	return ta->shrink_return_value;
 }
 
 static void
-pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	ta->dalloc_count++;
 	free(edata);
 }
 
 static void
 pai_test_allocator_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list) {
+    edata_list_active_t *list, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 
 	edata_t *edata;
 	while ((edata = edata_list_active_first(list)) != NULL) {
@@ -168,14 +177,15 @@ TEST_BEGIN(test_reuse) {
 	enum { NALLOCS = 11 };
 	edata_t *one_page[NALLOCS];
 	edata_t *two_page[NALLOCS];
+	bool deferred_work_generated;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 2 * PAGE,
 	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
 		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 		two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 	}
 	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
@@ -189,10 +199,12 @@ TEST_BEGIN(test_reuse) {
 	 * separation works correctly.
 	 */
 	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(tsdn, &sec.pai, one_page[i]);
+		pai_dalloc(tsdn, &sec.pai, one_page[i],
+		    &deferred_work_generated);
 	}
 	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(tsdn, &sec.pai, two_page[i]);
+		pai_dalloc(tsdn, &sec.pai, two_page[i],
+		    &deferred_work_generated);
 	}
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
@@ -204,9 +216,9 @@ TEST_BEGIN(test_reuse) {
 	 */
 	for (int i = 0; i < NALLOCS; i++) {
 		edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_eq(one_page[i], alloc1,
 		    "Got unexpected allocation");
 		expect_ptr_eq(two_page[i], alloc2,
@@ -238,14 +250,16 @@ TEST_BEGIN(test_auto_flush) {
 	enum { NALLOCS = 10 };
 	edata_t *extra_alloc;
 	edata_t *allocs[NALLOCS];
+	bool deferred_work_generated;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
-	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
+	    &deferred_work_generated);
 	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
 	expect_zu_le(NALLOCS + 1, max_allocs,
@@ -254,7 +268,7 @@ TEST_BEGIN(test_auto_flush) {
 	    "Incorrect number of allocations");
 	/* Free until the SEC is full, but should not have flushed yet. */
 	for (int i = 0; i < NALLOCS; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 	}
 	expect_zu_le(NALLOCS + 1, max_allocs,
 	    "Incorrect number of allocations");
@@ -267,7 +281,7 @@ TEST_BEGIN(test_auto_flush) {
 	 * entirety when it decides to do so, and it has only one bin active
 	 * right now.
 	 */
-	pai_dalloc(tsdn, &sec.pai, extra_alloc);
+	pai_dalloc(tsdn, &sec.pai, extra_alloc, &deferred_work_generated);
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
@@ -291,16 +305,17 @@ do_disable_flush_test(bool is_disable) {
 
 	enum { NALLOCS = 11 };
 	edata_t *allocs[NALLOCS];
+	bool deferred_work_generated;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	/* Free all but the last aloc. */
 	for (int i = 0; i < NALLOCS - 1; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 	}
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
 
@@ -326,7 +341,8 @@ do_disable_flush_test(bool is_disable) {
 	 * If we free into a disabled SEC, it should forward to the fallback.
 	 * Otherwise, the SEC should accept the allocation.
 	 */
-	pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1]);
+	pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1],
+	    &deferred_work_generated);
 
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
@@ -356,6 +372,8 @@ TEST_BEGIN(test_max_alloc_respected) {
 	size_t max_alloc = 2 * PAGE;
 	size_t attempted_alloc = 3 * PAGE;
 
+	bool deferred_work_generated;
+
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, max_alloc,
 	    /* max_bytes */ 1000 * PAGE);
 
@@ -365,13 +383,13 @@ TEST_BEGIN(test_max_alloc_respected) {
 		expect_zu_eq(i, ta.dalloc_count,
 		    "Incorrect number of deallocations");
 		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
-		    PAGE, /* zero */ false);
+		    PAGE, /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(edata, "Unexpected alloc failure");
 		expect_zu_eq(i + 1, ta.alloc_count,
 		    "Incorrect number of allocations");
 		expect_zu_eq(i, ta.dalloc_count,
 		    "Incorrect number of deallocations");
-		pai_dalloc(tsdn, &sec.pai, edata);
+		pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
 	}
 }
 TEST_END
@@ -387,27 +405,31 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 
+	bool deferred_work_generated;
+
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
 	    /* max_bytes */ 1000 * PAGE);
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false);
+	    /* zero */ false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected alloc failure");
 
 	bool err = pai_expand(tsdn, &sec.pai, edata, PAGE, 4 * PAGE,
-	    /* zero */ false);
+	    /* zero */ false, &deferred_work_generated);
 	expect_false(err, "Unexpected expand failure");
 	expect_zu_eq(1, ta.expand_count, "");
 	ta.expand_return_value = true;
 	err = pai_expand(tsdn, &sec.pai, edata, 4 * PAGE, 3 * PAGE,
-	    /* zero */ false);
+	    /* zero */ false, &deferred_work_generated);
 	expect_true(err, "Unexpected expand success");
 	expect_zu_eq(2, ta.expand_count, "");
 
-	err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE);
+	err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE,
+	    &deferred_work_generated);
 	expect_false(err, "Unexpected shrink failure");
 	expect_zu_eq(1, ta.shrink_count, "");
 	ta.shrink_return_value = true;
-	err = pai_shrink(tsdn, &sec.pai, edata, 2 * PAGE, PAGE);
+	err = pai_shrink(tsdn, &sec.pai, edata, 2 * PAGE, PAGE,
+	    &deferred_work_generated);
 	expect_true(err, "Unexpected shrink success");
 	expect_zu_eq(2, ta.shrink_count, "");
 }
@@ -426,9 +448,10 @@ TEST_BEGIN(test_nshards_0) {
 	opts.nshards = 0;
 	sec_init(TSDN_NULL, &sec, base, &ta.pai, &opts);
 
+	bool deferred_work_generated;
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false);
-	pai_dalloc(tsdn, &sec.pai, edata);
+	    /* zero */ false, &deferred_work_generated);
+	pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
 
 	/* Both operations should have gone directly to the fallback. */
 	expect_zu_eq(1, ta.alloc_count, "");
@@ -461,25 +484,28 @@ TEST_BEGIN(test_stats_simple) {
 		FLUSH_PAGES = 20,
 	};
 
+	bool deferred_work_generated;
+
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}
 
 	/* Increase and decrease, without flushing. */
 	for (size_t i = 0; i < NITERS; i++) {
 		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
-			pai_dalloc(tsdn, &sec.pai, allocs[j]);
+			pai_dalloc(tsdn, &sec.pai, allocs[j],
+			    &deferred_work_generated);
 			expect_stats_pages(tsdn, &sec, j + 1);
 		}
 		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
 			allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-			    /* zero */ false);
+			    /* zero */ false, &deferred_work_generated);
 			expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1);
 		}
 	}
@@ -505,25 +531,30 @@ TEST_BEGIN(test_stats_auto_flush) {
 	edata_t *extra_alloc1;
 	edata_t *allocs[2 * FLUSH_PAGES];
 
-	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
-	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+	bool deferred_work_generated;
+
+	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
+	    &deferred_work_generated);
+	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
+	    &deferred_work_generated);
 
 	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 	}
 
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 	}
-	pai_dalloc(tsdn, &sec.pai, extra_alloc0);
+	pai_dalloc(tsdn, &sec.pai, extra_alloc0, &deferred_work_generated);
 
 	/* Flush the remaining pages; stats should still work. */
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i],
+		    &deferred_work_generated);
 	}
 
-	pai_dalloc(tsdn, &sec.pai, extra_alloc1);
+	pai_dalloc(tsdn, &sec.pai, extra_alloc1, &deferred_work_generated);
 
 	expect_stats_pages(tsdn, &sec, ta.alloc_count + ta.alloc_batch_count
 	    - ta.dalloc_count - ta.dalloc_batch_count);
@@ -545,16 +576,17 @@ TEST_BEGIN(test_stats_manual_flush) {
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
+	bool deferred_work_generated;
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}
 
 	/* Dalloc the first half of the allocations. */
 	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, i + 1);
 	}
 
@@ -563,7 +595,8 @@ TEST_BEGIN(test_stats_manual_flush) {
 
 	/* Flush the remaining pages. */
 	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i],
+		    &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, i + 1);
 	}
 	sec_disable(tsdn, &sec);
-- 
cgit v0.12


From 6e848a005e23d5eeb7f0b32424730d53f1d4edf3 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Fri, 10 Sep 2021 17:32:23 -0700
Subject: Remove opt_background_thread_hpa_interval_max_ms

Now that HPA can communicate the time until its deferred work should be done,
this option is not used anymore.
---
 .../jemalloc/internal/background_thread_externs.h  |  3 --
 src/background_thread.c                            | 11 ++-----
 src/ctl.c                                          |  5 ---
 src/jemalloc.c                                     |  9 ------
 src/pa.c                                           | 36 +---------------------
 src/pac.c                                          | 31 ++++++++++++++++++-
 src/stats.c                                        |  1 -
 test/unit/hpa_background_thread.c                  |  2 +-
 8 files changed, 34 insertions(+), 64 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index a2d79ad..6ae3c8d 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -2,7 +2,6 @@
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
 
 extern bool opt_background_thread;
-extern ssize_t opt_background_thread_hpa_interval_max_ms;
 extern size_t opt_max_background_threads;
 extern malloc_mutex_t background_thread_lock;
 extern atomic_b_t background_thread_enabled_state;
@@ -16,8 +15,6 @@ bool background_threads_disable(tsd_t *tsd);
 bool background_thread_is_started(background_thread_info_t* info);
 void background_thread_wakeup_early(background_thread_info_t *info,
     nstime_t *remaining_sleep);
-void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
-    decay_t *decay, size_t npages_new);
 void background_thread_prefork0(tsdn_t *tsdn);
 void background_thread_prefork1(tsdn_t *tsdn);
 void background_thread_postfork_parent(tsdn_t *tsdn);
diff --git a/src/background_thread.c b/src/background_thread.c
index 69ef983..ac171c3 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -13,13 +13,6 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 /* Read-only after initialization. */
 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
 size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1;
-/*
- * This is disabled (and set to -1) if the HPA is.  If the HPA is enabled,
- * malloc_conf initialization sets it to
- * BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED.
- */
-ssize_t opt_background_thread_hpa_interval_max_ms =
-    BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED;
 
 /* Used for thread creation, termination and stats. */
 malloc_mutex_t background_thread_lock;
@@ -60,7 +53,7 @@ pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
-bool background_thread_running(background_thread_info_t *info) NOT_REACHED
+bool background_thread_is_started(background_thread_info_t *info) NOT_REACHED
 void background_thread_wakeup_early(background_thread_info_t *info,
     nstime_t *remaining_sleep) NOT_REACHED
 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
@@ -593,7 +586,7 @@ background_thread_wakeup_early(background_thread_info_t *info,
 	 * we know that background thread wakes up soon, so the time to cache
 	 * the just freed memory is bounded and low.
 	 */
-	if (remaining_sleep && nstime_ns(remaining_sleep) <
+	if (remaining_sleep != NULL && nstime_ns(remaining_sleep) <
 	    BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return;
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 9647478..42ded60 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -113,7 +113,6 @@ CTL_PROTO(opt_oversize_threshold)
 CTL_PROTO(opt_background_thread)
 CTL_PROTO(opt_mutex_max_spin)
 CTL_PROTO(opt_max_background_threads)
-CTL_PROTO(opt_background_thread_hpa_interval_max_ms)
 CTL_PROTO(opt_dirty_decay_ms)
 CTL_PROTO(opt_muzzy_decay_ms)
 CTL_PROTO(opt_stats_print)
@@ -427,8 +426,6 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("mutex_max_spin"),	CTL(opt_mutex_max_spin)},
 	{NAME("background_thread"),	CTL(opt_background_thread)},
 	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
-	{NAME("background_thread_hpa_interval_max_ms"),
-		CTL(opt_background_thread_hpa_interval_max_ms)},
 	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
 	{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
@@ -2148,8 +2145,6 @@ CTL_RO_NL_GEN(opt_mutex_max_spin, opt_mutex_max_spin, int64_t)
 CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t)
 CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
 CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t)
-CTL_RO_NL_GEN(opt_background_thread_hpa_interval_max_ms,
-    opt_background_thread_hpa_interval_max_ms, ssize_t)
 CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 66e3685..18b5452 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1416,10 +1416,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			CONF_HANDLE_BOOL(opt_hpa, "hpa")
-			CONF_HANDLE_SSIZE_T(
-			    opt_background_thread_hpa_interval_max_ms,
-			    "background_thread_hpa_interval_max_ms", -1,
-			    SSIZE_MAX)
 			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
 			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
@@ -1658,11 +1654,6 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 	malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf);
 	malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
 	    NULL);
-	if (opt_hpa && opt_background_thread_hpa_interval_max_ms
-	    == BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED) {
-		opt_background_thread_hpa_interval_max_ms =
-		    BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED;
-	}
 }
 
 #undef MALLOC_CONF_NSOURCES
diff --git a/src/pa.c b/src/pa.c
index a29e10b..249de24 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -245,19 +245,6 @@ pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
 	}
 }
 
-static inline uint64_t
-pa_shard_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
-	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
-		/* Use minimal interval if decay is contended. */
-		return BACKGROUND_THREAD_DEFERRED_MIN;
-	}
-	uint64_t result = decay_ns_until_purge(decay, npages,
-	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
-
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-	return result;
-}
-
 /*
  * Get time until next deferred work ought to happen. If there are multiple
  * things that have been deferred, this function calculates the time until
@@ -265,32 +252,11 @@ pa_shard_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
  */
 uint64_t
 pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
-	uint64_t time;
-	time = pa_shard_ns_until_purge(tsdn,
-	    &shard->pac.decay_dirty,
-	    ecache_npages_get(&shard->pac.ecache_dirty));
+	uint64_t time = pai_time_until_deferred_work(tsdn, &shard->pac.pai);
 	if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
 		return time;
 	}
 
-	uint64_t muzzy = pa_shard_ns_until_purge(tsdn,
-	    &shard->pac.decay_muzzy,
-	    ecache_npages_get(&shard->pac.ecache_muzzy));
-	if (muzzy < time) {
-		time = muzzy;
-		if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
-			return time;
-		}
-	}
-
-	uint64_t pac = pai_time_until_deferred_work(tsdn, &shard->pac.pai);
-	if (pac < time) {
-		time = pac;
-		if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
-			return time;
-		}
-	}
-
 	if (pa_shard_uses_hpa(shard)) {
 		uint64_t hpa =
 		    pai_time_until_deferred_work(tsdn, &shard->hpa_shard.pai);
diff --git a/src/pac.c b/src/pac.c
index 2221c8d..03e3197 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -208,9 +208,38 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	*deferred_work_generated = true;
 }
 
+static inline uint64_t
+pac_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		/* Use minimal interval if decay is contended. */
+		return BACKGROUND_THREAD_DEFERRED_MIN;
+	}
+	uint64_t result = decay_ns_until_purge(decay, npages,
+	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
+
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+	return result;
+}
+
 static uint64_t
 pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
-	return BACKGROUND_THREAD_DEFERRED_MAX;
+	uint64_t time;
+	pac_t *pac = (pac_t *)self;
+
+	time = pac_ns_until_purge(tsdn,
+	    &pac->decay_dirty,
+	    ecache_npages_get(&pac->ecache_dirty));
+	if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
+		return time;
+	}
+
+	uint64_t muzzy = pac_ns_until_purge(tsdn,
+	    &pac->decay_muzzy,
+	    ecache_npages_get(&pac->ecache_muzzy));
+	if (muzzy < time) {
+		time = muzzy;
+	}
+	return time;
 }
 
 bool
diff --git a/src/stats.c b/src/stats.c
index 25ee235..7af5782 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1499,7 +1499,6 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_INT64("mutex_max_spin")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
-	OPT_WRITE_SSIZE_T("background_thread_hpa_interval_max_ms")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
 	OPT_WRITE_SSIZE_T_MUTABLE("muzzy_decay_ms", "arenas.muzzy_decay_ms")
 	OPT_WRITE_SIZE_T("lg_extent_max_active_fit")
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index c468683..77d0555 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -77,7 +77,7 @@ wait_until_thread_is_enabled(unsigned arena_id) {
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 		sleeping = background_thread_indefinite_sleep(info);
-		assert_d_lt(iterations, (int)1e6,
+		assert_d_lt(iterations, UINT64_C(1000000),
 		    "Waiting for a thread to start for too long");
 	} while (!sleeping);
 }
-- 
cgit v0.12


From 523cfa55c5b350decb5efc11083c4bc366cd98c4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 8 Sep 2021 10:58:04 -0700
Subject: Guard prof related mallctl with opt_prof.

The prof initialization is done only when opt_prof is true.  This change makes
sure the prof_* mallctls only have limited read access (i.e. no access to prof
internals) when opt_prof is false.

In addition, initialize the global prof mutexes even if opt_prof is false.  This
makes sure the mutex stats are set properly.
---
 src/ctl.c           | 47 ++++++++++++++++++--------
 src/prof.c          | 97 +++++++++++++++++++++++++----------------------------
 test/unit/mallctl.c |  2 +-
 3 files changed, 79 insertions(+), 67 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 42ded60..8717c96 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2337,7 +2337,7 @@ thread_prof_name_ctl(tsd_t *tsd, const size_t *mib,
     size_t newlen) {
 	int ret;
 
-	if (!config_prof) {
+	if (!config_prof || !opt_prof) {
 		return ENOENT;
 	}
 
@@ -2374,8 +2374,12 @@ thread_prof_active_ctl(tsd_t *tsd, const size_t *mib,
 		return ENOENT;
 	}
 
-	oldval = prof_thread_active_get(tsd);
+	oldval = opt_prof ? prof_thread_active_get(tsd) : false;
 	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
 		if (newlen != sizeof(bool)) {
 			ret = EINVAL;
 			goto label_return;
@@ -3128,6 +3132,10 @@ prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib,
 	}
 
 	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
 		if (newlen != sizeof(bool)) {
 			ret = EINVAL;
 			goto label_return;
@@ -3135,7 +3143,8 @@ prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib,
 		oldval = prof_thread_active_init_set(tsd_tsdn(tsd),
 		    *(bool *)newp);
 	} else {
-		oldval = prof_thread_active_init_get(tsd_tsdn(tsd));
+		oldval = opt_prof ? prof_thread_active_init_get(tsd_tsdn(tsd)) :
+		    false;
 	}
 	READ(oldval, bool);
 
@@ -3161,13 +3170,19 @@ prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			goto label_return;
 		}
 		bool val = *(bool *)newp;
-		if (!opt_prof && val) {
-			ret = ENOENT;
-			goto label_return;
+		if (!opt_prof) {
+			if (val) {
+				ret = ENOENT;
+				goto label_return;
+			} else {
+				/* No change needed (already off). */
+				oldval = false;
+			}
+		} else {
+			oldval = prof_active_set(tsd_tsdn(tsd), val);
 		}
-		oldval = prof_active_set(tsd_tsdn(tsd), val);
 	} else {
-		oldval = prof_active_get(tsd_tsdn(tsd));
+		oldval = opt_prof ? prof_active_get(tsd_tsdn(tsd)) : false;
 	}
 	READ(oldval, bool);
 
@@ -3182,7 +3197,7 @@ prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	int ret;
 	const char *filename = NULL;
 
-	if (!config_prof) {
+	if (!config_prof || !opt_prof) {
 		return ENOENT;
 	}
 
@@ -3210,13 +3225,17 @@ prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	}
 
 	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
 		if (newlen != sizeof(bool)) {
 			ret = EINVAL;
 			goto label_return;
 		}
 		oldval = prof_gdump_set(tsd_tsdn(tsd), *(bool *)newp);
 	} else {
-		oldval = prof_gdump_get(tsd_tsdn(tsd));
+		oldval = opt_prof ? prof_gdump_get(tsd_tsdn(tsd)) : false;
 	}
 	READ(oldval, bool);
 
@@ -3231,7 +3250,7 @@ prof_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	int ret;
 	const char *prefix = NULL;
 
-	if (!config_prof) {
+	if (!config_prof || !opt_prof) {
 		return ENOENT;
 	}
 
@@ -3251,7 +3270,7 @@ prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	int ret;
 	size_t lg_sample = lg_prof_sample;
 
-	if (!config_prof) {
+	if (!config_prof || !opt_prof) {
 		return ENOENT;
 	}
 
@@ -3278,7 +3297,7 @@ prof_log_start_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	const char *filename = NULL;
 
-	if (!config_prof) {
+	if (!config_prof || !opt_prof) {
 		return ENOENT;
 	}
 
@@ -3298,7 +3317,7 @@ label_return:
 static int
 prof_log_stop_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	if (!config_prof) {
+	if (!config_prof || !opt_prof) {
 		return ENOENT;
 	}
 
diff --git a/src/prof.c b/src/prof.c
index 0f1f7a7..67a7f71 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -554,72 +554,65 @@ bool
 prof_boot2(tsd_t *tsd, base_t *base) {
 	cassert(config_prof);
 
-	if (opt_prof) {
-		unsigned i;
+	/*
+	 * Initialize the global mutexes unconditionally to maintain correct
+	 * stats when opt_prof is false.
+	 */
+	if (malloc_mutex_init(&prof_active_mtx, "prof_active",
+	    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
+	    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&prof_thread_active_init_mtx,
+	    "prof_thread_active_init", WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
+	    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
+	    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
+	    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&prof_stats_mtx, "prof_stats",
+	    WITNESS_RANK_PROF_STATS, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&prof_dump_filename_mtx,
+	    "prof_dump_filename", WITNESS_RANK_PROF_DUMP_FILENAME,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
+	    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
 
+	if (opt_prof) {
 		lg_prof_sample = opt_lg_prof_sample;
 		prof_unbias_map_init();
-
 		prof_active = opt_prof_active;
-		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
-		    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
 		prof_gdump_val = opt_prof_gdump;
-		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
-		    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
 		prof_thread_active_init = opt_prof_thread_active_init;
-		if (malloc_mutex_init(&prof_thread_active_init_mtx,
-		    "prof_thread_active_init",
-		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
-		    malloc_mutex_rank_exclusive)) {
-			return true;
-		}
 
 		if (prof_data_init(tsd)) {
 			return true;
 		}
 
-		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
-		    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
-		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
-		    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
 		next_thr_uid = 0;
-		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
-		    WITNESS_RANK_PROF_NEXT_THR_UID,
-		    malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
-		if (malloc_mutex_init(&prof_stats_mtx, "prof_stats",
-		    WITNESS_RANK_PROF_STATS, malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
 		if (prof_idump_accum_init()) {
 			return true;
 		}
 
-		if (malloc_mutex_init(&prof_dump_filename_mtx,
-		    "prof_dump_filename", WITNESS_RANK_PROF_DUMP_FILENAME,
-		    malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
-		    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
 		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
 		    atexit(prof_fdump) != 0) {
 			malloc_write("<jemalloc>: Error in atexit()\n");
@@ -643,7 +636,7 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 		if (gctx_locks == NULL) {
 			return true;
 		}
-		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
+		for (unsigned i = 0; i < PROF_NCTX_LOCKS; i++) {
 			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
 			    WITNESS_RANK_PROF_GCTX,
 			    malloc_mutex_rank_exclusive)) {
@@ -656,7 +649,7 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 		if (tdata_locks == NULL) {
 			return true;
 		}
-		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
+		for (unsigned i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
 			    WITNESS_RANK_PROF_TDATA,
 			    malloc_mutex_rank_exclusive)) {
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index e9e0feb..5cba083 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -914,7 +914,7 @@ TEST_BEGIN(test_prof_active) {
 	old = true;
 	expect_d_eq(mallctl("prof.active", &old, &len, &active, len), ENOENT,
 	    "Setting prof_active to true should fail when opt_prof is off");
-	expect_true(old, "old valud should not be touched when mallctl fails");
+	expect_true(old, "old value should not be touched when mallctl fails");
 	active = false;
 	expect_d_eq(mallctl("prof.active", NULL, NULL, &active, len), 0,
 	    "Setting prof_active to false should succeed when opt_prof is off");
-- 
cgit v0.12


From f7d46b81197b9879e1f572f9a4d3bfe3b8f850b9 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 30 Aug 2021 14:05:56 -0700
Subject: Allow setting custom backtrace hook

Existing backtrace implementations skip native stack frames from runtimes like
Python. The hook allows to augment the backtraces to attribute allocations to
native functions in heap profiles.
---
 Makefile.in                              |  1 +
 include/jemalloc/internal/prof_externs.h |  4 ++-
 include/jemalloc/internal/prof_hook.h    | 16 +++++++++
 include/jemalloc/internal/prof_structs.h |  3 +-
 include/jemalloc/internal/prof_sys.h     |  1 +
 src/ctl.c                                | 36 ++++++++++++++++++-
 src/prof.c                               | 16 +++++++++
 src/prof_sys.c                           | 44 ++++++++++++-----------
 test/analyze/prof_bias.c                 | 14 ++++----
 test/unit/prof_hook.c                    | 61 ++++++++++++++++++++++++++++++++
 test/unit/prof_hook.sh                   |  6 ++++
 11 files changed, 172 insertions(+), 30 deletions(-)
 create mode 100644 include/jemalloc/internal/prof_hook.h
 create mode 100644 test/unit/prof_hook.c
 create mode 100644 test/unit/prof_hook.sh

diff --git a/Makefile.in b/Makefile.in
index 51276ce..a6f61ce 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -247,6 +247,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_accum.c \
 	$(srcroot)test/unit/prof_active.c \
 	$(srcroot)test/unit/prof_gdump.c \
+	$(srcroot)test/unit/prof_hook.c \
 	$(srcroot)test/unit/prof_idump.c \
 	$(srcroot)test/unit/prof_log.c \
 	$(srcroot)test/unit/prof_mdump.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 671ac9b..75d1d7a 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_PROF_EXTERNS_H
 
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/prof_hook.h"
 
 extern bool opt_prof;
 extern bool opt_prof_active;
@@ -52,7 +53,8 @@ extern bool prof_booted;
  * otherwise difficult to guarantee that two allocations are reported as coming
  * from the exact same stack trace in the presence of an optimizing compiler.
  */
-extern void (* JET_MUTABLE prof_backtrace_hook)(prof_bt_t *bt);
+void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
+prof_backtrace_hook_t prof_backtrace_hook_get();
 
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
new file mode 100644
index 0000000..277cd99
--- /dev/null
+++ b/include/jemalloc/internal/prof_hook.h
@@ -0,0 +1,16 @@
+#ifndef JEMALLOC_INTERNAL_PROF_HOOK_H
+#define JEMALLOC_INTERNAL_PROF_HOOK_H
+
+/*
+ * The hooks types of which are declared in this file are experimental and
+ * undocumented, thus the typedefs are located in an 'internal' header.
+ */
+
+/*
+ * A hook to mock out backtrace functionality.  This can be handy, since it's
+ * otherwise difficult to guarantee that two allocations are reported as coming
+ * from the exact same stack trace in the presence of an optimizing compiler.
+ */
+typedef void (*prof_backtrace_hook_t)(void **, unsigned *, unsigned);
+
+#endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index c2a111a..dd22115 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -16,7 +16,8 @@ struct prof_bt_s {
 #ifdef JEMALLOC_PROF_LIBGCC
 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
 typedef struct {
-	prof_bt_t	*bt;
+	void 		**vec;
+	unsigned	*len;
 	unsigned	max;
 } prof_unwind_data_t;
 #endif
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 6e4e811..3d25a42 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -6,6 +6,7 @@ extern base_t *prof_base;
 
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
+void prof_hooks_init();
 void prof_unwind_init();
 void prof_sys_thread_name_fetch(tsd_t *tsd);
 int prof_getpid(void);
diff --git a/src/ctl.c b/src/ctl.c
index 8717c96..6bf1c94 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -305,6 +305,7 @@ CTL_PROTO(stats_retained)
 CTL_PROTO(stats_zero_reallocs)
 CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
+CTL_PROTO(experimental_hooks_prof_backtrace)
 CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
@@ -833,7 +834,8 @@ static const ctl_named_node_t stats_node[] = {
 
 static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("install"),	CTL(experimental_hooks_install)},
-	{NAME("remove"),	CTL(experimental_hooks_remove)}
+	{NAME("remove"),	CTL(experimental_hooks_remove)},
+	{NAME("prof_backtrace"),	CTL(experimental_hooks_prof_backtrace)}
 };
 
 static const ctl_named_node_t experimental_thread_node[] = {
@@ -3328,6 +3330,38 @@ prof_log_stop_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	return 0;
 }
 
+static int
+experimental_hooks_prof_backtrace_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (oldp == NULL && newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	if (oldp != NULL) {
+		prof_backtrace_hook_t old_hook =
+		    prof_backtrace_hook_get();
+		READ(old_hook, prof_backtrace_hook_t);
+	}
+	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		prof_backtrace_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(new_hook, prof_backtrace_hook_t);
+		if (new_hook == NULL) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		prof_backtrace_hook_set(new_hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
 /******************************************************************************/
 
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
diff --git a/src/prof.c b/src/prof.c
index 67a7f71..d0cae0e 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -10,6 +10,7 @@
 #include "jemalloc/internal/prof_recent.h"
 #include "jemalloc/internal/prof_stats.h"
 #include "jemalloc/internal/prof_sys.h"
+#include "jemalloc/internal/prof_hook.h"
 #include "jemalloc/internal/thread_event.h"
 
 /*
@@ -69,6 +70,9 @@ static malloc_mutex_t next_thr_uid_mtx;
 /* Do not dump any profiles until bootstrapping is complete. */
 bool prof_booted = false;
 
+/* Logically a prof_backtrace_hook_t. */
+atomic_p_t prof_backtrace_hook;
+
 /******************************************************************************/
 
 void
@@ -519,6 +523,17 @@ prof_gdump_set(tsdn_t *tsdn, bool gdump) {
 }
 
 void
+prof_backtrace_hook_set(prof_backtrace_hook_t hook) {
+	atomic_store_p(&prof_backtrace_hook, hook, ATOMIC_RELEASE);
+}
+
+prof_backtrace_hook_t
+prof_backtrace_hook_get() {
+	return (prof_backtrace_hook_t)atomic_load_p(&prof_backtrace_hook,
+	    ATOMIC_ACQUIRE);
+}
+
+void
 prof_boot0(void) {
 	cassert(config_prof);
 
@@ -657,6 +672,7 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 			}
 		}
 
+		prof_hooks_init();
 		prof_unwind_init();
 	}
 	prof_booted = true;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 6a5b2b1..1485e8b 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -49,18 +49,18 @@ bt_init(prof_bt_t *bt, void **vec) {
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
 static void
-prof_backtrace_impl(prof_bt_t *bt) {
+prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	int nframes;
 
 	cassert(config_prof);
-	assert(bt->len == 0);
-	assert(bt->vec != NULL);
+	assert(*len == 0);
+	assert(vec != NULL);
 
-	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
+	nframes = unw_backtrace(vec, PROF_BT_MAX);
 	if (nframes <= 0) {
 		return;
 	}
-	bt->len = nframes;
+	*len = nframes;
 }
 #elif (defined(JEMALLOC_PROF_LIBGCC))
 static _Unwind_Reason_Code
@@ -81,9 +81,9 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
 	if (ip == NULL) {
 		return _URC_END_OF_STACK;
 	}
-	data->bt->vec[data->bt->len] = ip;
-	data->bt->len++;
-	if (data->bt->len == data->max) {
+	data->vec[*data->len] = ip;
+	(*data->len)++;
+	if (*data->len == data->max) {
 		return _URC_END_OF_STACK;
 	}
 
@@ -91,8 +91,8 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
 }
 
 static void
-prof_backtrace_impl(prof_bt_t *bt) {
-	prof_unwind_data_t data = {bt, PROF_BT_MAX};
+prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
+	prof_unwind_data_t data = {vec, len, max_len};
 
 	cassert(config_prof);
 
@@ -100,9 +100,9 @@ prof_backtrace_impl(prof_bt_t *bt) {
 }
 #elif (defined(JEMALLOC_PROF_GCC))
 static void
-prof_backtrace_impl(prof_bt_t *bt) {
+prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 #define BT_FRAME(i)							\
-	if ((i) < PROF_BT_MAX) {					\
+	if ((i) < max_len) {						\
 		void *p;						\
 		if (__builtin_frame_address(i) == 0) {			\
 			return;						\
@@ -111,8 +111,8 @@ prof_backtrace_impl(prof_bt_t *bt) {
 		if (p == NULL) {					\
 			return;						\
 		}							\
-		bt->vec[(i)] = p;					\
-		bt->len = (i) + 1;					\
+		vec[(i)] = p;						\
+		*len = (i) + 1;						\
 	} else {							\
 		return;							\
 	}
@@ -263,24 +263,28 @@ prof_backtrace_impl(prof_bt_t *bt) {
 }
 #else
 static void
-prof_backtrace_impl(prof_bt_t *bt) {
+prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	cassert(config_prof);
 	not_reached();
 }
 #endif
 
-
-void (* JET_MUTABLE prof_backtrace_hook)(prof_bt_t *bt) = &prof_backtrace_impl;
-
 void
 prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 	cassert(config_prof);
 	pre_reentrancy(tsd, NULL);
-	prof_backtrace_hook(bt);
+	prof_backtrace_hook_t prof_backtrace_hook = prof_backtrace_hook_get();
+	prof_backtrace_hook(bt->vec, &bt->len, PROF_BT_MAX);
 	post_reentrancy(tsd);
 }
 
-void prof_unwind_init() {
+void
+prof_hooks_init() {
+	prof_backtrace_hook_set(&prof_backtrace_impl);
+}
+
+void
+prof_unwind_init() {
 #ifdef JEMALLOC_PROF_LIBGCC
 	/*
 	 * Cause the backtracing machinery to allocate its internal
diff --git a/test/analyze/prof_bias.c b/test/analyze/prof_bias.c
index 0aae766..4b960a6 100644
--- a/test/analyze/prof_bias.c
+++ b/test/analyze/prof_bias.c
@@ -24,12 +24,12 @@
  */
 
 static void
-mock_backtrace(prof_bt_t *bt) {
-	bt->len = 4;
-	bt->vec[0] = (void *)0x111;
-	bt->vec[1] = (void *)0x222;
-	bt->vec[2] = (void *)0x333;
-	bt->vec[3] = (void *)0x444;
+mock_backtrace(void **vec, unsigned *len, unsigned max_len) {
+	*len = 4;
+	vec[0] = (void *)0x111;
+	vec[1] = (void *)0x222;
+	vec[2] = (void *)0x333;
+	vec[3] = (void *)0x444;
 }
 
 static void
@@ -50,7 +50,7 @@ main(void) {
 	    sizeof(lg_prof_sample));
 	assert(err == 0);
 
-	prof_backtrace_hook = &mock_backtrace;
+	prof_backtrace_hook_set(mock_backtrace);
 	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ true);
 	do_allocs(32 * 1024* 1024, 16, /* do_frees */ true);
 	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ false);
diff --git a/test/unit/prof_hook.c b/test/unit/prof_hook.c
new file mode 100644
index 0000000..32d0e9e
--- /dev/null
+++ b/test/unit/prof_hook.c
@@ -0,0 +1,61 @@
+#include "test/jemalloc_test.h"
+
+bool mock_bt_hook_called = false;
+
+void
+mock_bt_hook(void **vec, unsigned *len, unsigned max_len) {
+	*len = max_len;
+	for (unsigned i = 0; i < max_len; ++i) {
+		vec[i] = (void *)((uintptr_t)i);
+	}
+	mock_bt_hook_called = true;
+}
+
+TEST_BEGIN(test_prof_backtrace_hook) {
+
+	test_skip_if(!config_prof);
+
+	mock_bt_hook_called = false;
+
+	void *p0 = mallocx(1, 0);
+	assert_ptr_not_null(p0, "Failed to allocate");
+
+	expect_false(mock_bt_hook_called, "Called mock hook before it's set");
+
+	prof_backtrace_hook_t null_hook = NULL;
+	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
+	    NULL, 0, (void *)&null_hook,  sizeof(null_hook)),
+		EINVAL, "Incorrectly allowed NULL backtrace hook");
+
+	prof_backtrace_hook_t default_hook;
+	size_t default_hook_sz = sizeof(prof_backtrace_hook_t);
+	prof_backtrace_hook_t hook = &mock_bt_hook;
+	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
+	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
+	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+
+	void *p1 = mallocx(1, 0);
+	assert_ptr_not_null(p1, "Failed to allocate");
+
+	expect_true(mock_bt_hook_called, "Didn't call mock hook");
+
+	prof_backtrace_hook_t current_hook;
+	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
+	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
+	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
+	    sizeof(default_hook)), 0,
+	    "Unexpected mallctl failure resetting hook to default");
+
+	expect_ptr_eq(current_hook, hook,
+	    "Hook returned by mallctl is not equal to mock hook");
+
+	dallocx(p1, 0);
+	dallocx(p0, 0);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_prof_backtrace_hook);
+}
diff --git a/test/unit/prof_hook.sh b/test/unit/prof_hook.sh
new file mode 100644
index 0000000..d14cb8c
--- /dev/null
+++ b/test/unit/prof_hook.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi
+
-- 
cgit v0.12


From a9031a0970df9c999873617423f789bd46bfe619 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 1 Sep 2021 13:00:01 -0700
Subject: Allow setting a dump hook

If users want to be notified when a heap dump occurs, they can set this hook.
---
 include/jemalloc/internal/prof_externs.h |   8 +--
 include/jemalloc/internal/prof_hook.h    |   5 ++
 src/ctl.c                                |  32 ++++++++-
 src/prof.c                               |  16 ++++-
 src/prof_sys.c                           |  14 +++-
 test/unit/prof_hook.c                    | 114 ++++++++++++++++++++++++++++++-
 6 files changed, 178 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 75d1d7a..75dd90b 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -48,14 +48,12 @@ extern size_t lg_prof_sample;
 
 extern bool prof_booted;
 
-/*
- * A hook to mock out backtrace functionality.  This can be handy, since it's
- * otherwise difficult to guarantee that two allocations are reported as coming
- * from the exact same stack trace in the presence of an optimizing compiler.
- */
 void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
 prof_backtrace_hook_t prof_backtrace_hook_get();
 
+void prof_dump_hook_set(prof_dump_hook_t hook);
+prof_dump_hook_t prof_dump_hook_get();
+
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 277cd99..150d19d 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -13,4 +13,9 @@
  */
 typedef void (*prof_backtrace_hook_t)(void **, unsigned *, unsigned);
 
+/*
+ * A callback hook that notifies about recently dumped heap profile.
+ */
+typedef void (*prof_dump_hook_t)(const char *filename);
+
 #endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/src/ctl.c b/src/ctl.c
index 6bf1c94..3aaa5a7 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -306,6 +306,7 @@ CTL_PROTO(stats_zero_reallocs)
 CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
 CTL_PROTO(experimental_hooks_prof_backtrace)
+CTL_PROTO(experimental_hooks_prof_dump)
 CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
@@ -835,7 +836,8 @@ static const ctl_named_node_t stats_node[] = {
 static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("install"),	CTL(experimental_hooks_install)},
 	{NAME("remove"),	CTL(experimental_hooks_remove)},
-	{NAME("prof_backtrace"),	CTL(experimental_hooks_prof_backtrace)}
+	{NAME("prof_backtrace"),	CTL(experimental_hooks_prof_backtrace)},
+	{NAME("prof_dump"),	CTL(experimental_hooks_prof_dump)},
 };
 
 static const ctl_named_node_t experimental_thread_node[] = {
@@ -3362,6 +3364,34 @@ label_return:
 	return ret;
 }
 
+static int
+experimental_hooks_prof_dump_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (oldp == NULL && newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	if (oldp != NULL) {
+		prof_dump_hook_t old_hook =
+		    prof_dump_hook_get();
+		READ(old_hook, prof_dump_hook_t);
+	}
+	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		prof_dump_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(new_hook, prof_dump_hook_t);
+		prof_dump_hook_set(new_hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
 /******************************************************************************/
 
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
diff --git a/src/prof.c b/src/prof.c
index d0cae0e..625bcd7 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -73,6 +73,9 @@ bool prof_booted = false;
 /* Logically a prof_backtrace_hook_t. */
 atomic_p_t prof_backtrace_hook;
 
+/* Logically a prof_dump_hook_t. */
+atomic_p_t prof_dump_hook;
+
 /******************************************************************************/
 
 void
@@ -534,6 +537,17 @@ prof_backtrace_hook_get() {
 }
 
 void
+prof_dump_hook_set(prof_dump_hook_t hook) {
+	atomic_store_p(&prof_dump_hook, hook, ATOMIC_RELEASE);
+}
+
+prof_dump_hook_t
+prof_dump_hook_get() {
+	return (prof_dump_hook_t)atomic_load_p(&prof_dump_hook,
+	    ATOMIC_ACQUIRE);
+}
+
+void
 prof_boot0(void) {
 	cassert(config_prof);
 
@@ -672,8 +686,8 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 			}
 		}
 
-		prof_hooks_init();
 		prof_unwind_init();
+		prof_hooks_init();
 	}
 	prof_booted = true;
 
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 1485e8b..fd41e86 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -55,6 +55,7 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	cassert(config_prof);
 	assert(*len == 0);
 	assert(vec != NULL);
+	assert(max_len == PROF_BT_MAX);
 
 	nframes = unw_backtrace(vec, PROF_BT_MAX);
 	if (nframes <= 0) {
@@ -95,6 +96,8 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	prof_unwind_data_t data = {vec, len, max_len};
 
 	cassert(config_prof);
+	assert(vec != NULL);
+	assert(max_len == PROF_BT_MAX);
 
 	_Unwind_Backtrace(prof_unwind_callback, &data);
 }
@@ -118,6 +121,8 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	}
 
 	cassert(config_prof);
+	assert(vec != NULL);
+	assert(max_len == PROF_BT_MAX);
 
 	BT_FRAME(0)
 	BT_FRAME(1)
@@ -272,8 +277,10 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 void
 prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 	cassert(config_prof);
-	pre_reentrancy(tsd, NULL);
 	prof_backtrace_hook_t prof_backtrace_hook = prof_backtrace_hook_get();
+	assert(prof_backtrace_hook != NULL);
+
+	pre_reentrancy(tsd, NULL);
 	prof_backtrace_hook(bt->vec, &bt->len, PROF_BT_MAX);
 	post_reentrancy(tsd);
 }
@@ -281,6 +288,7 @@ prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 void
 prof_hooks_init() {
 	prof_backtrace_hook_set(&prof_backtrace_impl);
+	prof_dump_hook_set(NULL);
 }
 
 void
@@ -506,6 +514,10 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
 	buf_writer_terminate(tsd_tsdn(tsd), &buf_writer);
 	prof_dump_close(&arg);
 
+	prof_dump_hook_t dump_hook = prof_dump_hook_get();
+	if (dump_hook != NULL) {
+		dump_hook(filename);
+	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 	post_reentrancy(tsd);
 
diff --git a/test/unit/prof_hook.c b/test/unit/prof_hook.c
index 32d0e9e..6480d93 100644
--- a/test/unit/prof_hook.c
+++ b/test/unit/prof_hook.c
@@ -1,6 +1,11 @@
 #include "test/jemalloc_test.h"
 
+const char *dump_filename = "/dev/null";
+
+prof_backtrace_hook_t default_hook;
+
 bool mock_bt_hook_called = false;
+bool mock_dump_hook_called = false;
 
 void
 mock_bt_hook(void **vec, unsigned *len, unsigned max_len) {
@@ -11,7 +16,38 @@ mock_bt_hook(void **vec, unsigned *len, unsigned max_len) {
 	mock_bt_hook_called = true;
 }
 
-TEST_BEGIN(test_prof_backtrace_hook) {
+void
+mock_bt_augmenting_hook(void **vec, unsigned *len, unsigned max_len) {
+	default_hook(vec, len, max_len);
+	expect_u_gt(*len, 0, "Default backtrace hook returned empty backtrace");
+	expect_u_lt(*len, max_len,
+	    "Default backtrace hook returned too large backtrace");
+
+	/* Add a separator between default frames and augmented */
+	vec[*len] = (void *)0x030303030;
+	(*len)++;
+
+	/* Add more stack frames */
+	for (unsigned i = 0; i < 3; ++i) {
+		if (*len == max_len) {
+			break;
+		}
+		vec[*len] = (void *)((uintptr_t)i);
+		(*len)++;
+	}
+
+
+	mock_bt_hook_called = true;
+}
+
+void
+mock_dump_hook(const char *filename) {
+	mock_dump_hook_called = true;
+	expect_str_eq(filename, dump_filename,
+	    "Incorrect file name passed to the dump hook");
+}
+
+TEST_BEGIN(test_prof_backtrace_hook_replace) {
 
 	test_skip_if(!config_prof);
 
@@ -27,7 +63,6 @@ TEST_BEGIN(test_prof_backtrace_hook) {
 	    NULL, 0, (void *)&null_hook,  sizeof(null_hook)),
 		EINVAL, "Incorrectly allowed NULL backtrace hook");
 
-	prof_backtrace_hook_t default_hook;
 	size_t default_hook_sz = sizeof(prof_backtrace_hook_t);
 	prof_backtrace_hook_t hook = &mock_bt_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
@@ -54,8 +89,81 @@ TEST_BEGIN(test_prof_backtrace_hook) {
 }
 TEST_END
 
+TEST_BEGIN(test_prof_backtrace_hook_augment) {
+
+	test_skip_if(!config_prof);
+
+	mock_bt_hook_called = false;
+
+	void *p0 = mallocx(1, 0);
+	assert_ptr_not_null(p0, "Failed to allocate");
+
+	expect_false(mock_bt_hook_called, "Called mock hook before it's set");
+
+	size_t default_hook_sz = sizeof(prof_backtrace_hook_t);
+	prof_backtrace_hook_t hook = &mock_bt_augmenting_hook;
+	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
+	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
+	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+
+	void *p1 = mallocx(1, 0);
+	assert_ptr_not_null(p1, "Failed to allocate");
+
+	expect_true(mock_bt_hook_called, "Didn't call mock hook");
+
+	prof_backtrace_hook_t current_hook;
+	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
+	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
+	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
+	    sizeof(default_hook)), 0,
+	    "Unexpected mallctl failure resetting hook to default");
+
+	expect_ptr_eq(current_hook, hook,
+	    "Hook returned by mallctl is not equal to mock hook");
+
+	dallocx(p1, 0);
+	dallocx(p0, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_prof_dump_hook) {
+
+	test_skip_if(!config_prof);
+
+	mock_dump_hook_called = false;
+
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&dump_filename,
+	    sizeof(dump_filename)), 0, "Failed to dump heap profile");
+
+	expect_false(mock_dump_hook_called, "Called dump hook before it's set");
+
+	size_t default_hook_sz = sizeof(prof_dump_hook_t);
+	prof_dump_hook_t hook = &mock_dump_hook;
+	expect_d_eq(mallctl("experimental.hooks.prof_dump",
+	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
+	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&dump_filename,
+	    sizeof(dump_filename)), 0, "Failed to dump heap profile");
+
+	expect_true(mock_dump_hook_called, "Didn't call mock hook");
+
+	prof_dump_hook_t current_hook;
+	size_t current_hook_sz = sizeof(prof_dump_hook_t);
+	expect_d_eq(mallctl("experimental.hooks.prof_dump",
+	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
+	    sizeof(default_hook)), 0,
+	    "Unexpected mallctl failure resetting hook to default");
+
+	expect_ptr_eq(current_hook, hook,
+	    "Hook returned by mallctl is not equal to mock hook");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
-	    test_prof_backtrace_hook);
+	    test_prof_backtrace_hook_replace,
+	    test_prof_backtrace_hook_augment,
+	    test_prof_dump_hook);
 }
-- 
cgit v0.12


From 7bb05e04be693b26536dc2335b4d230dacc5d7d2 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Mon, 23 Aug 2021 14:03:35 +0200
Subject: add experimental.arenas_create_ext mallctl

This mallctl accepts an arena_config_t structure which
can be used to customize the behavior of the arena.
Right now it contains extent_hooks and a new option,
metadata_use_hooks, which controls whether the extent
hooks are also used for metadata allocation.

The medata_use_hooks option has two main use cases:

1. In heterogeneous memory systems, to avoid metadata
being placed on potentially slower memory.

2. Avoiding virtual memory from being leaked as a result
of metadata allocation failure originating in an extent hook.
---
 include/jemalloc/internal/arena_externs.h          |  2 +-
 include/jemalloc/internal/arena_types.h            | 14 ++++++
 include/jemalloc/internal/base.h                   |  7 ++-
 include/jemalloc/internal/base_structs.h           |  5 ++
 .../jemalloc/internal/jemalloc_internal_externs.h  |  2 +-
 .../internal/jemalloc_internal_inlines_a.h         |  2 +-
 src/arena.c                                        | 11 +++--
 src/base.c                                         | 21 +++++++--
 src/ctl.c                                          | 42 ++++++++++++++---
 src/jemalloc.c                                     | 14 +++---
 test/integration/extent.c                          | 53 +++++++++++++++++++++-
 test/unit/base.c                                   |  7 +--
 test/unit/edata_cache.c                            |  2 +-
 test/unit/hpa.c                                    |  2 +-
 test/unit/pa.c                                     |  3 +-
 test/unit/rtree.c                                  | 15 ++++--
 test/unit/sec.c                                    |  4 +-
 17 files changed, 165 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index b9231c5..e6fceaa 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -97,7 +97,7 @@ bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
 unsigned arena_nthreads_get(arena_t *arena, bool internal);
 void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);
-arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
 bool arena_init_huge(void);
 bool arena_is_huge(unsigned arena_ind);
 arena_t *arena_choose_huge(tsd_t *tsd);
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index e0f8218..f763a8c 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -41,4 +41,18 @@ typedef enum {
  */
 #define OVERSIZE_THRESHOLD_DEFAULT (8 << 20)
 
+struct arena_config_s {
+	/* extent hooks to be used for the arena */
+	struct extent_hooks_s *extent_hooks;
+
+	/*
+	 * Use extent hooks for metadata (base) allocations when true.
+	 */
+	bool metadata_use_hooks;
+};
+
+typedef struct arena_config_s arena_config_t;
+
+extern const arena_config_t arena_config_default;
+
 #endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 628e393..67e1940 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -46,6 +46,11 @@ struct base_s {
 	 */
 	ehooks_t ehooks;
 
+	/*
+	 * Use user hooks for metadata when true.
+	 */
+	bool metadata_use_hooks;
+
 	/* Protects base_alloc() and base_stats_get() operations. */
 	malloc_mutex_t mtx;
 
@@ -87,7 +92,7 @@ metadata_thp_enabled(void) {
 
 base_t *b0get(void);
 base_t *base_new(tsdn_t *tsdn, unsigned ind,
-    const extent_hooks_t *extent_hooks);
+    const extent_hooks_t *extent_hooks, bool metadata_use_hooks);
 void base_delete(tsdn_t *tsdn, base_t *base);
 ehooks_t *base_ehooks_get(base_t *base);
 extent_hooks_t *base_extent_hooks_set(base_t *base,
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index ff1fdfb..914c5b5 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -25,6 +25,11 @@ struct base_s {
 	 */
 	ehooks_t ehooks;
 
+	/*
+	 * Use user hooks for metadata when true.
+	 */
+	bool metadata_use_hooks;
+
 	/* Protects base_alloc() and base_stats_get() operations. */
 	malloc_mutex_t mtx;
 
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index af6dc0a..e8bfb03 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -57,7 +57,7 @@ void *bootstrap_calloc(size_t num, size_t size);
 void bootstrap_free(void *ptr);
 void arena_set(unsigned ind, arena_t *arena);
 unsigned narenas_total_get(void);
-arena_t *arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+arena_t *arena_init(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
 arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
 void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
 void iarena_cleanup(tsd_t *tsd);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 24e42d3..1bca34c 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -66,7 +66,7 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	if (unlikely(ret == NULL)) {
 		if (init_if_missing) {
 			ret = arena_init(tsdn, ind,
-			    (extent_hooks_t *)&ehooks_default_extent_hooks);
+			    &arena_config_default);
 		}
 	}
 	return ret;
diff --git a/src/arena.c b/src/arena.c
index c720bcb..c2842c6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -48,6 +48,11 @@ static unsigned nbins_total;
 
 static unsigned huge_arena_ind;
 
+const arena_config_t arena_config_default = {
+	/* .extent_hooks = */ (extent_hooks_t *)&ehooks_default_extent_hooks,
+	/* .metadata_use_hooks = */ true,
+};
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -1516,7 +1521,6 @@ arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
 	return ret;
 }
 
-
 dss_prec_t
 arena_dss_prec_get(arena_t *arena) {
 	return (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_ACQUIRE);
@@ -1583,7 +1587,7 @@ arena_nthreads_dec(arena_t *arena, bool internal) {
 }
 
 arena_t *
-arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
+arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_t *arena;
 	base_t *base;
 	unsigned i;
@@ -1591,7 +1595,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	if (ind == 0) {
 		base = b0get();
 	} else {
-		base = base_new(tsdn, ind, extent_hooks);
+		base = base_new(tsdn, ind, config->extent_hooks,
+		    config->metadata_use_hooks);
 		if (base == NULL) {
 			return NULL;
 		}
diff --git a/src/base.c b/src/base.c
index 44878ad..cc127ea 100644
--- a/src/base.c
+++ b/src/base.c
@@ -295,6 +295,12 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	return block;
 }
 
+static ehooks_t *
+base_ehooks_get_for_metadata(base_t *base) {
+	return base->metadata_use_hooks ? &base->ehooks :
+		(struct ehooks_s *)&ehooks_default_extent_hooks;
+}
+
 /*
  * Allocate an extent that is at least as large as specified size, with
  * specified alignment.
@@ -303,7 +309,7 @@ static edata_t *
 base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	malloc_mutex_assert_owner(tsdn, &base->mtx);
 
-	ehooks_t *ehooks = base_ehooks_get(base);
+	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
 	/*
 	 * Drop mutex during base_block_alloc(), because an extent hook will be
 	 * called.
@@ -342,7 +348,8 @@ b0get(void) {
 }
 
 base_t *
-base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks) {
+base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
+    bool metadata_use_hooks) {
 	pszind_t pind_last = 0;
 	size_t extent_sn_next = 0;
 
@@ -352,7 +359,9 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks) {
 	 * memory, and then initialize the ehooks within the base_t.
 	 */
 	ehooks_t fake_ehooks;
-	ehooks_init(&fake_ehooks, (extent_hooks_t *)extent_hooks, ind);
+	ehooks_init(&fake_ehooks, metadata_use_hooks ?
+	    (extent_hooks_t *)extent_hooks :
+	    (extent_hooks_t *)&ehooks_default_extent_hooks, ind);
 
 	base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
@@ -375,6 +384,7 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks) {
 	base->extent_sn_next = extent_sn_next;
 	base->blocks = block;
 	base->auto_thp_switched = false;
+	base->metadata_use_hooks = metadata_use_hooks;
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
 		edata_heap_new(&base->avail[i]);
 	}
@@ -397,7 +407,7 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks) {
 
 void
 base_delete(tsdn_t *tsdn, base_t *base) {
-	ehooks_t *ehooks = base_ehooks_get(base);
+	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
 	base_block_t *next = base->blocks;
 	do {
 		base_block_t *block = next;
@@ -512,6 +522,7 @@ base_postfork_child(tsdn_t *tsdn, base_t *base) {
 
 bool
 base_boot(tsdn_t *tsdn) {
-	b0 = base_new(tsdn, 0, (extent_hooks_t *)&ehooks_default_extent_hooks);
+	b0 = base_new(tsdn, 0,
+		(extent_hooks_t *)&ehooks_default_extent_hooks, true);
 	return (b0 == NULL);
 }
diff --git a/src/ctl.c b/src/ctl.c
index 3aaa5a7..491a333 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -315,6 +315,7 @@ INDEX_PROTO(experimental_arenas_i)
 CTL_PROTO(experimental_prof_recent_alloc_max)
 CTL_PROTO(experimental_prof_recent_alloc_dump)
 CTL_PROTO(experimental_batch_alloc)
+CTL_PROTO(experimental_arenas_create_ext)
 
 #define MUTEX_STATS_CTL_PROTO_GEN(n)					\
 CTL_PROTO(stats_##n##_num_ops)						\
@@ -870,6 +871,7 @@ static const ctl_named_node_t experimental_node[] = {
 	{NAME("hooks"),		CHILD(named, experimental_hooks)},
 	{NAME("utilization"),	CHILD(named, experimental_utilization)},
 	{NAME("arenas"),	CHILD(indexed, experimental_arenas)},
+	{NAME("arenas_create_ext"),	CTL(experimental_arenas_create_ext)},
 	{NAME("prof_recent"),	CHILD(named, experimental_prof_recent)},
 	{NAME("batch_alloc"),	CTL(experimental_batch_alloc)},
 	{NAME("thread"),	CHILD(named, experimental_thread)}
@@ -1242,7 +1244,7 @@ ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, ctl_arena_t *ctl_sdarena,
 }
 
 static unsigned
-ctl_arena_init(tsd_t *tsd, extent_hooks_t *extent_hooks) {
+ctl_arena_init(tsd_t *tsd, const arena_config_t *config) {
 	unsigned arena_ind;
 	ctl_arena_t *ctl_arena;
 
@@ -1260,7 +1262,7 @@ ctl_arena_init(tsd_t *tsd, extent_hooks_t *extent_hooks) {
 	}
 
 	/* Initialize new arena. */
-	if (arena_init(tsd_tsdn(tsd), arena_ind, extent_hooks) == NULL) {
+	if (arena_init(tsd_tsdn(tsd), arena_ind, config) == NULL) {
 		return UINT_MAX;
 	}
 
@@ -2881,8 +2883,11 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				extent_hooks_t *new_extent_hooks
 				    JEMALLOC_CC_SILENCE_INIT(NULL);
 				WRITE(new_extent_hooks, extent_hooks_t *);
+				arena_config_t config = arena_config_default;
+				config.extent_hooks = new_extent_hooks;
+
 				arena = arena_init(tsd_tsdn(tsd), arena_ind,
-				    new_extent_hooks);
+				    &config);
 				if (arena == NULL) {
 					ret = EFAULT;
 					goto label_return;
@@ -3069,15 +3074,14 @@ static int
 arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
-	extent_hooks_t *extent_hooks;
 	unsigned arena_ind;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 
 	VERIFY_READ(unsigned);
-	extent_hooks = (extent_hooks_t *)&ehooks_default_extent_hooks;
-	WRITE(extent_hooks, extent_hooks_t *);
-	if ((arena_ind = ctl_arena_init(tsd, extent_hooks)) == UINT_MAX) {
+	arena_config_t config = arena_config_default;
+	WRITE(config.extent_hooks, extent_hooks_t *);
+	if ((arena_ind = ctl_arena_init(tsd, &config)) == UINT_MAX) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -3090,6 +3094,30 @@ label_return:
 }
 
 static int
+experimental_arenas_create_ext_ctl(tsd_t *tsd,
+    const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned arena_ind;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+
+	arena_config_t config = arena_config_default;
+	VERIFY_READ(unsigned);
+	WRITE(config, arena_config_t);
+
+	if ((arena_ind = ctl_arena_init(tsd, &config)) == UINT_MAX) {
+		ret = EAGAIN;
+		goto label_return;
+	}
+	READ(arena_ind, unsigned);
+	ret = 0;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	return ret;
+}
+
+static int
 arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp,
     size_t newlen) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 18b5452..7ffa553 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -384,7 +384,7 @@ narenas_total_get(void) {
 
 /* Create a new arena and insert it into the arenas array at index ind. */
 static arena_t *
-arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
+arena_init_locked(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_t *arena;
 
 	assert(ind <= narenas_total_get());
@@ -406,7 +406,7 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	/* Actually initialize the arena. */
-	arena = arena_new(tsdn, ind, extent_hooks);
+	arena = arena_new(tsdn, ind, config);
 
 	return arena;
 }
@@ -430,11 +430,11 @@ arena_new_create_background_thread(tsdn_t *tsdn, unsigned ind) {
 }
 
 arena_t *
-arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
+arena_init(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_t *arena;
 
 	malloc_mutex_lock(tsdn, &arenas_lock);
-	arena = arena_init_locked(tsdn, ind, extent_hooks);
+	arena = arena_init_locked(tsdn, ind, config);
 	malloc_mutex_unlock(tsdn, &arenas_lock);
 
 	arena_new_create_background_thread(tsdn, ind);
@@ -570,8 +570,7 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 				choose[j] = first_null;
 				arena = arena_init_locked(tsd_tsdn(tsd),
 				    choose[j],
-				    (extent_hooks_t *)
-				    &ehooks_default_extent_hooks);
+				    &arena_config_default);
 				if (arena == NULL) {
 					malloc_mutex_unlock(tsd_tsdn(tsd),
 					    &arenas_lock);
@@ -1779,8 +1778,7 @@ malloc_init_hard_a0_locked() {
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(TSDN_NULL, 0,
-	    (extent_hooks_t *)&ehooks_default_extent_hooks) == NULL) {
+	if (arena_init(TSDN_NULL, 0, &arena_config_default) == NULL) {
 		return true;
 	}
 	a0 = arena_get(TSDN_NULL, 0, false);
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 831ef63..7a028f1 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -2,6 +2,8 @@
 
 #include "test/extent_hooks.h"
 
+#include "jemalloc/internal/arena_types.h"
+
 static void
 test_extent_body(unsigned arena_ind) {
 	void *p;
@@ -228,9 +230,58 @@ TEST_BEGIN(test_extent_auto_hook) {
 }
 TEST_END
 
+static void
+test_arenas_create_ext_base(arena_config_t config,
+	bool expect_hook_data, bool expect_hook_metadata)
+{
+	unsigned arena, arena1;
+	void *ptr;
+	size_t sz = sizeof(unsigned);
+
+	extent_hooks_prep();
+
+	called_alloc = false;
+	expect_d_eq(mallctl("experimental.arenas_create_ext",
+	    (void *)&arena, &sz, &config, sizeof(arena_config_t)), 0,
+	    "Unexpected mallctl() failure");
+	expect_b_eq(called_alloc, expect_hook_metadata,
+	    "expected hook metadata alloc mismatch");
+
+	called_alloc = false;
+	ptr = mallocx(42, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
+	expect_b_eq(called_alloc, expect_hook_data,
+	    "expected hook data alloc mismatch");
+
+	expect_ptr_not_null(ptr, "Unexpected mallocx() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena1, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
+	expect_u_eq(arena, arena1, "Unexpected arena index");
+	dallocx(ptr, 0);
+}
+
+TEST_BEGIN(test_arenas_create_ext_with_ehooks_no_metadata) {
+	arena_config_t config;
+	config.extent_hooks = &hooks;
+	config.metadata_use_hooks = false;
+
+	test_arenas_create_ext_base(config, true, false);
+}
+TEST_END
+
+TEST_BEGIN(test_arenas_create_ext_with_ehooks_with_metadata) {
+	arena_config_t config;
+	config.extent_hooks = &hooks;
+	config.metadata_use_hooks = true;
+
+	test_arenas_create_ext_base(config, true, true);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_extent_manual_hook,
-	    test_extent_auto_hook);
+	    test_extent_auto_hook,
+	    test_arenas_create_ext_with_ehooks_no_metadata,
+	    test_arenas_create_ext_with_ehooks_with_metadata);
 }
diff --git a/test/unit/base.c b/test/unit/base.c
index 5e990b3..07a43df 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -32,7 +32,8 @@ TEST_BEGIN(test_base_hooks_default) {
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	base = base_new(tsdn, 0,
-	    (extent_hooks_t *)&ehooks_default_extent_hooks);
+	    (extent_hooks_t *)&ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
@@ -74,7 +75,7 @@ TEST_BEGIN(test_base_hooks_null) {
 	memcpy(&hooks, &hooks_null, sizeof(extent_hooks_t));
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	base = base_new(tsdn, 0, &hooks);
+	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new() failure");
 
 	if (config_stats) {
@@ -120,7 +121,7 @@ TEST_BEGIN(test_base_hooks_not_null) {
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	did_alloc = false;
-	base = base_new(tsdn, 0, &hooks);
+	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new() failure");
 	expect_true(did_alloc, "Expected alloc");
 
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
index fe920c9..af1110a 100644
--- a/test/unit/edata_cache.c
+++ b/test/unit/edata_cache.c
@@ -5,7 +5,7 @@
 static void
 test_edata_cache_init(edata_cache_t *edata_cache) {
 	base_t *base = base_new(TSDN_NULL, /* ind */ 1,
-	    &ehooks_default_extent_hooks);
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
 	bool err = edata_cache_init(edata_cache, base);
 	assert_false(err, "");
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index dc3acc0..86012c7 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -37,7 +37,7 @@ static hpa_shard_t *
 create_test_data(hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 	bool err;
 	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
-	    &ehooks_default_extent_hooks);
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
 
 	test_data_t *test_data = malloc(sizeof(test_data_t));
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 4d3ad5e..01d891d 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -53,7 +53,8 @@ test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	assert_ptr_not_null(test_data, "");
 	init_test_extent_hooks(&test_data->hooks);
 
-	base_t *base = base_new(TSDN_NULL, /* ind */ 1, &test_data->hooks);
+	base_t *base = base_new(TSDN_NULL, /* ind */ 1,
+	    &test_data->hooks, /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
 
 	test_data->base = base;
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 7b2a4e3..82b617b 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -12,7 +12,8 @@ TEST_BEGIN(test_rtree_read_empty) {
 
 	tsdn = tsdn_fetch();
 
-	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	base_t *base = base_new(tsdn, 0,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
@@ -52,7 +53,8 @@ TEST_BEGIN(test_rtree_extrema) {
 
 	tsdn_t *tsdn = tsdn_fetch();
 
-	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	base_t *base = base_new(tsdn, 0,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
@@ -103,7 +105,8 @@ TEST_END
 
 TEST_BEGIN(test_rtree_bits) {
 	tsdn_t *tsdn = tsdn_fetch();
-	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	base_t *base = base_new(tsdn, 0,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[] = {PAGE, PAGE + 1,
@@ -152,7 +155,8 @@ TEST_BEGIN(test_rtree_random) {
 	sfmt_t *sfmt = init_gen_rand(SEED);
 	tsdn_t *tsdn = tsdn_fetch();
 
-	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	base_t *base = base_new(tsdn, 0,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[NSET];
@@ -250,7 +254,8 @@ test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
 
 TEST_BEGIN(test_rtree_range) {
 	tsdn_t *tsdn = tsdn_fetch();
-	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	base_t *base = base_new(tsdn, 0,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 82b0c9d..763e608 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -42,7 +42,7 @@ test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
 	 * short-running, and SECs are arena-scoped in reality.
 	 */
 	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
-	    &ehooks_default_extent_hooks);
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 
 	bool err = sec_init(TSDN_NULL, sec, base, fallback, &opts);
 	assert_false(err, "Unexpected initialization failure");
@@ -442,7 +442,7 @@ TEST_BEGIN(test_nshards_0) {
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
-	    &ehooks_default_extent_hooks);
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 
 	sec_opts_t opts = SEC_OPTS_DEFAULT;
 	opts.nshards = 0;
-- 
cgit v0.12


From deb8e62a837b6dd303128a544501a7dc9677e47a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 26 Apr 2021 14:22:25 -0700
Subject: Implement guard pages.

Adding guarded extents, which are regular extents surrounded by guard pages
(mprotected).  To reduce syscalls, small guarded extents are cached as a
separate eset in ecache, and decay through the dirty / muzzy / retained pipeline
as usual.
---
 Makefile.in                                        |   2 +
 configure.ac                                       |  12 ++
 include/jemalloc/internal/arena_inlines_b.h        |   3 +-
 include/jemalloc/internal/ecache.h                 |  13 +-
 include/jemalloc/internal/edata.h                  |  26 ++-
 include/jemalloc/internal/ehooks.h                 |  41 +++++
 include/jemalloc/internal/extent.h                 |   4 +-
 include/jemalloc/internal/guard.h                  |  76 ++++++++
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |   3 +
 include/jemalloc/internal/pa.h                     |   2 +-
 include/jemalloc/internal/pages.h                  |   2 +
 include/jemalloc/internal/pai.h                    |   7 +-
 include/jemalloc/internal/tsd.h                    |   4 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |   3 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |   3 +
 src/arena.c                                        |  11 +-
 src/ecache.c                                       |   4 +
 src/ehooks.c                                       |  10 +
 src/extent.c                                       |  78 +++++---
 src/guard.c                                        |  63 +++++++
 src/hpa.c                                          |   7 +-
 src/jemalloc.c                                     |   9 +
 src/pa.c                                           |  27 ++-
 src/pac.c                                          |  86 +++++++--
 src/pages.c                                        |  47 ++++-
 src/pai.c                                          |   2 +-
 src/sec.c                                          |   9 +-
 src/tsd.c                                          |   3 +
 test/include/test/arena_decay.h                    | 149 +++++++++++++++
 test/include/test/guard.h                          |   6 +
 test/unit/arena_decay.c                            | 150 +--------------
 test/unit/double_free.c                            |  25 ++-
 test/unit/guard.c                                  | 201 +++++++++++++++++++++
 test/unit/guard.sh                                 |   3 +
 test/unit/hpa.c                                    |  16 +-
 test/unit/hpa_background_thread.c                  |   4 +
 test/unit/pa.c                                     |   2 +-
 test/unit/retained.c                               |   7 +-
 test/unit/sec.c                                    |  49 +++--
 41 files changed, 920 insertions(+), 251 deletions(-)
 create mode 100644 include/jemalloc/internal/guard.h
 create mode 100644 src/guard.c
 create mode 100644 test/include/test/arena_decay.h
 create mode 100644 test/include/test/guard.h
 create mode 100644 test/unit/guard.c
 create mode 100644 test/unit/guard.sh

diff --git a/Makefile.in b/Makefile.in
index a6f61ce..abd361f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -119,6 +119,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/fxp.c \
+	$(srcroot)src/guard.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_hooks.c \
@@ -218,6 +219,7 @@ TESTS_UNIT := \
 	${srcroot}test/unit/fb.c \
 	$(srcroot)test/unit/fork.c \
 	${srcroot}test/unit/fxp.c \
+	${srcroot}test/unit/guard.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
diff --git a/configure.ac b/configure.ac
index 5a5887a..7e2b44c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2257,6 +2257,18 @@ else
 fi
 
 dnl ============================================================================
+dnl Check for mprotect(2).
+
+JE_COMPILABLE([mprotect(2)], [
+#include <sys/mman.h>
+], [
+	mprotect((void *)0, 0, PROT_NONE);
+], [je_cv_mprotect])
+if test "x${je_cv_mprotect}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_MPROTECT], [ ])
+fi
+
+dnl ============================================================================
 dnl Check for __builtin_clz(), __builtin_clzl(), and __builtin_clzll().
 
 AC_CACHE_CHECK([for __builtin_clz],
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 5410b16..fa81537 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -221,7 +221,8 @@ large_dalloc_safety_checks(edata_t *edata, void *ptr, szind_t szind) {
 	 * The cost is low enough (as edata will be accessed anyway) to be
 	 * enabled all the time.
 	 */
-	if (unlikely(edata_state_get(edata) != extent_state_active)) {
+	if (unlikely(edata == NULL ||
+	    edata_state_get(edata) != extent_state_active)) {
 		safety_check_fail("Invalid deallocation detected: "
 		    "pages being freed (%p) not currently active, "
 		    "possibly caused by double free bugs.",
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index cc2752f..dd1bc32 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -2,12 +2,14 @@
 #define JEMALLOC_INTERNAL_ECACHE_H
 
 #include "jemalloc/internal/eset.h"
+#include "jemalloc/internal/guard.h"
 #include "jemalloc/internal/mutex.h"
 
 typedef struct ecache_s ecache_t;
 struct ecache_s {
 	malloc_mutex_t mtx;
 	eset_t eset;
+	eset_t guarded_eset;
 	/* All stored extents must be in the same state. */
 	extent_state_t state;
 	/* The index of the ehooks the ecache is associated with. */
@@ -21,17 +23,22 @@ struct ecache_s {
 
 static inline size_t
 ecache_npages_get(ecache_t *ecache) {
-	return eset_npages_get(&ecache->eset);
+	return eset_npages_get(&ecache->eset) +
+	    eset_npages_get(&ecache->guarded_eset);
 }
+
 /* Get the number of extents in the given page size index. */
 static inline size_t
 ecache_nextents_get(ecache_t *ecache, pszind_t ind) {
-	return eset_nextents_get(&ecache->eset, ind);
+	return eset_nextents_get(&ecache->eset, ind) +
+	    eset_nextents_get(&ecache->guarded_eset, ind);
 }
+
 /* Get the sum total bytes of the extents in the given page size index. */
 static inline size_t
 ecache_nbytes_get(ecache_t *ecache, pszind_t ind) {
-	return eset_nbytes_get(&ecache->eset, ind);
+	return eset_nbytes_get(&ecache->eset, ind) +
+	    eset_nbytes_get(&ecache->guarded_eset, ind);
 }
 
 static inline unsigned
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index ff14982..af039ea 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -98,12 +98,13 @@ struct edata_s {
 	 * c: committed
 	 * p: pai
 	 * z: zeroed
+	 * g: guarded
 	 * t: state
 	 * i: szind
 	 * f: nfree
 	 * s: bin_shard
 	 *
-	 * 00000000 ... 00000sss sssfffff fffffiii iiiiittt zpcbaaaa aaaaaaaa
+	 * 00000000 ... 0000ssss ssffffff ffffiiii iiiitttg zpcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -123,6 +124,9 @@ struct edata_s {
 	 * zeroed: The zeroed flag is used by extent recycling code to track
 	 *         whether memory is zero-filled.
 	 *
+	 * guarded: The guarded flag is use by the sanitizer to track whether
+	 *          the extent has page guards around it.
+	 *
 	 * state: The state flag is an extent_state_t.
 	 *
 	 * szind: The szind flag indicates usable size class index for
@@ -158,8 +162,12 @@ struct edata_s {
 #define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
 #define EDATA_BITS_ZEROED_MASK  MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
 
+#define EDATA_BITS_GUARDED_WIDTH  1
+#define EDATA_BITS_GUARDED_SHIFT  (EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_GUARDED_MASK  MASK(EDATA_BITS_GUARDED_WIDTH, EDATA_BITS_GUARDED_SHIFT)
+
 #define EDATA_BITS_STATE_WIDTH  3
-#define EDATA_BITS_STATE_SHIFT  (EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_STATE_SHIFT  (EDATA_BITS_GUARDED_WIDTH + EDATA_BITS_GUARDED_SHIFT)
 #define EDATA_BITS_STATE_MASK  MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
 
 #define EDATA_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
@@ -294,6 +302,12 @@ edata_state_get(const edata_t *edata) {
 }
 
 static inline bool
+edata_guarded_get(const edata_t *edata) {
+	return (bool)((edata->e_bits & EDATA_BITS_GUARDED_MASK) >>
+	    EDATA_BITS_GUARDED_SHIFT);
+}
+
+static inline bool
 edata_zeroed_get(const edata_t *edata) {
 	return (bool)((edata->e_bits & EDATA_BITS_ZEROED_MASK) >>
 	    EDATA_BITS_ZEROED_SHIFT);
@@ -506,6 +520,12 @@ edata_state_set(edata_t *edata, extent_state_t state) {
 }
 
 static inline void
+edata_guarded_set(edata_t *edata, bool guarded) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_GUARDED_MASK) |
+	    ((uint64_t)guarded << EDATA_BITS_GUARDED_SHIFT);
+}
+
+static inline void
 edata_zeroed_set(edata_t *edata, bool zeroed) {
 	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ZEROED_MASK) |
 	    ((uint64_t)zeroed << EDATA_BITS_ZEROED_SHIFT);
@@ -588,6 +608,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	edata_szind_set(edata, szind);
 	edata_sn_set(edata, sn);
 	edata_state_set(edata, state);
+	edata_guarded_set(edata, false);
 	edata_zeroed_set(edata, zeroed);
 	edata_committed_set(edata, committed);
 	edata_pai_set(edata, pai);
@@ -606,6 +627,7 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn) {
 	edata_szind_set(edata, SC_NSIZES);
 	edata_sn_set(edata, sn);
 	edata_state_set(edata, extent_state_active);
+	edata_guarded_set(edata, false);
 	edata_zeroed_set(edata, true);
 	edata_committed_set(edata, true);
 	/*
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 064ecf5..8d9513e 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -63,6 +63,8 @@ bool ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a,
     unsigned arena_ind);
 bool ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, void *addr_b);
 void ehooks_default_zero_impl(void *addr, size_t size);
+void ehooks_default_guard_impl(void *guard1, void *guard2);
+void ehooks_default_unguard_impl(void *guard1, void *guard2);
 
 /*
  * We don't officially support reentrancy from wtihin the extent hooks.  But
@@ -139,6 +141,15 @@ ehooks_merge_will_fail(ehooks_t *ehooks) {
 	return ehooks_get_extent_hooks_ptr(ehooks)->merge == NULL;
 }
 
+static inline bool
+ehooks_guard_will_fail(ehooks_t *ehooks) {
+	/*
+	 * Before the guard hooks are officially introduced, limit the use to
+	 * the default hooks only.
+	 */
+	return !ehooks_are_default(ehooks);
+}
+
 /*
  * Some hooks are required to return zeroed memory in certain situations.  In
  * debug mode, we do some heuristic checks that they did what they were supposed
@@ -368,4 +379,34 @@ ehooks_zero(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size) {
 	}
 }
 
+static inline bool
+ehooks_guard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
+	bool err;
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+
+	if (extent_hooks == &ehooks_default_extent_hooks) {
+		ehooks_default_guard_impl(guard1, guard2);
+		err = false;
+	} else {
+		err = true;
+	}
+
+	return err;
+}
+
+static inline bool
+ehooks_unguard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
+	bool err;
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+
+	if (extent_hooks == &ehooks_default_extent_hooks) {
+		ehooks_default_unguard_impl(guard1, guard2);
+		err = false;
+	} else {
+		err = true;
+	}
+
+	return err;
+}
+
 #endif /* JEMALLOC_INTERNAL_EHOOKS_H */
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 03eebdd..73c5563 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -21,10 +21,10 @@ extern size_t opt_lg_extent_max_active_fit;
 
 edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
-    bool zero);
+    bool zero, bool guarded);
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
-    bool zero);
+    bool zero, bool guarded);
 void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
diff --git a/include/jemalloc/internal/guard.h b/include/jemalloc/internal/guard.h
new file mode 100644
index 0000000..31f98c5
--- /dev/null
+++ b/include/jemalloc/internal/guard.h
@@ -0,0 +1,76 @@
+#ifndef JEMALLOC_INTERNAL_GUARD_H
+#define JEMALLOC_INTERNAL_GUARD_H
+
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/emap.h"
+
+#define PAGE_GUARDS_SIZE (2 * PAGE)
+
+#define SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT 0
+#define SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT 0
+
+/* 0 means disabled, i.e. never guarded. */
+extern size_t opt_san_guard_large;
+extern size_t opt_san_guard_small;
+
+void guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap);
+void unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap);
+void tsd_san_init(tsd_t *tsd);
+
+static inline bool
+san_enabled(void) {
+	return (opt_san_guard_large != 0 || opt_san_guard_small != 0);
+}
+
+static inline bool
+large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
+    size_t alignment) {
+	if (opt_san_guard_large == 0 || ehooks_guard_will_fail(ehooks) ||
+	    tsdn_null(tsdn)) {
+		return false;
+	}
+
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	uint64_t n = tsd_san_extents_until_guard_large_get(tsd);
+	assert(n >= 1);
+	if (n > 1) {
+		/*
+		 * Subtract conditionally because the guard may not happen due
+		 * to alignment or size restriction below.
+		 */
+		*tsd_san_extents_until_guard_largep_get(tsd) = n - 1;
+	}
+
+	if (n == 1 && (alignment <= PAGE) &&
+	    (size + PAGE_GUARDS_SIZE <= SC_LARGE_MAXCLASS)) {
+		*tsd_san_extents_until_guard_largep_get(tsd) =
+		    opt_san_guard_large;
+		return true;
+	} else {
+		assert(tsd_san_extents_until_guard_large_get(tsd) >= 1);
+		return false;
+	}
+}
+
+static inline bool
+slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
+	if (opt_san_guard_small == 0 || ehooks_guard_will_fail(ehooks) ||
+	    tsdn_null(tsdn)) {
+		return false;
+	}
+
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	uint64_t n = tsd_san_extents_until_guard_small_get(tsd);
+	assert(n >= 1);
+	if (n == 1) {
+		*tsd_san_extents_until_guard_smallp_get(tsd) =
+		    opt_san_guard_small;
+		return true;
+	} else {
+		*tsd_san_extents_until_guard_smallp_get(tsd) = n - 1;
+		assert(tsd_san_extents_until_guard_small_get(tsd) >= 1);
+		return false;
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_GUARD_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 78d1213..418b0cb 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -312,6 +312,9 @@
  */
 #undef JEMALLOC_MADVISE_NOCORE
 
+/* Defined if mprotect(2) is available. */
+#undef JEMALLOC_HAVE_MPROTECT
+
 /*
  * Defined if transparent huge pages (THPs) are supported via the
  * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 9783413..3cf370c 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -167,7 +167,7 @@ void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);
 
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
-    size_t alignment, bool slab, szind_t szind, bool zero,
+    size_t alignment, bool slab, szind_t szind, bool zero, bool guarded,
     bool *deferred_work_generated);
 /* Returns true on error, in which case nothing changed. */
 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 035364e..3d7993d 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -110,5 +110,7 @@ bool pages_dontdump(void *addr, size_t size);
 bool pages_dodump(void *addr, size_t size);
 bool pages_boot(void);
 void pages_set_thp_state (void *ptr, size_t size);
+void pages_mark_guards(void *head, void *tail);
+void pages_unmark_guards(void *head, void *tail);
 
 #endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index ca5f616..f8f7d66 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -7,7 +7,8 @@ typedef struct pai_s pai_t;
 struct pai_s {
 	/* Returns NULL on failure. */
 	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t alignment, bool zero, bool *deferred_work_generated);
+	    size_t alignment, bool zero, bool guarded,
+	    bool *deferred_work_generated);
 	/*
 	 * Returns the number of extents added to the list (which may be fewer
 	 * than requested, in case of OOM).  The list should already be
@@ -37,8 +38,8 @@ struct pai_s {
 
 static inline edata_t *
 pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool *deferred_work_generated) {
-	return self->alloc(tsdn, self, size, alignment, zero,
+    bool guarded, bool *deferred_work_generated) {
+	return self->alloc(tsdn, self, size, alignment, zero, guarded,
 	    deferred_work_generated);
 }
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index d22fdc9..86d5277 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -73,6 +73,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(peak_dalloc_event_wait,	uint64_t,	uint64_t)		\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(prng_state,		uint64_t,		uint64_t)	\
+    O(san_extents_until_guard_small,	uint64_t,	uint64_t)	\
+    O(san_extents_until_guard_large,	uint64_t,	uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arena_decay_ticker,	ticker_geom_t,		ticker_geom_t)	\
@@ -103,6 +105,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* peak_dalloc_event_wait */	0,				\
     /* prof_tdata */		NULL,					\
     /* prng_state */		0,					\
+    /* san_extents_until_guard_small */	0,				\
+    /* san_extents_until_guard_large */	0,				\
     /* iarena */		NULL,					\
     /* arena */			NULL,					\
     /* arena_decay_ticker */						\
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 597b247..75d6680 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -59,6 +59,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\fxp.c" />
+    <ClCompile Include="..\..\..\..\src\guard.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index d063a01..c5bb4cf 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -61,6 +61,9 @@
     <ClCompile Include="..\..\..\..\src\fxp.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\guard.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 46633e8..d25768e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -59,6 +59,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\fxp.c" />
+    <ClCompile Include="..\..\..\..\src\guard.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index d063a01..c5bb4cf 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -61,6 +61,9 @@
     <ClCompile Include="..\..\..\..\src\fxp.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\guard.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index c2842c6..8147d14 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/guard.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/safety_check.h"
@@ -327,9 +328,10 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t esize = usize + sz_large_pad;
 
+	bool guarded = large_extent_decide_guard(tsdn, arena_get_ehooks(arena),
+	    esize, alignment);
 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
-	    /* slab */ false, szind, zero, &deferred_work_generated);
-
+	    /* slab */ false, szind, zero, guarded, &deferred_work_generated);
 	assert(deferred_work_generated == false);
 
 	if (edata != NULL) {
@@ -827,9 +829,10 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
+	bool guarded = slab_extent_decide_guard(tsdn, arena_get_ehooks(arena));
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
-	    PAGE, /* slab */ true, /* szind */ binind, /* zero */ false,
-	    &deferred_work_generated);
+	    /* alignment */ PAGE, /* slab */ true, /* szind */ binind,
+	     /* zero */ false, guarded, &deferred_work_generated);
 
 	if (deferred_work_generated) {
 		arena_handle_deferred_work(tsdn, arena);
diff --git a/src/ecache.c b/src/ecache.c
index 3c1a227..26fc211 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -1,6 +1,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/guard.h"
+
 bool
 ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state, unsigned ind,
     bool delay_coalesce) {
@@ -12,6 +14,8 @@ ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state, unsigned ind,
 	ecache->ind = ind;
 	ecache->delay_coalesce = delay_coalesce;
 	eset_init(&ecache->eset, state);
+	eset_init(&ecache->guarded_eset, state);
+
 	return false;
 }
 
diff --git a/src/ehooks.c b/src/ehooks.c
index 5d12d00..383e9de 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -244,6 +244,16 @@ ehooks_default_zero_impl(void *addr, size_t size) {
 	}
 }
 
+void
+ehooks_default_guard_impl(void *guard1, void *guard2) {
+	pages_mark_guards(guard1, guard2);
+}
+
+void
+ehooks_default_unguard_impl(void *guard1, void *guard2) {
+	pages_unmark_guards(guard1, guard2);
+}
+
 const extent_hooks_t ehooks_default_extent_hooks = {
 	ehooks_default_alloc,
 	ehooks_default_dalloc,
diff --git a/src/extent.c b/src/extent.c
index 0400114..84ecd6b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -37,14 +37,14 @@ static atomic_zu_t highpages;
 static void extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t usize, size_t alignment,
-    bool zero, bool *commit, bool growing_retained);
+    bool zero, bool *commit, bool growing_retained, bool guarded);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced);
 static void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, edata_t *expand_edata, size_t size, size_t alignment,
-    bool zero, bool *commit);
+    bool zero, bool *commit, bool guarded);
 static edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
 
@@ -80,7 +80,8 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 edata_t *
 ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *expand_edata, size_t size, size_t alignment, bool zero) {
+    edata_t *expand_edata, size_t size, size_t alignment, bool zero,
+    bool guarded) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -88,14 +89,15 @@ ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 	bool commit = true;
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, expand_edata,
-	    size, alignment, zero, &commit, false);
+	    size, alignment, zero, &commit, false, guarded);
 	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
 	return edata;
 }
 
 edata_t *
 ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *expand_edata, size_t size, size_t alignment, bool zero) {
+    edata_t *expand_edata, size_t size, size_t alignment, bool zero,
+    bool guarded) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -103,7 +105,7 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 	bool commit = true;
 	edata_t *edata = extent_alloc_retained(tsdn, pac, ehooks, expand_edata,
-	    size, alignment, zero, &commit);
+	    size, alignment, zero, &commit, guarded);
 	if (edata == NULL) {
 		if (opt_retain && expand_edata != NULL) {
 			/*
@@ -114,6 +116,14 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 			 */
 			return NULL;
 		}
+		if (guarded) {
+			/*
+			 * Means no cached guarded extents available (and no
+			 * grow_retained was attempted).  The pac_alloc flow
+			 * will alloc regular extents to make new guarded ones.
+			 */
+			return NULL;
+		}
 		void *new_addr = (expand_edata == NULL) ? NULL :
 		    edata_past_get(expand_edata);
 		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr,
@@ -151,9 +161,19 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	edata_t *edata;
 	while (true) {
 		/* Get the LRU extent, if any. */
-		edata = edata_list_inactive_first(&ecache->eset.lru);
+		eset_t *eset = &ecache->eset;
+		edata = edata_list_inactive_first(&eset->lru);
 		if (edata == NULL) {
-			goto label_return;
+			/*
+			 * Next check if there are guarded extents.  They are
+			 * more expensive to purge (since they are not
+			 * mergeable), thus in favor of caching them longer.
+			 */
+			eset = &ecache->guarded_eset;
+			edata = edata_list_inactive_first(&eset->lru);
+			if (edata == NULL) {
+				goto label_return;
+			}
 		}
 		/* Check the eviction limit. */
 		size_t extents_npages = ecache_npages_get(ecache);
@@ -161,7 +181,7 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			edata = NULL;
 			goto label_return;
 		}
-		eset_remove(&ecache->eset, edata);
+		eset_remove(eset, edata);
 		if (!ecache->delay_coalesce) {
 			break;
 		}
@@ -234,17 +254,19 @@ extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 	assert(edata_state_get(edata) == extent_state_active);
 
 	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
-	eset_insert(&ecache->eset, edata);
+	eset_t *eset = edata_guarded_get(edata) ? &ecache->guarded_eset :
+	    &ecache->eset;
+	eset_insert(eset, edata);
 }
 
 static void
-extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, eset_t *eset,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == ecache->state ||
 	    edata_state_get(edata) == extent_state_merging);
 
-	eset_remove(&ecache->eset, edata);
+	eset_remove(eset, edata);
 	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
 }
 
@@ -350,7 +372,8 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac,
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment) {
+    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
+    bool guarded) {
 	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 	assert(alignment > 0);
 	if (config_debug && expand_edata != NULL) {
@@ -366,6 +389,7 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	edata_t *edata;
+	eset_t *eset = guarded ? &ecache->guarded_eset : &ecache->eset;
 	if (expand_edata != NULL) {
 		edata = emap_try_acquire_edata_neighbor_expand(tsdn, pac->emap,
 		    expand_edata, EXTENT_PAI_PAC, ecache->state);
@@ -382,7 +406,7 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 * If split and merge are not allowed (Windows w/o retain), try
 		 * exact fit only.
 		 */
-		bool exact_only = (!maps_coalesce && !opt_retain);
+		bool exact_only = (!maps_coalesce && !opt_retain) || guarded;
 		/*
 		 * A large extent might be broken up from its original size to
 		 * some small size to satisfy a small request.  When that small
@@ -394,13 +418,13 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 */
 		unsigned lg_max_fit = ecache->delay_coalesce
 		    ? (unsigned)opt_lg_extent_max_active_fit : SC_PTR_BITS;
-		edata = eset_fit(&ecache->eset, size, alignment, exact_only,
-		    lg_max_fit);
+		edata = eset_fit(eset, size, alignment, exact_only, lg_max_fit);
 	}
 	if (edata == NULL) {
 		return NULL;
 	}
-	extent_activate_locked(tsdn, pac, ecache, edata);
+	assert(!guarded || edata_guarded_get(edata));
+	extent_activate_locked(tsdn, pac, ecache, eset, edata);
 
 	return edata;
 }
@@ -551,13 +575,14 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 static edata_t *
 extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *expand_edata, size_t size, size_t alignment, bool zero,
-    bool *commit, bool growing_retained) {
+    bool *commit, bool growing_retained, bool guarded) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	assert(!guarded || expand_edata == NULL);
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata = extent_recycle_extract(tsdn, pac, ehooks, ecache,
-	    expand_edata, size, alignment);
+	    expand_edata, size, alignment, guarded);
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &ecache->mtx);
 		return NULL;
@@ -734,7 +759,7 @@ label_err:
 static edata_t *
 extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *expand_edata, size_t size, size_t alignment, bool zero,
-    bool *commit) {
+    bool *commit, bool guarded) {
 	assert(size != 0);
 	assert(alignment != 0);
 
@@ -742,13 +767,13 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks,
 	    &pac->ecache_retained, expand_edata, size, alignment, zero, commit,
-	    /* growing_retained */ true);
+	    /* growing_retained */ true, guarded);
 	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
-	} else if (opt_retain && expand_edata == NULL) {
+	} else if (opt_retain && expand_edata == NULL && !guarded) {
 		edata = extent_grow_retained(tsdn, pac, ehooks, size,
 		    alignment, zero, commit);
 		/* extent_grow_retained() always releases pac->grow_mtx. */
@@ -910,6 +935,9 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	emap_assert_mapped(tsdn, pac->emap, edata);
 
+	if (edata_guarded_get(edata)) {
+		goto label_skip_coalesce;
+	}
 	if (!ecache->delay_coalesce) {
 		edata = extent_try_coalesce(tsdn, pac,  ehooks, ecache, edata,
 		    NULL);
@@ -931,6 +959,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			return;
 		}
 	}
+label_skip_coalesce:
 	extent_deactivate_locked(tsdn, pac, ecache, edata);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
@@ -981,6 +1010,11 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	/* Avoid calling the default extent_dalloc unless have to. */
 	if (!ehooks_dalloc_will_fail(ehooks)) {
+		/* Restore guard pages for dalloc / unmap. */
+		if (edata_guarded_get(edata)) {
+			assert(ehooks_are_default(ehooks));
+			unguard_pages(tsdn, ehooks, edata, pac->emap);
+		}
 		/*
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.
diff --git a/src/guard.c b/src/guard.c
new file mode 100644
index 0000000..0723219
--- /dev/null
+++ b/src/guard.c
@@ -0,0 +1,63 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/tsd.h"
+
+/* The sanitizer options. */
+size_t opt_san_guard_large = SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT;
+size_t opt_san_guard_small = SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT;
+
+void
+guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
+	emap_deregister_boundary(tsdn, emap, edata);
+
+	size_t size_with_guards = edata_size_get(edata);
+	size_t usize = size_with_guards - PAGE_GUARDS_SIZE;
+
+	uintptr_t guard1 = (uintptr_t)edata_base_get(edata);
+	uintptr_t addr = guard1 + PAGE;
+	uintptr_t guard2 = addr + usize;
+
+	assert(edata_state_get(edata) == extent_state_active);
+	ehooks_guard(tsdn, ehooks, (void *)guard1, (void *)guard2);
+
+	/* Update the guarded addr and usable size of the edata. */
+	edata_size_set(edata, usize);
+	edata_addr_set(edata, (void *)addr);
+	edata_guarded_set(edata, true);
+
+	/* The new boundary will be registered on the pa_alloc path. */
+}
+
+void
+unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
+	/* Remove the inner boundary which no longer exists. */
+	emap_deregister_boundary(tsdn, emap, edata);
+
+	size_t size = edata_size_get(edata);
+	size_t size_with_guards = size + PAGE_GUARDS_SIZE;
+
+	uintptr_t addr =  (uintptr_t)edata_base_get(edata);
+	uintptr_t guard1 = addr - PAGE;
+	uintptr_t guard2 = addr + size;
+
+	assert(edata_state_get(edata) == extent_state_active);
+	ehooks_unguard(tsdn, ehooks, (void *)guard1, (void *)guard2);
+
+	/* Update the true addr and usable size of the edata. */
+	edata_size_set(edata, size_with_guards);
+	edata_addr_set(edata, (void *)guard1);
+	edata_guarded_set(edata, false);
+
+	/* Then re-register the outer boundary including the guards. */
+	emap_register_boundary(tsdn, emap, edata, SC_NSIZES, /* slab */ false);
+}
+
+void
+tsd_san_init(tsd_t *tsd) {
+	*tsd_san_extents_until_guard_smallp_get(tsd) = opt_san_guard_small;
+	*tsd_san_extents_until_guard_largep_get(tsd) = opt_san_guard_large;
+}
diff --git a/src/hpa.c b/src/hpa.c
index d7422a3..82b9c99 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -9,7 +9,7 @@
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
 
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool *deferred_work_generated);
+    size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
 static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
 static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -750,8 +750,9 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 
 static edata_t *
 hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool *deferred_work_generated) {
+    bool guarded, bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
+	assert(!guarded);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -796,7 +797,6 @@ hpa_dalloc_prepare_unlocked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 	assert(edata_state_get(edata) == extent_state_active);
 	assert(edata_arena_ind_get(edata) == shard->ind);
 	assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
-	assert(!edata_slab_get(edata));
 	assert(edata_committed_get(edata));
 	assert(edata_base_get(edata) != NULL);
 
@@ -865,6 +865,7 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
 static void
 hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     bool *deferred_work_generated) {
+	assert(!edata_guarded_get(edata));
 	/* Just a dalloc_batch of size 1; this lets us share logic. */
 	edata_list_active_t dalloc_list;
 	edata_list_active_init(&dalloc_list);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7ffa553..907265c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -10,6 +10,7 @@
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/fxp.h"
+#include "jemalloc/internal/guard.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/log.h"
@@ -1616,6 +1617,14 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 				CONF_CONTINUE;
 			}
+
+			CONF_HANDLE_SIZE_T(opt_san_guard_small,
+			    "san_guard_small", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+			CONF_HANDLE_SIZE_T(opt_san_guard_large,
+			    "san_guard_large", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+
 			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
 #undef CONF_ERROR
 #undef CONF_CONTINUE
diff --git a/src/pa.c b/src/pa.c
index 249de24..649b9c2 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,6 +1,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/guard.h"
 #include "jemalloc/internal/hpa.h"
 
 static void
@@ -118,15 +119,17 @@ pa_get_pai(pa_shard_t *shard, edata_t *edata) {
 
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
-    bool slab, szind_t szind, bool zero, bool *deferred_work_generated) {
+    bool slab, szind_t szind, bool zero, bool guarded,
+    bool *deferred_work_generated) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
+	assert(!guarded || alignment <= PAGE);
 
 	edata_t *edata = NULL;
 	*deferred_work_generated = false;
-	if (pa_shard_uses_hpa(shard)) {
+	if (!guarded && pa_shard_uses_hpa(shard)) {
 		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
-		    zero, deferred_work_generated);
+		    zero, /* guarded */ false, deferred_work_generated);
 	}
 	/*
 	 * Fall back to the PAC if the HPA is off or couldn't serve the given
@@ -134,10 +137,10 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	 */
 	if (edata == NULL) {
 		edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero,
-		    deferred_work_generated);
+		    guarded, deferred_work_generated);
 	}
-
 	if (edata != NULL) {
+		assert(edata_size_get(edata) == size);
 		pa_nactive_add(shard, size >> LG_PAGE);
 		emap_remap(tsdn, shard->emap, edata, szind, slab);
 		edata_szind_set(edata, szind);
@@ -145,8 +148,6 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 		if (slab && (size > 2 * PAGE)) {
 			emap_register_interior(tsdn, shard->emap, edata, szind);
 		}
-	}
-	if (edata != NULL) {
 		assert(edata_arena_ind_get(edata) == shard->ind);
 	}
 	return edata;
@@ -158,7 +159,9 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
-
+	if (edata_guarded_get(edata)) {
+		return true;
+	}
 	size_t expand_amount = new_size - old_size;
 
 	pai_t *pai = pa_get_pai(shard, edata);
@@ -181,6 +184,9 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	assert(new_size < old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
+	if (edata_guarded_get(edata)) {
+		return true;
+	}
 	size_t shrink_amount = old_size - new_size;
 
 	pai_t *pai = pa_get_pai(shard, edata);
@@ -202,7 +208,10 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	emap_remap(tsdn, shard->emap, edata, SC_NSIZES, /* slab */ false);
 	if (edata_slab_get(edata)) {
 		emap_deregister_interior(tsdn, shard->emap, edata);
-		edata_slab_set(edata, false);
+		/*
+		 * The slab state of the extent isn't cleared.  It may be used
+		 * by the pai implementation, e.g. to make caching decisions.
+		 */
 	}
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_szind_set(edata, SC_NSIZES);
diff --git a/src/pac.c b/src/pac.c
index 03e3197..8ce3159 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -2,9 +2,10 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/pac.h"
+#include "jemalloc/internal/guard.h"
 
 static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool *deferred_work_generated);
+    size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
 static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -109,28 +110,66 @@ pac_may_have_muzzy(pac_t *pac) {
 }
 
 static edata_t *
-pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
-    bool zero, bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
-
-	*deferred_work_generated = false;
+pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
+    size_t alignment, bool zero, bool guarded) {
+	assert(!guarded || alignment <= PAGE);
 
-	ehooks_t *ehooks = pac_ehooks_get(pac);
 	edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
-	    NULL, size, alignment, zero);
+	    NULL, size, alignment, zero, guarded);
 
 	if (edata == NULL && pac_may_have_muzzy(pac)) {
 		edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
-		    NULL, size, alignment, zero);
+		    NULL, size, alignment, zero, guarded);
 	}
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, pac, ehooks,
-		    &pac->ecache_retained, NULL, size, alignment, zero);
+		    &pac->ecache_retained, NULL, size, alignment, zero,
+		    guarded);
 		if (config_stats && edata != NULL) {
 			atomic_fetch_add_zu(&pac->stats->pac_mapped, size,
 			    ATOMIC_RELAXED);
 		}
 	}
+
+	return edata;
+}
+
+static edata_t *
+pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
+    size_t alignment, bool zero) {
+	assert(alignment <= PAGE);
+
+	size_t size_with_guards = size + PAGE_GUARDS_SIZE;
+	/* Alloc a non-guarded extent first.*/
+	edata_t *edata = pac_alloc_real(tsdn, pac, ehooks, size_with_guards,
+	    /* alignment */ PAGE, zero, /* guarded */ false);
+	if (edata != NULL) {
+		/* Add guards around it. */
+		assert(edata_size_get(edata) == size_with_guards);
+		guard_pages(tsdn, ehooks, edata, pac->emap);
+	}
+	assert(edata == NULL || (edata_guarded_get(edata) &&
+	    edata_size_get(edata) == size));
+
+	return edata;
+}
+
+static edata_t *
+pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
+    bool zero, bool guarded, bool *deferred_work_generated) {
+	*deferred_work_generated = false;
+
+	pac_t *pac = (pac_t *)self;
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+
+	edata_t *edata = pac_alloc_real(tsdn, pac, ehooks, size, alignment,
+	    zero, guarded);
+	if (edata == NULL && guarded) {
+		/* No cached guarded extents; creating a new one. */
+		edata = pac_alloc_new_guarded(tsdn, pac, ehooks, size,
+		    alignment, zero);
+	}
+
 	return edata;
 }
 
@@ -149,15 +188,15 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 	edata_t *trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
-	    edata, expand_amount, PAGE, zero);
+	    edata, expand_amount, PAGE, zero, /* guarded*/ false);
 	if (trail == NULL) {
 		trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
-		    edata, expand_amount, PAGE, zero);
+		    edata, expand_amount, PAGE, zero, /* guarded*/ false);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, pac, ehooks,
 		    &pac->ecache_retained, edata, expand_amount, PAGE,
-		    zero);
+		    zero, /* guarded */ false);
 		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
@@ -203,6 +242,27 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
+
+	if (edata_guarded_get(edata)) {
+		/*
+		 * Because cached guarded extents do exact fit only, large
+		 * guarded extents are restored on dalloc eagerly (otherwise
+		 * they will not be reused efficiently).  Slab sizes have a
+		 * limited number of size classes, and tend to cycle faster.
+		 *
+		 * In the case where coalesce is restrained (VirtualFree on
+		 * Windows), guarded extents are also not cached -- otherwise
+		 * during arena destroy / reset, the retained extents would not
+		 * be whole regions (i.e. they are split between regular and
+		 * guarded).
+		 */
+		if (!edata_slab_get(edata) || !maps_coalesce) {
+			assert(edata_size_get(edata) >= SC_LARGE_MINCLASS ||
+			    !maps_coalesce);
+			unguard_pages(tsdn, ehooks, edata, pac->emap);
+		}
+	}
+
 	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata);
 	/* Purging of deallocated pages is deferred */
 	*deferred_work_generated = true;
diff --git a/src/pages.c b/src/pages.c
index 4261885..a8d9988 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -316,14 +316,10 @@ pages_unmap(void *addr, size_t size) {
 }
 
 static bool
-pages_commit_impl(void *addr, size_t size, bool commit) {
+os_pages_commit(void *addr, size_t size, bool commit) {
 	assert(PAGE_ADDR2BASE(addr) == addr);
 	assert(PAGE_CEILING(size) == size);
 
-	if (os_overcommits) {
-		return true;
-	}
-
 #ifdef _WIN32
 	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
 	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
@@ -348,6 +344,15 @@ pages_commit_impl(void *addr, size_t size, bool commit) {
 #endif
 }
 
+static bool
+pages_commit_impl(void *addr, size_t size, bool commit) {
+	if (os_overcommits) {
+		return true;
+	}
+
+	return os_pages_commit(addr, size, commit);
+}
+
 bool
 pages_commit(void *addr, size_t size) {
 	return pages_commit_impl(addr, size, true);
@@ -358,6 +363,38 @@ pages_decommit(void *addr, size_t size) {
 	return pages_commit_impl(addr, size, false);
 }
 
+void
+pages_mark_guards(void *head, void *tail) {
+	assert(head != NULL && tail != NULL);
+	assert((uintptr_t)head < (uintptr_t)tail);
+#ifdef JEMALLOC_HAVE_MPROTECT
+	mprotect(head, PAGE, PROT_NONE);
+	mprotect(tail, PAGE, PROT_NONE);
+#else
+	/* Decommit sets to PROT_NONE / MEM_DECOMMIT. */
+	os_pages_commit(head, PAGE, false);
+	os_pages_commit(tail, PAGE, false);
+#endif
+}
+
+void
+pages_unmark_guards(void *head, void *tail) {
+	assert(head != NULL && tail != NULL);
+	assert((uintptr_t)head < (uintptr_t)tail);
+#ifdef JEMALLOC_HAVE_MPROTECT
+	size_t range = (uintptr_t)tail - (uintptr_t)head + PAGE;
+	if (range <= SC_LARGE_MINCLASS) {
+		mprotect(head, range, PROT_READ | PROT_WRITE);
+	} else {
+		mprotect(head, PAGE, PROT_READ | PROT_WRITE);
+		mprotect(tail, PAGE, PROT_READ | PROT_WRITE);
+	}
+#else
+	os_pages_commit(head, PAGE, true);
+	os_pages_commit(tail, PAGE, true);
+#endif
+}
+
 bool
 pages_purge_lazy(void *addr, size_t size) {
 	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
diff --git a/src/pai.c b/src/pai.c
index e863a9b..86b8ee5 100644
--- a/src/pai.c
+++ b/src/pai.c
@@ -7,7 +7,7 @@ pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	for (size_t i = 0; i < nallocs; i++) {
 		bool deferred_by_alloc = false;
 		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
-		    /* zero */ false, &deferred_by_alloc);
+		    /* zero */ false, /* guarded */ false, &deferred_by_alloc);
 		*deferred_work_generated |= deferred_by_alloc;
 		if (edata == NULL) {
 			return i;
diff --git a/src/sec.c b/src/sec.c
index c6f611f..0f95a0d 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/sec.h"
 
 static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool *deferred_work_generated);
+    size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
 static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -218,8 +218,9 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 
 static edata_t *
 sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool *deferred_work_generated) {
+    bool guarded, bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
+	assert(!guarded);
 
 	sec_t *sec = (sec_t *)self;
 	*deferred_work_generated = false;
@@ -227,7 +228,7 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	if (zero || alignment > PAGE || sec->opts.nshards == 0
 	    || size > sec->opts.max_alloc) {
 		return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
-		    deferred_work_generated);
+		    /* guarded */ false, deferred_work_generated);
 	}
 	pszind_t pszind = sz_psz2ind(size);
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
@@ -250,7 +251,7 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 			    size);
 		} else {
 			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
-			    zero, deferred_work_generated);
+			    zero, /* guarded */ false, deferred_work_generated);
 		}
 	}
 	return edata;
diff --git a/src/tsd.c b/src/tsd.c
index 6820eb6..31ff2f2 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/guard.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
 
@@ -242,6 +243,7 @@ tsd_data_init(tsd_t *tsd) {
 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
 	tsd_prng_state_init(tsd);
 	tsd_te_init(tsd); /* event_init may use the prng state above. */
+	tsd_san_init(tsd);
 	return tsd_tcache_enabled_data_init(tsd);
 }
 
@@ -269,6 +271,7 @@ tsd_data_init_nocleanup(tsd_t *tsd) {
 	*tsd_reentrancy_levelp_get(tsd) = 1;
 	tsd_prng_state_init(tsd);
 	tsd_te_init(tsd); /* event_init may use the prng state above. */
+	tsd_san_init(tsd);
 	assert_tsd_data_cleanup_done(tsd);
 
 	return false;
diff --git a/test/include/test/arena_decay.h b/test/include/test/arena_decay.h
new file mode 100644
index 0000000..da65921
--- /dev/null
+++ b/test/include/test/arena_decay.h
@@ -0,0 +1,149 @@
+static unsigned
+do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+	unsigned arena_ind;
+	size_t sz = sizeof(unsigned);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+
+	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
+	    "Unexpected mallctlbymib() failure");
+
+	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
+	    "Unexpected mallctlbymib() failure");
+
+	return arena_ind;
+}
+
+static void
+do_arena_destroy(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+static void
+do_epoch(void) {
+	uint64_t epoch = 1;
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+}
+
+static void
+do_purge(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+static void
+do_decay(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+static uint64_t
+get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	uint64_t npurge = 0;
+	size_t sz = sizeof(npurge);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
+	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
+	return npurge;
+}
+
+static uint64_t
+get_arena_dirty_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind);
+}
+
+static uint64_t
+get_arena_dirty_purged(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_purged", arena_ind);
+}
+
+static uint64_t
+get_arena_muzzy_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+}
+
+static uint64_t
+get_arena_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
+	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+}
+
+static size_t
+get_arena_pdirty(unsigned arena_ind) {
+	do_epoch();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	size_t pdirty;
+	size_t sz = sizeof(pdirty);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	return pdirty;
+}
+
+static size_t
+get_arena_pmuzzy(unsigned arena_ind) {
+	do_epoch();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	size_t pmuzzy;
+	size_t sz = sizeof(pmuzzy);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	return pmuzzy;
+}
+
+static void *
+do_mallocx(size_t size, int flags) {
+	void *p = mallocx(size, flags);
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	return p;
+}
+
+static void
+generate_dirty(unsigned arena_ind, size_t size) {
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	void *p = do_mallocx(size, flags);
+	dallocx(p, flags);
+}
+
diff --git a/test/include/test/guard.h b/test/include/test/guard.h
new file mode 100644
index 0000000..691dc50
--- /dev/null
+++ b/test/include/test/guard.h
@@ -0,0 +1,6 @@
+static inline bool
+extent_is_guarded(tsdn_t *tsdn, void *ptr) {
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+	return edata_guarded_get(edata);
+}
+
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 9fca538..bbfd23a 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -1,4 +1,5 @@
 #include "test/jemalloc_test.h"
+#include "test/arena_decay.h"
 
 #include "jemalloc/internal/ticker.h"
 
@@ -22,155 +23,6 @@ nstime_update_mock(nstime_t *time) {
 	}
 }
 
-static unsigned
-do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
-	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-
-	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
-
-	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
-
-	return arena_ind;
-}
-
-static void
-do_arena_destroy(unsigned arena_ind) {
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-}
-
-void
-do_epoch(void) {
-	uint64_t epoch = 1;
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
-	    0, "Unexpected mallctl() failure");
-}
-
-void
-do_purge(unsigned arena_ind) {
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-}
-
-void
-do_decay(unsigned arena_ind) {
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-}
-
-static uint64_t
-get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[2] = (size_t)arena_ind;
-	uint64_t npurge = 0;
-	size_t sz = sizeof(npurge);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
-	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
-	return npurge;
-}
-
-static uint64_t
-get_arena_dirty_npurge(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind);
-}
-
-static uint64_t
-get_arena_dirty_purged(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_purged", arena_ind);
-}
-
-static uint64_t
-get_arena_muzzy_npurge(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
-}
-
-static uint64_t
-get_arena_npurge(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
-	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
-}
-
-static size_t
-get_arena_pdirty(unsigned arena_ind) {
-	do_epoch();
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[2] = (size_t)arena_ind;
-	size_t pdirty;
-	size_t sz = sizeof(pdirty);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-	return pdirty;
-}
-
-static size_t
-get_arena_pmuzzy(unsigned arena_ind) {
-	do_epoch();
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[2] = (size_t)arena_ind;
-	size_t pmuzzy;
-	size_t sz = sizeof(pmuzzy);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-	return pmuzzy;
-}
-
-static void *
-do_mallocx(size_t size, int flags) {
-	void *p = mallocx(size, flags);
-	expect_ptr_not_null(p, "Unexpected mallocx() failure");
-	return p;
-}
-
-static void
-generate_dirty(unsigned arena_ind, size_t size) {
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
-	void *p = do_mallocx(size, flags);
-	dallocx(p, flags);
-}
-
 TEST_BEGIN(test_decay_ticks) {
 	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index 73155b9..f98484c 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -1,4 +1,5 @@
 #include "test/jemalloc_test.h"
+#include "test/guard.h"
 
 #include "jemalloc/internal/safety_check.h"
 
@@ -30,8 +31,18 @@ TEST_BEGIN(test_large_double_free_tcache) {
 
 	test_large_double_free_pre();
 	char *ptr = malloc(SC_LARGE_MINCLASS);
+	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	free(ptr);
-	free(ptr);
+	if (!guarded) {
+		free(ptr);
+	} else {
+		/*
+		 * Skip because guarded extents may unguard immediately on
+		 * deallocation, in which case the second free will crash before
+		 * reaching the intended safety check.
+		 */
+		fake_abort_called = true;
+	}
 	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
 	test_large_double_free_post();
 }
@@ -43,8 +54,18 @@ TEST_BEGIN(test_large_double_free_no_tcache) {
 
 	test_large_double_free_pre();
 	char *ptr = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
+	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
-	dallocx(ptr, MALLOCX_TCACHE_NONE);
+	if (!guarded) {
+		dallocx(ptr, MALLOCX_TCACHE_NONE);
+	} else {
+		/*
+		 * Skip because guarded extents may unguard immediately on
+		 * deallocation, in which case the second free will crash before
+		 * reaching the intended safety check.
+		 */
+		fake_abort_called = true;
+	}
 	test_large_double_free_post();
 }
 TEST_END
diff --git a/test/unit/guard.c b/test/unit/guard.c
new file mode 100644
index 0000000..43381e4
--- /dev/null
+++ b/test/unit/guard.c
@@ -0,0 +1,201 @@
+#include "test/jemalloc_test.h"
+#include "test/arena_decay.h"
+#include "test/guard.h"
+
+#include "jemalloc/internal/guard.h"
+
+static void
+verify_extent_guarded(tsdn_t *tsdn, void *ptr) {
+	expect_true(extent_is_guarded(tsdn, ptr),
+	    "All extents should be guarded.");
+}
+
+#define MAX_SMALL_ALLOCATIONS 4096
+void *small_alloc[MAX_SMALL_ALLOCATIONS];
+
+TEST_BEGIN(test_guarded_small) {
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	unsigned npages = 16, pages_found = 0, ends_found = 0;
+	VARIABLE_ARRAY(uintptr_t, pages, npages);
+
+	/* Allocate to get sanitized pointers. */
+	size_t sz = PAGE / 8;
+	unsigned n_alloc = 0;
+	while (n_alloc < MAX_SMALL_ALLOCATIONS) {
+		void *ptr = malloc(sz);
+		expect_ptr_not_null(ptr, "Unexpected malloc() failure");
+		small_alloc[n_alloc] = ptr;
+		verify_extent_guarded(tsdn, ptr);
+		if ((uintptr_t)ptr % PAGE == 0) {
+			pages[pages_found++] = (uintptr_t)ptr;
+		}
+		if (((uintptr_t)ptr + (uintptr_t)sz) % PAGE == 0) {
+			ends_found++;
+		}
+		n_alloc++;
+		if (pages_found == npages && ends_found == npages) {
+			break;
+		}
+	}
+	/* Should found the ptrs being checked for overflow and underflow. */
+	expect_u_eq(pages_found, npages, "Could not found the expected pages.");
+	expect_u_eq(ends_found, npages, "Could not found the expected pages.");
+
+	/* Verify the pages are not continuous, i.e. separated by guards. */
+	for (unsigned i = 0; i < npages - 1; i++) {
+		for (unsigned j = i + 1; j < npages; j++) {
+			uintptr_t ptr_diff = pages[i] > pages[j] ?
+			    pages[i] - pages[j] : pages[j] - pages[i];
+			expect_zu_gt((size_t)ptr_diff, 2 * PAGE,
+			    "Pages should not be next to each other.");
+		}
+	}
+
+	for (unsigned i = 0; i < n_alloc + 1; i++) {
+		free(small_alloc[i]);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_guarded_large) {
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	unsigned nlarge = 32;
+	VARIABLE_ARRAY(uintptr_t, large, nlarge);
+
+	/* Allocate to get sanitized pointers. */
+	size_t large_sz = SC_LARGE_MINCLASS;
+	for (unsigned i = 0; i < nlarge; i++) {
+		void *ptr = malloc(large_sz);
+		verify_extent_guarded(tsdn, ptr);
+		expect_ptr_not_null(ptr, "Unexpected malloc() failure");
+		large[i] = (uintptr_t)ptr;
+	}
+
+	/* Verify the pages are not continuous, i.e. separated by guards. */
+	uintptr_t min_diff = (uintptr_t)-1;
+	for (unsigned i = 0; i < nlarge; i++) {
+		for (unsigned j = i + 1; j < nlarge; j++) {
+			uintptr_t ptr_diff = large[i] > large[j] ?
+			    large[i] - large[j] : large[j] - large[i];
+			expect_zu_ge((size_t)ptr_diff, large_sz + 2 * PAGE,
+			    "Pages should not be next to each other.");
+			if (ptr_diff < min_diff) {
+				min_diff = ptr_diff;
+			}
+		}
+	}
+	expect_zu_ge((size_t)min_diff, large_sz + 2 * PAGE,
+	    "Pages should not be next to each other.");
+
+	for (unsigned i = 0; i < nlarge; i++) {
+		free((void *)large[i]);
+	}
+}
+TEST_END
+
+static void
+verify_pdirty(unsigned arena_ind, uint64_t expected) {
+	uint64_t pdirty = get_arena_pdirty(arena_ind);
+	expect_u64_eq(pdirty, expected / PAGE,
+	    "Unexpected dirty page amount.");
+}
+
+static void
+verify_pmuzzy(unsigned arena_ind, uint64_t expected) {
+	uint64_t pmuzzy = get_arena_pmuzzy(arena_ind);
+	expect_u64_eq(pmuzzy, expected / PAGE,
+	    "Unexpected muzzy page amount.");
+}
+
+TEST_BEGIN(test_guarded_decay) {
+	unsigned arena_ind = do_arena_create(-1, -1);
+	do_decay(arena_ind);
+	do_purge(arena_ind);
+
+	verify_pdirty(arena_ind, 0);
+	verify_pmuzzy(arena_ind, 0);
+
+	/* Verify that guarded extents as dirty. */
+	size_t sz1 = PAGE, sz2 = PAGE * 2;
+	/* W/o maps_coalesce, guarded extents are unguarded eagerly. */
+	size_t add_guard_size = maps_coalesce ? 0 : PAGE_GUARDS_SIZE;
+	generate_dirty(arena_ind, sz1);
+	verify_pdirty(arena_ind, sz1 + add_guard_size);
+	verify_pmuzzy(arena_ind, 0);
+
+	/* Should reuse the first extent. */
+	generate_dirty(arena_ind, sz1);
+	verify_pdirty(arena_ind, sz1 + add_guard_size);
+	verify_pmuzzy(arena_ind, 0);
+
+	/* Should not reuse; expect new dirty pages. */
+	generate_dirty(arena_ind, sz2);
+	verify_pdirty(arena_ind, sz1 + sz2 + 2 * add_guard_size);
+	verify_pmuzzy(arena_ind, 0);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
+	/* Should reuse dirty extents for the two mallocx. */
+	void *p1 = do_mallocx(sz1, flags);
+	verify_extent_guarded(tsdn, p1);
+	verify_pdirty(arena_ind, sz2 + add_guard_size);
+
+	void *p2 = do_mallocx(sz2, flags);
+	verify_extent_guarded(tsdn, p2);
+	verify_pdirty(arena_ind, 0);
+	verify_pmuzzy(arena_ind, 0);
+
+	dallocx(p1, flags);
+	verify_pdirty(arena_ind, sz1 + add_guard_size);
+	dallocx(p2, flags);
+	verify_pdirty(arena_ind, sz1 + sz2 + 2 * add_guard_size);
+	verify_pmuzzy(arena_ind, 0);
+
+	do_purge(arena_ind);
+	verify_pdirty(arena_ind, 0);
+	verify_pmuzzy(arena_ind, 0);
+
+	if (config_stats) {
+		expect_u64_eq(get_arena_npurge(arena_ind), 1,
+		    "Expected purging to occur");
+		expect_u64_eq(get_arena_dirty_npurge(arena_ind), 1,
+		    "Expected purging to occur");
+		expect_u64_eq(get_arena_dirty_purged(arena_ind),
+		    (sz1 + sz2 + 2 * add_guard_size) / PAGE,
+		    "Expected purging to occur");
+		expect_u64_eq(get_arena_muzzy_npurge(arena_ind), 0,
+		    "Expected purging to occur");
+	}
+
+	if (opt_retain) {
+		/*
+		 * With retain, guarded extents are not mergable and will be
+		 * cached in ecache_retained.  They should be reused.
+		 */
+		void *new_p1 = do_mallocx(sz1, flags);
+		verify_extent_guarded(tsdn, p1);
+		expect_ptr_eq(p1, new_p1, "Expect to reuse p1");
+
+		void *new_p2 = do_mallocx(sz2, flags);
+		verify_extent_guarded(tsdn, p2);
+		expect_ptr_eq(p2, new_p2, "Expect to reuse p2");
+
+		dallocx(new_p1, flags);
+		verify_pdirty(arena_ind, sz1 + add_guard_size);
+		dallocx(new_p2, flags);
+		verify_pdirty(arena_ind, sz1 + sz2 + 2 * add_guard_size);
+		verify_pmuzzy(arena_ind, 0);
+	}
+
+	do_arena_destroy(arena_ind);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_guarded_small,
+	    test_guarded_large,
+	    test_guarded_decay);
+}
diff --git a/test/unit/guard.sh b/test/unit/guard.sh
new file mode 100644
index 0000000..933b4a4
--- /dev/null
+++ b/test/unit/guard.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="san_guard_large:1,san_guard_small:1"
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 86012c7..060ce3e 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -80,11 +80,11 @@ TEST_BEGIN(test_alloc_max) {
 
 	/* Small max */
 	bool deferred_work_generated;
-	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false,
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
 	    &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false,
-	    &deferred_work_generated);
+	    false, &deferred_work_generated);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
 
 	destroy_test_data(shard);
@@ -188,7 +188,7 @@ TEST_BEGIN(test_stress) {
 			size_t npages = npages_min + prng_range_zu(&prng_state,
 			    npages_max - npages_min);
 			edata_t *edata = pai_alloc(tsdn, &shard->pai,
-			    npages * PAGE, PAGE, false,
+			    npages * PAGE, PAGE, false, false,
 			    &deferred_work_generated);
 			assert_ptr_not_null(edata,
 			    "Unexpected allocation failure");
@@ -263,7 +263,8 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	 */
 	for (size_t i = 0; i < NALLOCS / 2; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	edata_list_active_t allocs_list;
@@ -299,7 +300,8 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	/* Reallocate (individually), and ensure reuse and contiguity. */
 	for (size_t i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
 	}
 	void *new_base = edata_base_get(allocs[0]);
@@ -374,7 +376,7 @@ TEST_BEGIN(test_defer_time) {
 	edata_t *edatas[HUGEPAGE_PAGES];
 	for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    &deferred_work_generated);
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -408,7 +410,7 @@ TEST_BEGIN(test_defer_time) {
 	 */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    &deferred_work_generated);
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	/*
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 77d0555..5976bb4 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -128,6 +128,8 @@ TEST_BEGIN(test_hpa_background_thread_purges) {
 	test_skip_if(!config_stats);
 	test_skip_if(!hpa_supported());
 	test_skip_if(!have_background_thread);
+	/* Skip since guarded pages cannot be allocated from hpa. */
+	test_skip_if(san_enabled());
 
 	unsigned arena_ind = create_arena();
 	/*
@@ -142,6 +144,8 @@ TEST_BEGIN(test_hpa_background_thread_enable_disable) {
 	test_skip_if(!config_stats);
 	test_skip_if(!hpa_supported());
 	test_skip_if(!have_background_thread);
+	/* Skip since guarded pages cannot be allocated from hpa. */
+	test_skip_if(san_enabled());
 
 	unsigned arena_ind = create_arena();
 
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 01d891d..fcf2223 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -91,7 +91,7 @@ do_alloc_free_purge(void *arg) {
 		bool deferred_work_generated;
 		edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, PAGE,
 		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false,
-		    &deferred_work_generated);
+		    /* guarded */ false, &deferred_work_generated);
 		assert_ptr_not_null(edata, "");
 		pa_dalloc(TSDN_NULL, &test_data->shard, edata,
 		    &deferred_work_generated);
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 9ad9940..53cda28 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -1,5 +1,6 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/guard.h"
 #include "jemalloc/internal/spin.h"
 
 static unsigned		arena_ind;
@@ -103,7 +104,8 @@ TEST_BEGIN(test_retained) {
 
 	arena_ind = do_arena_create(NULL);
 	sz = nallocx(HUGEPAGE, 0);
-	esz = sz + sz_large_pad;
+	size_t guard_sz = san_enabled() ? PAGE_GUARDS_SIZE : 0;
+	esz = sz + sz_large_pad + guard_sz;
 
 	atomic_store_u(&epoch, 0, ATOMIC_RELAXED);
 
@@ -133,7 +135,8 @@ TEST_BEGIN(test_retained) {
 		 */
 		do_refresh();
 
-		size_t allocated = esz * nthreads * PER_THD_NALLOCS;
+		size_t allocated = (esz - guard_sz) * nthreads *
+		    PER_THD_NALLOCS;
 		size_t active = do_get_active(arena_ind);
 		expect_zu_le(allocated, active, "Unexpected active memory");
 		size_t mapped = do_get_mapped(arena_ind);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 763e608..acca192 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -50,7 +50,9 @@ test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
 
 static inline edata_t *
 pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool *deferred_work_generated) {
+    size_t alignment, bool zero, bool guarded,
+    bool *deferred_work_generated) {
+	assert(!guarded);
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
 	*deferred_work_generated = false;
 	if (ta->alloc_fail) {
@@ -182,10 +184,12 @@ TEST_BEGIN(test_reuse) {
 	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
 		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 		two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 	}
 	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
@@ -216,9 +220,11 @@ TEST_BEGIN(test_reuse) {
 	 */
 	for (int i = 0; i < NALLOCS; i++) {
 		edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_eq(one_page[i], alloc1,
 		    "Got unexpected allocation");
 		expect_ptr_eq(two_page[i], alloc2,
@@ -255,11 +261,12 @@ TEST_BEGIN(test_auto_flush) {
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    &deferred_work_generated);
+	    /* guarded */ false, &deferred_work_generated);
 	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
 	expect_zu_le(NALLOCS + 1, max_allocs,
@@ -310,7 +317,8 @@ do_disable_flush_test(bool is_disable) {
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	/* Free all but the last aloc. */
@@ -383,7 +391,8 @@ TEST_BEGIN(test_max_alloc_respected) {
 		expect_zu_eq(i, ta.dalloc_count,
 		    "Incorrect number of deallocations");
 		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
-		    PAGE, /* zero */ false, &deferred_work_generated);
+		    PAGE, /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(edata, "Unexpected alloc failure");
 		expect_zu_eq(i + 1, ta.alloc_count,
 		    "Incorrect number of allocations");
@@ -410,7 +419,8 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
 	    /* max_bytes */ 1000 * PAGE);
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, &deferred_work_generated);
+	    /* zero */ false, /* guarded */ false,
+	    &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected alloc failure");
 
 	bool err = pai_expand(tsdn, &sec.pai, edata, PAGE, 4 * PAGE,
@@ -450,7 +460,8 @@ TEST_BEGIN(test_nshards_0) {
 
 	bool deferred_work_generated;
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, &deferred_work_generated);
+	    /* zero */ false, /* guarded */ false,
+	    &deferred_work_generated);
 	pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
 
 	/* Both operations should have gone directly to the fallback. */
@@ -492,7 +503,8 @@ TEST_BEGIN(test_stats_simple) {
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}
 
@@ -505,7 +517,8 @@ TEST_BEGIN(test_stats_simple) {
 		}
 		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
 			allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-			    /* zero */ false, &deferred_work_generated);
+			    /* zero */ false, /* guarded */ false,
+			    &deferred_work_generated);
 			expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1);
 		}
 	}
@@ -534,13 +547,14 @@ TEST_BEGIN(test_stats_auto_flush) {
 	bool deferred_work_generated;
 
 	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    &deferred_work_generated);
+	    /* guarded */ false, &deferred_work_generated);
 	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    &deferred_work_generated);
+	    /* guarded */ false, &deferred_work_generated);
 
 	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 	}
 
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
@@ -580,7 +594,8 @@ TEST_BEGIN(test_stats_manual_flush) {
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}
 
-- 
cgit v0.12


From 3c4b717ffc05012905fec0c4b49cda8f783c2727 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 27 Sep 2021 13:19:37 -0700
Subject: Remove unused header base_structs.h.

---
 include/jemalloc/internal/base_structs.h | 62 --------------------------------
 1 file changed, 62 deletions(-)
 delete mode 100644 include/jemalloc/internal/base_structs.h

diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
deleted file mode 100644
index 914c5b5..0000000
--- a/include/jemalloc/internal/base_structs.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H
-#define JEMALLOC_INTERNAL_BASE_STRUCTS_H
-
-#include "jemalloc/internal/ehooks.h"
-#include "jemalloc/internal/edata.h"
-#include "jemalloc/internal/jemalloc_internal_types.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/sc.h"
-
-/* Embedded at the beginning of every block of base-managed virtual memory. */
-struct base_block_s {
-	/* Total size of block's virtual memory mapping. */
-	size_t size;
-
-	/* Next block in list of base's blocks. */
-	base_block_t *next;
-
-	/* Tracks unused trailing space. */
-	edata_t edata;
-};
-
-struct base_s {
-	/*
-	 * User-configurable extent hook functions.
-	 */
-	ehooks_t ehooks;
-
-	/*
-	 * Use user hooks for metadata when true.
-	 */
-	bool metadata_use_hooks;
-
-	/* Protects base_alloc() and base_stats_get() operations. */
-	malloc_mutex_t mtx;
-
-	/* Using THP when true (metadata_thp auto mode). */
-	bool auto_thp_switched;
-	/*
-	 * Most recent size class in the series of increasingly large base
-	 * extents.  Logarithmic spacing between subsequent allocations ensures
-	 * that the total number of distinct mappings remains small.
-	 */
-	pszind_t pind_last;
-
-	/* Serial number generation state. */
-	size_t extent_sn_next;
-
-	/* Chain of all blocks associated with base. */
-	base_block_t *blocks;
-
-	/* Heap of extents that track unused trailing space within blocks. */
-	edata_heap_t avail[SC_NSIZES];
-
-	/* Stats, only maintained if config_stats. */
-	size_t allocated;
-	size_t resident;
-	size_t mapped;
-	/* Number of THP regions touched. */
-	size_t n_thp;
-};
-
-#endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */
-- 
cgit v0.12


From 83f3294027952710f35014cff1cffd51f281d785 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 27 Sep 2021 13:43:24 -0700
Subject: Small refactors around 7bb05e0.

---
 include/jemalloc/internal/arena_structs.h            |  6 +++---
 include/jemalloc/internal/arena_types.h              |  2 +-
 .../jemalloc/internal/jemalloc_internal_inlines_a.h  |  3 +--
 src/base.c                                           |  6 +++---
 src/jemalloc.c                                       |  3 +--
 test/unit/pa.c                                       |  4 ++--
 test/unit/rtree.c                                    | 20 ++++++++++----------
 7 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index ad76a79..e2a5a40 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
-#define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
+#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_H
+#define JEMALLOC_INTERNAL_ARENA_STRUCTS_H
 
 #include "jemalloc/internal/arena_stats.h"
 #include "jemalloc/internal/atomic.h"
@@ -98,4 +98,4 @@ struct arena_s {
 	bin_t			bins[0];
 };
 
-#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */
+#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index f763a8c..d0e1291 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -43,7 +43,7 @@ typedef enum {
 
 struct arena_config_s {
 	/* extent hooks to be used for the arena */
-	struct extent_hooks_s *extent_hooks;
+	extent_hooks_t *extent_hooks;
 
 	/*
 	 * Use extent hooks for metadata (base) allocations when true.
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 1bca34c..9e27cc3 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -65,8 +65,7 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	ret = (arena_t *)atomic_load_p(&arenas[ind], ATOMIC_ACQUIRE);
 	if (unlikely(ret == NULL)) {
 		if (init_if_missing) {
-			ret = arena_init(tsdn, ind,
-			    &arena_config_default);
+			ret = arena_init(tsdn, ind, &arena_config_default);
 		}
 	}
 	return ret;
diff --git a/src/base.c b/src/base.c
index cc127ea..38f6fa4 100644
--- a/src/base.c
+++ b/src/base.c
@@ -298,7 +298,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 static ehooks_t *
 base_ehooks_get_for_metadata(base_t *base) {
 	return base->metadata_use_hooks ? &base->ehooks :
-		(struct ehooks_s *)&ehooks_default_extent_hooks;
+	    (ehooks_t *)&ehooks_default_extent_hooks;
 }
 
 /*
@@ -522,7 +522,7 @@ base_postfork_child(tsdn_t *tsdn, base_t *base) {
 
 bool
 base_boot(tsdn_t *tsdn) {
-	b0 = base_new(tsdn, 0,
-		(extent_hooks_t *)&ehooks_default_extent_hooks, true);
+	b0 = base_new(tsdn, 0, (extent_hooks_t *)&ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 	return (b0 == NULL);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 907265c..8e04fa6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -570,8 +570,7 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 				/* Initialize a new arena. */
 				choose[j] = first_null;
 				arena = arena_init_locked(tsd_tsdn(tsd),
-				    choose[j],
-				    &arena_config_default);
+				    choose[j], &arena_config_default);
 				if (arena == NULL) {
 					malloc_mutex_unlock(tsd_tsdn(tsd),
 					    &arenas_lock);
diff --git a/test/unit/pa.c b/test/unit/pa.c
index fcf2223..10fa1b2 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -53,8 +53,8 @@ test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	assert_ptr_not_null(test_data, "");
 	init_test_extent_hooks(&test_data->hooks);
 
-	base_t *base = base_new(TSDN_NULL, /* ind */ 1,
-	    &test_data->hooks, /* metadata_use_hooks */ true);
+	base_t *base = base_new(TSDN_NULL, /* ind */ 1, &test_data->hooks,
+	    /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
 
 	test_data->base = base;
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 82b617b..4101b72 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -12,8 +12,8 @@ TEST_BEGIN(test_rtree_read_empty) {
 
 	tsdn = tsdn_fetch();
 
-	base_t *base = base_new(tsdn, 0,
-	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
@@ -53,8 +53,8 @@ TEST_BEGIN(test_rtree_extrema) {
 
 	tsdn_t *tsdn = tsdn_fetch();
 
-	base_t *base = base_new(tsdn, 0,
-	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
@@ -105,8 +105,8 @@ TEST_END
 
 TEST_BEGIN(test_rtree_bits) {
 	tsdn_t *tsdn = tsdn_fetch();
-	base_t *base = base_new(tsdn, 0,
-	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[] = {PAGE, PAGE + 1,
@@ -155,8 +155,8 @@ TEST_BEGIN(test_rtree_random) {
 	sfmt_t *sfmt = init_gen_rand(SEED);
 	tsdn_t *tsdn = tsdn_fetch();
 
-	base_t *base = base_new(tsdn, 0,
-	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[NSET];
@@ -254,8 +254,8 @@ test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
 
 TEST_BEGIN(test_rtree_range) {
 	tsdn_t *tsdn = tsdn_fetch();
-	base_t *base = base_new(tsdn, 0,
-	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
-- 
cgit v0.12


From 11b6db7448f9c31502a7bcf7e59cd8913732c83d Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Sun, 25 Oct 2020 15:48:41 +0000
Subject: CPU affinity on BSD platforms support.

---
 src/background_thread.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index ac171c3..3bb8d26 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -82,13 +82,33 @@ static inline bool
 set_current_thread_affinity(int cpu) {
 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	cpu_set_t cpuset;
+#else
+#  ifndef __NetBSD__
+	cpuset_t cpuset;
+#  else
+	cpuset_t *cpuset;
+#  endif
+#endif
+
+#ifndef __NetBSD__
 	CPU_ZERO(&cpuset);
 	CPU_SET(cpu, &cpuset);
-	int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
+#else
+	cpuset = cpuset_create();
+#endif
 
-	return (ret != 0);
+#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+	return (sched_setaffinity(0, sizeof(cpu_set_t), &cpuset) != 0);
 #else
-	return false;
+#  ifndef __NetBSD__
+	int ret = pthread_setaffinity_np(pthread_self(), sizeof(cpuset_t),
+	    &cpuset);
+#  else
+	int ret = pthread_setaffinity_np(pthread_self(), cpuset_size(cpuset),
+	    cpuset);
+	cpuset_destroy(cpuset);
+#  endif
+	return ret != 0;
 #endif
 }
 
-- 
cgit v0.12


From ab0f1604b4fc563158f142d41f6a3550463d7729 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Sep 2021 15:41:10 -0700
Subject: Delay the atexit call to prof_log_start().

So that atexit() is only done when prof_log is used.
---
 src/prof_log.c | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index 0f27a12..0632c3b 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -412,6 +412,13 @@ prof_log_dummy_set(bool new_value) {
 	prof_log_dummy = new_value;
 }
 
+/* Used as an atexit function to stop logging on exit. */
+static void
+prof_log_stop_final(void) {
+	tsd_t *tsd = tsd_fetch();
+	prof_log_stop(tsd_tsdn(tsd));
+}
+
 JEMALLOC_COLD
 bool
 prof_log_start(tsdn_t *tsdn, const char *filename) {
@@ -425,6 +432,20 @@ prof_log_start(tsdn_t *tsdn, const char *filename) {
 
 	malloc_mutex_lock(tsdn, &log_mtx);
 
+	static bool prof_log_atexit_called = false;
+	if (!prof_log_atexit_called) {
+		prof_log_atexit_called = true;
+		if (atexit(prof_log_stop_final) != 0) {
+			malloc_write("<jemalloc>: Error in atexit() "
+			    "for logging\n");
+			if (opt_abort) {
+				abort();
+			}
+			ret = true;
+			goto label_done;
+		}
+	}
+
 	if (prof_logging_state != prof_logging_state_stopped) {
 		ret = true;
 	} else if (filename == NULL) {
@@ -442,19 +463,12 @@ prof_log_start(tsdn_t *tsdn, const char *filename) {
 	if (!ret) {
 		nstime_prof_init_update(&log_start_timestamp);
 	}
-
+label_done:
 	malloc_mutex_unlock(tsdn, &log_mtx);
 
 	return ret;
 }
 
-/* Used as an atexit function to stop logging on exit. */
-static void
-prof_log_stop_final(void) {
-	tsd_t *tsd = tsd_fetch();
-	prof_log_stop(tsd_tsdn(tsd));
-}
-
 struct prof_emitter_cb_arg_s {
 	int fd;
 	ssize_t ret;
@@ -697,15 +711,6 @@ prof_log_init(tsd_t *tsd) {
 		prof_log_start(tsd_tsdn(tsd), NULL);
 	}
 
-	if (atexit(prof_log_stop_final) != 0) {
-		malloc_write("<jemalloc>: Error in atexit() "
-			     "for logging\n");
-		if (opt_abort) {
-			abort();
-		}
-		return true;
-	}
-
 	return false;
 }
 
-- 
cgit v0.12


From cf9724531af2864b243668d82aa63114e9737bfd Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Wed, 17 Feb 2021 20:40:11 +0000
Subject: Darwin malloc_size override support proposal.

Darwin has similar api than Linux/FreeBSD's malloc_usable_size.
---
 configure.ac                                       |  3 ++
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |  5 ++++
 include/jemalloc/jemalloc_protos.h.in              |  4 +++
 src/jemalloc.c                                     | 35 ++++++++++++++++------
 test/include/test/jemalloc_test.h.in               |  5 ++++
 test/integration/aligned_alloc.c                   |  4 +--
 test/integration/allocated.c                       |  2 +-
 test/integration/malloc.c                          |  2 +-
 test/integration/posix_memalign.c                  |  2 +-
 test/integration/rallocx.c                         |  4 +--
 test/stress/microbench.c                           |  2 +-
 test/unit/junk.c                                   |  2 +-
 test/unit/prof_stats.c                             |  2 +-
 13 files changed, 53 insertions(+), 19 deletions(-)

diff --git a/configure.ac b/configure.ac
index 7e2b44c..7a49e84 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1056,6 +1056,9 @@ AC_CHECK_FUNC([memalign],
 AC_CHECK_FUNC([valloc],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ])
 	       public_syms="${public_syms} valloc"])
+AC_CHECK_FUNC([malloc_size],
+	      [AC_DEFINE([JEMALLOC_HAVE_MALLOC_SIZE], [ ])
+	       public_syms="${public_syms} malloc_size"])
 
 dnl Check for allocator-related functions that should be wrapped.
 wrap_syms=
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 418b0cb..a4be549 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -337,6 +337,11 @@
  */
 #undef JEMALLOC_HAVE_MEMCNTL
 
+/*
+ * Defined if malloc_size is supported
+ */
+#undef JEMALLOC_HAVE_MALLOC_SIZE
+
 /* Define if operating system has alloca.h header. */
 #undef JEMALLOC_HAS_ALLOCA_H
 
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index d75b222..356221c 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -53,6 +53,10 @@ JEMALLOC_EXPORT void JEMALLOC_NOTHROW	@je_@malloc_stats_print(
     const char *opts);
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW	@je_@malloc_usable_size(
     JEMALLOC_USABLE_SIZE_CONST void *ptr) JEMALLOC_CXX_THROW;
+#ifdef JEMALLOC_HAVE_MALLOC_SIZE
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW	@je_@malloc_size(
+    const void *ptr);
+#endif
 
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8e04fa6..469a491 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3904,18 +3904,14 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 }
 #undef STATS_PRINT_BUFSIZE
 
-JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
-je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
-	size_t ret;
-	tsdn_t *tsdn;
-
-	LOG("core.malloc_usable_size.entry", "ptr: %p", ptr);
-
+JEMALLOC_ALWAYS_INLINE size_t
+je_malloc_usable_size_impl(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	tsdn = tsdn_fetch();
+	tsdn_t *tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 
+	size_t ret;
 	if (unlikely(ptr == NULL)) {
 		ret = 0;
 	} else {
@@ -3926,12 +3922,33 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 			ret = isalloc(tsdn, ptr);
 		}
 	}
-
 	check_entry_exit_locking(tsdn);
+
+	return ret;
+}
+
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
+je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
+	LOG("core.malloc_usable_size.entry", "ptr: %p", ptr);
+
+	size_t ret = je_malloc_usable_size_impl(ptr);
+
 	LOG("core.malloc_usable_size.exit", "result: %zu", ret);
 	return ret;
 }
 
+#ifdef JEMALLOC_HAVE_MALLOC_SIZE
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
+je_malloc_size(const void *ptr) {
+	LOG("core.malloc_size.entry", "ptr: %p", ptr);
+
+	size_t ret = je_malloc_usable_size_impl(ptr);
+
+	LOG("core.malloc_size.exit", "result: %zu", ret);
+	return ret;
+}
+#endif
+
 static void
 batch_alloc_prof_sample_assert(tsd_t *tsd, size_t batch, size_t usize) {
 	assert(config_prof && opt_prof);
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 0e33216..3f8c0da 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -132,6 +132,11 @@ static const bool config_debug =
 #define MEXP 19937
 #include "test/SFMT.h"
 
+#ifndef JEMALLOC_HAVE_MALLOC_SIZE
+#define TEST_MALLOC_SIZE malloc_usable_size
+#else
+#define TEST_MALLOC_SIZE malloc_size
+#endif
 /******************************************************************************/
 /*
  * Define always-enabled assertion macros, so that test assertions execute even
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 3f619e7..b37d5ba 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -120,7 +120,7 @@ TEST_BEGIN(test_alignment_and_size) {
 					    "size=%zu (%#zx): %s",
 					    alignment, size, size, buf);
 				}
-				total += malloc_usable_size(ps[i]);
+				total += TEST_MALLOC_SIZE(ps[i]);
 				if (total >= (MAXALIGN << 1)) {
 					break;
 				}
@@ -141,7 +141,7 @@ TEST_END
 TEST_BEGIN(test_zero_alloc) {
 	void *res = aligned_alloc(8, 0);
 	assert(res);
-	size_t usable = malloc_usable_size(res);
+	size_t usable = TEST_MALLOC_SIZE(res);
 	assert(usable > 0);
 	free(res);
 }
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
index 8f2f21d..0c64272 100644
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -70,7 +70,7 @@ thd_start(void *arg) {
 	expect_ptr_eq(ap0, ap1,
 	    "Pointer returned by \"thread.allocatedp\" should not change");
 
-	usize = malloc_usable_size(p);
+	usize = TEST_MALLOC_SIZE(p);
 	expect_u64_le(a0 + usize, a1,
 	    "Allocated memory counter should increase by at least the amount "
 	    "explicitly allocated");
diff --git a/test/integration/malloc.c b/test/integration/malloc.c
index 8b33bc8..ef44916 100644
--- a/test/integration/malloc.c
+++ b/test/integration/malloc.c
@@ -3,7 +3,7 @@
 TEST_BEGIN(test_zero_alloc) {
 	void *res = malloc(0);
 	assert(res);
-	size_t usable = malloc_usable_size(res);
+	size_t usable = TEST_MALLOC_SIZE(res);
 	assert(usable > 0);
 	free(res);
 }
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 6f8a1b0..2da0549 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -101,7 +101,7 @@ TEST_BEGIN(test_alignment_and_size) {
 					    "size=%zu (%#zx): %s",
 					    alignment, size, size, buf);
 				}
-				total += malloc_usable_size(ps[i]);
+				total += TEST_MALLOC_SIZE(ps[i]);
 				if (total >= (MAXALIGN << 1)) {
 					break;
 				}
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 57c7967..d4a48fc 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -185,7 +185,7 @@ TEST_BEGIN(test_align_enum) {
 				assert_ptr_not_null(p,
 				    "Unexpected mallocx() error");
 				assert_zu_eq(nallocx(1, flags),
-				    malloc_usable_size(p),
+				    TEST_MALLOC_SIZE(p),
 				    "Wrong mallocx() usable size");
 				int flags_next =
 				    MALLOCX_LG_ALIGN(lg_align_next);
@@ -193,7 +193,7 @@ TEST_BEGIN(test_align_enum) {
 				assert_ptr_not_null(p,
 				    "Unexpected rallocx() error");
 				expect_zu_eq(nallocx(size, flags_next),
-				    malloc_usable_size(p),
+				    TEST_MALLOC_SIZE(p),
 				    "Wrong rallocx() usable size");
 				free(p);
 			}
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 226677f..062e32f 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -69,7 +69,7 @@ malloc_mus_free(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
-	malloc_usable_size(p);
+	TEST_MALLOC_SIZE(p);
 	free(p);
 }
 
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 314da3c..543092f 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -30,7 +30,7 @@ do_allocs(size_t size, bool zero, size_t lg_align) {
 		if (opt_junk_alloc && !zero) {				\
 			expect_ptr_eq(ptr, last_junked_ptr, "");	\
 			expect_zu_eq(last_junked_usize,			\
-			    malloc_usable_size(ptr), "");		\
+			    TEST_MALLOC_SIZE(ptr), "");			\
 		}							\
 	} while (0)
 	if (!zero && lg_align == 0) {
diff --git a/test/unit/prof_stats.c b/test/unit/prof_stats.c
index a914587..c88c4ae 100644
--- a/test/unit/prof_stats.c
+++ b/test/unit/prof_stats.c
@@ -43,7 +43,7 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 		int flags = flags_array[i];
 		void *p = mallocx(sz, flags);
 		assert_ptr_not_null(p, "malloc() failed");
-		assert(malloc_usable_size(p) == sz_index2size(ind));
+		assert(TEST_MALLOC_SIZE(p) == sz_index2size(ind));
 		ptrs[i] = p;
 		live_req_sum += sz;
 		live_count++;
-- 
cgit v0.12


From 912324a1acae4bfb6445825caad000aa295dcca8 Mon Sep 17 00:00:00 2001
From: Stan Angelov <sangelov@fb.com>
Date: Thu, 30 Sep 2021 17:37:59 -0700
Subject: Add debug check outside of the loop in hpa_alloc_batch.

This optimizes the whole loop away for non-debug builds.
---
 src/hpa.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 82b9c99..24fb7a3 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -733,17 +733,25 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata;
-	ql_foreach(edata, &results->head, ql_link_active) {
-		emap_assert_mapped(tsdn, shard->emap, edata);
-		assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
-		assert(edata_state_get(edata) == extent_state_active);
-		assert(edata_arena_ind_get(edata) == shard->ind);
-		assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
-		assert(!edata_slab_get(edata));
-		assert(edata_committed_get(edata));
-		assert(edata_base_get(edata) == edata_addr_get(edata));
-		assert(edata_base_get(edata) != NULL);
+	/*
+	 * Guard the sanity checks with config_debug because the loop cannot be
+	 * proven non-circular by the compiler, even if everything within the
+	 * loop is optimized away.
+	 */
+	if (config_debug) {
+		edata_t *edata;
+		ql_foreach(edata, &results->head, ql_link_active) {
+			emap_assert_mapped(tsdn, shard->emap, edata);
+			assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+			assert(edata_state_get(edata) == extent_state_active);
+			assert(edata_arena_ind_get(edata) == shard->ind);
+			assert(edata_szind_get_maybe_invalid(edata) ==
+			    SC_NSIZES);
+			assert(!edata_slab_get(edata));
+			assert(edata_committed_get(edata));
+			assert(edata_base_get(edata) == edata_addr_get(edata));
+			assert(edata_base_get(edata) != NULL);
+		}
 	}
 	return nsuccess;
 }
-- 
cgit v0.12


From c9ebff0fd6ab90d5eed0d11f48dfedcc21222ab0 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 6 Oct 2021 15:22:38 -0700
Subject: Initialize deferred_work_generated

As the code evolves, some code paths that have previously assigned
deferred_work_generated may cease being reached. This would leave the value
uninitialized. This change initializes the value for safety.
---
 src/arena.c     |  6 +++---
 src/hpa.c       |  2 --
 src/large.c     |  6 +++---
 src/pa.c        |  1 -
 src/pac.c       |  6 ------
 src/sec.c       |  8 +++-----
 test/unit/hpa.c |  8 ++++----
 test/unit/pa.c  |  2 +-
 test/unit/sec.c | 24 +++++++++---------------
 9 files changed, 23 insertions(+), 40 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 8147d14..811f0ed 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -324,7 +324,7 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
 edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero) {
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	szind_t szind = sz_size2index(usize);
 	size_t esize = usize + sz_large_pad;
 
@@ -561,7 +561,7 @@ arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	pa_dalloc(tsdn, &arena->pa_shard, slab, &deferred_work_generated);
 	if (deferred_work_generated) {
 		arena_handle_deferred_work(tsdn, arena);
@@ -825,7 +825,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 static edata_t *
 arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
     const bin_info_t *bin_info) {
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
diff --git a/src/hpa.c b/src/hpa.c
index 24fb7a3..5251655 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -785,7 +785,6 @@ static bool
 hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero, bool *deferred_work_generated) {
 	/* Expand not yet supported. */
-	*deferred_work_generated = false;
 	return true;
 }
 
@@ -793,7 +792,6 @@ static bool
 hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool *deferred_work_generated) {
 	/* Shrink not yet supported. */
-	*deferred_work_generated = false;
 	return true;
 }
 
diff --git a/src/large.c b/src/large.c
index 6dbb3d9..5fc4bf5 100644
--- a/src/large.c
+++ b/src/large.c
@@ -64,7 +64,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 		return true;
 	}
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	bool err = pa_shrink(tsdn, &arena->pa_shard, edata, old_size,
 	    usize + sz_large_pad, sz_size2index(usize),
 	    &deferred_work_generated);
@@ -90,7 +90,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 
 	szind_t szind = sz_size2index(usize);
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	bool err = pa_expand(tsdn, &arena->pa_shard, edata, old_size, new_size,
 	    szind, zero, &deferred_work_generated);
 
@@ -249,7 +249,7 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 
 static void
 large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	pa_dalloc(tsdn, &arena->pa_shard, edata, &deferred_work_generated);
 	if (deferred_work_generated) {
 		arena_handle_deferred_work(tsdn, arena);
diff --git a/src/pa.c b/src/pa.c
index 649b9c2..779e672 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -126,7 +126,6 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	assert(!guarded || alignment <= PAGE);
 
 	edata_t *edata = NULL;
-	*deferred_work_generated = false;
 	if (!guarded && pa_shard_uses_hpa(shard)) {
 		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
 		    zero, /* guarded */ false, deferred_work_generated);
diff --git a/src/pac.c b/src/pac.c
index 8ce3159..176b181 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -157,8 +157,6 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 static edata_t *
 pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
     bool zero, bool guarded, bool *deferred_work_generated) {
-	*deferred_work_generated = false;
-
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
@@ -179,8 +177,6 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
-	*deferred_work_generated = false;
-
 	size_t mapped_add = 0;
 	size_t expand_amount = new_size - old_size;
 
@@ -221,8 +217,6 @@ pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 
 	size_t shrink_amount = old_size - new_size;
 
-	*deferred_work_generated = false;
-
 	if (ehooks_split_will_fail(ehooks)) {
 		return true;
 	}
diff --git a/src/sec.c b/src/sec.c
index 0f95a0d..d99c443 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -148,7 +148,7 @@ sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	}
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
 	    &deferred_work_generated);
 }
@@ -178,7 +178,7 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 
 	edata_list_active_t result;
 	edata_list_active_init(&result);
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
 	    1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated);
 
@@ -223,7 +223,6 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	assert(!guarded);
 
 	sec_t *sec = (sec_t *)self;
-	*deferred_work_generated = false;
 
 	if (zero || alignment > PAGE || sec->opts.nshards == 0
 	    || size > sec->opts.max_alloc) {
@@ -291,7 +290,7 @@ sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	 * we're disabling the HPA or resetting the arena, both of which are
 	 * rare pathways.
 	 */
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
 	    &deferred_work_generated);
 }
@@ -341,7 +340,6 @@ sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	if (shard->enabled) {
-		*deferred_work_generated = false;
 		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
 	} else {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 060ce3e..bda0d46 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -79,7 +79,7 @@ TEST_BEGIN(test_alloc_max) {
 	edata_t *edata;
 
 	/* Small max */
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
 	    &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
@@ -169,7 +169,7 @@ TEST_BEGIN(test_stress) {
 	mem_tree_t tree;
 	mem_tree_new(&tree);
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	for (size_t i = 0; i < 100 * 1000; i++) {
 		size_t operation = prng_range_zu(&prng_state, 2);
@@ -252,7 +252,7 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	    &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	enum {NALLOCS = 8};
 
@@ -369,7 +369,7 @@ TEST_BEGIN(test_defer_time) {
 
 	hpa_shard_t *shard = create_test_data(&hooks, &opts);
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 10fa1b2..505b6fa 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -88,7 +88,7 @@ static void *
 do_alloc_free_purge(void *arg) {
 	test_data_t *test_data = (test_data_t *)arg;
 	for (int i = 0; i < 10 * 1000; i++) {
-		bool deferred_work_generated;
+		bool deferred_work_generated = false;
 		edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, PAGE,
 		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false,
 		    /* guarded */ false, &deferred_work_generated);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index acca192..8ac3411 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -54,7 +54,6 @@ pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     bool *deferred_work_generated) {
 	assert(!guarded);
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	*deferred_work_generated = false;
 	if (ta->alloc_fail) {
 		return NULL;
 	}
@@ -76,7 +75,6 @@ pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t nallocs, edata_list_active_t *results,
     bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	*deferred_work_generated = false;
 	if (ta->alloc_fail) {
 		return 0;
 	}
@@ -100,7 +98,6 @@ pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero,
     bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	*deferred_work_generated = false;
 	ta->expand_count++;
 	return ta->expand_return_value;
 }
@@ -109,7 +106,6 @@ static bool
 pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	*deferred_work_generated = false;
 	ta->shrink_count++;
 	return ta->shrink_return_value;
 }
@@ -118,7 +114,6 @@ static void
 pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	*deferred_work_generated = false;
 	ta->dalloc_count++;
 	free(edata);
 }
@@ -127,7 +122,6 @@ static void
 pai_test_allocator_dalloc_batch(tsdn_t *tsdn, pai_t *self,
     edata_list_active_t *list, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	*deferred_work_generated = false;
 
 	edata_t *edata;
 	while ((edata = edata_list_active_first(list)) != NULL) {
@@ -179,7 +173,7 @@ TEST_BEGIN(test_reuse) {
 	enum { NALLOCS = 11 };
 	edata_t *one_page[NALLOCS];
 	edata_t *two_page[NALLOCS];
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 2 * PAGE,
 	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
@@ -256,7 +250,7 @@ TEST_BEGIN(test_auto_flush) {
 	enum { NALLOCS = 10 };
 	edata_t *extra_alloc;
 	edata_t *allocs[NALLOCS];
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
@@ -312,7 +306,7 @@ do_disable_flush_test(bool is_disable) {
 
 	enum { NALLOCS = 11 };
 	edata_t *allocs[NALLOCS];
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
@@ -380,7 +374,7 @@ TEST_BEGIN(test_max_alloc_respected) {
 	size_t max_alloc = 2 * PAGE;
 	size_t attempted_alloc = 3 * PAGE;
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, max_alloc,
 	    /* max_bytes */ 1000 * PAGE);
@@ -414,7 +408,7 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
 	    /* max_bytes */ 1000 * PAGE);
@@ -458,7 +452,7 @@ TEST_BEGIN(test_nshards_0) {
 	opts.nshards = 0;
 	sec_init(TSDN_NULL, &sec, base, &ta.pai, &opts);
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 	    /* zero */ false, /* guarded */ false,
 	    &deferred_work_generated);
@@ -495,7 +489,7 @@ TEST_BEGIN(test_stats_simple) {
 		FLUSH_PAGES = 20,
 	};
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ FLUSH_PAGES * PAGE);
@@ -544,7 +538,7 @@ TEST_BEGIN(test_stats_auto_flush) {
 	edata_t *extra_alloc1;
 	edata_t *allocs[2 * FLUSH_PAGES];
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
 	    /* guarded */ false, &deferred_work_generated);
@@ -590,7 +584,7 @@ TEST_BEGIN(test_stats_manual_flush) {
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-- 
cgit v0.12


From 8daac7958f6b9a3e10e5de83c2a1252e8977687f Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 22 Sep 2021 14:59:53 -0700
Subject: Redefine functions with test hooks only for tests

Android build has issues with these defines, this will allow the build to
succeed if it doesn't need to build the tests.
---
 include/jemalloc/internal/test_hooks.h | 23 ++++++++++++++---------
 src/prof_sys.c                         |  2 +-
 test/unit/test_hooks.c                 |  2 +-
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/test_hooks.h b/include/jemalloc/internal/test_hooks.h
index a6351e5..3d530b5 100644
--- a/include/jemalloc/internal/test_hooks.h
+++ b/include/jemalloc/internal/test_hooks.h
@@ -4,16 +4,21 @@
 extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)();
 extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)();
 
-#define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
+#if defined(JEMALLOC_JET) || defined(JEMALLOC_UNIT_TEST)
+#  define JEMALLOC_TEST_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
 
-#define open JEMALLOC_HOOK(open, test_hooks_libc_hook)
-#define read JEMALLOC_HOOK(read, test_hooks_libc_hook)
-#define write JEMALLOC_HOOK(write, test_hooks_libc_hook)
-#define readlink JEMALLOC_HOOK(readlink, test_hooks_libc_hook)
-#define close JEMALLOC_HOOK(close, test_hooks_libc_hook)
-#define creat JEMALLOC_HOOK(creat, test_hooks_libc_hook)
-#define secure_getenv JEMALLOC_HOOK(secure_getenv, test_hooks_libc_hook)
+#  define open JEMALLOC_TEST_HOOK(open, test_hooks_libc_hook)
+#  define read JEMALLOC_TEST_HOOK(read, test_hooks_libc_hook)
+#  define write JEMALLOC_TEST_HOOK(write, test_hooks_libc_hook)
+#  define readlink JEMALLOC_TEST_HOOK(readlink, test_hooks_libc_hook)
+#  define close JEMALLOC_TEST_HOOK(close, test_hooks_libc_hook)
+#  define creat JEMALLOC_TEST_HOOK(creat, test_hooks_libc_hook)
+#  define secure_getenv JEMALLOC_TEST_HOOK(secure_getenv, test_hooks_libc_hook)
 /* Note that this is undef'd and re-define'd in src/prof.c. */
-#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#  define _Unwind_Backtrace JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#else
+#  define JEMALLOC_TEST_HOOK(fn, hook) fn
+#endif
+
 
 #endif /* JEMALLOC_INTERNAL_TEST_HOOKS_H */
diff --git a/src/prof_sys.c b/src/prof_sys.c
index fd41e86..b7a3a2c 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -20,7 +20,7 @@
  */
 #undef _Unwind_Backtrace
 #include <unwind.h>
-#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#define _Unwind_Backtrace JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
 #endif
 
 /******************************************************************************/
diff --git a/test/unit/test_hooks.c b/test/unit/test_hooks.c
index 2a5b3d5..8cd2b3b 100644
--- a/test/unit/test_hooks.c
+++ b/test/unit/test_hooks.c
@@ -12,7 +12,7 @@ func_to_hook(int arg1, int arg2) {
 	return arg1 + arg2;
 }
 
-#define func_to_hook JEMALLOC_HOOK(func_to_hook, test_hooks_libc_hook)
+#define func_to_hook JEMALLOC_TEST_HOOK(func_to_hook, test_hooks_libc_hook)
 
 TEST_BEGIN(unhooked_call) {
 	test_hooks_libc_hook = NULL;
-- 
cgit v0.12


From 2159615419a90b5473cfd9d3a4cb4700259d8c0b Mon Sep 17 00:00:00 2001
From: Wang JinLong <wangjinlong@uniontech.com>
Date: Mon, 18 Oct 2021 09:57:27 +0800
Subject: Add new architecture loongarch.

Signed-off-by: Wang JinLong <wangjinlong@uniontech.com>
---
 include/jemalloc/internal/quantum.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index 760d6ad..c22d753 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -30,6 +30,9 @@
 #  ifdef __hppa__
 #    define LG_QUANTUM		4
 #  endif
+#  ifdef __loongarch__
+#    define LG_QUANTUM		4
+#  endif
 #  ifdef __m68k__
 #    define LG_QUANTUM		3
 #  endif
-- 
cgit v0.12


From 26f5257b88c925357bc524444a61049905e7bd19 Mon Sep 17 00:00:00 2001
From: Ashutosh Grewal <agrewal@fb.com>
Date: Fri, 15 Oct 2021 19:23:31 -0700
Subject: Remove declaration of an undefined function

---
 include/jemalloc/internal/pac.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index d07ccc2..5eee3de 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -121,8 +121,6 @@ bool pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
     ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
     malloc_mutex_t *stats_mtx);
-void pac_stats_merge(tsdn_t *tsdn, pac_t *pac, pac_stats_t *pac_stats_out,
-    pac_estats_t *estats_out, size_t *resident);
 
 static inline size_t
 pac_mapped(pac_t *pac) {
-- 
cgit v0.12


From 4d56aaeca5883ae5f4b5550c528503fb51fdf479 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 19 Oct 2021 17:14:08 -0700
Subject: Optimize away the tsd_fast() check on free fastpath.

To ensure that the free fastpath can tolerate uninitialized tsd, improved the
static initializer for rtree_ctx in tsd.
---
 include/jemalloc/internal/emap.h         |  6 ++++--
 include/jemalloc/internal/rtree.h        |  3 ---
 include/jemalloc/internal/rtree_tsd.h    | 24 ++++++++++++++++++------
 include/jemalloc/internal/thread_event.h | 15 ++++-----------
 include/jemalloc/internal/tsd.h          |  2 +-
 src/jemalloc.c                           | 27 +++++++++++++--------------
 test/unit/tsd.c                          |  7 +++++++
 7 files changed, 47 insertions(+), 37 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index a40b504..87ece63 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -276,12 +276,14 @@ emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 }
 
 /*
- * Returns true on error.
+ * Only used on the fastpath of free.  Returns true when cannot be fulfilled by
+ * fast path, e.g. when the metadata key is not cached.
  */
 JEMALLOC_ALWAYS_INLINE bool
 emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
     emap_alloc_ctx_t *alloc_ctx) {
-	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+	/* Use the unsafe getter since this may gets called during exit. */
+	rtree_ctx_t *rtree_ctx = tsd_rtree_ctxp_get_unsafe(tsd);
 
 	rtree_metadata_t metadata;
 	bool err = rtree_metadata_try_read_fast(tsd_tsdn(tsd), &emap->rtree,
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index c5f0d8c..b4f4484 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -35,9 +35,6 @@
 #  define RTREE_LEAF_COMPACT
 #endif
 
-/* Needed for initialization only. */
-#define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
-
 typedef struct rtree_node_elm_s rtree_node_elm_t;
 struct rtree_node_elm_s {
 	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
diff --git a/include/jemalloc/internal/rtree_tsd.h b/include/jemalloc/internal/rtree_tsd.h
index 562e292..e45525c 100644
--- a/include/jemalloc/internal/rtree_tsd.h
+++ b/include/jemalloc/internal/rtree_tsd.h
@@ -18,16 +18,28 @@
  * cache misses if made overly large, plus the cost of linear search in the LRU
  * cache.
  */
-#define RTREE_CTX_LG_NCACHE 4
-#define RTREE_CTX_NCACHE (1 << RTREE_CTX_LG_NCACHE)
+#define RTREE_CTX_NCACHE 16
 #define RTREE_CTX_NCACHE_L2 8
 
+/* Needed for initialization only. */
+#define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
+#define RTREE_CTX_CACHE_ELM_INVALID {RTREE_LEAFKEY_INVALID, NULL}
+
+#define RTREE_CTX_INIT_ELM_1 RTREE_CTX_CACHE_ELM_INVALID
+#define RTREE_CTX_INIT_ELM_2 RTREE_CTX_INIT_ELM_1, RTREE_CTX_INIT_ELM_1
+#define RTREE_CTX_INIT_ELM_4 RTREE_CTX_INIT_ELM_2, RTREE_CTX_INIT_ELM_2
+#define RTREE_CTX_INIT_ELM_8 RTREE_CTX_INIT_ELM_4, RTREE_CTX_INIT_ELM_4
+#define RTREE_CTX_INIT_ELM_16 RTREE_CTX_INIT_ELM_8, RTREE_CTX_INIT_ELM_8
+
+#define _RTREE_CTX_INIT_ELM_DATA(n) RTREE_CTX_INIT_ELM_##n
+#define RTREE_CTX_INIT_ELM_DATA(n) _RTREE_CTX_INIT_ELM_DATA(n)
+
 /*
- * Zero initializer required for tsd initialization only.  Proper initialization
- * done via rtree_ctx_data_init().
+ * Static initializer (to invalidate the cache entries) is required because the
+ * free fastpath may access the rtree cache before a full tsd initialization.
  */
-#define RTREE_CTX_ZERO_INITIALIZER {{{0, 0}}, {{0, 0}}}
-
+#define RTREE_CTX_INITIALIZER {{RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE)}, \
+			       {RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE_L2)}}
 
 typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
 
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 525019b..2f4e1b3 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -118,17 +118,10 @@ te_malloc_fastpath_ctx(tsd_t *tsd, uint64_t *allocated, uint64_t *threshold) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-te_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated, uint64_t *threshold,
-    bool size_hint) {
-	if (!size_hint) {
-		*deallocated = tsd_thread_deallocated_get(tsd);
-		*threshold = tsd_thread_deallocated_next_event_fast_get(tsd);
-	} else {
-		/* Unsafe getters since this may happen before tsd_init. */
-		*deallocated = *tsd_thread_deallocatedp_get_unsafe(tsd);
-		*threshold =
-		    *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd);
-	}
+te_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated, uint64_t *threshold) {
+	/* Unsafe getters since this may happen before tsd_init. */
+	*deallocated = *tsd_thread_deallocatedp_get_unsafe(tsd);
+	*threshold = *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd);
 	assert(*threshold <= TE_NEXT_EVENT_FAST_MAX);
 }
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 86d5277..0a46d44 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -119,7 +119,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* activity_callback_thunk */					\
 	ACTIVITY_CALLBACK_THUNK_INITIALIZER,				\
     /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
-    /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,
+    /* rtree_ctx */		RTREE_CTX_INITIALIZER,
 
 /*  O(name,			type,			nullable type) */
 #define TSD_DATA_FAST							\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 469a491..0c798c8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2910,12 +2910,20 @@ free_default(void *ptr) {
 JEMALLOC_ALWAYS_INLINE
 bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	tsd_t *tsd = tsd_get(false);
+	/* The branch gets optimized away unless tsd_get_allocates(). */
+	if (unlikely(tsd == NULL)) {
+		return false;
+	}
+	/*
+	 *  The tsd_fast() / initialized checks are folded into the branch
+	 *  testing (deallocated_after >= threshold) later in this function.
+	 *  The threshold will be set to 0 when !tsd_fast.
+	 */
+	assert(tsd_fast(tsd) ||
+	    *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
 
 	emap_alloc_ctx_t alloc_ctx;
 	if (!size_hint) {
-		if (unlikely(tsd == NULL || !tsd_fast(tsd))) {
-			return false;
-		}
 		bool err = emap_alloc_ctx_try_lookup_fast(tsd,
 		    &arena_emap_global, ptr, &alloc_ctx);
 
@@ -2926,15 +2934,6 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		assert(alloc_ctx.szind != SC_NSIZES);
 	} else {
 		/*
-		 * The size hinted fastpath does not involve rtree lookup, thus
-		 * can tolerate an uninitialized tsd.  This allows the tsd_fast
-		 * check to be folded into the branch testing fast_threshold
-		 * (set to 0 when !tsd_fast).
-		 */
-		if (unlikely(tsd == NULL)) {
-			return false;
-		}
-		/*
 		 * Check for both sizes that are too large, and for sampled
 		 * objects.  Sampled objects are always page-aligned.  The
 		 * sampled object check will also check for null ptr.
@@ -2949,7 +2948,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	}
 
 	uint64_t deallocated, threshold;
-	te_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
+	te_free_fastpath_ctx(tsd, &deallocated, &threshold);
 
 	size_t usize = sz_index2size(alloc_ctx.szind);
 	uint64_t deallocated_after = deallocated + usize;
@@ -2963,7 +2962,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	if (unlikely(deallocated_after >= threshold)) {
 		return false;
 	}
-
+	assert(tsd_fast(tsd));
 	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
 	if (fail) {
 		/* See the comment in isfree. */
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 3f3ca73..205d870 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -48,6 +48,13 @@ thd_start(void *arg) {
 	int d = (int)(uintptr_t)arg;
 	void *p;
 
+	/*
+	 * Test free before tsd init -- the free fast path (which does not
+	 * explicitly check for NULL) has to tolerate this case, and fall back
+	 * to free_default.
+	 */
+	free(NULL);
+
 	tsd_t *tsd = tsd_fetch();
 	expect_x_eq(tsd_test_data_get(tsd), MALLOC_TSD_TEST_DATA_INIT,
 	    "Initial tsd get should return initialization value");
-- 
cgit v0.12


From b6a7a535b32a3298db5b3518bc1f52fccc1597a6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 20 Oct 2021 14:17:57 -0700
Subject: Optimize away a branch on the free fastpath.

On the rtree metadata lookup fast path, there will never be a NULL returned when
the cache key matches (which is unknown to the compiler).  The previous logic
was checking for NULL return value, resulting in the extra branch (in addition to
the cache key match checking).  Make the lookup_fast return a bool to indicate
cache miss / match, so that the extra branch is avoided.
---
 include/jemalloc/internal/rtree.h | 43 ++++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index b4f4484..a00adb2 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -330,28 +330,27 @@ rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
 }
 
 /*
- * Tries to look up the key in the L1 cache, returning it if there's a hit, or
- * NULL if there's a miss.
- * Key is allowed to be NULL; returns NULL in this case.
+ * Tries to look up the key in the L1 cache, returning false if there's a hit, or
+ * true if there's a miss.
+ * Key is allowed to be NULL; returns true in this case.
  */
-JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
+JEMALLOC_ALWAYS_INLINE bool
 rtree_leaf_elm_lookup_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key) {
-	rtree_leaf_elm_t *elm;
-
+    uintptr_t key, rtree_leaf_elm_t **elm) {
 	size_t slot = rtree_cache_direct_map(key);
 	uintptr_t leafkey = rtree_leafkey(key);
 	assert(leafkey != RTREE_LEAFKEY_INVALID);
 
-	if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
-		rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
-		assert(leaf != NULL);
-		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
-		elm = &leaf[subkey];
-		return elm;
-	} else {
-		return NULL;
+	if (unlikely(rtree_ctx->cache[slot].leafkey != leafkey)) {
+		return true;
 	}
+
+	rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
+	assert(leaf != NULL);
+	uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+	*elm = &leaf[subkey];
+
+	return false;
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
@@ -449,16 +448,22 @@ rtree_metadata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 }
 
 /*
- * Returns true on error.
+ * Returns true when the request cannot be fulfilled by fastpath.
  */
 static inline bool
 rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, rtree_metadata_t *r_rtree_metadata) {
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup_fast(tsdn, rtree, rtree_ctx,
-	    key);
-	if (elm == NULL) {
+	rtree_leaf_elm_t *elm;
+	/*
+	 * Should check the bool return value (lookup success or not) instead of
+	 * elm == NULL (which will result in an extra branch).  This is because
+	 * when the cache lookup succeeds, there will never be a NULL pointer
+	 * returned (which is unknown to the compiler).
+	 */
+	if (rtree_leaf_elm_lookup_fast(tsdn, rtree, rtree_ctx, key, &elm)) {
 		return true;
 	}
+	assert(elm != NULL);
 	*r_rtree_metadata = rtree_leaf_elm_read(tsdn, rtree, elm,
 	    /* dependent */ true).metadata;
 	return false;
-- 
cgit v0.12


From 6cb585b13ad196ca2e4588ce984c269f3fdb4cea Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 2 Nov 2021 15:56:36 -0700
Subject: San: Unguard guarded slabs during arena destruction

When opt_retain is on, slab extents remain guarded in all states, even
retained. This works well if arena is never destroyed, because we
anticipate those slabs will be eventually reused. But if the arena is
destroyed, the slabs must be unguarded to prevent leaking guard pages.
---
 include/jemalloc/internal/guard.h |  9 ++++++++-
 src/extent.c                      |  8 +++++---
 src/guard.c                       | 35 +++++++++++++++++++++++++++++------
 3 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/guard.h b/include/jemalloc/internal/guard.h
index 31f98c5..8e57816 100644
--- a/include/jemalloc/internal/guard.h
+++ b/include/jemalloc/internal/guard.h
@@ -14,7 +14,14 @@ extern size_t opt_san_guard_large;
 extern size_t opt_san_guard_small;
 
 void guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap);
-void unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap);
+void unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap);
+/*
+ * Unguard the extent, but don't modify emap boundaries. Must be called on an
+ * extent that has been erased from emap and shouldn't be placed back.
+ */
+void unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap);
 void tsd_san_init(tsd_t *tsd);
 
 static inline bool
diff --git a/src/extent.c b/src/extent.c
index 84ecd6b..a79e1c7 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1057,12 +1057,14 @@ extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
+	assert(edata_state_get(edata) == extent_state_retained);
+	assert(emap_edata_is_acquired(tsdn, pac->emap, edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	/* Deregister first to avoid a race with other allocating threads. */
-	extent_deregister(tsdn, pac, edata);
-
+	if (edata_guarded_get(edata)) {
+		unguard_pages_pre_destroy(tsdn, ehooks, edata, pac->emap);
+	}
 	edata_addr_set(edata, edata_base_get(edata));
 
 	/* Try to destroy; silently fail otherwise. */
diff --git a/src/guard.c b/src/guard.c
index 0723219..4dadc97 100644
--- a/src/guard.c
+++ b/src/guard.c
@@ -32,10 +32,16 @@ guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	/* The new boundary will be registered on the pa_alloc path. */
 }
 
-void
-unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
+static void
+unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
+    bool reg_emap) {
 	/* Remove the inner boundary which no longer exists. */
-	emap_deregister_boundary(tsdn, emap, edata);
+	if (reg_emap) {
+		assert(edata_state_get(edata) == extent_state_active);
+		emap_deregister_boundary(tsdn, emap, edata);
+	} else {
+		assert(edata_state_get(edata) == extent_state_retained);
+	}
 
 	size_t size = edata_size_get(edata);
 	size_t size_with_guards = size + PAGE_GUARDS_SIZE;
@@ -44,7 +50,6 @@ unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	uintptr_t guard1 = addr - PAGE;
 	uintptr_t guard2 = addr + size;
 
-	assert(edata_state_get(edata) == extent_state_active);
 	ehooks_unguard(tsdn, ehooks, (void *)guard1, (void *)guard2);
 
 	/* Update the true addr and usable size of the edata. */
@@ -52,8 +57,26 @@ unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	edata_addr_set(edata, (void *)guard1);
 	edata_guarded_set(edata, false);
 
-	/* Then re-register the outer boundary including the guards. */
-	emap_register_boundary(tsdn, emap, edata, SC_NSIZES, /* slab */ false);
+	/*
+	 * Then re-register the outer boundary including the guards, if
+	 * requested.
+	 */
+	if (reg_emap) {
+		emap_register_boundary(tsdn, emap, edata, SC_NSIZES,
+		    /* slab */ false);
+	}
+}
+
+void
+unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
+	unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ true);
+}
+
+void
+unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap) {
+	emap_assert_not_mapped(tsdn, emap, edata);
+	unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ false);
 }
 
 void
-- 
cgit v0.12


From 37342a4d32797fdc029dde296cbef618c849608b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 4 Nov 2021 16:39:06 -0700
Subject: Add ctl interface for experimental_infallible_new.

---
 src/ctl.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/ctl.c b/src/ctl.c
index 491a333..eccb958 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -123,6 +123,7 @@ CTL_PROTO(opt_junk)
 CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
+CTL_PROTO(opt_experimental_infallible_new)
 CTL_PROTO(opt_tcache)
 CTL_PROTO(opt_tcache_max)
 CTL_PROTO(opt_tcache_nslots_small_min)
@@ -439,6 +440,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("zero"),		CTL(opt_zero)},
 	{NAME("utrace"),	CTL(opt_utrace)},
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
+	{NAME("experimental_infallible_new"),
+		CTL(opt_experimental_infallible_new)},
 	{NAME("tcache"),	CTL(opt_tcache)},
 	{NAME("tcache_max"),	CTL(opt_tcache_max)},
 	{NAME("tcache_nslots_small_min"),
@@ -2161,6 +2164,8 @@ CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
 CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
+CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
+    opt_experimental_infallible_new, bool)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
 CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
 CTL_RO_NL_GEN(opt_tcache_nslots_small_min, opt_tcache_nslots_small_min,
-- 
cgit v0.12


From 6bdb4f5ab0358d0b4c53b2d18ec9422526042413 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 11 Nov 2021 20:35:37 -0800
Subject: Check prof_active in addtion to opt_prof during batch_alloc().

---
 src/jemalloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0c798c8..a9d7c16 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4000,6 +4000,7 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		size_t batch = num - filled;
 		size_t surplus = SIZE_MAX; /* Dead store. */
 		bool prof_sample_event = config_prof && opt_prof
+		    && prof_active_get_unlocked()
 		    && te_prof_sample_event_lookahead_surplus(tsd,
 		    batch * usize, &surplus);
 
-- 
cgit v0.12


From 8b81d3f214cc9ef86210d731803fe39f2f3d54d9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 15 Nov 2021 15:30:54 -0800
Subject: Fix the initialization of last_event in thread event init.

The event counters maintain a relationship with the current bytes: last_event <=
current < next_event.  When a reinit happens (e.g. reincarnated tsd), the last
event needs progressing because all events start fresh from the current bytes.
---
 src/thread_event.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/thread_event.c b/src/thread_event.c
index bb91baa..37eb582 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -221,7 +221,13 @@ te_recompute_fast_threshold(tsd_t *tsd) {
 static void
 te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
     uint64_t wait) {
+	/*
+	 * The next threshold based on future events can only be adjusted after
+	 * progressing the last_event counter (which is set to current).
+	 */
+	assert(te_ctx_current_bytes_get(ctx) == te_ctx_last_event_get(ctx));
 	assert(wait <= TE_MAX_START_WAIT);
+
 	uint64_t next_event = te_ctx_last_event_get(ctx) + (wait <=
 	    TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
 	te_ctx_next_event_set(tsd, ctx, next_event);
@@ -298,6 +304,19 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
 
 static void
 te_init(tsd_t *tsd, bool is_alloc) {
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, is_alloc);
+	/*
+	 * Reset the last event to current, which starts the events from a clean
+	 * state.  This is necessary when re-init the tsd event counters.
+	 *
+	 * The event counters maintain a relationship with the current bytes:
+	 * last_event <= current < next_event.  When a reinit happens (e.g.
+	 * reincarnated tsd), the last event needs progressing because all
+	 * events start fresh from the current bytes.
+	 */
+	te_ctx_last_event_set(&ctx, te_ctx_current_bytes_get(&ctx));
+
 	uint64_t wait = TE_MAX_START_WAIT;
 #define E(event, condition, alloc_event)				\
 	if (is_alloc == alloc_event && condition) {			\
@@ -311,8 +330,6 @@ te_init(tsd_t *tsd, bool is_alloc) {
 
 	ITERATE_OVER_ALL_EVENTS
 #undef E
-	te_ctx_t ctx;
-	te_ctx_get(tsd, &ctx, is_alloc);
 	te_adjust_thresholds_helper(tsd, &ctx, wait);
 }
 
-- 
cgit v0.12


From 400c59895a744068994025cf33f80b56bc960a35 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 16 Nov 2021 12:40:34 -0800
Subject: Fix uninitialized nstime reading / updating on the stack in hpa.

In order for nstime_update to handle non-monotonic clocks, it requires the input
nstime to be initialized -- when reading for the first time, zero init has to be
done.  Otherwise random stack value may be seen as clocks and returned.
---
 include/jemalloc/internal/hpa_hooks.h |  2 +-
 src/hpa.c                             | 13 ++++++++-----
 src/hpa_hooks.c                       |  7 +++++--
 test/unit/hpa.c                       |  2 +-
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 3e21d85..12e6b97 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -8,7 +8,7 @@ struct hpa_hooks_s {
 	void (*purge)(void *ptr, size_t size);
 	void (*hugify)(void *ptr, size_t size);
 	void (*dehugify)(void *ptr, size_t size);
-	void (*curtime)(nstime_t *r_time);
+	void (*curtime)(nstime_t *r_time, bool first_reading);
 };
 
 extern hpa_hooks_t hpa_hooks_default;
diff --git a/src/hpa.c b/src/hpa.c
index 5251655..b2628db 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -347,7 +347,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	if (hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
 		nstime_t now;
-		shard->central->hooks.curtime(&now);
+		shard->central->hooks.curtime(&now, /* first_reading */ true);
 		hpdata_allow_hugify(ps, now);
 	}
 	/*
@@ -437,7 +437,8 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	shard->npending_purge -= num_to_purge;
 	shard->stats.npurge_passes++;
 	shard->stats.npurges += purges_this_pass;
-	shard->central->hooks.curtime(&shard->last_purge);
+	shard->central->hooks.curtime(&shard->last_purge,
+	    /* first_reading */ false);
 	if (dehugify) {
 		shard->stats.ndehugifies++;
 	}
@@ -477,7 +478,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	/* Make sure that it's been hugifiable for long enough. */
 	nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify);
 	nstime_t nstime;
-	shard->central->hooks.curtime(&nstime);
+	shard->central->hooks.curtime(&nstime, /* first_reading */ true);
 	nstime_subtract(&nstime, &time_hugify_allowed);
 	uint64_t millis = nstime_msec(&nstime);
 	if (millis < shard->opts.hugify_delay_ms) {
@@ -895,7 +896,8 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 		nstime_t time_hugify_allowed =
 		    hpdata_time_hugify_allowed(to_hugify);
 		nstime_t nstime;
-		shard->central->hooks.curtime(&nstime);
+		shard->central->hooks.curtime(&nstime,
+		    /* first_reading */ true);
 		nstime_subtract(&nstime, &time_hugify_allowed);
 		uint64_t since_hugify_allowed_ms = nstime_msec(&nstime);
 		/*
@@ -921,7 +923,8 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 			return BACKGROUND_THREAD_DEFERRED_MIN;
 		}
 		nstime_t nstime;
-		shard->central->hooks.curtime(&nstime);
+		shard->central->hooks.curtime(&nstime,
+		    /* first_reading */ true);
 		nstime_subtract(&nstime, &shard->last_purge);
 		uint64_t since_last_purge_ms = nstime_msec(&nstime);
 
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 6f37761..116592f 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -8,7 +8,7 @@ static void hpa_hooks_unmap(void *ptr, size_t size);
 static void hpa_hooks_purge(void *ptr, size_t size);
 static void hpa_hooks_hugify(void *ptr, size_t size);
 static void hpa_hooks_dehugify(void *ptr, size_t size);
-static void hpa_hooks_curtime(nstime_t *r_nstime);
+static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 
 hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_map,
@@ -48,6 +48,9 @@ hpa_hooks_dehugify(void *ptr, size_t size) {
 }
 
 static void
-hpa_hooks_curtime(nstime_t *r_nstime) {
+hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading) {
+	if (first_reading) {
+		nstime_init_zero(r_nstime);
+	}
 	nstime_update(r_nstime);
 }
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index bda0d46..a63d51d 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -349,7 +349,7 @@ defer_test_dehugify(void *ptr, size_t size) {
 
 static nstime_t defer_curtime;
 static void
-defer_test_curtime(nstime_t *r_time) {
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
 	*r_time = defer_curtime;
 }
 
-- 
cgit v0.12


From cdabe908d05ba68da248edf1dd9f522af1ec6024 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 16 Nov 2021 14:51:07 -0800
Subject: Track the initialized state of nstime_t on debug build.

Some nstime_t operations require and assume the input nstime is initialized
(e.g. nstime_update) -- uninitialized input may cause silent failures which is
difficult to reproduce / debug.  Add an explicit flag to track the state
(limited to debug build only).

Also fixed an use case in hpa (time of last_purge).
---
 include/jemalloc/internal/nstime.h | 11 ++++++-
 src/hpa.c                          |  1 +
 src/nstime.c                       | 62 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 76e4351..e8315db 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -3,10 +3,19 @@
 
 /* Maximum supported number of seconds (~584 years). */
 #define NSTIME_SEC_MAX KQU(18446744072)
-#define NSTIME_ZERO_INITIALIZER {0}
+
+#define NSTIME_MAGIC ((uint32_t)0xb8a9ce37)
+#ifdef JEMALLOC_DEBUG
+#  define NSTIME_ZERO_INITIALIZER {0, NSTIME_MAGIC}
+#else
+#  define NSTIME_ZERO_INITIALIZER {0}
+#endif
 
 typedef struct {
 	uint64_t ns;
+#ifdef JEMALLOC_DEBUG
+	uint32_t magic; /* Tracks if initialized. */
+#endif
 } nstime_t;
 
 static const nstime_t zero = NSTIME_ZERO_INITIALIZER;
diff --git a/src/hpa.c b/src/hpa.c
index b2628db..caf122b 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -203,6 +203,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	shard->opts = *opts;
 
 	shard->npending_purge = 0;
+	nstime_init_zero(&shard->last_purge);
 
 	shard->stats.npurge_passes = 0;
 	shard->stats.npurges = 0;
diff --git a/src/nstime.c b/src/nstime.c
index 184aa4c..44419d2 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -8,93 +8,153 @@
 #define BILLION	UINT64_C(1000000000)
 #define MILLION	UINT64_C(1000000)
 
+static void
+nstime_set_initialized(nstime_t *time) {
+#ifdef JEMALLOC_DEBUG
+	time->magic = NSTIME_MAGIC;
+#endif
+}
+
+static void
+nstime_assert_initialized(const nstime_t *time) {
+#ifdef JEMALLOC_DEBUG
+	/*
+	 * Some parts (e.g. stats) rely on memset to zero initialize.  Treat
+	 * these as valid initialization.
+	 */
+	assert(time->magic == NSTIME_MAGIC ||
+	    (time->magic == 0 && time->ns == 0));
+#endif
+}
+
+static void
+nstime_pair_assert_initialized(const nstime_t *t1, const nstime_t *t2) {
+	nstime_assert_initialized(t1);
+	nstime_assert_initialized(t2);
+}
+
+static void
+nstime_initialize_operand(nstime_t *time) {
+	/*
+	 * Operations like nstime_add may have the initial operand being zero
+	 * initialized (covered by the assert below).  Full-initialize needed
+	 * before changing it to non-zero.
+	 */
+	nstime_assert_initialized(time);
+	nstime_set_initialized(time);
+}
+
 void
 nstime_init(nstime_t *time, uint64_t ns) {
+	nstime_set_initialized(time);
 	time->ns = ns;
 }
 
 void
 nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec) {
+	nstime_set_initialized(time);
 	time->ns = sec * BILLION + nsec;
 }
 
 uint64_t
 nstime_ns(const nstime_t *time) {
+	nstime_assert_initialized(time);
 	return time->ns;
 }
 
 uint64_t
 nstime_msec(const nstime_t *time) {
+	nstime_assert_initialized(time);
 	return time->ns / MILLION;
 }
 
 uint64_t
 nstime_sec(const nstime_t *time) {
+	nstime_assert_initialized(time);
 	return time->ns / BILLION;
 }
 
 uint64_t
 nstime_nsec(const nstime_t *time) {
+	nstime_assert_initialized(time);
 	return time->ns % BILLION;
 }
 
 void
 nstime_copy(nstime_t *time, const nstime_t *source) {
+	/* Source is required to be initialized. */
+	nstime_assert_initialized(source);
 	*time = *source;
+	nstime_assert_initialized(time);
 }
 
 int
 nstime_compare(const nstime_t *a, const nstime_t *b) {
+	nstime_pair_assert_initialized(a, b);
 	return (a->ns > b->ns) - (a->ns < b->ns);
 }
 
 void
 nstime_add(nstime_t *time, const nstime_t *addend) {
+	nstime_pair_assert_initialized(time, addend);
 	assert(UINT64_MAX - time->ns >= addend->ns);
 
+	nstime_initialize_operand(time);
 	time->ns += addend->ns;
 }
 
 void
 nstime_iadd(nstime_t *time, uint64_t addend) {
+	nstime_assert_initialized(time);
 	assert(UINT64_MAX - time->ns >= addend);
 
+	nstime_initialize_operand(time);
 	time->ns += addend;
 }
 
 void
 nstime_subtract(nstime_t *time, const nstime_t *subtrahend) {
+	nstime_pair_assert_initialized(time, subtrahend);
 	assert(nstime_compare(time, subtrahend) >= 0);
 
+	/* No initialize operand -- subtraction must be initialized. */
 	time->ns -= subtrahend->ns;
 }
 
 void
 nstime_isubtract(nstime_t *time, uint64_t subtrahend) {
+	nstime_assert_initialized(time);
 	assert(time->ns >= subtrahend);
 
+	/* No initialize operand -- subtraction must be initialized. */
 	time->ns -= subtrahend;
 }
 
 void
 nstime_imultiply(nstime_t *time, uint64_t multiplier) {
+	nstime_assert_initialized(time);
 	assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) <<
 	    2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns));
 
+	nstime_initialize_operand(time);
 	time->ns *= multiplier;
 }
 
 void
 nstime_idivide(nstime_t *time, uint64_t divisor) {
+	nstime_assert_initialized(time);
 	assert(divisor != 0);
 
+	nstime_initialize_operand(time);
 	time->ns /= divisor;
 }
 
 uint64_t
 nstime_divide(const nstime_t *time, const nstime_t *divisor) {
+	nstime_pair_assert_initialized(time, divisor);
 	assert(divisor->ns != 0);
 
+	/* No initialize operand -- *time itself remains unchanged. */
 	return time->ns / divisor->ns;
 }
 
@@ -192,7 +252,7 @@ nstime_update_impl(nstime_t *time) {
 	nstime_t old_time;
 
 	nstime_copy(&old_time, time);
-  nstime_get(time);
+	nstime_get(time);
 
 	/* Handle non-monotonic clocks. */
 	if (unlikely(nstime_compare(&old_time, time) > 0)) {
-- 
cgit v0.12


From 3b3257a7092f447fa6c9a3a7305cb346dfb37841 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 22 Nov 2021 18:42:05 -0800
Subject: Correct opt.prof_leak documentation

The option has been misleading, because it stays disabled unless
prof_final is also specified. In practice it's impossible to detect that
the option is silently disabled, because it just doesn't provide any
output as if there are no memory leaks detected.
---
 doc/jemalloc.xml.in | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index b8b96ab..cba0b3f 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1547,8 +1547,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <manvolnum>3</manvolnum></citerefentry> function to report memory leaks
         detected by allocation sampling.  See the
         <link linkend="opt.prof"><mallctl>opt.prof</mallctl></link> option for
-        information on analyzing heap profile output.  This option is disabled
-        by default.</para></listitem>
+        information on analyzing heap profile output.  Works only when combined
+        with <link linkend="opt.prof_final"><mallctl>opt.prof_final</mallctl>
+        </link>, otherwise does nothing.  This option is disabled by default.
+        </para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.zero_realloc">
-- 
cgit v0.12


From 113e8e68e1932065125acf66fa087a2e6e11b509 Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Thu, 2 Dec 2021 16:40:05 +0000
Subject: freebsd 14 build fix proposal.

seems to have introduced finally more linux api cpu affinity (sched_* family)
compatibility detected at configure time thus adjusting accordingly.
---
 configure.ac                                        | 1 +
 include/jemalloc/internal/jemalloc_internal_decls.h | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/configure.ac b/configure.ac
index 7a49e84..22900ec 100644
--- a/configure.ac
+++ b/configure.ac
@@ -652,6 +652,7 @@ case "${host}" in
 	SYM_PREFIX="_"
 	;;
   *-*-freebsd*)
+	JE_APPEND_VS(CPPFLAGS, -D_BSD_SOURCE)
 	abi="elf"
 	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
 	force_lazy_lock="1"
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 7d212c4..983027c 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -34,6 +34,10 @@
 #  include <pthread.h>
 #  if defined(__FreeBSD__) || defined(__DragonFly__)
 #  include <pthread_np.h>
+#  include <sched.h>
+#  if defined(__FreeBSD__)
+#    define cpu_set_t cpuset_t
+#  endif
 #  endif
 #  include <signal.h>
 #  ifdef JEMALLOC_OS_UNFAIR_LOCK
-- 
cgit v0.12


From af6ee27c0d6a87d0274b9e83a55f78176ab95da4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 3 Dec 2021 12:06:16 -0800
Subject: Enforce abort_conf:true when malloc_conf is not fully recognized.

Ensures the malloc_conf "ends with key", "ends with comma" and "malform conf
string" cases abort under abort_conf:true.
---
 src/jemalloc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index a9d7c16..a7d43dc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -845,10 +845,12 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 			if (opts != *opts_p) {
 				malloc_write("<jemalloc>: Conf string ends "
 				    "with key\n");
+				had_conf_error = true;
 			}
 			return true;
 		default:
 			malloc_write("<jemalloc>: Malformed conf string\n");
+			had_conf_error = true;
 			return true;
 		}
 	}
@@ -867,6 +869,7 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 			if (*opts == '\0') {
 				malloc_write("<jemalloc>: Conf string ends "
 				    "with comma\n");
+				had_conf_error = true;
 			}
 			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
 			accept = true;
-- 
cgit v0.12


From 7dcf77809c9886e3892e29954d90b838af1292c3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 30 Nov 2021 15:58:03 -0800
Subject: Mark slab as true on sized dealloc fast path.

For sized dealloc, fastpath only handles lookup-able sizes, which must be slabs.
---
 include/jemalloc/internal/sc.h | 11 ++++++++---
 src/jemalloc.c                 | 10 +++++++++-
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 031ffff..8efd324 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -248,16 +248,21 @@
 
 /* The largest size class in the lookup table, and its binary log. */
 #define SC_LG_MAX_LOOKUP 12
-#define SC_LOOKUP_MAXCLASS ((size_t)1 << SC_LG_MAX_LOOKUP)
+#define SC_LOOKUP_MAXCLASS (1 << SC_LG_MAX_LOOKUP)
 
 /* Internal, only used for the definition of SC_SMALL_MAXCLASS. */
-#define SC_SMALL_MAX_BASE ((size_t)1 << (LG_PAGE + SC_LG_NGROUP - 1))
-#define SC_SMALL_MAX_DELTA ((size_t)1 << (LG_PAGE - 1))
+#define SC_SMALL_MAX_BASE (1 << (LG_PAGE + SC_LG_NGROUP - 1))
+#define SC_SMALL_MAX_DELTA (1 << (LG_PAGE - 1))
 
 /* The largest size class allocated out of a slab. */
 #define SC_SMALL_MAXCLASS (SC_SMALL_MAX_BASE				\
     + (SC_NGROUP - 1) * SC_SMALL_MAX_DELTA)
 
+/* The fastpath assumes all lookup-able sizes are small. */
+#if (SC_SMALL_MAXCLASS < SC_LOOKUP_MAXCLASS)
+#  error "Lookup table sizes must be small"
+#endif
+
 /* The smallest size class not allocated out of a slab. */
 #define SC_LARGE_MINCLASS ((size_t)1ULL << (LG_PAGE + SC_LG_NGROUP))
 #define SC_LG_LARGE_MINCLASS (LG_PAGE + SC_LG_NGROUP)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a7d43dc..521f4ea 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2946,9 +2946,17 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 			return false;
 		}
 		alloc_ctx.szind = sz_size2index_lookup(size);
+		/* Max lookup class must be small. */
+		assert(alloc_ctx.szind < SC_NBINS);
 		/* This is a dead store, except when opt size checking is on. */
-		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+		alloc_ctx.slab = true;
 	}
+	/*
+	 * Currently the fastpath only handles small sizes.  The branch on
+	 * SC_LOOKUP_MAXCLASS makes sure of it.  This lets us avoid checking
+	 * tcache szind upper limit (i.e. tcache_maxclass) as well.
+	 */
+	assert(alloc_ctx.slab);
 
 	uint64_t deallocated, threshold;
 	te_free_fastpath_ctx(tsd, &deallocated, &threshold);
-- 
cgit v0.12


From d9bbf539ff9cee5f138e03ad2e7f61263d381c7f Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 13 Oct 2021 12:35:52 -0700
Subject: CI: Refactor gen_travis.py

The CI consolidation project adds more operating systems to Travis. This
refactoring is aimed to decouple the configuration of each individual OS
from the actual job matrix generation and formatting. Otherwise,
format_job function would turn into a huge collection of ad-hoc
conditions.
---
 .travis.yml           | 127 ++++++++++-----------
 scripts/gen_travis.py | 298 +++++++++++++++++++++++++++++++-------------------
 2 files changed, 254 insertions(+), 171 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5cf0e08..ecc13f4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,17 +1,15 @@
+
+# This config file is generated by ./scripts/gen_travis.py.
+# Do not edit by hand.
+
 language: generic
 dist: focal
 
-matrix:
+jobs:
   include:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
@@ -53,57 +51,6 @@ matrix:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       addons: *gcc_multilib
@@ -313,18 +260,76 @@ matrix:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     # Development build
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     # --enable-expermental-smallocx:
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 
 before_script:
   - autoconf
   - scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
-  - ./configure ${COMPILER_FLAGS:+       CC="$CC $COMPILER_FLAGS"       CXX="$CXX $COMPILER_FLAGS" }       $CONFIGURE_FLAGS
+  # If COMPILER_FLAGS are not empty, add them to CC and CXX
+  - ./configure ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" CXX="$CXX $COMPILER_FLAGS"} $CONFIGURE_FLAGS
   - make -j3
   - make -j3 tests
 
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index fe9d840..e98ebeb 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -1,22 +1,35 @@
 #!/usr/bin/env python3
 
-from itertools import combinations
+from itertools import combinations, chain
+from enum import Enum, auto
+
+
+LINUX = 'linux'
+OSX = 'osx'
+
+
+AMD64 = 'amd64'
+ARM64 = 'arm64'
+PPC64LE = 'ppc64le'
+
+
+TRAVIS_TEMPLATE = """
+# This config file is generated by ./scripts/gen_travis.py.
+# Do not edit by hand.
 
-travis_template = """\
 language: generic
 dist: focal
 
-matrix:
+jobs:
   include:
-%s
+{jobs}
 
 before_script:
   - autoconf
   - scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
-  - ./configure ${COMPILER_FLAGS:+ \
-      CC="$CC $COMPILER_FLAGS" \
-      CXX="$CXX $COMPILER_FLAGS" } \
-      $CONFIGURE_FLAGS
+  # If COMPILER_FLAGS are not empty, add them to CC and CXX
+  - ./configure ${{COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" \
+CXX="$CXX $COMPILER_FLAGS"}} $CONFIGURE_FLAGS
   - make -j3
   - make -j3 tests
 
@@ -24,6 +37,39 @@ script:
   - make check
 """
 
+
+class Option(object):
+    class Type:
+        COMPILER = auto()
+        COMPILER_FLAG = auto()
+        CONFIGURE_FLAG = auto()
+        MALLOC_CONF = auto()
+
+    def __init__(self, type, value):
+        self.type = type
+        self.value = value
+
+    @staticmethod
+    def as_compiler(value):
+        return Option(Option.Type.COMPILER, value)
+
+    @staticmethod
+    def as_compiler_flag(value):
+        return Option(Option.Type.COMPILER_FLAG, value)
+
+    @staticmethod
+    def as_configure_flag(value):
+        return Option(Option.Type.CONFIGURE_FLAG, value)
+
+    @staticmethod
+    def as_malloc_conf(value):
+        return Option(Option.Type.MALLOC_CONF, value)
+
+    def __eq__(self, obj):
+        return (isinstance(obj, Option) and obj.type == self.type
+                and obj.value == self.value)
+
+
 # The 'default' configuration is gcc, on linux, with no compiler or configure
 # flags.  We also test with clang, -m32, --enable-debug, --enable-prof,
 # --disable-stats, and --with-malloc-conf=tcache:false.  To avoid abusing
@@ -32,84 +78,80 @@ script:
 # hope that bugs involving interactions of such settings are rare.
 MAX_UNUSUAL_OPTIONS = 2
 
-os_default = 'linux'
-os_unusual = 'osx'
 
-arch_default = 'amd64'
-arch_unusual = 'ppc64le'
+GCC = Option.as_compiler('CC=gcc CXX=g++')
+CLANG = Option.as_compiler('CC=clang CXX=clang++')
+
+
+compiler_default = GCC
+compilers_unusual = [CLANG,]
+
 
-compilers_default = 'CC=gcc CXX=g++'
-compilers_unusual = 'CC=clang CXX=clang++'
+compiler_flag_unusuals = [Option.as_compiler_flag(opt) for opt in ('-m32',)]
 
-compiler_flag_unusuals = ['-m32']
 
-configure_flag_unusuals = [
+configure_flag_unusuals = [Option.as_configure_flag(opt) for opt in (
     '--enable-debug',
     '--enable-prof',
     '--disable-stats',
     '--disable-libdl',
     '--enable-opt-safety-checks',
     '--with-lg-page=16',
-]
+)]
 
-malloc_conf_unusuals = [
+
+malloc_conf_unusuals = [Option.as_malloc_conf(opt) for opt in (
     'tcache:false',
     'dss:primary',
     'percpu_arena:percpu',
     'background_thread:true',
-]
+)]
+
 
-all_unusuals = (
-    [os_unusual] + [arch_unusual] + [compilers_unusual] + compiler_flag_unusuals
-    + configure_flag_unusuals + malloc_conf_unusuals
-)
+all_unusuals = (compilers_unusual + compiler_flag_unusuals
+    + configure_flag_unusuals + malloc_conf_unusuals)
 
-unusual_combinations_to_test = []
-for i in range(MAX_UNUSUAL_OPTIONS + 1):
-    unusual_combinations_to_test += combinations(all_unusuals, i)
 
 gcc_multilib_set = False
-gcc_ppc_set = False
+
+
+def get_extra_cflags(os, compiler):
+    # We get some spurious errors when -Warray-bounds is enabled.
+    extra_cflags = ['-Werror', '-Wno-array-bounds']
+    if compiler == CLANG.value or os == OSX:
+        extra_cflags += [
+	    '-Wno-unknown-warning-option',
+	    '-Wno-ignored-attributes'
+	]
+    if os == OSX:
+        extra_cflags += [
+	    '-Wno-deprecated-declarations',
+	]
+    return extra_cflags
+
+
 # Formats a job from a combination of flags
-def format_job(combination):
+def format_job(os, arch, combination):
     global gcc_multilib_set
-    global gcc_ppc_set
-
-    os = os_unusual if os_unusual in combination else os_default
-    compilers = compilers_unusual if compilers_unusual in combination else compilers_default
-    arch = arch_unusual if arch_unusual in combination else arch_default
-    compiler_flags = [x for x in combination if x in compiler_flag_unusuals]
-    configure_flags = [x for x in combination if x in configure_flag_unusuals]
-    malloc_conf = [x for x in combination if x in malloc_conf_unusuals]
-
-    # Filter out unsupported configurations on OS X.
-    if os == 'osx' and ('dss:primary' in malloc_conf or \
-      'percpu_arena:percpu' in malloc_conf or 'background_thread:true' \
-      in malloc_conf):
-        return ""
-    # gcc is just a redirect to clang on OS X. No need to test both.
-    if os == 'osx' and compilers_unusual in combination:
-        return ""
-    if len(malloc_conf) > 0:
-        configure_flags.append('--with-malloc-conf=' + ",".join(malloc_conf))
 
-    # Filter out an unsupported configuration - heap profiling on OS X.
-    if os == 'osx' and '--enable-prof' in configure_flags:
-        return ""
+    compiler = [x.value for x in combination if x.type == Option.Type.COMPILER]
+    assert(len(compiler) <= 1)
+    if not compiler:
+        compiler = compiler_default.value
+    else:
+        compiler = compiler[0]
+    compiler_flags = [x.value for x in combination if x.type == Option.Type.COMPILER_FLAG]
+    configure_flags = [x.value for x in combination if x.type == Option.Type.CONFIGURE_FLAG]
+    malloc_conf = [x.value for x in combination if x.type == Option.Type.MALLOC_CONF]
 
-    # Filter out unsupported OSX configuration on PPC64LE
-    if arch == 'ppc64le' and (
-        os == 'osx'
-        or '-m32' in combination
-        or compilers_unusual in combination
-        ):
-        return ""
+    if len(malloc_conf) > 0:
+        configure_flags.append('--with-malloc-conf=' + ','.join(malloc_conf))
 
     job = ""
-    job += '    - os: %s\n' % os
-    job += '      arch: %s\n' % arch
+    job += '    - os: {}\n'.format(os)
+    job += '      arch: {}\n'.format(arch)
 
-    if '-m32' in combination and os == 'linux':
+    if '-m32' in compiler_flags and os == 'linux':
         job += '      addons:'
         if gcc_multilib_set:
             job += ' *gcc_multilib\n'
@@ -121,63 +163,99 @@ def format_job(combination):
             job += '            - g++-multilib\n'
             gcc_multilib_set = True
 
-    # We get some spurious errors when -Warray-bounds is enabled.
-    extra_cflags = ['-Werror', '-Wno-array-bounds']
-    if 'clang' in compilers or os == 'osx':
-        extra_cflags += [
-	    '-Wno-unknown-warning-option',
-	    '-Wno-ignored-attributes'
-	]
-    if os == 'osx':
-        extra_cflags += [
-	    '-Wno-deprecated-declarations',
-	]
     env_string = ('{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}" '
         'EXTRA_CFLAGS="{}"'.format(
-        compilers, ' '.join(compiler_flags), ' '.join(configure_flags),
-        ' '.join(extra_cflags)))
+            compiler,
+            ' '.join(compiler_flags),
+            ' '.join(configure_flags),
+            ' '.join(get_extra_cflags(os, compiler))))
 
-    job += '      env: %s\n' % env_string
+    job += '      env: {}'.format(env_string)
     return job
 
-include_rows = ""
-for combination in unusual_combinations_to_test:
-    include_rows += format_job(combination)
 
-# Development build
-include_rows += '''\
+def generate_unusual_combinations(max_unusual_opts):
+    """
+    Generates different combinations of non-standard compilers, compiler flags,
+    configure flags and malloc_conf settings.
+
+    @param max_unusual_opts: Limit of unusual options per combination.
+    """
+    return chain.from_iterable(
+            [combinations(all_unusuals, i) for i in range(max_unusual_opts + 1)])
+
+
+def included(combination, exclude):
+    """
+    Checks if the combination of options should be included in the Travis
+    testing matrix.
+    """
+    return not any(excluded in combination for excluded in exclude)
+
+
+def generate_jobs(os, arch, exclude, max_unusual_opts):
+    jobs = []
+    for combination in generate_unusual_combinations(max_unusual_opts):
+        if included(combination, exclude):
+            jobs.append(format_job(os, arch, combination))
+    return '\n'.join(jobs)
+
+
+def generate_linux(arch):
+    os = LINUX
+
+    # Only generate 2 unusual options for AMD64 to reduce matrix size
+    max_unusual_opts = MAX_UNUSUAL_OPTIONS if arch == AMD64 else 1
+
+    exclude = []
+    if arch == PPC64LE:
+        # Avoid 32 bit builds and clang on PowerPC
+        exclude = [Option.as_compiler_flag('-m32')] + compilers_unusual
+
+    return generate_jobs(os, arch, exclude, max_unusual_opts)
+
+
+def generate_macos(arch):
+    os = OSX
+
+    max_unusual_opts = 1
+
+    exclude = ([Option.as_malloc_conf(opt) for opt in (
+            'dss:primary',
+            'percpu_arena:percpu',
+            'background_thread:true')] +
+        [Option.as_configure_flag('--enable-prof')] +
+        [CLANG,])
+
+    return generate_jobs(os, arch, exclude, max_unusual_opts)
+
+
+def get_manual_jobs():
+    return """\
     # Development build
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-'''
-
-# Enable-expermental-smallocx
-include_rows += '''\
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug \
+--disable-cache-oblivious --enable-stats --enable-log --enable-prof" \
+EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     # --enable-expermental-smallocx:
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-'''
-
-# Does not seem to be working on newer travis machines. Valgrind has long been a
-# pain point; abandon it for now.
-# Valgrind build bots
-#include_rows += '''
-#    # Valgrind
-#    - os: linux
-#      arch: amd64
-#      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
-#      addons:
-#        apt:
-#          packages:
-#            - valgrind
-#'''
-
-# To enable valgrind on macosx add:
-#
-#  - os: osx
-#    env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
-#    install: brew install valgrind
-#
-# It currently fails due to: https://github.com/jemalloc/jemalloc/issues/1274
-
-print(travis_template % include_rows)
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug \
+--enable-experimental-smallocx --enable-stats --enable-prof" \
+EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+"""
+
+
+def main():
+    jobs = '\n'.join((
+        generate_linux(AMD64),
+        generate_linux(PPC64LE),
+
+        generate_macos(AMD64),
+        get_manual_jobs()
+    ))
+
+    print(TRAVIS_TEMPLATE.format(jobs=jobs))
+
+
+if __name__ == '__main__':
+    main()
-- 
cgit v0.12


From 62f9c54d2a9035c6bfdbb4c41ecc0dcb040b509e Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Fri, 22 Oct 2021 17:40:42 -0700
Subject: San: Rename 'guard' to 'san'

This prepares the foundation for more sanitizer-related work in the
future.
---
 Makefile.in                        |   4 +-
 include/jemalloc/internal/ecache.h |   2 +-
 include/jemalloc/internal/guard.h  |  83 ---------------
 include/jemalloc/internal/san.h    |  84 ++++++++++++++++
 src/arena.c                        |   6 +-
 src/ecache.c                       |   2 +-
 src/extent.c                       |   4 +-
 src/guard.c                        |  86 ----------------
 src/jemalloc.c                     |   2 +-
 src/pa.c                           |   2 +-
 src/pac.c                          |   6 +-
 src/san.c                          |  87 ++++++++++++++++
 src/tsd.c                          |   2 +-
 test/include/test/guard.h          |   6 --
 test/include/test/san.h            |   6 ++
 test/unit/double_free.c            |   2 +-
 test/unit/guard.c                  | 201 -------------------------------------
 test/unit/guard.sh                 |   3 -
 test/unit/retained.c               |   2 +-
 test/unit/san.c                    | 201 +++++++++++++++++++++++++++++++++++++
 test/unit/san.sh                   |   3 +
 21 files changed, 398 insertions(+), 396 deletions(-)
 delete mode 100644 include/jemalloc/internal/guard.h
 create mode 100644 include/jemalloc/internal/san.h
 delete mode 100644 src/guard.c
 create mode 100644 src/san.c
 delete mode 100644 test/include/test/guard.h
 create mode 100644 test/include/test/san.h
 delete mode 100644 test/unit/guard.c
 delete mode 100644 test/unit/guard.sh
 create mode 100644 test/unit/san.c
 create mode 100644 test/unit/san.sh

diff --git a/Makefile.in b/Makefile.in
index abd361f..8f96a99 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -119,7 +119,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/fxp.c \
-	$(srcroot)src/guard.c \
+	$(srcroot)src/san.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_hooks.c \
@@ -219,7 +219,7 @@ TESTS_UNIT := \
 	${srcroot}test/unit/fb.c \
 	$(srcroot)test/unit/fork.c \
 	${srcroot}test/unit/fxp.c \
-	${srcroot}test/unit/guard.c \
+	${srcroot}test/unit/san.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index dd1bc32..71cae3e 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_ECACHE_H
 
 #include "jemalloc/internal/eset.h"
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/mutex.h"
 
 typedef struct ecache_s ecache_t;
diff --git a/include/jemalloc/internal/guard.h b/include/jemalloc/internal/guard.h
deleted file mode 100644
index 8e57816..0000000
--- a/include/jemalloc/internal/guard.h
+++ /dev/null
@@ -1,83 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_GUARD_H
-#define JEMALLOC_INTERNAL_GUARD_H
-
-#include "jemalloc/internal/ehooks.h"
-#include "jemalloc/internal/emap.h"
-
-#define PAGE_GUARDS_SIZE (2 * PAGE)
-
-#define SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT 0
-#define SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT 0
-
-/* 0 means disabled, i.e. never guarded. */
-extern size_t opt_san_guard_large;
-extern size_t opt_san_guard_small;
-
-void guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap);
-void unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap);
-/*
- * Unguard the extent, but don't modify emap boundaries. Must be called on an
- * extent that has been erased from emap and shouldn't be placed back.
- */
-void unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap);
-void tsd_san_init(tsd_t *tsd);
-
-static inline bool
-san_enabled(void) {
-	return (opt_san_guard_large != 0 || opt_san_guard_small != 0);
-}
-
-static inline bool
-large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
-    size_t alignment) {
-	if (opt_san_guard_large == 0 || ehooks_guard_will_fail(ehooks) ||
-	    tsdn_null(tsdn)) {
-		return false;
-	}
-
-	tsd_t *tsd = tsdn_tsd(tsdn);
-	uint64_t n = tsd_san_extents_until_guard_large_get(tsd);
-	assert(n >= 1);
-	if (n > 1) {
-		/*
-		 * Subtract conditionally because the guard may not happen due
-		 * to alignment or size restriction below.
-		 */
-		*tsd_san_extents_until_guard_largep_get(tsd) = n - 1;
-	}
-
-	if (n == 1 && (alignment <= PAGE) &&
-	    (size + PAGE_GUARDS_SIZE <= SC_LARGE_MAXCLASS)) {
-		*tsd_san_extents_until_guard_largep_get(tsd) =
-		    opt_san_guard_large;
-		return true;
-	} else {
-		assert(tsd_san_extents_until_guard_large_get(tsd) >= 1);
-		return false;
-	}
-}
-
-static inline bool
-slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
-	if (opt_san_guard_small == 0 || ehooks_guard_will_fail(ehooks) ||
-	    tsdn_null(tsdn)) {
-		return false;
-	}
-
-	tsd_t *tsd = tsdn_tsd(tsdn);
-	uint64_t n = tsd_san_extents_until_guard_small_get(tsd);
-	assert(n >= 1);
-	if (n == 1) {
-		*tsd_san_extents_until_guard_smallp_get(tsd) =
-		    opt_san_guard_small;
-		return true;
-	} else {
-		*tsd_san_extents_until_guard_smallp_get(tsd) = n - 1;
-		assert(tsd_san_extents_until_guard_small_get(tsd) >= 1);
-		return false;
-	}
-}
-
-#endif /* JEMALLOC_INTERNAL_GUARD_H */
diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
new file mode 100644
index 0000000..b3d0304
--- /dev/null
+++ b/include/jemalloc/internal/san.h
@@ -0,0 +1,84 @@
+#ifndef JEMALLOC_INTERNAL_GUARD_H
+#define JEMALLOC_INTERNAL_GUARD_H
+
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/emap.h"
+
+#define PAGE_GUARDS_SIZE (2 * PAGE)
+
+#define SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT 0
+#define SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT 0
+
+/* 0 means disabled, i.e. never guarded. */
+extern size_t opt_san_guard_large;
+extern size_t opt_san_guard_small;
+
+void san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap);
+void san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap);
+/*
+ * Unguard the extent, but don't modify emap boundaries. Must be called on an
+ * extent that has been erased from emap and shouldn't be placed back.
+ */
+void san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_t *edata, emap_t *emap);
+void tsd_san_init(tsd_t *tsd);
+
+static inline bool
+san_enabled(void) {
+	return (opt_san_guard_large != 0 || opt_san_guard_small != 0);
+}
+
+static inline bool
+san_large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
+    size_t alignment) {
+	if (opt_san_guard_large == 0 || ehooks_guard_will_fail(ehooks) ||
+	    tsdn_null(tsdn)) {
+		return false;
+	}
+
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	uint64_t n = tsd_san_extents_until_guard_large_get(tsd);
+	assert(n >= 1);
+	if (n > 1) {
+		/*
+		 * Subtract conditionally because the guard may not happen due
+		 * to alignment or size restriction below.
+		 */
+		*tsd_san_extents_until_guard_largep_get(tsd) = n - 1;
+	}
+
+	if (n == 1 && (alignment <= PAGE) &&
+	    (size + PAGE_GUARDS_SIZE <= SC_LARGE_MAXCLASS)) {
+		*tsd_san_extents_until_guard_largep_get(tsd) =
+		    opt_san_guard_large;
+		return true;
+	} else {
+		assert(tsd_san_extents_until_guard_large_get(tsd) >= 1);
+		return false;
+	}
+}
+
+static inline bool
+san_slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
+	if (opt_san_guard_small == 0 || ehooks_guard_will_fail(ehooks) ||
+	    tsdn_null(tsdn)) {
+		return false;
+	}
+
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	uint64_t n = tsd_san_extents_until_guard_small_get(tsd);
+	assert(n >= 1);
+	if (n == 1) {
+		*tsd_san_extents_until_guard_smallp_get(tsd) =
+		    opt_san_guard_small;
+		return true;
+	} else {
+		*tsd_san_extents_until_guard_smallp_get(tsd) = n - 1;
+		assert(tsd_san_extents_until_guard_small_get(tsd) >= 1);
+		return false;
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_GUARD_H */
diff --git a/src/arena.c b/src/arena.c
index 811f0ed..19e4e85 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -6,7 +6,7 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/safety_check.h"
@@ -328,7 +328,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t esize = usize + sz_large_pad;
 
-	bool guarded = large_extent_decide_guard(tsdn, arena_get_ehooks(arena),
+	bool guarded = san_large_extent_decide_guard(tsdn, arena_get_ehooks(arena),
 	    esize, alignment);
 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
 	    /* slab */ false, szind, zero, guarded, &deferred_work_generated);
@@ -829,7 +829,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	bool guarded = slab_extent_decide_guard(tsdn, arena_get_ehooks(arena));
+	bool guarded = san_slab_extent_decide_guard(tsdn, arena_get_ehooks(arena));
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
 	    /* alignment */ PAGE, /* slab */ true, /* szind */ binind,
 	     /* zero */ false, guarded, &deferred_work_generated);
diff --git a/src/ecache.c b/src/ecache.c
index 26fc211..a242227 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -1,7 +1,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 
 bool
 ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state, unsigned ind,
diff --git a/src/extent.c b/src/extent.c
index a79e1c7..7112d3a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1013,7 +1013,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* Restore guard pages for dalloc / unmap. */
 		if (edata_guarded_get(edata)) {
 			assert(ehooks_are_default(ehooks));
-			unguard_pages(tsdn, ehooks, edata, pac->emap);
+			san_unguard_pages(tsdn, ehooks, edata, pac->emap);
 		}
 		/*
 		 * Deregister first to avoid a race with other allocating
@@ -1063,7 +1063,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	if (edata_guarded_get(edata)) {
-		unguard_pages_pre_destroy(tsdn, ehooks, edata, pac->emap);
+		san_unguard_pages_pre_destroy(tsdn, ehooks, edata, pac->emap);
 	}
 	edata_addr_set(edata, edata_base_get(edata));
 
diff --git a/src/guard.c b/src/guard.c
deleted file mode 100644
index 4dadc97..0000000
--- a/src/guard.c
+++ /dev/null
@@ -1,86 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-#include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/ehooks.h"
-#include "jemalloc/internal/guard.h"
-#include "jemalloc/internal/tsd.h"
-
-/* The sanitizer options. */
-size_t opt_san_guard_large = SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT;
-size_t opt_san_guard_small = SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT;
-
-void
-guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
-	emap_deregister_boundary(tsdn, emap, edata);
-
-	size_t size_with_guards = edata_size_get(edata);
-	size_t usize = size_with_guards - PAGE_GUARDS_SIZE;
-
-	uintptr_t guard1 = (uintptr_t)edata_base_get(edata);
-	uintptr_t addr = guard1 + PAGE;
-	uintptr_t guard2 = addr + usize;
-
-	assert(edata_state_get(edata) == extent_state_active);
-	ehooks_guard(tsdn, ehooks, (void *)guard1, (void *)guard2);
-
-	/* Update the guarded addr and usable size of the edata. */
-	edata_size_set(edata, usize);
-	edata_addr_set(edata, (void *)addr);
-	edata_guarded_set(edata, true);
-
-	/* The new boundary will be registered on the pa_alloc path. */
-}
-
-static void
-unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
-    bool reg_emap) {
-	/* Remove the inner boundary which no longer exists. */
-	if (reg_emap) {
-		assert(edata_state_get(edata) == extent_state_active);
-		emap_deregister_boundary(tsdn, emap, edata);
-	} else {
-		assert(edata_state_get(edata) == extent_state_retained);
-	}
-
-	size_t size = edata_size_get(edata);
-	size_t size_with_guards = size + PAGE_GUARDS_SIZE;
-
-	uintptr_t addr =  (uintptr_t)edata_base_get(edata);
-	uintptr_t guard1 = addr - PAGE;
-	uintptr_t guard2 = addr + size;
-
-	ehooks_unguard(tsdn, ehooks, (void *)guard1, (void *)guard2);
-
-	/* Update the true addr and usable size of the edata. */
-	edata_size_set(edata, size_with_guards);
-	edata_addr_set(edata, (void *)guard1);
-	edata_guarded_set(edata, false);
-
-	/*
-	 * Then re-register the outer boundary including the guards, if
-	 * requested.
-	 */
-	if (reg_emap) {
-		emap_register_boundary(tsdn, emap, edata, SC_NSIZES,
-		    /* slab */ false);
-	}
-}
-
-void
-unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
-	unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ true);
-}
-
-void
-unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap) {
-	emap_assert_not_mapped(tsdn, emap, edata);
-	unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ false);
-}
-
-void
-tsd_san_init(tsd_t *tsd) {
-	*tsd_san_extents_until_guard_smallp_get(tsd) = opt_san_guard_small;
-	*tsd_san_extents_until_guard_largep_get(tsd) = opt_san_guard_large;
-}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 521f4ea..e707f9f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -10,7 +10,7 @@
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/fxp.h"
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/log.h"
diff --git a/src/pa.c b/src/pa.c
index 779e672..9004cc9 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,7 +1,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/hpa.h"
 
 static void
diff --git a/src/pac.c b/src/pac.c
index 176b181..e53de80 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -2,7 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/pac.h"
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 
 static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
@@ -146,7 +146,7 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 	if (edata != NULL) {
 		/* Add guards around it. */
 		assert(edata_size_get(edata) == size_with_guards);
-		guard_pages(tsdn, ehooks, edata, pac->emap);
+		san_guard_pages(tsdn, ehooks, edata, pac->emap);
 	}
 	assert(edata == NULL || (edata_guarded_get(edata) &&
 	    edata_size_get(edata) == size));
@@ -253,7 +253,7 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 		if (!edata_slab_get(edata) || !maps_coalesce) {
 			assert(edata_size_get(edata) >= SC_LARGE_MINCLASS ||
 			    !maps_coalesce);
-			unguard_pages(tsdn, ehooks, edata, pac->emap);
+			san_unguard_pages(tsdn, ehooks, edata, pac->emap);
 		}
 	}
 
diff --git a/src/san.c b/src/san.c
new file mode 100644
index 0000000..139ec5a
--- /dev/null
+++ b/src/san.c
@@ -0,0 +1,87 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/san.h"
+#include "jemalloc/internal/tsd.h"
+
+/* The sanitizer options. */
+size_t opt_san_guard_large = SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT;
+size_t opt_san_guard_small = SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT;
+
+void
+san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
+	emap_deregister_boundary(tsdn, emap, edata);
+
+	size_t size_with_guards = edata_size_get(edata);
+	size_t usize = size_with_guards - PAGE_GUARDS_SIZE;
+
+	uintptr_t guard1 = (uintptr_t)edata_base_get(edata);
+	uintptr_t addr = guard1 + PAGE;
+	uintptr_t guard2 = addr + usize;
+
+	assert(edata_state_get(edata) == extent_state_active);
+	ehooks_guard(tsdn, ehooks, (void *)guard1, (void *)guard2);
+
+	/* Update the guarded addr and usable size of the edata. */
+	edata_size_set(edata, usize);
+	edata_addr_set(edata, (void *)addr);
+	edata_guarded_set(edata, true);
+
+	/* The new boundary will be registered on the pa_alloc path. */
+}
+
+static void
+san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap, bool reg_emap) {
+	/* Remove the inner boundary which no longer exists. */
+	if (reg_emap) {
+		assert(edata_state_get(edata) == extent_state_active);
+		emap_deregister_boundary(tsdn, emap, edata);
+	} else {
+		assert(edata_state_get(edata) == extent_state_retained);
+	}
+
+	size_t size = edata_size_get(edata);
+	size_t size_with_guards = size + PAGE_GUARDS_SIZE;
+
+	uintptr_t addr =  (uintptr_t)edata_base_get(edata);
+	uintptr_t guard1 = addr - PAGE;
+	uintptr_t guard2 = addr + size;
+
+	ehooks_unguard(tsdn, ehooks, (void *)guard1, (void *)guard2);
+
+	/* Update the true addr and usable size of the edata. */
+	edata_size_set(edata, size_with_guards);
+	edata_addr_set(edata, (void *)guard1);
+	edata_guarded_set(edata, false);
+
+	/*
+	 * Then re-register the outer boundary including the guards, if
+	 * requested.
+	 */
+	if (reg_emap) {
+		emap_register_boundary(tsdn, emap, edata, SC_NSIZES,
+		    /* slab */ false);
+	}
+}
+
+void
+san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap) {
+	san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ true);
+}
+
+void
+san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap) {
+	emap_assert_not_mapped(tsdn, emap, edata);
+	san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ false);
+}
+
+void
+tsd_san_init(tsd_t *tsd) {
+	*tsd_san_extents_until_guard_smallp_get(tsd) = opt_san_guard_small;
+	*tsd_san_extents_until_guard_largep_get(tsd) = opt_san_guard_large;
+}
diff --git a/src/tsd.c b/src/tsd.c
index 31ff2f2..4859048 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -2,7 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
 
diff --git a/test/include/test/guard.h b/test/include/test/guard.h
deleted file mode 100644
index 691dc50..0000000
--- a/test/include/test/guard.h
+++ /dev/null
@@ -1,6 +0,0 @@
-static inline bool
-extent_is_guarded(tsdn_t *tsdn, void *ptr) {
-	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
-	return edata_guarded_get(edata);
-}
-
diff --git a/test/include/test/san.h b/test/include/test/san.h
new file mode 100644
index 0000000..691dc50
--- /dev/null
+++ b/test/include/test/san.h
@@ -0,0 +1,6 @@
+static inline bool
+extent_is_guarded(tsdn_t *tsdn, void *ptr) {
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+	return edata_guarded_get(edata);
+}
+
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index f98484c..12122c1 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -1,5 +1,5 @@
 #include "test/jemalloc_test.h"
-#include "test/guard.h"
+#include "test/san.h"
 
 #include "jemalloc/internal/safety_check.h"
 
diff --git a/test/unit/guard.c b/test/unit/guard.c
deleted file mode 100644
index 43381e4..0000000
--- a/test/unit/guard.c
+++ /dev/null
@@ -1,201 +0,0 @@
-#include "test/jemalloc_test.h"
-#include "test/arena_decay.h"
-#include "test/guard.h"
-
-#include "jemalloc/internal/guard.h"
-
-static void
-verify_extent_guarded(tsdn_t *tsdn, void *ptr) {
-	expect_true(extent_is_guarded(tsdn, ptr),
-	    "All extents should be guarded.");
-}
-
-#define MAX_SMALL_ALLOCATIONS 4096
-void *small_alloc[MAX_SMALL_ALLOCATIONS];
-
-TEST_BEGIN(test_guarded_small) {
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	unsigned npages = 16, pages_found = 0, ends_found = 0;
-	VARIABLE_ARRAY(uintptr_t, pages, npages);
-
-	/* Allocate to get sanitized pointers. */
-	size_t sz = PAGE / 8;
-	unsigned n_alloc = 0;
-	while (n_alloc < MAX_SMALL_ALLOCATIONS) {
-		void *ptr = malloc(sz);
-		expect_ptr_not_null(ptr, "Unexpected malloc() failure");
-		small_alloc[n_alloc] = ptr;
-		verify_extent_guarded(tsdn, ptr);
-		if ((uintptr_t)ptr % PAGE == 0) {
-			pages[pages_found++] = (uintptr_t)ptr;
-		}
-		if (((uintptr_t)ptr + (uintptr_t)sz) % PAGE == 0) {
-			ends_found++;
-		}
-		n_alloc++;
-		if (pages_found == npages && ends_found == npages) {
-			break;
-		}
-	}
-	/* Should found the ptrs being checked for overflow and underflow. */
-	expect_u_eq(pages_found, npages, "Could not found the expected pages.");
-	expect_u_eq(ends_found, npages, "Could not found the expected pages.");
-
-	/* Verify the pages are not continuous, i.e. separated by guards. */
-	for (unsigned i = 0; i < npages - 1; i++) {
-		for (unsigned j = i + 1; j < npages; j++) {
-			uintptr_t ptr_diff = pages[i] > pages[j] ?
-			    pages[i] - pages[j] : pages[j] - pages[i];
-			expect_zu_gt((size_t)ptr_diff, 2 * PAGE,
-			    "Pages should not be next to each other.");
-		}
-	}
-
-	for (unsigned i = 0; i < n_alloc + 1; i++) {
-		free(small_alloc[i]);
-	}
-}
-TEST_END
-
-TEST_BEGIN(test_guarded_large) {
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	unsigned nlarge = 32;
-	VARIABLE_ARRAY(uintptr_t, large, nlarge);
-
-	/* Allocate to get sanitized pointers. */
-	size_t large_sz = SC_LARGE_MINCLASS;
-	for (unsigned i = 0; i < nlarge; i++) {
-		void *ptr = malloc(large_sz);
-		verify_extent_guarded(tsdn, ptr);
-		expect_ptr_not_null(ptr, "Unexpected malloc() failure");
-		large[i] = (uintptr_t)ptr;
-	}
-
-	/* Verify the pages are not continuous, i.e. separated by guards. */
-	uintptr_t min_diff = (uintptr_t)-1;
-	for (unsigned i = 0; i < nlarge; i++) {
-		for (unsigned j = i + 1; j < nlarge; j++) {
-			uintptr_t ptr_diff = large[i] > large[j] ?
-			    large[i] - large[j] : large[j] - large[i];
-			expect_zu_ge((size_t)ptr_diff, large_sz + 2 * PAGE,
-			    "Pages should not be next to each other.");
-			if (ptr_diff < min_diff) {
-				min_diff = ptr_diff;
-			}
-		}
-	}
-	expect_zu_ge((size_t)min_diff, large_sz + 2 * PAGE,
-	    "Pages should not be next to each other.");
-
-	for (unsigned i = 0; i < nlarge; i++) {
-		free((void *)large[i]);
-	}
-}
-TEST_END
-
-static void
-verify_pdirty(unsigned arena_ind, uint64_t expected) {
-	uint64_t pdirty = get_arena_pdirty(arena_ind);
-	expect_u64_eq(pdirty, expected / PAGE,
-	    "Unexpected dirty page amount.");
-}
-
-static void
-verify_pmuzzy(unsigned arena_ind, uint64_t expected) {
-	uint64_t pmuzzy = get_arena_pmuzzy(arena_ind);
-	expect_u64_eq(pmuzzy, expected / PAGE,
-	    "Unexpected muzzy page amount.");
-}
-
-TEST_BEGIN(test_guarded_decay) {
-	unsigned arena_ind = do_arena_create(-1, -1);
-	do_decay(arena_ind);
-	do_purge(arena_ind);
-
-	verify_pdirty(arena_ind, 0);
-	verify_pmuzzy(arena_ind, 0);
-
-	/* Verify that guarded extents as dirty. */
-	size_t sz1 = PAGE, sz2 = PAGE * 2;
-	/* W/o maps_coalesce, guarded extents are unguarded eagerly. */
-	size_t add_guard_size = maps_coalesce ? 0 : PAGE_GUARDS_SIZE;
-	generate_dirty(arena_ind, sz1);
-	verify_pdirty(arena_ind, sz1 + add_guard_size);
-	verify_pmuzzy(arena_ind, 0);
-
-	/* Should reuse the first extent. */
-	generate_dirty(arena_ind, sz1);
-	verify_pdirty(arena_ind, sz1 + add_guard_size);
-	verify_pmuzzy(arena_ind, 0);
-
-	/* Should not reuse; expect new dirty pages. */
-	generate_dirty(arena_ind, sz2);
-	verify_pdirty(arena_ind, sz1 + sz2 + 2 * add_guard_size);
-	verify_pmuzzy(arena_ind, 0);
-
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
-
-	/* Should reuse dirty extents for the two mallocx. */
-	void *p1 = do_mallocx(sz1, flags);
-	verify_extent_guarded(tsdn, p1);
-	verify_pdirty(arena_ind, sz2 + add_guard_size);
-
-	void *p2 = do_mallocx(sz2, flags);
-	verify_extent_guarded(tsdn, p2);
-	verify_pdirty(arena_ind, 0);
-	verify_pmuzzy(arena_ind, 0);
-
-	dallocx(p1, flags);
-	verify_pdirty(arena_ind, sz1 + add_guard_size);
-	dallocx(p2, flags);
-	verify_pdirty(arena_ind, sz1 + sz2 + 2 * add_guard_size);
-	verify_pmuzzy(arena_ind, 0);
-
-	do_purge(arena_ind);
-	verify_pdirty(arena_ind, 0);
-	verify_pmuzzy(arena_ind, 0);
-
-	if (config_stats) {
-		expect_u64_eq(get_arena_npurge(arena_ind), 1,
-		    "Expected purging to occur");
-		expect_u64_eq(get_arena_dirty_npurge(arena_ind), 1,
-		    "Expected purging to occur");
-		expect_u64_eq(get_arena_dirty_purged(arena_ind),
-		    (sz1 + sz2 + 2 * add_guard_size) / PAGE,
-		    "Expected purging to occur");
-		expect_u64_eq(get_arena_muzzy_npurge(arena_ind), 0,
-		    "Expected purging to occur");
-	}
-
-	if (opt_retain) {
-		/*
-		 * With retain, guarded extents are not mergable and will be
-		 * cached in ecache_retained.  They should be reused.
-		 */
-		void *new_p1 = do_mallocx(sz1, flags);
-		verify_extent_guarded(tsdn, p1);
-		expect_ptr_eq(p1, new_p1, "Expect to reuse p1");
-
-		void *new_p2 = do_mallocx(sz2, flags);
-		verify_extent_guarded(tsdn, p2);
-		expect_ptr_eq(p2, new_p2, "Expect to reuse p2");
-
-		dallocx(new_p1, flags);
-		verify_pdirty(arena_ind, sz1 + add_guard_size);
-		dallocx(new_p2, flags);
-		verify_pdirty(arena_ind, sz1 + sz2 + 2 * add_guard_size);
-		verify_pmuzzy(arena_ind, 0);
-	}
-
-	do_arena_destroy(arena_ind);
-}
-TEST_END
-
-int
-main(void) {
-	return test(
-	    test_guarded_small,
-	    test_guarded_large,
-	    test_guarded_decay);
-}
diff --git a/test/unit/guard.sh b/test/unit/guard.sh
deleted file mode 100644
index 933b4a4..0000000
--- a/test/unit/guard.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-export MALLOC_CONF="san_guard_large:1,san_guard_small:1"
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 53cda28..76bda50 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/spin.h"
 
 static unsigned		arena_ind;
diff --git a/test/unit/san.c b/test/unit/san.c
new file mode 100644
index 0000000..1baa26e
--- /dev/null
+++ b/test/unit/san.c
@@ -0,0 +1,201 @@
+#include "test/jemalloc_test.h"
+#include "test/arena_decay.h"
+#include "test/san.h"
+
+#include "jemalloc/internal/san.h"
+
+static void
+verify_extent_guarded(tsdn_t *tsdn, void *ptr) {
+	expect_true(extent_is_guarded(tsdn, ptr),
+	    "All extents should be guarded.");
+}
+
+#define MAX_SMALL_ALLOCATIONS 4096
+void *small_alloc[MAX_SMALL_ALLOCATIONS];
+
+TEST_BEGIN(test_guarded_small) {
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	unsigned npages = 16, pages_found = 0, ends_found = 0;
+	VARIABLE_ARRAY(uintptr_t, pages, npages);
+
+	/* Allocate to get sanitized pointers. */
+	size_t sz = PAGE / 8;
+	unsigned n_alloc = 0;
+	while (n_alloc < MAX_SMALL_ALLOCATIONS) {
+		void *ptr = malloc(sz);
+		expect_ptr_not_null(ptr, "Unexpected malloc() failure");
+		small_alloc[n_alloc] = ptr;
+		verify_extent_guarded(tsdn, ptr);
+		if ((uintptr_t)ptr % PAGE == 0) {
+			pages[pages_found++] = (uintptr_t)ptr;
+		}
+		if (((uintptr_t)ptr + (uintptr_t)sz) % PAGE == 0) {
+			ends_found++;
+		}
+		n_alloc++;
+		if (pages_found == npages && ends_found == npages) {
+			break;
+		}
+	}
+	/* Should found the ptrs being checked for overflow and underflow. */
+	expect_u_eq(pages_found, npages, "Could not found the expected pages.");
+	expect_u_eq(ends_found, npages, "Could not found the expected pages.");
+
+	/* Verify the pages are not continuous, i.e. separated by guards. */
+	for (unsigned i = 0; i < npages - 1; i++) {
+		for (unsigned j = i + 1; j < npages; j++) {
+			uintptr_t ptr_diff = pages[i] > pages[j] ?
+			    pages[i] - pages[j] : pages[j] - pages[i];
+			expect_zu_gt((size_t)ptr_diff, 2 * PAGE,
+			    "Pages should not be next to each other.");
+		}
+	}
+
+	for (unsigned i = 0; i < n_alloc + 1; i++) {
+		free(small_alloc[i]);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_guarded_large) {
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	unsigned nlarge = 32;
+	VARIABLE_ARRAY(uintptr_t, large, nlarge);
+
+	/* Allocate to get sanitized pointers. */
+	size_t large_sz = SC_LARGE_MINCLASS;
+	for (unsigned i = 0; i < nlarge; i++) {
+		void *ptr = malloc(large_sz);
+		verify_extent_guarded(tsdn, ptr);
+		expect_ptr_not_null(ptr, "Unexpected malloc() failure");
+		large[i] = (uintptr_t)ptr;
+	}
+
+	/* Verify the pages are not continuous, i.e. separated by guards. */
+	uintptr_t min_diff = (uintptr_t)-1;
+	for (unsigned i = 0; i < nlarge; i++) {
+		for (unsigned j = i + 1; j < nlarge; j++) {
+			uintptr_t ptr_diff = large[i] > large[j] ?
+			    large[i] - large[j] : large[j] - large[i];
+			expect_zu_ge((size_t)ptr_diff, large_sz + 2 * PAGE,
+			    "Pages should not be next to each other.");
+			if (ptr_diff < min_diff) {
+				min_diff = ptr_diff;
+			}
+		}
+	}
+	expect_zu_ge((size_t)min_diff, large_sz + 2 * PAGE,
+	    "Pages should not be next to each other.");
+
+	for (unsigned i = 0; i < nlarge; i++) {
+		free((void *)large[i]);
+	}
+}
+TEST_END
+
+static void
+verify_pdirty(unsigned arena_ind, uint64_t expected) {
+	uint64_t pdirty = get_arena_pdirty(arena_ind);
+	expect_u64_eq(pdirty, expected / PAGE,
+	    "Unexpected dirty page amount.");
+}
+
+static void
+verify_pmuzzy(unsigned arena_ind, uint64_t expected) {
+	uint64_t pmuzzy = get_arena_pmuzzy(arena_ind);
+	expect_u64_eq(pmuzzy, expected / PAGE,
+	    "Unexpected muzzy page amount.");
+}
+
+TEST_BEGIN(test_guarded_decay) {
+	unsigned arena_ind = do_arena_create(-1, -1);
+	do_decay(arena_ind);
+	do_purge(arena_ind);
+
+	verify_pdirty(arena_ind, 0);
+	verify_pmuzzy(arena_ind, 0);
+
+	/* Verify that guarded extents as dirty. */
+	size_t sz1 = PAGE, sz2 = PAGE * 2;
+	/* W/o maps_coalesce, guarded extents are unguarded eagerly. */
+	size_t add_guard_size = maps_coalesce ? 0 : PAGE_GUARDS_SIZE;
+	generate_dirty(arena_ind, sz1);
+	verify_pdirty(arena_ind, sz1 + add_guard_size);
+	verify_pmuzzy(arena_ind, 0);
+
+	/* Should reuse the first extent. */
+	generate_dirty(arena_ind, sz1);
+	verify_pdirty(arena_ind, sz1 + add_guard_size);
+	verify_pmuzzy(arena_ind, 0);
+
+	/* Should not reuse; expect new dirty pages. */
+	generate_dirty(arena_ind, sz2);
+	verify_pdirty(arena_ind, sz1 + sz2 + 2 * add_guard_size);
+	verify_pmuzzy(arena_ind, 0);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
+	/* Should reuse dirty extents for the two mallocx. */
+	void *p1 = do_mallocx(sz1, flags);
+	verify_extent_guarded(tsdn, p1);
+	verify_pdirty(arena_ind, sz2 + add_guard_size);
+
+	void *p2 = do_mallocx(sz2, flags);
+	verify_extent_guarded(tsdn, p2);
+	verify_pdirty(arena_ind, 0);
+	verify_pmuzzy(arena_ind, 0);
+
+	dallocx(p1, flags);
+	verify_pdirty(arena_ind, sz1 + add_guard_size);
+	dallocx(p2, flags);
+	verify_pdirty(arena_ind, sz1 + sz2 + 2 * add_guard_size);
+	verify_pmuzzy(arena_ind, 0);
+
+	do_purge(arena_ind);
+	verify_pdirty(arena_ind, 0);
+	verify_pmuzzy(arena_ind, 0);
+
+	if (config_stats) {
+		expect_u64_eq(get_arena_npurge(arena_ind), 1,
+		    "Expected purging to occur");
+		expect_u64_eq(get_arena_dirty_npurge(arena_ind), 1,
+		    "Expected purging to occur");
+		expect_u64_eq(get_arena_dirty_purged(arena_ind),
+		    (sz1 + sz2 + 2 * add_guard_size) / PAGE,
+		    "Expected purging to occur");
+		expect_u64_eq(get_arena_muzzy_npurge(arena_ind), 0,
+		    "Expected purging to occur");
+	}
+
+	if (opt_retain) {
+		/*
+		 * With retain, guarded extents are not mergable and will be
+		 * cached in ecache_retained.  They should be reused.
+		 */
+		void *new_p1 = do_mallocx(sz1, flags);
+		verify_extent_guarded(tsdn, p1);
+		expect_ptr_eq(p1, new_p1, "Expect to reuse p1");
+
+		void *new_p2 = do_mallocx(sz2, flags);
+		verify_extent_guarded(tsdn, p2);
+		expect_ptr_eq(p2, new_p2, "Expect to reuse p2");
+
+		dallocx(new_p1, flags);
+		verify_pdirty(arena_ind, sz1 + add_guard_size);
+		dallocx(new_p2, flags);
+		verify_pdirty(arena_ind, sz1 + sz2 + 2 * add_guard_size);
+		verify_pmuzzy(arena_ind, 0);
+	}
+
+	do_arena_destroy(arena_ind);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_guarded_small,
+	    test_guarded_large,
+	    test_guarded_decay);
+}
diff --git a/test/unit/san.sh b/test/unit/san.sh
new file mode 100644
index 0000000..933b4a4
--- /dev/null
+++ b/test/unit/san.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="san_guard_large:1,san_guard_small:1"
-- 
cgit v0.12


From 34b00f896966e3993b8570542dfe77c2002ce185 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Fri, 22 Oct 2021 17:23:09 -0700
Subject: San: Avoid running san tests with prof enabled

With prof enabled, number of page aligned allocations doesn't match the
number of slab "ends" because prof allocations skew the addresses. It
leads to 'pages' array overflow and hard to debug failures.
---
 test/unit/san.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/unit/san.c b/test/unit/san.c
index 1baa26e..93e292f 100644
--- a/test/unit/san.c
+++ b/test/unit/san.c
@@ -14,6 +14,8 @@ verify_extent_guarded(tsdn_t *tsdn, void *ptr) {
 void *small_alloc[MAX_SMALL_ALLOCATIONS];
 
 TEST_BEGIN(test_guarded_small) {
+	test_skip_if(opt_prof);
+
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	unsigned npages = 16, pages_found = 0, ends_found = 0;
 	VARIABLE_ARRAY(uintptr_t, pages, npages);
@@ -27,6 +29,8 @@ TEST_BEGIN(test_guarded_small) {
 		small_alloc[n_alloc] = ptr;
 		verify_extent_guarded(tsdn, ptr);
 		if ((uintptr_t)ptr % PAGE == 0) {
+			assert_u_lt(pages_found, npages,
+			    "Unexpectedly large number of page aligned allocs");
 			pages[pages_found++] = (uintptr_t)ptr;
 		}
 		if (((uintptr_t)ptr + (uintptr_t)sz) % PAGE == 0) {
-- 
cgit v0.12


From 0f6da1257d7182777e47c78f47e0bb2aa28d259b Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 4 Nov 2021 11:10:19 -0700
Subject: San: Implement bump alloc

The new allocator will be used to allocate guarded extents used as slabs
for guarded small allocations.
---
 Makefile.in                          |   2 +
 include/jemalloc/internal/extent.h   |  11 ++-
 include/jemalloc/internal/pac.h      |   6 ++
 include/jemalloc/internal/san.h      |  48 +++++++++++--
 include/jemalloc/internal/san_bump.h |  27 ++++++++
 include/jemalloc/internal/witness.h  |   1 +
 src/extent.c                         |  95 ++++++++++++++------------
 src/pac.c                            |  15 ++---
 src/pages.c                          |  56 +++++++++++----
 src/san.c                            |  97 +++++++++++++++++++++-----
 src/san_bump.c                       | 127 +++++++++++++++++++++++++++++++++++
 test/include/test/arena_decay.h      |  28 ++++----
 test/unit/retained.c                 |   2 +-
 test/unit/san.c                      |   2 +-
 test/unit/san_bump.c                 | 111 ++++++++++++++++++++++++++++++
 15 files changed, 521 insertions(+), 107 deletions(-)
 create mode 100644 include/jemalloc/internal/san_bump.h
 create mode 100644 src/san_bump.c
 create mode 100644 test/unit/san_bump.c

diff --git a/Makefile.in b/Makefile.in
index 8f96a99..50c586c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -120,6 +120,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/fxp.c \
 	$(srcroot)src/san.c \
+	$(srcroot)src/san_bump.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_hooks.c \
@@ -220,6 +221,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/fork.c \
 	${srcroot}test/unit/fxp.c \
 	${srcroot}test/unit/san.c \
+	${srcroot}test/unit/san_bump.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 73c5563..73059ad 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -30,14 +30,20 @@ void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
+void extent_gdump_add(tsdn_t *tsdn, const edata_t *edata);
+void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata);
 void extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
+edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
+    bool growing_retained);
 void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length);
+    size_t offset, size_t length, bool growing_retained);
 bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
@@ -45,7 +51,8 @@ bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 edata_t *extent_split_wrapper(tsdn_t *tsdn, pac_t *pac,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b);
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b,
+    bool holding_core_locks);
 bool extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b);
 size_t extent_sn_next(pac_t *pac);
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 5eee3de..7eaaf89 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/exp_grow.h"
 #include "jemalloc/internal/pai.h"
+#include "san_bump.h"
 
 
 /*
@@ -127,6 +128,11 @@ pac_mapped(pac_t *pac) {
 	return atomic_load_zu(&pac->stats->pac_mapped, ATOMIC_RELAXED);
 }
 
+static inline ehooks_t *
+pac_ehooks_get(pac_t *pac) {
+	return base_ehooks_get(pac->base);
+}
+
 /*
  * All purging functions require holding decay->mtx.  This is one of the few
  * places external modules are allowed to peek inside pa_shard_t internals.
diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index b3d0304..70debf3 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -4,7 +4,8 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/emap.h"
 
-#define PAGE_GUARDS_SIZE (2 * PAGE)
+#define SAN_PAGE_GUARD PAGE
+#define SAN_PAGE_GUARDS_SIZE (SAN_PAGE_GUARD * 2)
 
 #define SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT 0
 #define SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT 0
@@ -14,9 +15,9 @@ extern size_t opt_san_guard_large;
 extern size_t opt_san_guard_small;
 
 void san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap);
+    emap_t *emap, bool left, bool right, bool remap);
 void san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap);
+    emap_t *emap, bool left, bool right);
 /*
  * Unguard the extent, but don't modify emap boundaries. Must be called on an
  * extent that has been erased from emap and shouldn't be placed back.
@@ -25,6 +26,45 @@ void san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, emap_t *emap);
 void tsd_san_init(tsd_t *tsd);
 
+static inline void
+san_guard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap, bool remap) {
+	return san_guard_pages(tsdn, ehooks, edata, emap, true, true,
+	    remap);
+}
+
+static inline void
+san_unguard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap) {
+	return san_unguard_pages(tsdn, ehooks, edata, emap, true, true);
+}
+
+static inline size_t
+san_two_side_unguarded_sz(size_t size) {
+	assert(size % PAGE == 0);
+	assert(size >= SAN_PAGE_GUARDS_SIZE);
+	return size - SAN_PAGE_GUARDS_SIZE;
+}
+
+static inline size_t
+san_two_side_guarded_sz(size_t size) {
+	assert(size % PAGE == 0);
+	return size + SAN_PAGE_GUARDS_SIZE;
+}
+
+static inline size_t
+san_one_side_unguarded_sz(size_t size) {
+	assert(size % PAGE == 0);
+	assert(size >= SAN_PAGE_GUARD);
+	return size - SAN_PAGE_GUARD;
+}
+
+static inline size_t
+san_one_side_guarded_sz(size_t size) {
+	assert(size % PAGE == 0);
+	return size + SAN_PAGE_GUARD;
+}
+
 static inline bool
 san_enabled(void) {
 	return (opt_san_guard_large != 0 || opt_san_guard_small != 0);
@@ -50,7 +90,7 @@ san_large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
 	}
 
 	if (n == 1 && (alignment <= PAGE) &&
-	    (size + PAGE_GUARDS_SIZE <= SC_LARGE_MAXCLASS)) {
+	    (san_two_side_guarded_sz(size) <= SC_LARGE_MAXCLASS)) {
 		*tsd_san_extents_until_guard_largep_get(tsd) =
 		    opt_san_guard_large;
 		return true;
diff --git a/include/jemalloc/internal/san_bump.h b/include/jemalloc/internal/san_bump.h
new file mode 100644
index 0000000..9c6c224
--- /dev/null
+++ b/include/jemalloc/internal/san_bump.h
@@ -0,0 +1,27 @@
+#ifndef JEMALLOC_INTERNAL_SAN_BUMP_H
+#define JEMALLOC_INTERNAL_SAN_BUMP_H
+
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/exp_grow.h"
+#include "jemalloc/internal/mutex.h"
+
+extern const size_t SBA_RETAINED_ALLOC_SIZE;
+
+typedef struct ehooks_s ehooks_t;
+typedef struct pac_s pac_t;
+
+typedef struct san_bump_alloc_s san_bump_alloc_t;
+struct san_bump_alloc_s {
+	malloc_mutex_t mtx;
+
+	edata_t *curr_reg;
+};
+
+bool
+san_bump_alloc_init(san_bump_alloc_t* sba);
+
+edata_t *
+san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac, ehooks_t *ehooks,
+    size_t size, bool zero);
+
+#endif /* JEMALLOC_INTERNAL_SAN_BUMP_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index c12a705..e81b9a0 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -48,6 +48,7 @@ enum witness_rank_e {
 
 	WITNESS_RANK_EXTENT_GROW,
 	WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW,
+	WITNESS_RANK_SAN_BUMP_ALLOC = WITNESS_RANK_EXTENT_GROW,
 
 	WITNESS_RANK_EXTENTS,
 	WITNESS_RANK_HPA_SHARD = WITNESS_RANK_EXTENTS,
diff --git a/src/extent.c b/src/extent.c
index 7112d3a..13d688d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -40,13 +40,9 @@ static edata_t *extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     bool zero, bool *commit, bool growing_retained, bool guarded);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced);
-static void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool *commit, bool guarded);
-static edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
 
 /******************************************************************************/
 
@@ -127,7 +123,8 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		void *new_addr = (expand_edata == NULL) ? NULL :
 		    edata_past_get(expand_edata);
 		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr,
-		    size, alignment, zero, &commit);
+		    size, alignment, zero, &commit,
+		    /* growing_retained */ false);
 	}
 
 	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
@@ -270,7 +267,7 @@ extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, eset_t *eset,
 	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
 }
 
-static void
+void
 extent_gdump_add(tsdn_t *tsdn, const edata_t *edata) {
 	cassert(config_prof);
 	/* prof_gdump() requirement. */
@@ -785,35 +782,6 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	return edata;
 }
 
-static edata_t *
-extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	edata_t *edata = edata_cache_get(tsdn, pac->edata_cache);
-	if (edata == NULL) {
-		return NULL;
-	}
-	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
-	    &zero, commit);
-	if (addr == NULL) {
-		edata_cache_put(tsdn, pac->edata_cache, edata);
-		return NULL;
-	}
-	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr,
-	    size, /* slab */ false, SC_NSIZES, extent_sn_next(pac),
-	    extent_state_active, zero, *commit, EXTENT_PAI_PAC,
-	    opt_retain ? EXTENT_IS_HEAD : EXTENT_NOT_HEAD);
-	if (extent_register(tsdn, pac, edata)) {
-		edata_cache_put(tsdn, pac->edata_cache, edata);
-		return NULL;
-	}
-
-	return edata;
-}
-
 static bool
 extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *inner, edata_t *outer, bool forward) {
@@ -924,9 +892,9 @@ extent_maximally_purge(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
  * Does the metadata management portions of putting an unused extent into the
  * given ecache_t (coalesces and inserts into the eset).
  */
-static void
-extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata) {
+void
+extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata) {
 	assert((ecache->state != extent_state_dirty &&
 	    ecache->state != extent_state_muzzy) ||
 	    !edata_zeroed_get(edata));
@@ -1001,6 +969,42 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	return err;
 }
 
+edata_t *
+extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
+    bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+	edata_t *edata = edata_cache_get(tsdn, pac->edata_cache);
+	if (edata == NULL) {
+		return NULL;
+	}
+	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
+	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
+	    &zero, commit);
+	if (addr == NULL) {
+		edata_cache_put(tsdn, pac->edata_cache, edata);
+		return NULL;
+	}
+	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr,
+	    size, /* slab */ false, SC_NSIZES, extent_sn_next(pac),
+	    extent_state_active, zero, *commit, EXTENT_PAI_PAC,
+	    opt_retain ? EXTENT_IS_HEAD : EXTENT_NOT_HEAD);
+	/*
+	 * Retained memory is not counted towards gdump.  Only if an extent is
+	 * allocated as a separate mapping, i.e. growing_retained is false, then
+	 * gdump should be updated.
+	 */
+	bool gdump_add = !growing_retained;
+	if (extent_register_impl(tsdn, pac, edata, gdump_add)) {
+		edata_cache_put(tsdn, pac->edata_cache, edata);
+		return NULL;
+	}
+
+	return edata;
+}
+
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
@@ -1013,7 +1017,8 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* Restore guard pages for dalloc / unmap. */
 		if (edata_guarded_get(edata)) {
 			assert(ehooks_are_default(ehooks));
-			san_unguard_pages(tsdn, ehooks, edata, pac->emap);
+			san_unguard_pages_two_sided(tsdn, ehooks, edata,
+			    pac->emap);
 		}
 		/*
 		 * Deregister first to avoid a race with other allocating
@@ -1057,12 +1062,14 @@ extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
-	assert(edata_state_get(edata) == extent_state_retained);
+	extent_state_t state = edata_state_get(edata);
+	assert(state == extent_state_retained || state == extent_state_active);
 	assert(emap_edata_is_acquired(tsdn, pac->emap, edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	if (edata_guarded_get(edata)) {
+		assert(opt_retain);
 		san_unguard_pages_pre_destroy(tsdn, ehooks, edata, pac->emap);
 	}
 	edata_addr_set(edata, edata_base_get(edata));
@@ -1087,9 +1094,9 @@ extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 
 bool
 extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length) {
+    size_t offset, size_t length, bool growing_retained) {
 	return extent_commit_impl(tsdn, ehooks, edata, offset, length,
-	    false);
+	    growing_retained);
 }
 
 bool
@@ -1207,9 +1214,9 @@ label_error_a:
 
 edata_t *
 extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
-    size_t size_a, size_t size_b) {
+    size_t size_a, size_t size_b, bool holding_core_locks) {
 	return extent_split_impl(tsdn, pac, ehooks, edata, size_a, size_b,
-	    /* holding_core_locks */ false);
+	    holding_core_locks);
 }
 
 static bool
diff --git a/src/pac.c b/src/pac.c
index e53de80..914cec9 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -14,11 +14,6 @@ static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     bool *deferred_work_generated);
 static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
-static ehooks_t *
-pac_ehooks_get(pac_t *pac) {
-	return base_ehooks_get(pac->base);
-}
-
 static inline void
 pac_decay_data_get(pac_t *pac, extent_state_t state,
     decay_t **r_decay, pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
@@ -139,14 +134,15 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
     size_t alignment, bool zero) {
 	assert(alignment <= PAGE);
 
-	size_t size_with_guards = size + PAGE_GUARDS_SIZE;
+	size_t size_with_guards = size + SAN_PAGE_GUARDS_SIZE;
 	/* Alloc a non-guarded extent first.*/
 	edata_t *edata = pac_alloc_real(tsdn, pac, ehooks, size_with_guards,
 	    /* alignment */ PAGE, zero, /* guarded */ false);
 	if (edata != NULL) {
 		/* Add guards around it. */
 		assert(edata_size_get(edata) == size_with_guards);
-		san_guard_pages(tsdn, ehooks, edata, pac->emap);
+		san_guard_pages(tsdn, ehooks, edata, pac->emap, true, true,
+		    true);
 	}
 	assert(edata == NULL || (edata_guarded_get(edata) &&
 	    edata_size_get(edata) == size));
@@ -222,7 +218,7 @@ pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	}
 
 	edata_t *trail = extent_split_wrapper(tsdn, pac, ehooks, edata,
-	    new_size, shrink_amount);
+	    new_size, shrink_amount, /* holding_core_locks */ false);
 	if (trail == NULL) {
 		return true;
 	}
@@ -253,7 +249,8 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 		if (!edata_slab_get(edata) || !maps_coalesce) {
 			assert(edata_size_get(edata) >= SC_LARGE_MINCLASS ||
 			    !maps_coalesce);
-			san_unguard_pages(tsdn, ehooks, edata, pac->emap);
+			san_unguard_pages_two_sided(tsdn, ehooks, edata,
+			    pac->emap);
 		}
 	}
 
diff --git a/src/pages.c b/src/pages.c
index a8d9988..8c83a7d 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -365,33 +365,61 @@ pages_decommit(void *addr, size_t size) {
 
 void
 pages_mark_guards(void *head, void *tail) {
-	assert(head != NULL && tail != NULL);
-	assert((uintptr_t)head < (uintptr_t)tail);
+	assert(head != NULL || tail != NULL);
+	assert(head == NULL || tail == NULL ||
+	    (uintptr_t)head < (uintptr_t)tail);
 #ifdef JEMALLOC_HAVE_MPROTECT
-	mprotect(head, PAGE, PROT_NONE);
-	mprotect(tail, PAGE, PROT_NONE);
+	if (head != NULL) {
+		mprotect(head, PAGE, PROT_NONE);
+	}
+	if (tail != NULL) {
+		mprotect(tail, PAGE, PROT_NONE);
+	}
 #else
 	/* Decommit sets to PROT_NONE / MEM_DECOMMIT. */
-	os_pages_commit(head, PAGE, false);
-	os_pages_commit(tail, PAGE, false);
+	if (head != NULL) {
+		os_pages_commit(head, PAGE, false);
+	}
+	if (tail != NULL) {
+		os_pages_commit(tail, PAGE, false);
+	}
 #endif
 }
 
 void
 pages_unmark_guards(void *head, void *tail) {
-	assert(head != NULL && tail != NULL);
-	assert((uintptr_t)head < (uintptr_t)tail);
+	assert(head != NULL || tail != NULL);
+	assert(head == NULL || tail == NULL ||
+	    (uintptr_t)head < (uintptr_t)tail);
 #ifdef JEMALLOC_HAVE_MPROTECT
-	size_t range = (uintptr_t)tail - (uintptr_t)head + PAGE;
-	if (range <= SC_LARGE_MINCLASS) {
+	bool head_and_tail = (head != NULL) && (tail != NULL);
+	size_t range = head_and_tail ?
+	    (uintptr_t)tail - (uintptr_t)head + PAGE :
+	    SIZE_T_MAX;
+	/*
+	 * The amount of work that the kernel does in mprotect depends on the
+	 * range argument.  SC_LARGE_MINCLASS is an arbitrary threshold chosen
+	 * to prevent kernel from doing too much work that would outweigh the
+	 * savings of performing one less system call.
+	 */
+	bool ranged_mprotect = head_and_tail && range <= SC_LARGE_MINCLASS;
+	if (ranged_mprotect) {
 		mprotect(head, range, PROT_READ | PROT_WRITE);
 	} else {
-		mprotect(head, PAGE, PROT_READ | PROT_WRITE);
-		mprotect(tail, PAGE, PROT_READ | PROT_WRITE);
+		if (head != NULL) {
+			mprotect(head, PAGE, PROT_READ | PROT_WRITE);
+		}
+		if (tail != NULL) {
+			mprotect(tail, PAGE, PROT_READ | PROT_WRITE);
+		}
 	}
 #else
-	os_pages_commit(head, PAGE, true);
-	os_pages_commit(tail, PAGE, true);
+	if (head != NULL) {
+		os_pages_commit(head, PAGE, true);
+	}
+	if (tail != NULL) {
+		os_pages_commit(tail, PAGE, true);
+	}
 #endif
 }
 
diff --git a/src/san.c b/src/san.c
index 139ec5a..15fdb7f 100644
--- a/src/san.c
+++ b/src/san.c
@@ -10,16 +10,63 @@
 size_t opt_san_guard_large = SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT;
 size_t opt_san_guard_small = SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT;
 
+static inline void
+san_find_guarded_addr(edata_t *edata, uintptr_t *guard1, uintptr_t *guard2,
+    uintptr_t *addr, size_t size, bool left, bool right) {
+	assert(!edata_guarded_get(edata));
+	assert(size % PAGE == 0);
+	*addr = (uintptr_t)edata_base_get(edata);
+	if (left) {
+		*guard1 = *addr;
+		*addr += SAN_PAGE_GUARD;
+	} else {
+		*guard1 = 0;
+	}
+
+	if (right) {
+		*guard2 = *addr + size;
+	} else {
+		*guard2 = 0;
+	}
+}
+
+static inline void
+san_find_unguarded_addr(edata_t *edata, uintptr_t *guard1, uintptr_t *guard2,
+    uintptr_t *addr, size_t size, bool left, bool right) {
+	assert(edata_guarded_get(edata));
+	assert(size % PAGE == 0);
+	*addr = (uintptr_t)edata_base_get(edata);
+	if (right) {
+		*guard2 = *addr + size;
+	} else {
+		*guard2 = 0;
+	}
+
+	if (left) {
+		*guard1 = *addr - SAN_PAGE_GUARD;
+		assert(*guard1 != 0);
+		*addr = *guard1;
+	} else {
+		*guard1 = 0;
+	}
+}
+
 void
-san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
-	emap_deregister_boundary(tsdn, emap, edata);
+san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
+    bool left, bool right, bool remap) {
+	assert(left || right);
+	if (remap) {
+		emap_deregister_boundary(tsdn, emap, edata);
+	}
 
 	size_t size_with_guards = edata_size_get(edata);
-	size_t usize = size_with_guards - PAGE_GUARDS_SIZE;
+	size_t usize = (left && right)
+	    ? san_two_side_unguarded_sz(size_with_guards)
+	    : san_one_side_unguarded_sz(size_with_guards);
 
-	uintptr_t guard1 = (uintptr_t)edata_base_get(edata);
-	uintptr_t addr = guard1 + PAGE;
-	uintptr_t guard2 = addr + usize;
+	uintptr_t guard1, guard2, addr;
+	san_find_guarded_addr(edata, &guard1, &guard2, &addr, usize, left,
+	    right);
 
 	assert(edata_state_get(edata) == extent_state_active);
 	ehooks_guard(tsdn, ehooks, (void *)guard1, (void *)guard2);
@@ -29,14 +76,18 @@ san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	edata_addr_set(edata, (void *)addr);
 	edata_guarded_set(edata, true);
 
-	/* The new boundary will be registered on the pa_alloc path. */
+	if (remap) {
+		emap_register_boundary(tsdn, emap, edata, SC_NSIZES,
+		    /* slab */ false);
+	}
 }
 
 static void
 san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap, bool reg_emap) {
+    emap_t *emap, bool left, bool right, bool remap) {
+	assert(left || right);
 	/* Remove the inner boundary which no longer exists. */
-	if (reg_emap) {
+	if (remap) {
 		assert(edata_state_get(edata) == extent_state_active);
 		emap_deregister_boundary(tsdn, emap, edata);
 	} else {
@@ -44,24 +95,26 @@ san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	}
 
 	size_t size = edata_size_get(edata);
-	size_t size_with_guards = size + PAGE_GUARDS_SIZE;
+	size_t size_with_guards = (left && right)
+	    ? san_two_side_guarded_sz(size)
+	    : san_one_side_guarded_sz(size);
 
-	uintptr_t addr =  (uintptr_t)edata_base_get(edata);
-	uintptr_t guard1 = addr - PAGE;
-	uintptr_t guard2 = addr + size;
+	uintptr_t guard1, guard2, addr;
+	san_find_unguarded_addr(edata, &guard1, &guard2, &addr, size, left,
+	    right);
 
 	ehooks_unguard(tsdn, ehooks, (void *)guard1, (void *)guard2);
 
 	/* Update the true addr and usable size of the edata. */
 	edata_size_set(edata, size_with_guards);
-	edata_addr_set(edata, (void *)guard1);
+	edata_addr_set(edata, (void *)addr);
 	edata_guarded_set(edata, false);
 
 	/*
 	 * Then re-register the outer boundary including the guards, if
 	 * requested.
 	 */
-	if (reg_emap) {
+	if (remap) {
 		emap_register_boundary(tsdn, emap, edata, SC_NSIZES,
 		    /* slab */ false);
 	}
@@ -69,15 +122,23 @@ san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 
 void
 san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap) {
-	san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ true);
+    emap_t *emap, bool left, bool right) {
+	san_unguard_pages_impl(tsdn, ehooks, edata, emap, left, right,
+	    /* remap */ true);
 }
 
 void
 san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     emap_t *emap) {
 	emap_assert_not_mapped(tsdn, emap, edata);
-	san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ false);
+	/*
+	 * We don't want to touch the emap of about to be destroyed extents, as
+	 * they have been unmapped upon eviction from the retained ecache. Also,
+	 * we unguard the extents to the right, because retained extents only
+	 * own their right guard page per san_bump_alloc's logic.
+	 */
+	 san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* left */ false,
+	    /* right */ true, /* remap */ false);
 }
 
 void
diff --git a/src/san_bump.c b/src/san_bump.c
new file mode 100644
index 0000000..6098bd9
--- /dev/null
+++ b/src/san_bump.c
@@ -0,0 +1,127 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/san_bump.h"
+#include "jemalloc/internal/pac.h"
+#include "jemalloc/internal/san.h"
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/edata_cache.h"
+
+const size_t SBA_RETAINED_ALLOC_SIZE = 1024 * 1024 * 4; /* 4 MB */
+
+static bool
+san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
+    ehooks_t *ehooks, size_t size);
+
+bool
+san_bump_alloc_init(san_bump_alloc_t* sba) {
+	bool err = malloc_mutex_init(&sba->mtx, "sanitizer_bump_allocator",
+	    WITNESS_RANK_SAN_BUMP_ALLOC, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	sba->curr_reg = NULL;
+
+	return false;
+}
+
+edata_t *
+san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
+    ehooks_t *ehooks, size_t size, bool zero) {
+	assert(maps_coalesce && opt_retain);
+
+	edata_t* to_destroy;
+	size_t guarded_size = san_one_side_guarded_sz(size);
+
+	malloc_mutex_lock(tsdn, &sba->mtx);
+
+	if (sba->curr_reg == NULL ||
+	    edata_size_get(sba->curr_reg) < guarded_size) {
+		/*
+		 * If the current region can't accommodate the allocation,
+		 * try replacing it with a larger one and destroy current if the
+		 * replacement succeeds.
+		 */
+		to_destroy = sba->curr_reg;
+		bool err = san_bump_grow_locked(tsdn, sba, pac, ehooks,
+		    guarded_size);
+		if (err) {
+			goto label_err;
+		}
+	} else {
+		to_destroy = NULL;
+	}
+	assert(guarded_size <= edata_size_get(sba->curr_reg));
+	size_t trail_size = edata_size_get(sba->curr_reg) - guarded_size;
+
+	edata_t* edata;
+	if (trail_size != 0) {
+		edata_t* curr_reg_trail = extent_split_wrapper(tsdn, pac,
+		    ehooks, sba->curr_reg, guarded_size, trail_size,
+		    /* holding_core_locks */ true);
+		if (curr_reg_trail == NULL) {
+			goto label_err;
+		}
+		edata = sba->curr_reg;
+		sba->curr_reg = curr_reg_trail;
+	} else {
+		edata = sba->curr_reg;
+		sba->curr_reg = NULL;
+	}
+
+	malloc_mutex_unlock(tsdn, &sba->mtx);
+
+	assert(!edata_guarded_get(edata));
+	assert(sba->curr_reg == NULL || !edata_guarded_get(sba->curr_reg));
+	assert(to_destroy == NULL || !edata_guarded_get(to_destroy));
+
+	if (to_destroy != NULL) {
+		extent_destroy_wrapper(tsdn, pac, ehooks, to_destroy);
+	}
+
+	san_guard_pages(tsdn, ehooks, edata, pac->emap, /* left */ false,
+	    /* right */ true, /* remap */ true);
+
+	if (!edata_committed_get(edata)) {
+		if (extent_commit_wrapper(tsdn, ehooks, edata, 0,
+		    edata_size_get(edata), true)) {
+			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
+			    edata);
+			return NULL;
+		}
+		edata_committed_set(edata, true);
+	}
+	if (zero && !edata_zeroed_get(edata)) {
+		void *addr = edata_base_get(edata);
+		size_t size = edata_size_get(edata);
+		ehooks_zero(tsdn, ehooks, addr, size);
+		edata_zeroed_set(edata, true);
+	}
+
+	if (config_prof) {
+		extent_gdump_add(tsdn, edata);
+	}
+
+	return edata;
+label_err:
+	malloc_mutex_unlock(tsdn, &sba->mtx);
+	return NULL;
+}
+
+static bool
+san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
+    ehooks_t *ehooks, size_t size) {
+	malloc_mutex_assert_owner(tsdn, &sba->mtx);
+
+	bool committed = false, zeroed = false;
+	size_t alloc_size = size > SBA_RETAINED_ALLOC_SIZE ? size :
+	    SBA_RETAINED_ALLOC_SIZE;
+	assert((alloc_size & PAGE_MASK) == 0);
+	sba->curr_reg = extent_alloc_wrapper(tsdn, pac, ehooks, NULL,
+	    alloc_size, PAGE, zeroed, &committed,
+	    /* growing_retained */ true);
+	if (sba->curr_reg == NULL) {
+		return true;
+	}
+	return false;
+}
diff --git a/test/include/test/arena_decay.h b/test/include/test/arena_decay.h
index da65921..524ee21 100644
--- a/test/include/test/arena_decay.h
+++ b/test/include/test/arena_decay.h
@@ -1,4 +1,4 @@
-static unsigned
+static inline unsigned
 do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	unsigned arena_ind;
 	size_t sz = sizeof(unsigned);
@@ -24,7 +24,7 @@ do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	return arena_ind;
 }
 
-static void
+static inline void
 do_arena_destroy(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
@@ -35,14 +35,14 @@ do_arena_destroy(unsigned arena_ind) {
 	    "Unexpected mallctlbymib() failure");
 }
 
-static void
+static inline void
 do_epoch(void) {
 	uint64_t epoch = 1;
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 }
 
-static void
+static inline void
 do_purge(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
@@ -53,7 +53,7 @@ do_purge(unsigned arena_ind) {
 	    "Unexpected mallctlbymib() failure");
 }
 
-static void
+static inline void
 do_decay(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
@@ -64,7 +64,7 @@ do_decay(unsigned arena_ind) {
 	    "Unexpected mallctlbymib() failure");
 }
 
-static uint64_t
+static inline uint64_t
 get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
 	size_t mib[4];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
@@ -78,32 +78,32 @@ get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
 	return npurge;
 }
 
-static uint64_t
+static inline uint64_t
 get_arena_dirty_npurge(unsigned arena_ind) {
 	do_epoch();
 	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind);
 }
 
-static uint64_t
+static inline uint64_t
 get_arena_dirty_purged(unsigned arena_ind) {
 	do_epoch();
 	return get_arena_npurge_impl("stats.arenas.0.dirty_purged", arena_ind);
 }
 
-static uint64_t
+static inline uint64_t
 get_arena_muzzy_npurge(unsigned arena_ind) {
 	do_epoch();
 	return get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
 }
 
-static uint64_t
+static inline uint64_t
 get_arena_npurge(unsigned arena_ind) {
 	do_epoch();
 	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
 	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
 }
 
-static size_t
+static inline size_t
 get_arena_pdirty(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
@@ -118,7 +118,7 @@ get_arena_pdirty(unsigned arena_ind) {
 	return pdirty;
 }
 
-static size_t
+static inline size_t
 get_arena_pmuzzy(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
@@ -133,14 +133,14 @@ get_arena_pmuzzy(unsigned arena_ind) {
 	return pmuzzy;
 }
 
-static void *
+static inline void *
 do_mallocx(size_t size, int flags) {
 	void *p = mallocx(size, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	return p;
 }
 
-static void
+static inline void
 generate_dirty(unsigned arena_ind, size_t size) {
 	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 	void *p = do_mallocx(size, flags);
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 76bda50..53c90f2 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -104,7 +104,7 @@ TEST_BEGIN(test_retained) {
 
 	arena_ind = do_arena_create(NULL);
 	sz = nallocx(HUGEPAGE, 0);
-	size_t guard_sz = san_enabled() ? PAGE_GUARDS_SIZE : 0;
+	size_t guard_sz = san_enabled() ? SAN_PAGE_GUARDS_SIZE : 0;
 	esz = sz + sz_large_pad + guard_sz;
 
 	atomic_store_u(&epoch, 0, ATOMIC_RELAXED);
diff --git a/test/unit/san.c b/test/unit/san.c
index 93e292f..eb9ff51 100644
--- a/test/unit/san.c
+++ b/test/unit/san.c
@@ -122,7 +122,7 @@ TEST_BEGIN(test_guarded_decay) {
 	/* Verify that guarded extents as dirty. */
 	size_t sz1 = PAGE, sz2 = PAGE * 2;
 	/* W/o maps_coalesce, guarded extents are unguarded eagerly. */
-	size_t add_guard_size = maps_coalesce ? 0 : PAGE_GUARDS_SIZE;
+	size_t add_guard_size = maps_coalesce ? 0 : SAN_PAGE_GUARDS_SIZE;
 	generate_dirty(arena_ind, sz1);
 	verify_pdirty(arena_ind, sz1 + add_guard_size);
 	verify_pmuzzy(arena_ind, 0);
diff --git a/test/unit/san_bump.c b/test/unit/san_bump.c
new file mode 100644
index 0000000..fbee53e
--- /dev/null
+++ b/test/unit/san_bump.c
@@ -0,0 +1,111 @@
+#include "test/jemalloc_test.h"
+#include "test/arena_decay.h"
+
+#include "jemalloc/internal/arena_structs.h"
+#include "jemalloc/internal/san_bump.h"
+
+TEST_BEGIN(test_san_bump_alloc) {
+	test_skip_if(!maps_coalesce || !opt_retain);
+
+	tsdn_t *tsdn = tsdn_fetch();
+
+	san_bump_alloc_t sba;
+	san_bump_alloc_init(&sba);
+
+	unsigned arena_ind = do_arena_create(0, 0);
+	assert_u_ne(arena_ind, UINT_MAX, "Failed to create an arena");
+
+	arena_t *arena = arena_get(tsdn, arena_ind, false);
+	pac_t *pac = &arena->pa_shard.pac;
+
+	size_t alloc_size = PAGE * 16;
+	size_t alloc_n = alloc_size / sizeof(unsigned);
+	edata_t* edata = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    alloc_size, /* zero */ false);
+
+	expect_ptr_not_null(edata, "Failed to allocate edata");
+	expect_u_eq(edata_arena_ind_get(edata), arena_ind,
+	    "Edata was assigned an incorrect arena id");
+	expect_zu_eq(edata_size_get(edata), alloc_size,
+	    "Allocated edata of incorrect size");
+	expect_false(edata_slab_get(edata),
+	    "Bump allocator incorrectly assigned 'slab' to true");
+	expect_true(edata_committed_get(edata), "Edata is not committed");
+
+	void *ptr = edata_addr_get(edata);
+	expect_ptr_not_null(ptr, "Edata was assigned an invalid address");
+	/* Test that memory is allocated; no guard pages are misplaced */
+	for (unsigned i = 0; i < alloc_n; ++i) {
+		((unsigned *)ptr)[i] = 1;
+	}
+
+	size_t alloc_size2 = PAGE * 28;
+	size_t alloc_n2 = alloc_size / sizeof(unsigned);
+	edata_t *edata2 = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    alloc_size2, /* zero */ true);
+
+	expect_ptr_not_null(edata2, "Failed to allocate edata");
+	expect_u_eq(edata_arena_ind_get(edata2), arena_ind,
+	    "Edata was assigned an incorrect arena id");
+	expect_zu_eq(edata_size_get(edata2), alloc_size2,
+	    "Allocated edata of incorrect size");
+	expect_false(edata_slab_get(edata2),
+	    "Bump allocator incorrectly assigned 'slab' to true");
+	expect_true(edata_committed_get(edata2), "Edata is not committed");
+
+	void *ptr2 = edata_addr_get(edata2);
+	expect_ptr_not_null(ptr, "Edata was assigned an invalid address");
+
+	uintptr_t ptrdiff = ptr2 > ptr ? (uintptr_t)ptr2 - (uintptr_t)ptr
+	    : (uintptr_t)ptr - (uintptr_t)ptr2;
+	size_t between_allocs = (size_t)ptrdiff - alloc_size;
+
+	expect_zu_ge(between_allocs, PAGE,
+	    "Guard page between allocs is missing");
+
+	for (unsigned i = 0; i < alloc_n2; ++i) {
+		expect_u_eq(((unsigned *)ptr2)[i], 0, "Memory is not zeroed");
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_large_alloc_size) {
+	test_skip_if(!maps_coalesce || !opt_retain);
+
+	tsdn_t *tsdn = tsdn_fetch();
+
+	san_bump_alloc_t sba;
+	san_bump_alloc_init(&sba);
+
+	unsigned arena_ind = do_arena_create(0, 0);
+	assert_u_ne(arena_ind, UINT_MAX, "Failed to create an arena");
+
+	arena_t *arena = arena_get(tsdn, arena_ind, false);
+	pac_t *pac = &arena->pa_shard.pac;
+
+	size_t alloc_size = SBA_RETAINED_ALLOC_SIZE * 2;
+	edata_t* edata = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    alloc_size, /* zero */ false);
+	expect_u_eq(edata_arena_ind_get(edata), arena_ind,
+	    "Edata was assigned an incorrect arena id");
+	expect_zu_eq(edata_size_get(edata), alloc_size,
+	    "Allocated edata of incorrect size");
+	expect_false(edata_slab_get(edata),
+	    "Bump allocator incorrectly assigned 'slab' to true");
+	expect_true(edata_committed_get(edata), "Edata is not committed");
+
+	void *ptr = edata_addr_get(edata);
+	expect_ptr_not_null(ptr, "Edata was assigned an invalid address");
+	/* Test that memory is allocated; no guard pages are misplaced */
+	for (unsigned i = 0; i < alloc_size / PAGE; ++i) {
+		*((char *)ptr + PAGE * i) = 1;
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_san_bump_alloc,
+	    test_large_alloc_size);
+}
-- 
cgit v0.12


From 2c70e8d3513edc5417a1fa6808350083e5c40f7d Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 25 Oct 2021 20:19:08 -0700
Subject: Rename 'arena_decay' to 'arena_util'

While initially this file contained helper functions for one particular
test, now its usage spread across different test files. Purpose has
shifted towards a collection of handy arena ctl wrappers.
---
 test/include/test/arena_decay.h | 149 ----------------------------------------
 test/include/test/arena_util.h  | 149 ++++++++++++++++++++++++++++++++++++++++
 test/unit/arena_decay.c         |   2 +-
 test/unit/san.c                 |   2 +-
 test/unit/san_bump.c            |   2 +-
 5 files changed, 152 insertions(+), 152 deletions(-)
 delete mode 100644 test/include/test/arena_decay.h
 create mode 100644 test/include/test/arena_util.h

diff --git a/test/include/test/arena_decay.h b/test/include/test/arena_decay.h
deleted file mode 100644
index 524ee21..0000000
--- a/test/include/test/arena_decay.h
+++ /dev/null
@@ -1,149 +0,0 @@
-static inline unsigned
-do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
-	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-
-	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
-
-	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
-
-	return arena_ind;
-}
-
-static inline void
-do_arena_destroy(unsigned arena_ind) {
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-}
-
-static inline void
-do_epoch(void) {
-	uint64_t epoch = 1;
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
-	    0, "Unexpected mallctl() failure");
-}
-
-static inline void
-do_purge(unsigned arena_ind) {
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-}
-
-static inline void
-do_decay(unsigned arena_ind) {
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-}
-
-static inline uint64_t
-get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[2] = (size_t)arena_ind;
-	uint64_t npurge = 0;
-	size_t sz = sizeof(npurge);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
-	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
-	return npurge;
-}
-
-static inline uint64_t
-get_arena_dirty_npurge(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind);
-}
-
-static inline uint64_t
-get_arena_dirty_purged(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_purged", arena_ind);
-}
-
-static inline uint64_t
-get_arena_muzzy_npurge(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
-}
-
-static inline uint64_t
-get_arena_npurge(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
-	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
-}
-
-static inline size_t
-get_arena_pdirty(unsigned arena_ind) {
-	do_epoch();
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[2] = (size_t)arena_ind;
-	size_t pdirty;
-	size_t sz = sizeof(pdirty);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-	return pdirty;
-}
-
-static inline size_t
-get_arena_pmuzzy(unsigned arena_ind) {
-	do_epoch();
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[2] = (size_t)arena_ind;
-	size_t pmuzzy;
-	size_t sz = sizeof(pmuzzy);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-	return pmuzzy;
-}
-
-static inline void *
-do_mallocx(size_t size, int flags) {
-	void *p = mallocx(size, flags);
-	expect_ptr_not_null(p, "Unexpected mallocx() failure");
-	return p;
-}
-
-static inline void
-generate_dirty(unsigned arena_ind, size_t size) {
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
-	void *p = do_mallocx(size, flags);
-	dallocx(p, flags);
-}
-
diff --git a/test/include/test/arena_util.h b/test/include/test/arena_util.h
new file mode 100644
index 0000000..524ee21
--- /dev/null
+++ b/test/include/test/arena_util.h
@@ -0,0 +1,149 @@
+static inline unsigned
+do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+	unsigned arena_ind;
+	size_t sz = sizeof(unsigned);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+
+	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
+	    "Unexpected mallctlbymib() failure");
+
+	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
+	    "Unexpected mallctlbymib() failure");
+
+	return arena_ind;
+}
+
+static inline void
+do_arena_destroy(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+static inline void
+do_epoch(void) {
+	uint64_t epoch = 1;
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+}
+
+static inline void
+do_purge(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+static inline void
+do_decay(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+static inline uint64_t
+get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	uint64_t npurge = 0;
+	size_t sz = sizeof(npurge);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
+	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
+	return npurge;
+}
+
+static inline uint64_t
+get_arena_dirty_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind);
+}
+
+static inline uint64_t
+get_arena_dirty_purged(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_purged", arena_ind);
+}
+
+static inline uint64_t
+get_arena_muzzy_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+}
+
+static inline uint64_t
+get_arena_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
+	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+}
+
+static inline size_t
+get_arena_pdirty(unsigned arena_ind) {
+	do_epoch();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	size_t pdirty;
+	size_t sz = sizeof(pdirty);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	return pdirty;
+}
+
+static inline size_t
+get_arena_pmuzzy(unsigned arena_ind) {
+	do_epoch();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	size_t pmuzzy;
+	size_t sz = sizeof(pmuzzy);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	return pmuzzy;
+}
+
+static inline void *
+do_mallocx(size_t size, int flags) {
+	void *p = mallocx(size, flags);
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	return p;
+}
+
+static inline void
+generate_dirty(unsigned arena_ind, size_t size) {
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	void *p = do_mallocx(size, flags);
+	dallocx(p, flags);
+}
+
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index bbfd23a..e991f4d 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -1,5 +1,5 @@
 #include "test/jemalloc_test.h"
-#include "test/arena_decay.h"
+#include "test/arena_util.h"
 
 #include "jemalloc/internal/ticker.h"
 
diff --git a/test/unit/san.c b/test/unit/san.c
index eb9ff51..0daa282 100644
--- a/test/unit/san.c
+++ b/test/unit/san.c
@@ -1,5 +1,5 @@
 #include "test/jemalloc_test.h"
-#include "test/arena_decay.h"
+#include "test/arena_util.h"
 #include "test/san.h"
 
 #include "jemalloc/internal/san.h"
diff --git a/test/unit/san_bump.c b/test/unit/san_bump.c
index fbee53e..cafa37f 100644
--- a/test/unit/san_bump.c
+++ b/test/unit/san_bump.c
@@ -1,5 +1,5 @@
 #include "test/jemalloc_test.h"
-#include "test/arena_decay.h"
+#include "test/arena_util.h"
 
 #include "jemalloc/internal/arena_structs.h"
 #include "jemalloc/internal/san_bump.h"
-- 
cgit v0.12


From f56f5b9930a46f919ae40b04acef8200fdd216e9 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Fri, 5 Nov 2021 14:19:39 -0700
Subject: Pass 'frequent_reuse' hint to PAI

Currently used only for guarding purposes, the hint is used to determine
if the allocation is supposed to be frequently reused. For example, it
might urge the allocator to ensure the allocation is cached.
---
 include/jemalloc/internal/pai.h |  9 ++++---
 src/hpa.c                       |  5 ++--
 src/pa.c                        |  4 +--
 src/pac.c                       |  6 +++--
 src/pai.c                       |  3 ++-
 src/sec.c                       | 11 ++++++---
 test/unit/hpa.c                 | 16 ++++++------
 test/unit/sec.c                 | 54 ++++++++++++++++++++++-------------------
 8 files changed, 60 insertions(+), 48 deletions(-)

diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index f8f7d66..d978cd7 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -7,7 +7,7 @@ typedef struct pai_s pai_t;
 struct pai_s {
 	/* Returns NULL on failure. */
 	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t alignment, bool zero, bool guarded,
+	    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
 	    bool *deferred_work_generated);
 	/*
 	 * Returns the number of extents added to the list (which may be fewer
@@ -37,10 +37,11 @@ struct pai_s {
  */
 
 static inline edata_t *
-pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool guarded, bool *deferred_work_generated) {
+pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
+    bool zero, bool guarded, bool frequent_reuse,
+    bool *deferred_work_generated) {
 	return self->alloc(tsdn, self, size, alignment, zero, guarded,
-	    deferred_work_generated);
+	    frequent_reuse, deferred_work_generated);
 }
 
 static inline size_t
diff --git a/src/hpa.c b/src/hpa.c
index caf122b..0a7ec19 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -9,7 +9,8 @@
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
 
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
+    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
+    bool *deferred_work_generated);
 static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
 static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -760,7 +761,7 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 
 static edata_t *
 hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool guarded, bool *deferred_work_generated) {
+    bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
 	assert(!guarded);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
diff --git a/src/pa.c b/src/pa.c
index 9004cc9..0f95e93 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -128,7 +128,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	edata_t *edata = NULL;
 	if (!guarded && pa_shard_uses_hpa(shard)) {
 		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
-		    zero, /* guarded */ false, deferred_work_generated);
+		    zero, /* guarded */ false, slab, deferred_work_generated);
 	}
 	/*
 	 * Fall back to the PAC if the HPA is off or couldn't serve the given
@@ -136,7 +136,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	 */
 	if (edata == NULL) {
 		edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero,
-		    guarded, deferred_work_generated);
+		    guarded, slab, deferred_work_generated);
 	}
 	if (edata != NULL) {
 		assert(edata_size_get(edata) == size);
diff --git a/src/pac.c b/src/pac.c
index 914cec9..e1f6002 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -5,7 +5,8 @@
 #include "jemalloc/internal/san.h"
 
 static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
+    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
+    bool *deferred_work_generated);
 static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -152,7 +153,8 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 
 static edata_t *
 pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
-    bool zero, bool guarded, bool *deferred_work_generated) {
+    bool zero, bool guarded, bool frequent_reuse,
+    bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
diff --git a/src/pai.c b/src/pai.c
index 86b8ee5..45c8772 100644
--- a/src/pai.c
+++ b/src/pai.c
@@ -7,7 +7,8 @@ pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	for (size_t i = 0; i < nallocs; i++) {
 		bool deferred_by_alloc = false;
 		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
-		    /* zero */ false, /* guarded */ false, &deferred_by_alloc);
+		    /* zero */ false, /* guarded */ false,
+		    /* frequent_reuse */ false, &deferred_by_alloc);
 		*deferred_work_generated |= deferred_by_alloc;
 		if (edata == NULL) {
 			return i;
diff --git a/src/sec.c b/src/sec.c
index d99c443..0c4e703 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -4,7 +4,8 @@
 #include "jemalloc/internal/sec.h"
 
 static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
+    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
+    bool *deferred_work_generated);
 static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -218,7 +219,7 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 
 static edata_t *
 sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool guarded, bool *deferred_work_generated) {
+    bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
 	assert(!guarded);
 
@@ -227,7 +228,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	if (zero || alignment > PAGE || sec->opts.nshards == 0
 	    || size > sec->opts.max_alloc) {
 		return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
-		    /* guarded */ false, deferred_work_generated);
+		    /* guarded */ false, frequent_reuse,
+		    deferred_work_generated);
 	}
 	pszind_t pszind = sz_psz2ind(size);
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
@@ -250,7 +252,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 			    size);
 		} else {
 			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
-			    zero, /* guarded */ false, deferred_work_generated);
+			    zero, /* guarded */ false, frequent_reuse,
+			    deferred_work_generated);
 		}
 	}
 	return edata;
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index a63d51d..25ee195 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -81,10 +81,10 @@ TEST_BEGIN(test_alloc_max) {
 	/* Small max */
 	bool deferred_work_generated = false;
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
-	    &deferred_work_generated);
+	    false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false,
-	    false, &deferred_work_generated);
+	    false, false, &deferred_work_generated);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
 
 	destroy_test_data(shard);
@@ -188,7 +188,7 @@ TEST_BEGIN(test_stress) {
 			size_t npages = npages_min + prng_range_zu(&prng_state,
 			    npages_max - npages_min);
 			edata_t *edata = pai_alloc(tsdn, &shard->pai,
-			    npages * PAGE, PAGE, false, false,
+			    npages * PAGE, PAGE, false, false, false,
 			    &deferred_work_generated);
 			assert_ptr_not_null(edata,
 			    "Unexpected allocation failure");
@@ -264,7 +264,7 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	for (size_t i = 0; i < NALLOCS / 2; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
 		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* frequent_reuse */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	edata_list_active_t allocs_list;
@@ -300,8 +300,8 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	/* Reallocate (individually), and ensure reuse and contiguity. */
 	for (size_t i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
 	}
 	void *new_base = edata_base_get(allocs[0]);
@@ -376,7 +376,7 @@ TEST_BEGIN(test_defer_time) {
 	edata_t *edatas[HUGEPAGE_PAGES];
 	for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    false, &deferred_work_generated);
+		    false, false, &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -410,7 +410,7 @@ TEST_BEGIN(test_defer_time) {
 	 */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    false, &deferred_work_generated);
+		    false, false, &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	/*
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 8ac3411..e98bdc9 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -50,7 +50,7 @@ test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
 
 static inline edata_t *
 pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded,
+    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated) {
 	assert(!guarded);
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
@@ -178,12 +178,12 @@ TEST_BEGIN(test_reuse) {
 	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
 		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 		two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 	}
 	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
@@ -214,11 +214,11 @@ TEST_BEGIN(test_reuse) {
 	 */
 	for (int i = 0; i < NALLOCS; i++) {
 		edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_ptr_eq(one_page[i], alloc1,
 		    "Got unexpected allocation");
 		expect_ptr_eq(two_page[i], alloc2,
@@ -255,12 +255,13 @@ TEST_BEGIN(test_auto_flush) {
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, &deferred_work_generated);
+	    /* guarded */ false, /* frequent_reuse */ false,
+	    &deferred_work_generated);
 	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
 	expect_zu_le(NALLOCS + 1, max_allocs,
@@ -311,8 +312,8 @@ do_disable_flush_test(bool is_disable) {
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	/* Free all but the last aloc. */
@@ -386,7 +387,7 @@ TEST_BEGIN(test_max_alloc_respected) {
 		    "Incorrect number of deallocations");
 		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
 		    PAGE, /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* frequent_reuse */ false, &deferred_work_generated);
 		expect_ptr_not_null(edata, "Unexpected alloc failure");
 		expect_zu_eq(i + 1, ta.alloc_count,
 		    "Incorrect number of allocations");
@@ -413,7 +414,7 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
 	    /* max_bytes */ 1000 * PAGE);
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, /* guarded */ false,
+	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
 	    &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected alloc failure");
 
@@ -454,7 +455,7 @@ TEST_BEGIN(test_nshards_0) {
 
 	bool deferred_work_generated = false;
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, /* guarded */ false,
+	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
 	    &deferred_work_generated);
 	pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
 
@@ -497,8 +498,8 @@ TEST_BEGIN(test_stats_simple) {
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}
 
@@ -512,6 +513,7 @@ TEST_BEGIN(test_stats_simple) {
 		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
 			allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 			    /* zero */ false, /* guarded */ false,
+			    /* frequent_reuse */ false,
 			    &deferred_work_generated);
 			expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1);
 		}
@@ -541,14 +543,16 @@ TEST_BEGIN(test_stats_auto_flush) {
 	bool deferred_work_generated = false;
 
 	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, &deferred_work_generated);
+	    /* guarded */ false, /* frequent_reuse */ false,
+	    &deferred_work_generated);
 	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, &deferred_work_generated);
+	    /* guarded */ false, /* frequent_reuse */ false,
+	    &deferred_work_generated);
 
 	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 	}
 
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
@@ -588,8 +592,8 @@ TEST_BEGIN(test_stats_manual_flush) {
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}
 
-- 
cgit v0.12


From 800ce49c19bc105199cf645172f1e462d70d77c4 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 28 Oct 2021 12:08:10 -0700
Subject: San: Bump alloc frequently reused guarded allocations

To utilize a separate retained area for guarded extents, use bump alloc
to allocate those extents.
---
 include/jemalloc/internal/emap.h     |  1 +
 include/jemalloc/internal/extent.h   |  1 +
 include/jemalloc/internal/pac.h      |  3 +++
 include/jemalloc/internal/san_bump.h | 31 ++++++++++++++++++++---
 src/arena.c                          |  7 +++---
 src/emap.c                           |  1 +
 src/extent.c                         | 32 ++++++++++++++++--------
 src/pac.c                            | 48 +++++++++++++++++++++++++-----------
 src/san_bump.c                       | 16 +-----------
 test/unit/san.c                      | 22 +++++++++--------
 10 files changed, 106 insertions(+), 56 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 87ece63..847af32 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -208,6 +208,7 @@ extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) {
 	assert(edata_committed_get(inner) == edata_committed_get(outer));
 	assert(edata_state_get(inner) == extent_state_active);
 	assert(edata_state_get(outer) == extent_state_merging);
+	assert(!edata_guarded_get(inner) && !edata_guarded_get(outer));
 	assert(edata_base_get(inner) == edata_past_get(outer) ||
 	    edata_base_get(outer) == edata_past_get(inner));
 }
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 73059ad..1660f45 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -127,6 +127,7 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 			return false;
 		}
 	}
+	assert(!edata_guarded_get(edata) && !edata_guarded_get(neighbor));
 
 	return true;
 }
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 7eaaf89..01c4e6a 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -99,6 +99,9 @@ struct pac_s {
 	exp_grow_t exp_grow;
 	malloc_mutex_t grow_mtx;
 
+	/* Special allocator for guarded frequently reused extents. */
+	san_bump_alloc_t sba;
+
 	/* How large extents should be before getting auto-purged. */
 	atomic_zu_t oversize_threshold;
 
diff --git a/include/jemalloc/internal/san_bump.h b/include/jemalloc/internal/san_bump.h
index 9c6c224..8ec4a71 100644
--- a/include/jemalloc/internal/san_bump.h
+++ b/include/jemalloc/internal/san_bump.h
@@ -5,7 +5,9 @@
 #include "jemalloc/internal/exp_grow.h"
 #include "jemalloc/internal/mutex.h"
 
-extern const size_t SBA_RETAINED_ALLOC_SIZE;
+#define SBA_RETAINED_ALLOC_SIZE ((size_t)4 << 20)
+
+extern bool opt_retain;
 
 typedef struct ehooks_s ehooks_t;
 typedef struct pac_s pac_t;
@@ -17,8 +19,31 @@ struct san_bump_alloc_s {
 	edata_t *curr_reg;
 };
 
-bool
-san_bump_alloc_init(san_bump_alloc_t* sba);
+static inline bool
+san_bump_enabled() {
+	/*
+	 * We enable san_bump allocator only when it's possible to break up a
+	 * mapping and unmap a part of it (maps_coalesce). This is needed to
+	 * ensure the arena destruction process can destroy all retained guarded
+	 * extents one by one and to unmap a trailing part of a retained guarded
+	 * region when it's too small to fit a pending allocation.
+	 * opt_retain is required, because this allocator retains a large
+	 * virtual memory mapping and returns smaller parts of it.
+	 */
+	return maps_coalesce && opt_retain;
+}
+
+static inline bool
+san_bump_alloc_init(san_bump_alloc_t* sba) {
+	bool err = malloc_mutex_init(&sba->mtx, "sanitizer_bump_allocator",
+	    WITNESS_RANK_SAN_BUMP_ALLOC, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	sba->curr_reg = NULL;
+
+	return false;
+}
 
 edata_t *
 san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac, ehooks_t *ehooks,
diff --git a/src/arena.c b/src/arena.c
index 19e4e85..121832a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -328,8 +328,8 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t esize = usize + sz_large_pad;
 
-	bool guarded = san_large_extent_decide_guard(tsdn, arena_get_ehooks(arena),
-	    esize, alignment);
+	bool guarded = san_large_extent_decide_guard(tsdn,
+	    arena_get_ehooks(arena), esize, alignment);
 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
 	    /* slab */ false, szind, zero, guarded, &deferred_work_generated);
 	assert(deferred_work_generated == false);
@@ -829,7 +829,8 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	bool guarded = san_slab_extent_decide_guard(tsdn, arena_get_ehooks(arena));
+	bool guarded = san_slab_extent_decide_guard(tsdn,
+	    arena_get_ehooks(arena));
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
 	    /* alignment */ PAGE, /* slab */ true, /* szind */ binind,
 	     /* zero */ false, guarded, &deferred_work_generated);
diff --git a/src/emap.c b/src/emap.c
index e37fea3..9cc95a7 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -44,6 +44,7 @@ emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     bool expanding) {
 	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE);
+	assert(!edata_guarded_get(edata));
 	assert(!expanding || forward);
 	assert(!edata_state_in_transition(expected_state));
 	assert(expected_state == extent_state_dirty ||
diff --git a/src/extent.c b/src/extent.c
index 13d688d..6fabcc7 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -87,6 +87,7 @@ ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, expand_edata,
 	    size, alignment, zero, &commit, false, guarded);
 	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
+	assert(edata == NULL || edata_guarded_get(edata) == guarded);
 	return edata;
 }
 
@@ -179,7 +180,7 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			goto label_return;
 		}
 		eset_remove(eset, edata);
-		if (!ecache->delay_coalesce) {
+		if (!ecache->delay_coalesce || edata_guarded_get(edata)) {
 			break;
 		}
 		/* Try to coalesce. */
@@ -400,11 +401,6 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		}
 	} else {
 		/*
-		 * If split and merge are not allowed (Windows w/o retain), try
-		 * exact fit only.
-		 */
-		bool exact_only = (!maps_coalesce && !opt_retain) || guarded;
-		/*
 		 * A large extent might be broken up from its original size to
 		 * some small size to satisfy a small request.  When that small
 		 * request is freed, though, it won't merge back with the larger
@@ -415,7 +411,18 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 */
 		unsigned lg_max_fit = ecache->delay_coalesce
 		    ? (unsigned)opt_lg_extent_max_active_fit : SC_PTR_BITS;
-		edata = eset_fit(eset, size, alignment, exact_only, lg_max_fit);
+
+		/*
+		 * If split and merge are not allowed (Windows w/o retain), try
+		 * exact fit only.
+		 *
+		 * For simplicity purposes, splitting guarded extents is not
+		 * supported.  Hence, we do only exact fit for guarded
+		 * allocations.
+		 */
+		bool exact_only = (!maps_coalesce && !opt_retain) || guarded;
+		edata = eset_fit(eset, size, alignment, exact_only,
+		    lg_max_fit);
 	}
 	if (edata == NULL) {
 		return NULL;
@@ -474,6 +481,7 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	/* Split the lead. */
 	if (leadsize != 0) {
+		assert(!edata_guarded_get(*edata));
 		*lead = *edata;
 		*edata = extent_split_impl(tsdn, pac, ehooks, *lead, leadsize,
 		    size + trailsize, /* holding_core_locks*/ true);
@@ -486,6 +494,7 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
+		assert(!edata_guarded_get(*edata));
 		*trail = extent_split_impl(tsdn, pac, ehooks, *edata, size,
 		    trailsize, /* holding_core_locks */ true);
 		if (*trail == NULL) {
@@ -510,6 +519,7 @@ static edata_t *
 extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     edata_t *edata, bool growing_retained) {
+	assert(!edata_guarded_get(edata) || size == edata_size_get(edata));
 	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 
 	edata_t *lead;
@@ -576,8 +586,10 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(!guarded || expand_edata == NULL);
+	assert(!guarded || alignment <= PAGE);
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
+
 	edata_t *edata = extent_recycle_extract(tsdn, pac, ehooks, ecache,
 	    expand_edata, size, alignment, guarded);
 	if (edata == NULL) {
@@ -746,7 +758,6 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		size_t size = edata_size_get(edata);
 		ehooks_zero(tsdn, ehooks, addr, size);
 	}
-
 	return edata;
 label_err:
 	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
@@ -801,6 +812,7 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced) {
+	assert(!edata_guarded_get(edata));
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -907,7 +919,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		goto label_skip_coalesce;
 	}
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, pac,  ehooks, ecache, edata,
+		edata = extent_try_coalesce(tsdn, pac, ehooks, ecache, edata,
 		    NULL);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &pac->ecache_dirty);
@@ -1014,7 +1026,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	/* Avoid calling the default extent_dalloc unless have to. */
 	if (!ehooks_dalloc_will_fail(ehooks)) {
-		/* Restore guard pages for dalloc / unmap. */
+		/* Remove guard pages for dalloc / unmap. */
 		if (edata_guarded_get(edata)) {
 			assert(ehooks_are_default(ehooks));
 			san_unguard_pages_two_sided(tsdn, ehooks, edata,
diff --git a/src/pac.c b/src/pac.c
index e1f6002..c6d9f14 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -81,6 +81,9 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	if (decay_init(&pac->decay_muzzy, cur_time, muzzy_decay_ms)) {
 		return true;
 	}
+	if (san_bump_alloc_init(&pac->sba)) {
+		return true;
+	}
 
 	pac->base = base;
 	pac->emap = emap;
@@ -132,18 +135,24 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 
 static edata_t *
 pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
-    size_t alignment, bool zero) {
+    size_t alignment, bool zero, bool frequent_reuse) {
 	assert(alignment <= PAGE);
 
-	size_t size_with_guards = size + SAN_PAGE_GUARDS_SIZE;
-	/* Alloc a non-guarded extent first.*/
-	edata_t *edata = pac_alloc_real(tsdn, pac, ehooks, size_with_guards,
-	    /* alignment */ PAGE, zero, /* guarded */ false);
-	if (edata != NULL) {
-		/* Add guards around it. */
-		assert(edata_size_get(edata) == size_with_guards);
-		san_guard_pages(tsdn, ehooks, edata, pac->emap, true, true,
-		    true);
+	edata_t *edata;
+	if (san_bump_enabled() && frequent_reuse) {
+		edata = san_bump_alloc(tsdn, &pac->sba, pac, ehooks, size,
+		    zero);
+	} else {
+		size_t size_with_guards = san_two_side_guarded_sz(size);
+		/* Alloc a non-guarded extent first.*/
+		edata = pac_alloc_real(tsdn, pac, ehooks, size_with_guards,
+		    /* alignment */ PAGE, zero, /* guarded */ false);
+		if (edata != NULL) {
+			/* Add guards around it. */
+			assert(edata_size_get(edata) == size_with_guards);
+			san_guard_pages_two_sided(tsdn, ehooks, edata,
+			    pac->emap, true);
+		}
 	}
 	assert(edata == NULL || (edata_guarded_get(edata) &&
 	    edata_size_get(edata) == size));
@@ -158,12 +167,21 @@ pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
-	edata_t *edata = pac_alloc_real(tsdn, pac, ehooks, size, alignment,
-	    zero, guarded);
+	edata_t *edata = NULL;
+	/*
+	 * The condition is an optimization - not frequently reused guarded
+	 * allocations are never put in the ecache.  pac_alloc_real also
+	 * doesn't grow retained for guarded allocations.  So pac_alloc_real
+	 * for such allocations would always return NULL.
+	 * */
+	if (!guarded || frequent_reuse) {
+		edata =	pac_alloc_real(tsdn, pac, ehooks, size, alignment,
+		    zero, guarded);
+	}
 	if (edata == NULL && guarded) {
 		/* No cached guarded extents; creating a new one. */
 		edata = pac_alloc_new_guarded(tsdn, pac, ehooks, size,
-		    alignment, zero);
+		    alignment, zero, frequent_reuse);
 	}
 
 	return edata;
@@ -189,8 +207,8 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, pac, ehooks,
-		    &pac->ecache_retained, edata, expand_amount, PAGE,
-		    zero, /* guarded */ false);
+		    &pac->ecache_retained, edata, expand_amount, PAGE, zero,
+		    /* guarded */ false);
 		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
diff --git a/src/san_bump.c b/src/san_bump.c
index 6098bd9..1a94e55 100644
--- a/src/san_bump.c
+++ b/src/san_bump.c
@@ -7,28 +7,14 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/edata_cache.h"
 
-const size_t SBA_RETAINED_ALLOC_SIZE = 1024 * 1024 * 4; /* 4 MB */
-
 static bool
 san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
     ehooks_t *ehooks, size_t size);
 
-bool
-san_bump_alloc_init(san_bump_alloc_t* sba) {
-	bool err = malloc_mutex_init(&sba->mtx, "sanitizer_bump_allocator",
-	    WITNESS_RANK_SAN_BUMP_ALLOC, malloc_mutex_rank_exclusive);
-	if (err) {
-		return true;
-	}
-	sba->curr_reg = NULL;
-
-	return false;
-}
-
 edata_t *
 san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
     ehooks_t *ehooks, size_t size, bool zero) {
-	assert(maps_coalesce && opt_retain);
+	assert(san_bump_enabled());
 
 	edata_t* to_destroy;
 	size_t guarded_size = san_one_side_guarded_sz(size);
diff --git a/test/unit/san.c b/test/unit/san.c
index 0daa282..5b98f52 100644
--- a/test/unit/san.c
+++ b/test/unit/san.c
@@ -13,6 +13,11 @@ verify_extent_guarded(tsdn_t *tsdn, void *ptr) {
 #define MAX_SMALL_ALLOCATIONS 4096
 void *small_alloc[MAX_SMALL_ALLOCATIONS];
 
+/*
+ * This test allocates page sized slabs and checks that every two slabs have
+ * at least one page in between them. That page is supposed to be the guard
+ * page.
+ */
 TEST_BEGIN(test_guarded_small) {
 	test_skip_if(opt_prof);
 
@@ -21,7 +26,8 @@ TEST_BEGIN(test_guarded_small) {
 	VARIABLE_ARRAY(uintptr_t, pages, npages);
 
 	/* Allocate to get sanitized pointers. */
-	size_t sz = PAGE / 8;
+	size_t slab_sz = PAGE;
+	size_t sz = slab_sz / 8;
 	unsigned n_alloc = 0;
 	while (n_alloc < MAX_SMALL_ALLOCATIONS) {
 		void *ptr = malloc(sz);
@@ -50,8 +56,9 @@ TEST_BEGIN(test_guarded_small) {
 		for (unsigned j = i + 1; j < npages; j++) {
 			uintptr_t ptr_diff = pages[i] > pages[j] ?
 			    pages[i] - pages[j] : pages[j] - pages[i];
-			expect_zu_gt((size_t)ptr_diff, 2 * PAGE,
-			    "Pages should not be next to each other.");
+			expect_zu_ge((size_t)ptr_diff, slab_sz + PAGE,
+			    "There should be at least one pages between "
+			    "guarded slabs");
 		}
 	}
 
@@ -76,20 +83,15 @@ TEST_BEGIN(test_guarded_large) {
 	}
 
 	/* Verify the pages are not continuous, i.e. separated by guards. */
-	uintptr_t min_diff = (uintptr_t)-1;
 	for (unsigned i = 0; i < nlarge; i++) {
 		for (unsigned j = i + 1; j < nlarge; j++) {
 			uintptr_t ptr_diff = large[i] > large[j] ?
 			    large[i] - large[j] : large[j] - large[i];
 			expect_zu_ge((size_t)ptr_diff, large_sz + 2 * PAGE,
-			    "Pages should not be next to each other.");
-			if (ptr_diff < min_diff) {
-				min_diff = ptr_diff;
-			}
+			    "There should be at least two pages between "
+			    " guarded large allocations");
 		}
 	}
-	expect_zu_ge((size_t)min_diff, large_sz + 2 * PAGE,
-	    "Pages should not be next to each other.");
 
 	for (unsigned i = 0; i < nlarge; i++) {
 		free((void *)large[i]);
-- 
cgit v0.12


From d90655390f5192d53723023667b57453ba23e676 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 22 Nov 2021 16:57:56 -0800
Subject: San: Create a function for committing and zeroing

Committing and zeroing an extent is usually done together, hence a new
function.
---
 include/jemalloc/internal/extent.h |  4 ++-
 src/extent.c                       | 52 ++++++++++++++++++++++++--------------
 src/san_bump.c                     | 19 ++++----------
 3 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 1660f45..7336e8b 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -43,7 +43,7 @@ void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length, bool growing_retained);
+    size_t offset, size_t length);
 bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
@@ -55,6 +55,8 @@ edata_t *extent_split_wrapper(tsdn_t *tsdn, pac_t *pac,
     bool holding_core_locks);
 bool extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b);
+bool extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    bool commit, bool zero, bool growing_retained);
 size_t extent_sn_next(pac_t *pac);
 bool extent_boot(void);
 
diff --git a/src/extent.c b/src/extent.c
index 6fabcc7..4bbbff3 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -604,27 +604,21 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		return NULL;
 	}
 
-	if (*commit && !edata_committed_get(edata)) {
-		if (extent_commit_impl(tsdn, ehooks, edata, 0,
-		    edata_size_get(edata), growing_retained)) {
-			extent_record(tsdn, pac, ehooks, ecache, edata);
-			return NULL;
-		}
+	assert(edata_state_get(edata) == extent_state_active);
+	if (extent_commit_zero(tsdn, ehooks, edata, *commit, zero,
+	    growing_retained)) {
+		extent_record(tsdn, pac, ehooks, ecache, edata);
+		return NULL;
 	}
-
 	if (edata_committed_get(edata)) {
+		/*
+		 * This reverses the purpose of this variable - previously it
+		 * was treated as an input parameter, now it turns into an
+		 * output parameter, reporting if the edata has actually been
+		 * committed.
+		 */
 		*commit = true;
 	}
-
-	assert(edata_state_get(edata) == extent_state_active);
-
-	if (zero) {
-		void *addr = edata_base_get(edata);
-		if (!edata_zeroed_get(edata)) {
-			size_t size = edata_size_get(edata);
-			ehooks_zero(tsdn, ehooks, addr, size);
-		}
-	}
 	return edata;
 }
 
@@ -1106,9 +1100,9 @@ extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 
 bool
 extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length, bool growing_retained) {
+    size_t offset, size_t length) {
 	return extent_commit_impl(tsdn, ehooks, edata, offset, length,
-	    growing_retained);
+	    /* growing_retained */ false);
 }
 
 bool
@@ -1288,6 +1282,26 @@ extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 bool
+extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    bool commit, bool zero, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+	if (commit && !edata_committed_get(edata)) {
+		if (extent_commit_impl(tsdn, ehooks, edata, 0,
+		    edata_size_get(edata), growing_retained)) {
+			return true;
+		}
+	}
+	if (zero && !edata_zeroed_get(edata)) {
+		void *addr = edata_base_get(edata);
+		size_t size = edata_size_get(edata);
+		ehooks_zero(tsdn, ehooks, addr, size);
+	}
+	return false;
+}
+
+bool
 extent_boot(void) {
 	assert(sizeof(slab_data_t) >= sizeof(e_prof_info_t));
 
diff --git a/src/san_bump.c b/src/san_bump.c
index 1a94e55..8889745 100644
--- a/src/san_bump.c
+++ b/src/san_bump.c
@@ -68,20 +68,11 @@ san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
 	san_guard_pages(tsdn, ehooks, edata, pac->emap, /* left */ false,
 	    /* right */ true, /* remap */ true);
 
-	if (!edata_committed_get(edata)) {
-		if (extent_commit_wrapper(tsdn, ehooks, edata, 0,
-		    edata_size_get(edata), true)) {
-			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-			    edata);
-			return NULL;
-		}
-		edata_committed_set(edata, true);
-	}
-	if (zero && !edata_zeroed_get(edata)) {
-		void *addr = edata_base_get(edata);
-		size_t size = edata_size_get(edata);
-		ehooks_zero(tsdn, ehooks, addr, size);
-		edata_zeroed_set(edata, true);
+	if (extent_commit_zero(tsdn, ehooks, edata, /* commit */ true, zero,
+	    /* growing_retained */ false)) {
+		extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
+		    edata);
+		return NULL;
 	}
 
 	if (config_prof) {
-- 
cgit v0.12


From 9015e129bd7de389afa4196495451669700904d0 Mon Sep 17 00:00:00 2001
From: Alex Lapenkov <lapenkov@fb.com>
Date: Mon, 13 Dec 2021 15:07:23 -0800
Subject: Update visual studio projects

Add relevant source files to the projects.
---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |  5 ++--
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       | 29 +++++++++++++++++++---
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     |  5 ++--
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       | 29 +++++++++++++++++++---
 4 files changed, 56 insertions(+), 12 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 75d6680..ec028a1 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -59,7 +59,6 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\fxp.c" />
-    <ClCompile Include="..\..\..\..\src\guard.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
@@ -86,6 +85,8 @@
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
+    <ClCompile Include="..\..\..\..\src\san.c" />
+    <ClCompile Include="..\..\..\..\src\san_bump.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\sec.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
@@ -376,4 +377,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index c5bb4cf..1b43e9f 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -61,9 +61,6 @@
     <ClCompile Include="..\..\..\..\src\fxp.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\guard.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -172,5 +169,29 @@
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin_info.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ecache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata_cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ehooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\eset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san_bump.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index d25768e..a8004db 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -59,7 +59,6 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\fxp.c" />
-    <ClCompile Include="..\..\..\..\src\guard.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
@@ -86,6 +85,8 @@
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
+    <ClCompile Include="..\..\..\..\src\san.c" />
+    <ClCompile Include="..\..\..\..\src\san_bump.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\sec.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
@@ -375,4 +376,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index c5bb4cf..1b43e9f 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -61,9 +61,6 @@
     <ClCompile Include="..\..\..\..\src\fxp.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\guard.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -172,5 +169,29 @@
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin_info.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ecache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata_cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ehooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\eset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san_bump.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file
-- 
cgit v0.12


From bb5052ce90c6ad4b07c665d9ac96952de2f2b443 Mon Sep 17 00:00:00 2001
From: mweisgut <marcel.weisgut@hpi.de>
Date: Fri, 17 Dec 2021 04:33:30 -0700
Subject: Fix base_ehooks_get_for_metadata

---
 include/jemalloc/internal/base.h |  5 +++--
 src/base.c                       | 15 ++++++++-------
 test/unit/base.c                 | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 67e1940..9b2c9fb 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -47,9 +47,9 @@ struct base_s {
 	ehooks_t ehooks;
 
 	/*
-	 * Use user hooks for metadata when true.
+	 * User-configurable extent hook functions for metadata allocations.
 	 */
-	bool metadata_use_hooks;
+	ehooks_t ehooks_base;
 
 	/* Protects base_alloc() and base_stats_get() operations. */
 	malloc_mutex_t mtx;
@@ -95,6 +95,7 @@ base_t *base_new(tsdn_t *tsdn, unsigned ind,
     const extent_hooks_t *extent_hooks, bool metadata_use_hooks);
 void base_delete(tsdn_t *tsdn, base_t *base);
 ehooks_t *base_ehooks_get(base_t *base);
+ehooks_t *base_ehooks_get_for_metadata(base_t *base);
 extent_hooks_t *base_extent_hooks_set(base_t *base,
     extent_hooks_t *extent_hooks);
 void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
diff --git a/src/base.c b/src/base.c
index 38f6fa4..7f4d675 100644
--- a/src/base.c
+++ b/src/base.c
@@ -295,12 +295,6 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	return block;
 }
 
-static ehooks_t *
-base_ehooks_get_for_metadata(base_t *base) {
-	return base->metadata_use_hooks ? &base->ehooks :
-	    (ehooks_t *)&ehooks_default_extent_hooks;
-}
-
 /*
  * Allocate an extent that is at least as large as specified size, with
  * specified alignment.
@@ -375,6 +369,9 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
 	    &gap_size, base_size, base_alignment);
 	ehooks_init(&base->ehooks, (extent_hooks_t *)extent_hooks, ind);
+	ehooks_init(&base->ehooks_base, metadata_use_hooks ?
+	    (extent_hooks_t *)extent_hooks :
+	    (extent_hooks_t *)&ehooks_default_extent_hooks, ind);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
 	    malloc_mutex_rank_exclusive)) {
 		base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
@@ -384,7 +381,6 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 	base->extent_sn_next = extent_sn_next;
 	base->blocks = block;
 	base->auto_thp_switched = false;
-	base->metadata_use_hooks = metadata_use_hooks;
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
 		edata_heap_new(&base->avail[i]);
 	}
@@ -422,6 +418,11 @@ base_ehooks_get(base_t *base) {
 	return &base->ehooks;
 }
 
+ehooks_t *
+base_ehooks_get_for_metadata(base_t *base) {
+	return &base->ehooks_base;
+}
+
 extent_hooks_t *
 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
 	extent_hooks_t *old_extent_hooks =
diff --git a/test/unit/base.c b/test/unit/base.c
index 07a43df..15e04a8 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -227,10 +227,39 @@ TEST_BEGIN(test_base_hooks_not_null) {
 }
 TEST_END
 
+TEST_BEGIN(test_base_ehooks_get_for_metadata_default_hook) {
+	extent_hooks_prep();
+	memcpy(&hooks, &hooks_not_null, sizeof(extent_hooks_t));
+	base_t *base;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ false);
+	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
+	expect_true(ehooks_are_default(ehooks),
+		"Expected default extent hook functions pointer");
+	base_delete(tsdn, base);
+}
+TEST_END
+
+
+TEST_BEGIN(test_base_ehooks_get_for_metadata_custom_hook) {
+	extent_hooks_prep();
+	memcpy(&hooks, &hooks_not_null, sizeof(extent_hooks_t));
+	base_t *base;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ true);
+	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
+	expect_ptr_eq(&hooks, ehooks_get_extent_hooks_ptr(ehooks),
+		"Expected user-specified extend hook functions pointer");
+	base_delete(tsdn, base);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_base_hooks_default,
 	    test_base_hooks_null,
-	    test_base_hooks_not_null);
+	    test_base_hooks_not_null,
+            test_base_ehooks_get_for_metadata_default_hook,
+            test_base_ehooks_get_for_metadata_custom_hook);
 }
-- 
cgit v0.12


From cafe9a315879b357ac3c6d00f3b7f9ad52c33087 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 17 Dec 2021 21:00:21 +0300
Subject: Disable percpu arena in case of non deterministic CPU count

Determinitic number of CPUs is important for percpu arena to work
correctly, since it uses cpu index - sched_getcpu(), and if it will
greater then number of CPUs bad thing will happen, or assertion will be
failed in debug build:

    <jemalloc>: ../contrib/jemalloc/src/jemalloc.c:321: Failed assertion: "ind <= narenas_total_get()"
    Aborted (core dumped)

Number of CPUs can be obtained from the following places:
- sched_getaffinity()
- sysconf(_SC_NPROCESSORS_ONLN)
- sysconf(_SC_NPROCESSORS_CONF)

For the sched_getaffinity() you may simply use taskset(1) to run program
on a different cpu, and in case it will be not first, percpu will work
incorrectly, i.e.:

    $ taskset --cpu-list $(( $(getconf _NPROCESSORS_ONLN)-1 )) <your_program>

_SC_NPROCESSORS_ONLN uses /sys/devices/system/cpu/online, LXD/LXC
virtualize /sys/devices/system/cpu/online file [1], and so when you run
container with limited limits.cpus it will bind randomly selected CPU to
it

  [1]: https://github.com/lxc/lxcfs/issues/301

_SC_NPROCESSORS_CONF uses /sys/devices/system/cpu/cpu*, and AFAIK nobody
playing with dentries there.

So if all three of these are equal, percpu arenas should work correctly.

And a small note regardless _SC_NPROCESSORS_ONLN/_SC_NPROCESSORS_CONF,
musl uses sched_getaffinity() for both. So this will also increase the
entropy.

Also note, that you can check is percpu arena really applied using
abort_conf:true.

Refs: https://github.com/jemalloc/jemalloc/pull/1939
Refs: https://github.com/ClickHouse/ClickHouse/issues/32806

v2: move malloc_cpu_count_is_deterministic() into
    malloc_init_hard_recursible() since _SC_NPROCESSORS_CONF does
    allocations for readdir()
v3:
- mark cpu_count_is_deterministic static
- check only if percpu arena is enabled
- check narenas
---
 src/jemalloc.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index e707f9f..38f7036 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -148,6 +148,8 @@ unsigned	opt_narenas = 0;
 fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
 
 unsigned	ncpus;
+/* ncpus is determinstinc, see malloc_cpu_count_is_deterministic() */
+static int	cpu_count_is_deterministic = -1;
 
 /* Protects arenas initialization. */
 malloc_mutex_t arenas_lock;
@@ -741,6 +743,42 @@ malloc_ncpus(void) {
 	return ((result == -1) ? 1 : (unsigned)result);
 }
 
+/*
+ * Ensure that number of CPUs is determistinc, i.e. it is the same based on:
+ * - sched_getaffinity()
+ * - _SC_NPROCESSORS_ONLN
+ * - _SC_NPROCESSORS_CONF
+ * Since otherwise tricky things is possible with percpu arenas in use.
+ */
+static bool
+malloc_cpu_count_is_deterministic()
+{
+#ifdef _WIN32
+	return true;
+#else
+	long cpu_onln = sysconf(_SC_NPROCESSORS_ONLN);
+	long cpu_conf = sysconf(_SC_NPROCESSORS_CONF);
+	if (cpu_onln != cpu_conf)
+		return false;
+#  if defined(CPU_COUNT)
+#    if defined(__FreeBSD__)
+	cpuset_t set;
+#    else
+	cpu_set_t set;
+#    endif /* __FreeBSD__ */
+#    if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+	sched_getaffinity(0, sizeof(set), &set);
+#    else /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */
+	pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
+#    endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */
+	long cpu_affinity = CPU_COUNT(&set);
+	if (cpu_affinity != cpu_conf)
+		return false;
+#  endif /* CPU_COUNT */
+	return true;
+#endif
+}
+
 static void
 init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
 	size_t opts_len = strlen(dest);
@@ -1833,6 +1871,7 @@ malloc_init_hard_recursible(void) {
 	malloc_init_state = malloc_init_recursible;
 
 	ncpus = malloc_ncpus();
+	cpu_count_is_deterministic = malloc_cpu_count_is_deterministic();
 
 #if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
     && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \
@@ -1892,7 +1931,22 @@ malloc_init_narenas(void) {
 	assert(ncpus > 0);
 
 	if (opt_percpu_arena != percpu_arena_disabled) {
-		if (!have_percpu_arena || malloc_getcpu() < 0) {
+		if (!cpu_count_is_deterministic) {
+			if (opt_narenas) {
+				malloc_write("<jemalloc>: Number of CPUs is not deterministic, "
+					"but narenas is set. Hope you not what you are doing and "
+					"you have set narenas to largest possible CPU ID.\n");
+				if (opt_abort) {
+					abort();
+				}
+			} else {
+				opt_percpu_arena = percpu_arena_disabled;
+				if (opt_abort_conf) {
+					malloc_write("<jemalloc>: Number of CPUs is not deterministic\n");
+					malloc_abort_invalid_conf();
+				}
+			}
+		} else if (!have_percpu_arena || malloc_getcpu() < 0) {
 			opt_percpu_arena = percpu_arena_disabled;
 			malloc_printf("<jemalloc>: perCPU arena getcpu() not "
 			    "available. Setting narenas to %u.\n", opt_narenas ?
-- 
cgit v0.12


From 310af725b0037870f70bf6b94426249f69ca4441 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 20 Dec 2021 14:39:24 -0800
Subject: Add nstime_ns_since which obtains the duration since the input time.

---
 include/jemalloc/internal/nstime.h |  1 +
 src/nstime.c                       | 13 +++++++++++++
 test/unit/nstime.c                 | 28 ++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index e8315db..258b16e 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -35,6 +35,7 @@ void nstime_isubtract(nstime_t *time, uint64_t subtrahend);
 void nstime_imultiply(nstime_t *time, uint64_t multiplier);
 void nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
+uint64_t nstime_ns_since(const nstime_t *past);
 
 typedef bool (nstime_monotonic_t)(void);
 extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
diff --git a/src/nstime.c b/src/nstime.c
index 44419d2..a1a5377 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -158,6 +158,19 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor) {
 	return time->ns / divisor->ns;
 }
 
+/* Returns time since *past, w/o updating *past. */
+uint64_t
+nstime_ns_since(const nstime_t *past) {
+	nstime_assert_initialized(past);
+
+	nstime_t now;
+	nstime_copy(&now, past);
+	nstime_update(&now);
+
+	assert(nstime_compare(&now, past) >= 0);
+	return now.ns - past->ns;
+}
+
 #ifdef _WIN32
 #  define NSTIME_MONOTONIC true
 static void
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index 083002b..56238ab 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -201,6 +201,33 @@ TEST_BEGIN(test_nstime_divide) {
 }
 TEST_END
 
+void
+test_nstime_since_once(nstime_t *t) {
+	nstime_t old_t;
+	nstime_copy(&old_t, t);
+
+	uint64_t ns_since = nstime_ns_since(t);
+	nstime_update(t);
+
+	nstime_t new_t;
+	nstime_copy(&new_t, t);
+	nstime_subtract(&new_t, &old_t);
+
+	expect_u64_ge(nstime_ns(&new_t), ns_since,
+	    "Incorrect time since result");
+}
+
+TEST_BEGIN(test_nstime_ns_since) {
+	nstime_t t;
+
+	nstime_init_update(&t);
+	for (uint64_t i = 0; i < 10000; i++) {
+		/* Keeps updating t and verifies ns_since is valid. */
+		test_nstime_since_once(&t);
+	}
+}
+TEST_END
+
 TEST_BEGIN(test_nstime_monotonic) {
 	nstime_monotonic();
 }
@@ -220,5 +247,6 @@ main(void) {
 	    test_nstime_imultiply,
 	    test_nstime_idivide,
 	    test_nstime_divide,
+	    test_nstime_ns_since,
 	    test_nstime_monotonic);
 }
-- 
cgit v0.12


From 837b37c4ce44a1c236e1657a6de80b064af98610 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 20 Dec 2021 15:04:12 -0800
Subject: Fix the time-since computation in HPA.

nstime module guarantees monotonic clock update within a single nstime_t.  This
means, if two separate nstime_t variables are read and updated separately,
nstime_subtract between them may result in underflow.  Fixed by switching to the
time since utility provided by nstime.
---
 include/jemalloc/internal/hpa_hooks.h |  1 +
 src/hpa.c                             | 22 +++++++---------------
 src/hpa_hooks.c                       |  7 +++++++
 test/unit/hpa.c                       |  7 +++++++
 4 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 12e6b97..4ea221c 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -9,6 +9,7 @@ struct hpa_hooks_s {
 	void (*hugify)(void *ptr, size_t size);
 	void (*dehugify)(void *ptr, size_t size);
 	void (*curtime)(nstime_t *r_time, bool first_reading);
+	uint64_t (*ms_since)(nstime_t *r_time);
 };
 
 extern hpa_hooks_t hpa_hooks_default;
diff --git a/src/hpa.c b/src/hpa.c
index 0a7ec19..7e2aeba 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -479,10 +479,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	/* Make sure that it's been hugifiable for long enough. */
 	nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify);
-	nstime_t nstime;
-	shard->central->hooks.curtime(&nstime, /* first_reading */ true);
-	nstime_subtract(&nstime, &time_hugify_allowed);
-	uint64_t millis = nstime_msec(&nstime);
+	uint64_t millis = shard->central->hooks.ms_since(&time_hugify_allowed);
 	if (millis < shard->opts.hugify_delay_ms) {
 		return false;
 	}
@@ -897,17 +894,15 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	if (to_hugify != NULL) {
 		nstime_t time_hugify_allowed =
 		    hpdata_time_hugify_allowed(to_hugify);
-		nstime_t nstime;
-		shard->central->hooks.curtime(&nstime,
-		    /* first_reading */ true);
-		nstime_subtract(&nstime, &time_hugify_allowed);
-		uint64_t since_hugify_allowed_ms = nstime_msec(&nstime);
+		uint64_t since_hugify_allowed_ms =
+		    shard->central->hooks.ms_since(&time_hugify_allowed);
 		/*
 		 * If not enough time has passed since hugification was allowed,
 		 * sleep for the rest.
 		 */
 		if (since_hugify_allowed_ms < shard->opts.hugify_delay_ms) {
-			time_ns = shard->opts.hugify_delay_ms - since_hugify_allowed_ms;
+			time_ns = shard->opts.hugify_delay_ms -
+			    since_hugify_allowed_ms;
 			time_ns *= 1000 * 1000;
 		} else {
 			malloc_mutex_unlock(tsdn, &shard->mtx);
@@ -924,11 +919,8 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 			malloc_mutex_unlock(tsdn, &shard->mtx);
 			return BACKGROUND_THREAD_DEFERRED_MIN;
 		}
-		nstime_t nstime;
-		shard->central->hooks.curtime(&nstime,
-		    /* first_reading */ true);
-		nstime_subtract(&nstime, &shard->last_purge);
-		uint64_t since_last_purge_ms = nstime_msec(&nstime);
+		uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
+		    &shard->last_purge);
 
 		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
 			uint64_t until_purge_ns;
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 116592f..ade581e 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -9,6 +9,7 @@ static void hpa_hooks_purge(void *ptr, size_t size);
 static void hpa_hooks_hugify(void *ptr, size_t size);
 static void hpa_hooks_dehugify(void *ptr, size_t size);
 static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
+static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
 
 hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_map,
@@ -17,6 +18,7 @@ hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_hugify,
 	&hpa_hooks_dehugify,
 	&hpa_hooks_curtime,
+	&hpa_hooks_ms_since
 };
 
 static void *
@@ -54,3 +56,8 @@ hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading) {
 	}
 	nstime_update(r_nstime);
 }
+
+static uint64_t
+hpa_hooks_ms_since(nstime_t *past_nstime) {
+	return nstime_ns_since(past_nstime) / 1000 / 1000;
+}
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 25ee195..dfd57f3 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -1,6 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
 
 #define SHARD_IND 111
 
@@ -353,6 +354,11 @@ defer_test_curtime(nstime_t *r_time, bool first_reading) {
 	*r_time = defer_curtime;
 }
 
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
 TEST_BEGIN(test_defer_time) {
 	test_skip_if(!hpa_supported());
 
@@ -363,6 +369,7 @@ TEST_BEGIN(test_defer_time) {
 	hooks.hugify = &defer_test_hugify;
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
-- 
cgit v0.12


From 60b9637cc0c5e88518d03e23de8538523757f060 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 21 Dec 2021 15:43:30 -0800
Subject: Only invoke malloc_cpu_count_is_deterministic() when necessary.

Also refactor the handling of the non-deterministic case.  Notably allow the
case with narenas set to proceed w/o warnings, to not affect existing valid use
cases.
---
 src/jemalloc.c | 51 +++++++++++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 38f7036..1893657 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -148,8 +148,6 @@ unsigned	opt_narenas = 0;
 fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
 
 unsigned	ncpus;
-/* ncpus is determinstinc, see malloc_cpu_count_is_deterministic() */
-static int	cpu_count_is_deterministic = -1;
 
 /* Protects arenas initialization. */
 malloc_mutex_t arenas_lock;
@@ -758,10 +756,11 @@ malloc_cpu_count_is_deterministic()
 #else
 	long cpu_onln = sysconf(_SC_NPROCESSORS_ONLN);
 	long cpu_conf = sysconf(_SC_NPROCESSORS_CONF);
-	if (cpu_onln != cpu_conf)
+	if (cpu_onln != cpu_conf) {
 		return false;
+	}
 #  if defined(CPU_COUNT)
-#    if defined(__FreeBSD__)
+#    if defined(__FreeBSD__) || defined(__DragonFly__)
 	cpuset_t set;
 #    else
 	cpu_set_t set;
@@ -772,8 +771,9 @@ malloc_cpu_count_is_deterministic()
 	pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
 #    endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */
 	long cpu_affinity = CPU_COUNT(&set);
-	if (cpu_affinity != cpu_conf)
+	if (cpu_affinity != cpu_conf) {
 		return false;
+	}
 #  endif /* CPU_COUNT */
 	return true;
 #endif
@@ -1871,7 +1871,29 @@ malloc_init_hard_recursible(void) {
 	malloc_init_state = malloc_init_recursible;
 
 	ncpus = malloc_ncpus();
-	cpu_count_is_deterministic = malloc_cpu_count_is_deterministic();
+	if (opt_percpu_arena != percpu_arena_disabled) {
+		bool cpu_count_is_deterministic =
+		    malloc_cpu_count_is_deterministic();
+		if (!cpu_count_is_deterministic) {
+			/*
+			 * If # of CPU is not deterministic, and narenas not
+			 * specified, disables per cpu arena since it may not
+			 * detect CPU IDs properly.
+			 */
+			if (opt_narenas == 0) {
+				opt_percpu_arena = percpu_arena_disabled;
+				malloc_write("<jemalloc>: Number of CPUs "
+				    "detected is not deterministic. Per-CPU "
+				    "arena disabled.\n");
+				if (opt_abort_conf) {
+					malloc_abort_invalid_conf();
+				}
+				if (opt_abort) {
+					abort();
+				}
+			}
+		}
+	}
 
 #if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
     && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \
@@ -1931,22 +1953,7 @@ malloc_init_narenas(void) {
 	assert(ncpus > 0);
 
 	if (opt_percpu_arena != percpu_arena_disabled) {
-		if (!cpu_count_is_deterministic) {
-			if (opt_narenas) {
-				malloc_write("<jemalloc>: Number of CPUs is not deterministic, "
-					"but narenas is set. Hope you not what you are doing and "
-					"you have set narenas to largest possible CPU ID.\n");
-				if (opt_abort) {
-					abort();
-				}
-			} else {
-				opt_percpu_arena = percpu_arena_disabled;
-				if (opt_abort_conf) {
-					malloc_write("<jemalloc>: Number of CPUs is not deterministic\n");
-					malloc_abort_invalid_conf();
-				}
-			}
-		} else if (!have_percpu_arena || malloc_getcpu() < 0) {
+		if (!have_percpu_arena || malloc_getcpu() < 0) {
 			opt_percpu_arena = percpu_arena_disabled;
 			malloc_printf("<jemalloc>: perCPU arena getcpu() not "
 			    "available. Setting narenas to %u.\n", opt_narenas ?
-- 
cgit v0.12


From e491df1d2f686a1ba47036301693285a72d98ca2 Mon Sep 17 00:00:00 2001
From: Joshua Watt <JPEWhacker@gmail.com>
Date: Wed, 15 Dec 2021 10:49:01 -0600
Subject: Fix warnings when using autoheader.

---
 configure.ac | 271 ++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 136 insertions(+), 135 deletions(-)

diff --git a/configure.ac b/configure.ac
index 22900ec..e18c0cc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -237,11 +237,11 @@ fi
 if test "x$GCC" = "xyes" ; then
   JE_CFLAGS_ADD([-std=gnu11])
   if test "x$je_cv_cflags_added" = "x-std=gnu11" ; then
-    AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+    AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT], [ ], [ ])
   else
     JE_CFLAGS_ADD([-std=gnu99])
     if test "x$je_cv_cflags_added" = "x-std=gnu99" ; then
-      AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+      AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT], [ ], [ ])
     fi
   fi
   JE_CFLAGS_ADD([-Werror=unknown-warning-option])
@@ -326,7 +326,7 @@ if test "x$enable_cxx" = "x1" ; then
   fi
 fi
 if test "x$enable_cxx" = "x1"; then
-  AC_DEFINE([JEMALLOC_ENABLE_CXX], [ ])
+  AC_DEFINE([JEMALLOC_ENABLE_CXX], [ ], [ ])
 fi
 AC_SUBST([enable_cxx])
 AC_SUBST([CONFIGURE_CXXFLAGS])
@@ -335,7 +335,7 @@ AC_SUBST([EXTRA_CXXFLAGS])
 
 AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0])
 if test "x${ac_cv_big_endian}" = "x1" ; then
-  AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ])
+  AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ], [ ])
 fi
 
 if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
@@ -355,7 +355,7 @@ else
     AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}])
   fi
 fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR], [ ])
 
 AC_CHECK_SIZEOF([int])
 if test "x${ac_cv_sizeof_int}" = "x8" ; then
@@ -365,7 +365,7 @@ elif test "x${ac_cv_sizeof_int}" = "x4" ; then
 else
   AC_MSG_ERROR([Unsupported int size: ${ac_cv_sizeof_int}])
 fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT], [ ])
 
 AC_CHECK_SIZEOF([long])
 if test "x${ac_cv_sizeof_long}" = "x8" ; then
@@ -375,7 +375,7 @@ elif test "x${ac_cv_sizeof_long}" = "x4" ; then
 else
   AC_MSG_ERROR([Unsupported long size: ${ac_cv_sizeof_long}])
 fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG], [ ])
 
 AC_CHECK_SIZEOF([long long])
 if test "x${ac_cv_sizeof_long_long}" = "x8" ; then
@@ -385,7 +385,7 @@ elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then
 else
   AC_MSG_ERROR([Unsupported long long size: ${ac_cv_sizeof_long_long}])
 fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG_LONG], [$LG_SIZEOF_LONG_LONG])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG_LONG], [$LG_SIZEOF_LONG_LONG], [ ])
 
 AC_CHECK_SIZEOF([intmax_t])
 if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then
@@ -397,7 +397,7 @@ elif test "x${ac_cv_sizeof_intmax_t}" = "x4" ; then
 else
   AC_MSG_ERROR([Unsupported intmax_t size: ${ac_cv_sizeof_intmax_t}])
 fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T], [ ])
 
 AC_CANONICAL_HOST
 dnl CPU-specific settings.
@@ -437,8 +437,8 @@ case "${host_cpu}" in
 	HAVE_CPU_SPINWAIT=0
 	;;
 esac
-AC_DEFINE_UNQUOTED([HAVE_CPU_SPINWAIT], [$HAVE_CPU_SPINWAIT])
-AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT])
+AC_DEFINE_UNQUOTED([HAVE_CPU_SPINWAIT], [$HAVE_CPU_SPINWAIT], [ ])
+AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT], [ ])
 
 AC_ARG_WITH([lg_vaddr],
   [AS_HELP_STRING([--with-lg-vaddr=<lg-vaddr>], [Number of significant virtual address bits])],
@@ -503,7 +503,7 @@ typedef unsigned __int32 uint32_t;
         LG_VADDR="${je_cv_lg_vaddr}"
       fi
       if test "x${LG_VADDR}" != "xerror" ; then
-        AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR])
+        AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR], [ ])
       else
         AC_MSG_ERROR([cannot determine number of significant virtual address bits])
       fi
@@ -525,7 +525,7 @@ typedef unsigned __int32 uint32_t;
     fi
     ;;
 esac
-AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR])
+AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR], [ ])
 
 LD_PRELOAD_VAR="LD_PRELOAD"
 so="so"
@@ -654,7 +654,7 @@ case "${host}" in
   *-*-freebsd*)
 	JE_APPEND_VS(CPPFLAGS, -D_BSD_SOURCE)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
+	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ], [ ])
 	force_lazy_lock="1"
 	;;
   *-*-dragonfly*)
@@ -672,11 +672,11 @@ case "${host}" in
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
 	glibc="0"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
-	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
-	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
-	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
-	AC_DEFINE([JEMALLOC_C11_ATOMICS])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ], [ ])
+	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ], [ ])
+	AC_DEFINE([JEMALLOC_C11_ATOMICS], [ ], [ ])
 	force_tls="0"
 	if test "${LG_SIZEOF_PTR}" = "3"; then
 	  default_retain="1"
@@ -687,11 +687,11 @@ case "${host}" in
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
 	glibc="1"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
-	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
-	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
-	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
-	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ], [ ])
+	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ], [ ])
+	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ], [ ])
 	if test "${LG_SIZEOF_PTR}" = "3"; then
 	  default_retain="1"
 	fi
@@ -700,10 +700,10 @@ case "${host}" in
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
-	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
-	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
-	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
+	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
+	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ], [ ])
+	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ], [ ])
 	;;
   *-*-netbsd*)
 	AC_MSG_CHECKING([ABI])
@@ -774,7 +774,7 @@ case "${host}" in
   *-*-nto-qnx)
 	abi="elf"
   force_tls="0"
-  AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
+  AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
 	;;
   *)
 	AC_MSG_RESULT([Unsupported operating system: ${host}])
@@ -797,7 +797,7 @@ AC_CHECK_HEADERS([malloc.h], [
                 AC_MSG_RESULT([no])
          ])
 ])
-AC_DEFINE_UNQUOTED([JEMALLOC_USABLE_SIZE_CONST], [$JEMALLOC_USABLE_SIZE_CONST])
+AC_DEFINE_UNQUOTED([JEMALLOC_USABLE_SIZE_CONST], [$JEMALLOC_USABLE_SIZE_CONST], [ ])
 AC_SUBST([abi])
 AC_SUBST([RPATH])
 AC_SUBST([LD_PRELOAD_VAR])
@@ -835,7 +835,7 @@ JE_COMPILABLE([__attribute__ syntax],
               [],
               [je_cv_attribute])
 if test "x${je_cv_attribute}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ], [ ])
   if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
     JE_CFLAGS_ADD([-fvisibility=hidden])
     JE_CXXFLAGS_ADD([-fvisibility=hidden])
@@ -863,7 +863,7 @@ JE_COMPILABLE([alloc_size attribute], [#include <stdlib.h>],
               [je_cv_alloc_size])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_alloc_size}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR_ALLOC_SIZE], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_ALLOC_SIZE], [ ], [ ])
 fi
 dnl Check for format(gnu_printf, ...) attribute support.
 JE_CFLAGS_SAVE()
@@ -874,7 +874,7 @@ JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include <stdlib.h>],
               [je_cv_format_gnu_printf])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_format_gnu_printf}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF], [ ], [ ])
 fi
 dnl Check for format(printf, ...) attribute support.
 JE_CFLAGS_SAVE()
@@ -885,7 +885,7 @@ JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
               [je_cv_format_printf])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_format_printf}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ], [ ])
 fi
 
 dnl Check for format_arg(...) attribute support.
@@ -897,7 +897,7 @@ JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
               [je_cv_format_arg])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_format_arg}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_ARG], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_ARG], [ ], [ ])
 fi
 
 dnl Check for fallthrough attribute support.
@@ -915,7 +915,7 @@ JE_COMPILABLE([fallthrough attribute],
               [je_cv_fallthrough])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_fallthrough}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR_FALLTHROUGH], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_FALLTHROUGH], [ ], [ ])
   JE_CFLAGS_ADD([-Wimplicit-fallthrough])
   JE_CXXFLAGS_ADD([-Wimplicit-fallthrough])
 fi
@@ -929,7 +929,7 @@ JE_COMPILABLE([cold attribute], [],
               [je_cv_cold])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_cold}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR_COLD], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_COLD], [ ], [ ])
 fi
 
 dnl Check for VM_MAKE_TAG for mmap support.
@@ -941,7 +941,7 @@ JE_COMPILABLE([vm_make_tag],
 	       munmap(p, 16);],
 	      [je_cv_vm_make_tag])
 if test "x${je_cv_vm_make_tag}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_VM_MAKE_TAG], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_VM_MAKE_TAG], [ ], [ ])
 fi
 
 dnl Support optional additions to rpath.
@@ -1033,11 +1033,11 @@ else
 fi]
 )
 if test "x$JEMALLOC_PREFIX" = "x" ; then
-  AC_DEFINE([JEMALLOC_IS_MALLOC])
+  AC_DEFINE([JEMALLOC_IS_MALLOC], [ ], [ ])
 else
   JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"`
-  AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"])
-  AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"])
+  AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"], [ ])
+  AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"], [ ])
 fi
 AC_SUBST([JEMALLOC_PREFIX])
 AC_SUBST([JEMALLOC_CPREFIX])
@@ -1045,45 +1045,45 @@ AC_SUBST([JEMALLOC_CPREFIX])
 AC_ARG_WITH([export],
   [AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])],
   [if test "x$with_export" = "xno"; then
-  AC_DEFINE([JEMALLOC_EXPORT],[])
+  AC_DEFINE([JEMALLOC_EXPORT],[], [ ])
 fi]
 )
 
 public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
 dnl Check for additional platform-specific public API functions.
 AC_CHECK_FUNC([memalign],
-	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ], [ ])
 	       public_syms="${public_syms} memalign"])
 AC_CHECK_FUNC([valloc],
-	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ])
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ], [ ])
 	       public_syms="${public_syms} valloc"])
 AC_CHECK_FUNC([malloc_size],
-	      [AC_DEFINE([JEMALLOC_HAVE_MALLOC_SIZE], [ ])
+	      [AC_DEFINE([JEMALLOC_HAVE_MALLOC_SIZE], [ ], [ ])
 	       public_syms="${public_syms} malloc_size"])
 
 dnl Check for allocator-related functions that should be wrapped.
 wrap_syms=
 if test "x${JEMALLOC_PREFIX}" = "x" ; then
   AC_CHECK_FUNC([__libc_calloc],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_CALLOC], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_CALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_calloc"])
   AC_CHECK_FUNC([__libc_free],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_free"])
   AC_CHECK_FUNC([__libc_malloc],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MALLOC], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_malloc"])
   AC_CHECK_FUNC([__libc_memalign],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MEMALIGN], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MEMALIGN], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_memalign"])
   AC_CHECK_FUNC([__libc_realloc],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_REALLOC], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_REALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_realloc"])
   AC_CHECK_FUNC([__libc_valloc],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_VALLOC], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_VALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_valloc"])
   AC_CHECK_FUNC([__posix_memalign],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___POSIX_MEMALIGN], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___POSIX_MEMALIGN], [ ], [ ])
 		 wrap_syms="${wrap_syms} __posix_memalign"])
 fi
 
@@ -1101,7 +1101,7 @@ AC_ARG_WITH([private_namespace],
   [JEMALLOC_PRIVATE_NAMESPACE="${with_private_namespace}je_"],
   [JEMALLOC_PRIVATE_NAMESPACE="je_"]
 )
-AC_DEFINE_UNQUOTED([JEMALLOC_PRIVATE_NAMESPACE], [$JEMALLOC_PRIVATE_NAMESPACE])
+AC_DEFINE_UNQUOTED([JEMALLOC_PRIVATE_NAMESPACE], [$JEMALLOC_PRIVATE_NAMESPACE], [ ])
 private_namespace="$JEMALLOC_PRIVATE_NAMESPACE"
 AC_SUBST([private_namespace])
 
@@ -1121,7 +1121,7 @@ AC_ARG_WITH([malloc_conf],
   [JEMALLOC_CONFIG_MALLOC_CONF=""]
 )
 config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF"
-AC_DEFINE_UNQUOTED([JEMALLOC_CONFIG_MALLOC_CONF], ["$config_malloc_conf"])
+AC_DEFINE_UNQUOTED([JEMALLOC_CONFIG_MALLOC_CONF], ["$config_malloc_conf"], [ ])
 
 dnl Substitute @je_@ in jemalloc_protos.h.in, primarily to make generation of
 dnl jemalloc_protos_jet.h easy.
@@ -1210,7 +1210,7 @@ fi
 [enable_debug="0"]
 )
 if test "x$enable_debug" = "x1" ; then
-  AC_DEFINE([JEMALLOC_DEBUG], [ ])
+  AC_DEFINE([JEMALLOC_DEBUG], [ ], [ ])
 fi
 AC_SUBST([enable_debug])
 
@@ -1242,7 +1242,7 @@ fi
 [enable_stats="1"]
 )
 if test "x$enable_stats" = "x1" ; then
-  AC_DEFINE([JEMALLOC_STATS], [ ])
+  AC_DEFINE([JEMALLOC_STATS], [ ], [ ])
 fi
 AC_SUBST([enable_stats])
 
@@ -1258,7 +1258,7 @@ fi
 [enable_experimental_smallocx="0"]
 )
 if test "x$enable_experimental_smallocx" = "x1" ; then
-  AC_DEFINE([JEMALLOC_EXPERIMENTAL_SMALLOCX_API])
+  AC_DEFINE([JEMALLOC_EXPERIMENTAL_SMALLOCX_API], [ ], [ ])
 fi
 AC_SUBST([enable_experimental_smallocx])
 
@@ -1315,7 +1315,7 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
   fi
   if test "x${enable_prof_libunwind}" = "x1" ; then
     backtrace_method="libunwind"
-    AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ])
+    AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ], [ ])
   fi
 fi
 
@@ -1338,7 +1338,7 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \
   fi
   if test "x${enable_prof_libgcc}" = "x1" ; then
     backtrace_method="libgcc"
-    AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
+    AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ], [ ])
   fi
 else
   enable_prof_libgcc="0"
@@ -1359,7 +1359,7 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \
      -a "x$GCC" = "xyes" ; then
   JE_CFLAGS_ADD([-fno-omit-frame-pointer])
   backtrace_method="gcc intrinsics"
-  AC_DEFINE([JEMALLOC_PROF_GCC], [ ])
+  AC_DEFINE([JEMALLOC_PROF_GCC], [ ], [ ])
 else
   enable_prof_gcc="0"
 fi
@@ -1374,19 +1374,19 @@ if test "x$enable_prof" = "x1" ; then
   dnl Heap profiling uses the log(3) function.
   JE_APPEND_VS(LIBS, $LM)
 
-  AC_DEFINE([JEMALLOC_PROF], [ ])
+  AC_DEFINE([JEMALLOC_PROF], [ ], [ ])
 fi
 AC_SUBST([enable_prof])
 
 dnl Indicate whether adjacent virtual memory mappings automatically coalesce
 dnl (and fragment on demand).
 if test "x${maps_coalesce}" = "x1" ; then
-  AC_DEFINE([JEMALLOC_MAPS_COALESCE], [ ])
+  AC_DEFINE([JEMALLOC_MAPS_COALESCE], [ ], [ ])
 fi
 
 dnl Indicate whether to retain memory (rather than using munmap()) by default.
 if test "x$default_retain" = "x1" ; then
-  AC_DEFINE([JEMALLOC_RETAIN], [ ])
+  AC_DEFINE([JEMALLOC_RETAIN], [ ], [ ])
 fi
 
 dnl Enable allocation from DSS if supported by the OS.
@@ -1403,7 +1403,7 @@ else
 fi
 
 if test "x$have_dss" = "x1" ; then
-  AC_DEFINE([JEMALLOC_DSS], [ ])
+  AC_DEFINE([JEMALLOC_DSS], [ ], [ ])
 fi
 
 dnl Support the junk/zero filling option by default.
@@ -1418,7 +1418,7 @@ fi
 [enable_fill="1"]
 )
 if test "x$enable_fill" = "x1" ; then
-  AC_DEFINE([JEMALLOC_FILL], [ ])
+  AC_DEFINE([JEMALLOC_FILL], [ ], [ ])
 fi
 AC_SUBST([enable_fill])
 
@@ -1456,11 +1456,11 @@ if test "x${je_cv_utrace}" = "xno" ; then
     enable_utrace="0"
   fi
   if test "x$enable_utrace" = "x1" ; then
-    AC_DEFINE([JEMALLOC_UTRACE_LABEL], [ ])
+    AC_DEFINE([JEMALLOC_UTRACE_LABEL], [ ], [ ])
   fi
 else
   if test "x$enable_utrace" = "x1" ; then
-    AC_DEFINE([JEMALLOC_UTRACE], [ ])
+    AC_DEFINE([JEMALLOC_UTRACE], [ ], [ ])
   fi
 fi
 AC_SUBST([enable_utrace])
@@ -1477,7 +1477,7 @@ fi
 [enable_xmalloc="0"]
 )
 if test "x$enable_xmalloc" = "x1" ; then
-  AC_DEFINE([JEMALLOC_XMALLOC], [ ])
+  AC_DEFINE([JEMALLOC_XMALLOC], [ ], [ ])
 fi
 AC_SUBST([enable_xmalloc])
 
@@ -1494,7 +1494,7 @@ fi
 [enable_cache_oblivious="1"]
 )
 if test "x$enable_cache_oblivious" = "x1" ; then
-  AC_DEFINE([JEMALLOC_CACHE_OBLIVIOUS], [ ])
+  AC_DEFINE([JEMALLOC_CACHE_OBLIVIOUS], [ ], [ ])
 fi
 AC_SUBST([enable_cache_oblivious])
 
@@ -1510,7 +1510,7 @@ fi
 [enable_log="0"]
 )
 if test "x$enable_log" = "x1" ; then
-  AC_DEFINE([JEMALLOC_LOG], [ ])
+  AC_DEFINE([JEMALLOC_LOG], [ ], [ ])
 fi
 AC_SUBST([enable_log])
 
@@ -1526,7 +1526,7 @@ fi
 [enable_readlinkat="0"]
 )
 if test "x$enable_readlinkat" = "x1" ; then
-  AC_DEFINE([JEMALLOC_READLINKAT], [ ])
+  AC_DEFINE([JEMALLOC_READLINKAT], [ ], [ ])
 fi
 AC_SUBST([enable_readlinkat])
 
@@ -1543,7 +1543,7 @@ fi
 [enable_opt_safety_checks="0"]
 )
 if test "x$enable_opt_safety_checks" = "x1" ; then
-  AC_DEFINE([JEMALLOC_OPT_SAFETY_CHECKS], [ ])
+  AC_DEFINE([JEMALLOC_OPT_SAFETY_CHECKS], [ ], [ ])
 fi
 AC_SUBST([enable_opt_safety_checks])
 
@@ -1560,7 +1560,7 @@ fi
 [enable_opt_size_checks="0"]
 )
 if test "x$enable_opt_size_checks" = "x1" ; then
-  AC_DEFINE([JEMALLOC_OPT_SIZE_CHECKS], [ ])
+  AC_DEFINE([JEMALLOC_OPT_SIZE_CHECKS], [ ], [ ])
 fi
 AC_SUBST([enable_opt_size_checks])
 
@@ -1574,9 +1574,9 @@ void foo (void) {
 	}
 ], [je_cv_gcc_builtin_unreachable])
 if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable])
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable], [ ])
 else
-  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort])
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort], [ ])
 fi
 
 dnl ============================================================================
@@ -1596,9 +1596,9 @@ JE_COMPILABLE([a program using __builtin_ffsl], [
 	}
 ], [je_cv_gcc_builtin_ffsl])
 if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [__builtin_ffsll])
-  AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl])
-  AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs])
+  AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [__builtin_ffsll], [ ])
+  AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl], [ ])
+  AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs], [ ])
 else
   JE_COMPILABLE([a program using ffsl], [
   #include <stdio.h>
@@ -1611,9 +1611,9 @@ else
 	}
   ], [je_cv_function_ffsl])
   if test "x${je_cv_function_ffsl}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [ffsll])
-    AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl])
-    AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs])
+    AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [ffsll], [ ])
+    AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl], [ ])
+    AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs], [ ])
   else
     AC_MSG_ERROR([Cannot build without ffsl(3) or __builtin_ffsl()])
   fi
@@ -1630,16 +1630,16 @@ JE_COMPILABLE([a program using __builtin_popcountl], [
 	}
 ], [je_cv_gcc_builtin_popcountl])
 if test "x${je_cv_gcc_builtin_popcountl}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNT], [__builtin_popcount])
-  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTL], [__builtin_popcountl])
-  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTLL], [__builtin_popcountll])
+  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNT], [__builtin_popcount], [ ])
+  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTL], [__builtin_popcountl], [ ])
+  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTLL], [__builtin_popcountll], [ ])
 fi
 
 AC_ARG_WITH([lg_quantum],
   [AS_HELP_STRING([--with-lg-quantum=<lg-quantum>],
    [Base 2 log of minimum allocation alignment])])
 if test "x$with_lg_quantum" != "x" ; then
-  AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum])
+  AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum], [ ])
 fi
 
 AC_ARG_WITH([lg_slab_maxregs],
@@ -1648,7 +1648,7 @@ AC_ARG_WITH([lg_slab_maxregs],
   [CONFIG_LG_SLAB_MAXREGS="with_lg_slab_maxregs"],
   [CONFIG_LG_SLAB_MAXREGS=""])
 if test "x$with_lg_slab_maxregs" != "x" ; then
-  AC_DEFINE_UNQUOTED([CONFIG_LG_SLAB_MAXREGS], [$with_lg_slab_maxregs])
+  AC_DEFINE_UNQUOTED([CONFIG_LG_SLAB_MAXREGS], [$with_lg_slab_maxregs], [ ])
 fi
 
 AC_ARG_WITH([lg_page],
@@ -1700,7 +1700,7 @@ if test "x${je_cv_lg_page}" != "x" ; then
   LG_PAGE="${je_cv_lg_page}"
 fi
 if test "x${LG_PAGE}" != "xundefined" ; then
-   AC_DEFINE_UNQUOTED([LG_PAGE], [$LG_PAGE])
+   AC_DEFINE_UNQUOTED([LG_PAGE], [$LG_PAGE], [ ])
 else
    AC_MSG_ERROR([cannot determine value for LG_PAGE])
 fi
@@ -1737,7 +1737,7 @@ if test "x${LG_PAGE}" != "xundefined" -a \
         "${je_cv_lg_hugepage}" -lt "${LG_PAGE}" ; then
   AC_MSG_ERROR([Huge page size (2^${je_cv_lg_hugepage}) must be at least page size (2^${LG_PAGE})])
 fi
-AC_DEFINE_UNQUOTED([LG_HUGEPAGE], [${je_cv_lg_hugepage}])
+AC_DEFINE_UNQUOTED([LG_HUGEPAGE], [${je_cv_lg_hugepage}], [ ])
 
 dnl ============================================================================
 dnl Enable libdl by default.
@@ -1758,7 +1758,7 @@ dnl ============================================================================
 dnl Configure pthreads.
 
 if test "x$abi" != "xpecoff" ; then
-  AC_DEFINE([JEMALLOC_HAVE_PTHREAD], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_PTHREAD], [ ], [ ])
   AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])])
   dnl Some systems may embed pthreads functionality in libc; check for libpthread
   dnl first, but try libc too before failing.
@@ -1776,7 +1776,7 @@ dnl Check if we have dlsym support.
         [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], [have_dlsym="0"])]),
       [have_dlsym="0"])
     if test "x$have_dlsym" = "x1" ; then
-      AC_DEFINE([JEMALLOC_HAVE_DLSYM], [ ])
+      AC_DEFINE([JEMALLOC_HAVE_DLSYM], [ ], [ ])
     fi
   else
     have_dlsym="0"
@@ -1788,7 +1788,7 @@ dnl Check if we have dlsym support.
   pthread_atfork((void *)0, (void *)0, (void *)0);
 ], [je_cv_pthread_atfork])
   if test "x${je_cv_pthread_atfork}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_ATFORK], [ ])
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_ATFORK], [ ], [ ])
   fi
   dnl Check if pthread_setname_np is available with the expected API.
   JE_COMPILABLE([pthread_setname_np(3)], [
@@ -1797,7 +1797,7 @@ dnl Check if we have dlsym support.
   pthread_setname_np(pthread_self(), "setname_test");
 ], [je_cv_pthread_setname_np])
   if test "x${je_cv_pthread_setname_np}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETNAME_NP], [ ])
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETNAME_NP], [ ], [ ])
   fi
   dnl Check if pthread_getname_np is not necessarily present despite
   dnl the pthread_setname_np counterpart
@@ -1812,7 +1812,7 @@ dnl Check if we have dlsym support.
   }
 ], [je_cv_pthread_getname_np])
   if test "x${je_cv_pthread_getname_np}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GETNAME_NP], [ ])
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GETNAME_NP], [ ], [ ])
   fi
   dnl Check if pthread_get_name_np is not necessarily present despite
   dnl the pthread_set_name_np counterpart
@@ -1828,7 +1828,7 @@ dnl Check if we have dlsym support.
   }
 ], [je_cv_pthread_get_name_np])
   if test "x${je_cv_pthread_get_name_np}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GET_NAME_NP], [ ])
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GET_NAME_NP], [ ], [ ])
   fi
 fi
 
@@ -1860,7 +1860,7 @@ JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_COARSE, ...)], [
 	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
 ], [je_cv_clock_monotonic_coarse])
 if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE])
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE], [ ], [ ])
 fi
 
 dnl check for CLOCK_MONOTONIC.
@@ -1876,7 +1876,7 @@ JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC, ...)], [
 #endif
 ], [je_cv_clock_monotonic])
 if test "x${je_cv_clock_monotonic}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC])
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC], [ ], [ ])
 fi
 
 dnl Check for mach_absolute_time().
@@ -1886,7 +1886,7 @@ JE_COMPILABLE([mach_absolute_time()], [
 	mach_absolute_time();
 ], [je_cv_mach_absolute_time])
 if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
+  AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME], [ ], [ ])
 fi
 
 dnl check for CLOCK_REALTIME (always should be available on Linux)
@@ -1898,7 +1898,7 @@ JE_COMPILABLE([clock_gettime(CLOCK_REALTIME, ...)], [
 	clock_gettime(CLOCK_REALTIME, &ts);
 ], [je_cv_clock_realtime])
 if test "x${je_cv_clock_realtime}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_CLOCK_REALTIME])
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_REALTIME], [ ], [ ])
 fi
 
 dnl Use syscall(2) (if available) by default.
@@ -1926,7 +1926,7 @@ if test "x$enable_syscall" = "x1" ; then
                 [je_cv_syscall])
   JE_CFLAGS_RESTORE()
   if test "x$je_cv_syscall" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_USE_SYSCALL], [ ])
+    AC_DEFINE([JEMALLOC_USE_SYSCALL], [ ], [ ])
   fi
 fi
 
@@ -1936,7 +1936,7 @@ AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="0"]
              )
 if test "x$have_secure_getenv" = "x1" ; then
-  AC_DEFINE([JEMALLOC_HAVE_SECURE_GETENV], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_SECURE_GETENV], [ ], [ ])
 fi
 
 dnl Check if the GNU-specific sched_getcpu function exists.
@@ -1945,7 +1945,7 @@ AC_CHECK_FUNC([sched_getcpu],
               [have_sched_getcpu="0"]
              )
 if test "x$have_sched_getcpu" = "x1" ; then
-  AC_DEFINE([JEMALLOC_HAVE_SCHED_GETCPU], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_SCHED_GETCPU], [ ], [ ])
 fi
 
 dnl Check if the GNU-specific sched_setaffinity function exists.
@@ -1954,7 +1954,7 @@ AC_CHECK_FUNC([sched_setaffinity],
               [have_sched_setaffinity="0"]
              )
 if test "x$have_sched_setaffinity" = "x1" ; then
-  AC_DEFINE([JEMALLOC_HAVE_SCHED_SETAFFINITY], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_SCHED_SETAFFINITY], [ ], [ ])
 fi
 
 dnl Check if the Solaris/BSD issetugid function exists.
@@ -1963,7 +1963,7 @@ AC_CHECK_FUNC([issetugid],
               [have_issetugid="0"]
              )
 if test "x$have_issetugid" = "x1" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ISSETUGID], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ISSETUGID], [ ], [ ])
 fi
 
 dnl Check whether the BSD-specific _malloc_thread_cleanup() exists.  If so, use
@@ -1975,7 +1975,7 @@ AC_CHECK_FUNC([_malloc_thread_cleanup],
               [have__malloc_thread_cleanup="0"]
              )
 if test "x$have__malloc_thread_cleanup" = "x1" ; then
-  AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ])
+  AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ], [ ])
   wrap_syms="${wrap_syms} _malloc_thread_cleanup"
   force_tls="1"
 fi
@@ -1988,7 +1988,7 @@ AC_CHECK_FUNC([_pthread_mutex_init_calloc_cb],
               [have__pthread_mutex_init_calloc_cb="0"]
              )
 if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then
-  AC_DEFINE([JEMALLOC_MUTEX_INIT_CB])
+  AC_DEFINE([JEMALLOC_MUTEX_INIT_CB], [ ], [ ])
   wrap_syms="${wrap_syms} _malloc_prefork _malloc_postfork"
 fi
 
@@ -1997,7 +1997,7 @@ AC_CHECK_FUNC([memcntl],
 	      [have_memcntl="0"],
 	      )
 if test "x$have_memcntl" = "x1" ; then
-  AC_DEFINE([JEMALLOC_HAVE_MEMCNTL], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_MEMCNTL], [ ], [ ])
 fi
 
 dnl Disable lazy locking by default.
@@ -2026,7 +2026,7 @@ if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
 fi
 if test "x$enable_lazy_lock" = "x1" ; then
   if test "x$have_dlsym" = "x1" ; then
-    AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
+    AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ], [ ])
   else
     AC_MSG_ERROR([Missing dlsym support: lazy-lock cannot be enabled.])
   fi
@@ -2059,7 +2059,7 @@ else
 fi
 AC_SUBST([enable_tls])
 if test "x${enable_tls}" = "x1" ; then
-  AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ])
+  AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2080,7 +2080,7 @@ JE_COMPILABLE([C11 atomics], [
     return r == 0;
 ], [je_cv_c11_atomics])
 if test "x${je_cv_c11_atomics}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_C11_ATOMICS])
+  AC_DEFINE([JEMALLOC_C11_ATOMICS], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2095,7 +2095,7 @@ JE_COMPILABLE([GCC __atomic atomics], [
     return after_add == 1;
 ], [je_cv_gcc_atomic_atomics])
 if test "x${je_cv_gcc_atomic_atomics}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_GCC_ATOMIC_ATOMICS])
+  AC_DEFINE([JEMALLOC_GCC_ATOMIC_ATOMICS], [ ], [ ])
 
   dnl check for 8-bit atomic support
   JE_COMPILABLE([GCC 8-bit __atomic atomics], [
@@ -2107,7 +2107,7 @@ if test "x${je_cv_gcc_atomic_atomics}" = "xyes" ; then
       return after_add == 1;
   ], [je_cv_gcc_u8_atomic_atomics])
   if test "x${je_cv_gcc_u8_atomic_atomics}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_GCC_U8_ATOMIC_ATOMICS])
+    AC_DEFINE([JEMALLOC_GCC_U8_ATOMIC_ATOMICS], [ ], [ ])
   fi
 fi
 
@@ -2122,7 +2122,7 @@ JE_COMPILABLE([GCC __sync atomics], [
     return (before_add == 0) && (after_add == 1);
 ], [je_cv_gcc_sync_atomics])
 if test "x${je_cv_gcc_sync_atomics}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_GCC_SYNC_ATOMICS])
+  AC_DEFINE([JEMALLOC_GCC_SYNC_ATOMICS], [ ], [ ])
 
   dnl check for 8-bit atomic support
   JE_COMPILABLE([GCC 8-bit __sync atomics], [
@@ -2133,7 +2133,7 @@ if test "x${je_cv_gcc_sync_atomics}" = "xyes" ; then
       return (before_add == 0) && (after_add == 1);
   ], [je_cv_gcc_u8_sync_atomics])
   if test "x${je_cv_gcc_u8_sync_atomics}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_GCC_U8_SYNC_ATOMICS])
+    AC_DEFINE([JEMALLOC_GCC_U8_SYNC_ATOMICS], [ ], [ ])
   fi
 fi
 
@@ -2158,7 +2158,7 @@ JE_COMPILABLE([Darwin OSAtomic*()], [
 	}
 ], [je_cv_osatomic])
 if test "x${je_cv_osatomic}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_OSATOMIC], [ ])
+  AC_DEFINE([JEMALLOC_OSATOMIC], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2170,7 +2170,7 @@ JE_COMPILABLE([madvise(2)], [
 	madvise((void *)0, 0, 0);
 ], [je_cv_madvise])
 if test "x${je_cv_madvise}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ], [ ])
 
   dnl Check for madvise(..., MADV_FREE).
   JE_COMPILABLE([madvise(..., MADV_FREE)], [
@@ -2179,12 +2179,12 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_FREE);
 ], [je_cv_madv_free])
   if test "x${je_cv_madv_free}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+    AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ], [ ])
   elif test "x${je_cv_madvise}" = "xyes" ; then
     case "${host_cpu}" in i686|x86_64)
         case "${host}" in *-*-linux*)
-            AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
-            AC_DEFINE([JEMALLOC_DEFINE_MADVISE_FREE], [ ])
+            AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ], [ ])
+            AC_DEFINE([JEMALLOC_DEFINE_MADVISE_FREE], [ ], [ ])
 	    ;;
         esac
         ;;
@@ -2198,7 +2198,7 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_DONTNEED);
 ], [je_cv_madv_dontneed])
   if test "x${je_cv_madv_dontneed}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
+    AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ], [ ])
   fi
 
   dnl Check for madvise(..., MADV_DO[NT]DUMP).
@@ -2209,7 +2209,7 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_DODUMP);
 ], [je_cv_madv_dontdump])
   if test "x${je_cv_madv_dontdump}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_MADVISE_DONTDUMP], [ ])
+    AC_DEFINE([JEMALLOC_MADVISE_DONTDUMP], [ ], [ ])
   fi
 
   dnl Check for madvise(..., MADV_[NO]HUGEPAGE).
@@ -2227,14 +2227,14 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_CORE);
 ], [je_cv_madv_nocore])
   if test "x${je_cv_madv_nocore}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_MADVISE_NOCORE], [ ])
+    AC_DEFINE([JEMALLOC_MADVISE_NOCORE], [ ], [ ])
   fi
 case "${host_cpu}" in
   arm*)
     ;;
   *)
   if test "x${je_cv_thp}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ])
+    AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ], [ ])
   fi
   ;;
 esac
@@ -2246,7 +2246,7 @@ else
     posix_madvise((void *)0, 0, 0);
   ], [je_cv_posix_madvise])
   if test "x${je_cv_posix_madvise}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_POSIX_MADVISE], [ ])
+    AC_DEFINE([JEMALLOC_HAVE_POSIX_MADVISE], [ ], [ ])
 
     dnl Check for posix_madvise(..., POSIX_MADV_DONTNEED).
     JE_COMPILABLE([posix_madvise(..., POSIX_MADV_DONTNEED)], [
@@ -2255,7 +2255,7 @@ else
     posix_madvise((void *)0, 0, POSIX_MADV_DONTNEED);
   ], [je_cv_posix_madv_dontneed])
     if test "x${je_cv_posix_madv_dontneed}" = "xyes" ; then
-      AC_DEFINE([JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED], [ ])
+      AC_DEFINE([JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED], [ ], [ ])
     fi
   fi
 fi
@@ -2269,7 +2269,7 @@ JE_COMPILABLE([mprotect(2)], [
 	mprotect((void *)0, 0, PROT_NONE);
 ], [je_cv_mprotect])
 if test "x${je_cv_mprotect}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_MPROTECT], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_MPROTECT], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2296,7 +2296,7 @@ AC_CACHE_CHECK([for __builtin_clz],
                                [je_cv_builtin_clz=no])])
 
 if test "x${je_cv_builtin_clz}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2315,7 +2315,7 @@ JE_COMPILABLE([Darwin os_unfair_lock_*()], [
 	#endif
 ], [je_cv_os_unfair_lock])
 if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ])
+  AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2341,7 +2341,7 @@ if test "x${enable_zone_allocator}" = "x1" ; then
   if test "x${abi}" != "xmacho"; then
     AC_MSG_ERROR([--enable-zone-allocator is only supported on Darwin])
   fi
-  AC_DEFINE([JEMALLOC_ZONE], [ ])
+  AC_DEFINE([JEMALLOC_ZONE], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2362,16 +2362,17 @@ AC_SUBST([enable_initial_exec_tls])
 if test "x${je_cv_tls_model}" = "xyes" -a \
        "x${enable_initial_exec_tls}" = "x1" ; then
   AC_DEFINE([JEMALLOC_TLS_MODEL],
-            [__attribute__((tls_model("initial-exec")))])
+            [__attribute__((tls_model("initial-exec")))], 
+            [ ])
 else
-  AC_DEFINE([JEMALLOC_TLS_MODEL], [ ])
+  AC_DEFINE([JEMALLOC_TLS_MODEL], [ ], [ ])
 fi
 
 dnl ============================================================================
 dnl Enable background threads if possible.
 
 if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" ; then
-  AC_DEFINE([JEMALLOC_BACKGROUND_THREAD])
+  AC_DEFINE([JEMALLOC_BACKGROUND_THREAD], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2392,7 +2393,7 @@ if test "x$glibc" = "x1" ; then
 ], [je_cv_glibc_malloc_hook])
   if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then
     if test "x${JEMALLOC_PREFIX}" = "x" ; then
-      AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ])
+      AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ], [ ])
       wrap_syms="${wrap_syms} __free_hook __malloc_hook __realloc_hook"
     fi
   fi
@@ -2407,7 +2408,7 @@ if test "x$glibc" = "x1" ; then
 ], [je_cv_glibc_memalign_hook])
   if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then
     if test "x${JEMALLOC_PREFIX}" = "x" ; then
-      AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ])
+      AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ], [ ])
       wrap_syms="${wrap_syms} __memalign_hook"
     fi
   fi
@@ -2422,7 +2423,7 @@ JE_COMPILABLE([pthreads adaptive mutexes], [
   pthread_mutexattr_destroy(&attr);
 ], [je_cv_pthread_mutex_adaptive_np])
 if test "x${je_cv_pthread_mutex_adaptive_np}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ], [ ])
 fi
 
 JE_CFLAGS_SAVE()
@@ -2441,7 +2442,7 @@ JE_COMPILABLE([strerror_r returns char with gnu source], [
 ], [je_cv_strerror_r_returns_char_with_gnu_source])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_strerror_r_returns_char_with_gnu_source}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE], [ ])
+  AC_DEFINE([JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE], [ ], [ ])
 fi
 
 dnl ============================================================================
-- 
cgit v0.12


From bd70d8fc0f35fc7883fad18216d09e613867314b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 15 Nov 2021 15:23:47 -0800
Subject: Add the profiling settings for tests explicit.

Many profiling related tests make assumptions on the profiling settings,
e.g. opt_prof is off by default, and prof_active is default on when opt_prof is
on.  However the default settings can be changed via --with-malloc-conf at build
time.  Fixing the tests by adding the assumed settings explicitly.
---
 test/unit/hpa_background_thread.c | 4 ++--
 test/unit/inspect.sh              | 5 +++++
 test/unit/mallctl.c               | 6 +++++-
 test/unit/prof_active.sh          | 2 +-
 test/unit/prof_hook.sh            | 2 +-
 test/unit/prof_log.sh             | 2 +-
 test/unit/prof_recent.sh          | 2 +-
 test/unit/prof_stats.sh           | 2 +-
 test/unit/prof_sys_thread_name.sh | 2 +-
 test/unit/prof_tctx.sh            | 2 +-
 test/unit/safety_check.sh         | 2 +-
 test/unit/size_check.sh           | 5 +++++
 test/unit/tcache_max.c            | 1 +
 13 files changed, 26 insertions(+), 11 deletions(-)
 create mode 100644 test/unit/inspect.sh
 create mode 100644 test/unit/size_check.sh

diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 5976bb4..228b771 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -104,8 +104,8 @@ expect_purging(unsigned arena_ind, bool expect_deferred) {
 		dallocx(ptr, MALLOCX_TCACHE_NONE);
 		empty_ndirty = get_empty_ndirty(arena_ind);
 		if (expect_deferred) {
-			expect_true(empty_ndirty == 0 || empty_ndirty == 1,
-			    "Unexpected extra dirty page count: %zu",
+			expect_true(empty_ndirty == 0 || empty_ndirty == 1 ||
+			    opt_prof, "Unexpected extra dirty page count: %zu",
 			    empty_ndirty);
 		} else {
 			assert_zu_eq(0, empty_ndirty,
diff --git a/test/unit/inspect.sh b/test/unit/inspect.sh
new file mode 100644
index 0000000..352d110
--- /dev/null
+++ b/test/unit/inspect.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:false"
+fi
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 5cba083..81a36c9 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -368,7 +368,10 @@ TEST_BEGIN(test_tcache_none) {
 	/* Make sure that tcache-based allocation returns p, not q. */
 	void *p1 = mallocx(42, 0);
 	expect_ptr_not_null(p1, "Unexpected mallocx() failure");
-	expect_ptr_eq(p0, p1, "Expected tcache to allocate cached region");
+	if (!opt_prof) {
+		expect_ptr_eq(p0, p1,
+		    "Expected tcache to allocate cached region");
+	}
 
 	/* Clean up. */
 	dallocx(p1, MALLOCX_TCACHE_NONE);
@@ -904,6 +907,7 @@ TEST_BEGIN(test_prof_active) {
 	 * test_mallctl_opt was already enough.
 	 */
 	test_skip_if(!config_prof);
+	test_skip_if(opt_prof);
 
 	bool active, old;
 	size_t len = sizeof(bool);
diff --git a/test/unit/prof_active.sh b/test/unit/prof_active.sh
index 0167cb1..9749674 100644
--- a/test/unit/prof_active.sh
+++ b/test/unit/prof_active.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_thread_active_init:false,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:true,prof_thread_active_init:false,lg_prof_sample:0"
 fi
diff --git a/test/unit/prof_hook.sh b/test/unit/prof_hook.sh
index d14cb8c..c7ebd8f 100644
--- a/test/unit/prof_hook.sh
+++ b/test/unit/prof_hook.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0"
 fi
 
diff --git a/test/unit/prof_log.sh b/test/unit/prof_log.sh
index 8fcc7d8..485f9bf 100644
--- a/test/unit/prof_log.sh
+++ b/test/unit/prof_log.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0"
 fi
diff --git a/test/unit/prof_recent.sh b/test/unit/prof_recent.sh
index 59759a6..58a54a4 100644
--- a/test/unit/prof_recent.sh
+++ b/test/unit/prof_recent.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_recent_alloc_max:3"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_recent_alloc_max:3"
 fi
diff --git a/test/unit/prof_stats.sh b/test/unit/prof_stats.sh
index b01dfd4..f3c819b 100644
--- a/test/unit/prof_stats.sh
+++ b/test/unit/prof_stats.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_stats:true"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_stats:true"
 fi
diff --git a/test/unit/prof_sys_thread_name.sh b/test/unit/prof_sys_thread_name.sh
index 281cf9a..1f02a8a 100644
--- a/test/unit/prof_sys_thread_name.sh
+++ b/test/unit/prof_sys_thread_name.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_sys_thread_name:true"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_sys_thread_name:true"
 fi
diff --git a/test/unit/prof_tctx.sh b/test/unit/prof_tctx.sh
index 8fcc7d8..485f9bf 100644
--- a/test/unit/prof_tctx.sh
+++ b/test/unit/prof_tctx.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0"
 fi
diff --git a/test/unit/safety_check.sh b/test/unit/safety_check.sh
index 8fcc7d8..485f9bf 100644
--- a/test/unit/safety_check.sh
+++ b/test/unit/safety_check.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0"
 fi
diff --git a/test/unit/size_check.sh b/test/unit/size_check.sh
new file mode 100644
index 0000000..352d110
--- /dev/null
+++ b/test/unit/size_check.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:false"
+fi
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 0594cef..4f207e0 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -151,6 +151,7 @@ test_tcache_max_impl(void) {
 TEST_BEGIN(test_tcache_max) {
 	test_skip_if(!config_stats);
 	test_skip_if(!opt_tcache);
+	test_skip_if(opt_prof);
 
 	for (alloc_option = alloc_option_start;
 	     alloc_option < alloc_option_end;
-- 
cgit v0.12


From d038160f3b76ac1e5203e11008169366629c81cd Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 22 Dec 2021 17:24:58 -0800
Subject: Fix shadowed variable usage.

Verified with EXTRA_CFLAGS=-Wshadow.
---
 .../internal/jemalloc_internal_inlines_b.h         |  1 -
 include/jemalloc/internal/nstime.h                 |  6 ++--
 include/jemalloc/internal/prof_externs.h           |  2 +-
 include/jemalloc/internal/prof_inlines.h           |  4 +--
 src/bin_info.c                                     |  4 +--
 src/ckh.c                                          |  6 ++--
 src/ctl.c                                          |  6 ++--
 src/extent.c                                       |  5 ++--
 src/jemalloc.c                                     | 32 +++++++++++-----------
 src/pa.c                                           |  7 +++--
 src/pac.c                                          |  8 +++---
 src/prof.c                                         | 10 +++----
 src/prof_data.c                                    |  8 +++---
 src/prof_sys.c                                     | 10 +++----
 src/stats.c                                        | 14 +++++-----
 test/analyze/prof_bias.c                           |  6 ++--
 test/src/test.c                                    |  4 +--
 test/unit/arena_reset.c                            |  6 ++--
 test/unit/atomic.c                                 |  2 +-
 test/unit/batch_alloc.c                            | 23 ++++++++--------
 test/unit/pa.c                                     |  4 +--
 test/unit/prof_idump.c                             |  6 ++--
 test/unit/prof_recent.c                            |  2 +-
 test/unit/prof_reset.c                             | 31 ++++++++++-----------
 test/unit/rb.c                                     |  2 +-
 test/unit/retained.c                               | 30 ++++++++++----------
 26 files changed, 119 insertions(+), 120 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 1de349e..35d71d0 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -20,7 +20,6 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 		tcache_t *tcache = tcache_get(tsd);
 		if (tcache != NULL) {
 			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
-			tcache_t *tcache = tsd_tcachep_get(tsd);
 			tcache_arena_reassociate(tsd_tsdn(tsd), tcache_slow,
 			    tcache, newarena);
 		}
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 258b16e..486e5cc 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -18,7 +18,7 @@ typedef struct {
 #endif
 } nstime_t;
 
-static const nstime_t zero = NSTIME_ZERO_INITIALIZER;
+static const nstime_t nstime_zero = NSTIME_ZERO_INITIALIZER;
 
 void nstime_init(nstime_t *time, uint64_t ns);
 void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
@@ -60,12 +60,12 @@ extern const char *prof_time_res_mode_names[];
 
 JEMALLOC_ALWAYS_INLINE void
 nstime_init_zero(nstime_t *time) {
-	nstime_copy(time, &zero);
+	nstime_copy(time, &nstime_zero);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
 nstime_equals_zero(nstime_t *time) {
-	int diff = nstime_compare(time, &zero);
+	int diff = nstime_compare(time, &nstime_zero);
 	assert(diff >= 0);
 	return diff == 0;
 }
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 75dd90b..953192f 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -32,7 +32,7 @@ extern bool opt_prof_sys_thread_name;
 extern bool opt_prof_stats;
 
 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
-extern bool prof_active;
+extern bool prof_active_state;
 
 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
 extern bool prof_gdump_val;
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 7884e9a..a8e7e7f 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -12,7 +12,7 @@ prof_active_assert() {
 	 * If opt_prof is off, then prof_active must always be off, regardless
 	 * of whether prof_active_mtx is in effect or not.
 	 */
-	assert(opt_prof || !prof_active);
+	assert(opt_prof || !prof_active_state);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -24,7 +24,7 @@ prof_active_get_unlocked(void) {
 	 * prof_active in the fast path, so there are no guarantees regarding
 	 * how long it will take for all threads to notice state changes.
 	 */
-	return prof_active;
+	return prof_active_state;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/src/bin_info.c b/src/bin_info.c
index 20b93ea..8629ef8 100644
--- a/src/bin_info.c
+++ b/src/bin_info.c
@@ -7,9 +7,9 @@ bin_info_t bin_infos[SC_NBINS];
 
 static void
 bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
-    bin_info_t bin_infos[SC_NBINS]) {
+    bin_info_t infos[SC_NBINS]) {
 	for (unsigned i = 0; i < SC_NBINS; i++) {
-		bin_info_t *bin_info = &bin_infos[i];
+		bin_info_t *bin_info = &infos[i];
 		sc_t *sc = &sc_data->sc[i];
 		bin_info->reg_size = ((size_t)1U << sc->lg_base)
 		    + ((size_t)sc->ndelta << sc->lg_delta);
diff --git a/src/ckh.c b/src/ckh.c
index 9441fba..8db4319 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -356,14 +356,14 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 }
 
 bool
-ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *ckh_hash,
     ckh_keycomp_t *keycomp) {
 	bool ret;
 	size_t mincells, usize;
 	unsigned lg_mincells;
 
 	assert(minitems > 0);
-	assert(hash != NULL);
+	assert(ckh_hash != NULL);
 	assert(keycomp != NULL);
 
 #ifdef CKH_COUNT
@@ -392,7 +392,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 	}
 	ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
 	ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
-	ckh->hash = hash;
+	ckh->hash = ckh_hash;
 	ckh->keycomp = keycomp;
 
 	usize = sz_sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
diff --git a/src/ctl.c b/src/ctl.c
index eccb958..81ab147 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3622,9 +3622,9 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
 		MUTEX_PROF_RESET(arena->base->mtx);
 
-		for (szind_t i = 0; i < SC_NBINS; i++) {
-			for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-				bin_t *bin = arena_get_bin(arena, i, j);
+		for (szind_t j = 0; j < SC_NBINS; j++) {
+			for (unsigned k = 0; k < bin_infos[j].n_shards; k++) {
+				bin_t *bin = arena_get_bin(arena, j, k);
 				MUTEX_PROF_RESET(bin->lock);
 			}
 		}
diff --git a/src/extent.c b/src/extent.c
index 4bbbff3..1c6fa1f 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -748,9 +748,8 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		extent_gdump_add(tsdn, edata);
 	}
 	if (zero && !edata_zeroed_get(edata)) {
-		void *addr = edata_base_get(edata);
-		size_t size = edata_size_get(edata);
-		ehooks_zero(tsdn, ehooks, addr, size);
+		ehooks_zero(tsdn, ehooks, edata_base_get(edata),
+		    edata_size_get(edata));
 	}
 	return edata;
 label_err:
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1893657..d105dff 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1212,12 +1212,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
 			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
 			if (strncmp("metadata_thp", k, klen) == 0) {
-				int i;
+				int m;
 				bool match = false;
-				for (i = 0; i < metadata_thp_mode_limit; i++) {
-					if (strncmp(metadata_thp_mode_names[i],
+				for (m = 0; m < metadata_thp_mode_limit; m++) {
+					if (strncmp(metadata_thp_mode_names[m],
 					    v, vlen) == 0) {
-						opt_metadata_thp = i;
+						opt_metadata_thp = m;
 						match = true;
 						break;
 					}
@@ -1230,18 +1230,18 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			}
 			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {
-				int i;
+				int m;
 				bool match = false;
-				for (i = 0; i < dss_prec_limit; i++) {
-					if (strncmp(dss_prec_names[i], v, vlen)
+				for (m = 0; m < dss_prec_limit; m++) {
+					if (strncmp(dss_prec_names[m], v, vlen)
 					    == 0) {
-						if (extent_dss_prec_set(i)) {
+						if (extent_dss_prec_set(m)) {
 							CONF_ERROR(
 							    "Error setting dss",
 							    k, klen, v, vlen);
 						} else {
 							opt_dss =
-							    dss_prec_names[i];
+							    dss_prec_names[m];
 							match = true;
 							break;
 						}
@@ -1428,16 +1428,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 			if (strncmp("percpu_arena", k, klen) == 0) {
 				bool match = false;
-				for (int i = percpu_arena_mode_names_base; i <
-				    percpu_arena_mode_names_limit; i++) {
-					if (strncmp(percpu_arena_mode_names[i],
+				for (int m = percpu_arena_mode_names_base; m <
+				    percpu_arena_mode_names_limit; m++) {
+					if (strncmp(percpu_arena_mode_names[m],
 					    v, vlen) == 0) {
 						if (!have_percpu_arena) {
 							CONF_ERROR(
 							    "No getcpu support",
 							    k, klen, v, vlen);
 						}
-						opt_percpu_arena = i;
+						opt_percpu_arena = m;
 						match = true;
 						break;
 					}
@@ -1622,15 +1622,15 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			}
 			if (CONF_MATCH("thp")) {
 				bool match = false;
-				for (int i = 0; i < thp_mode_names_limit; i++) {
-					if (strncmp(thp_mode_names[i],v, vlen)
+				for (int m = 0; m < thp_mode_names_limit; m++) {
+					if (strncmp(thp_mode_names[m],v, vlen)
 					    == 0) {
 						if (!have_madvise_huge && !have_memcntl) {
 							CONF_ERROR(
 							    "No THP support",
 							    k, klen, v, vlen);
 						}
-						opt_thp = i;
+						opt_thp = m;
 						match = true;
 						break;
 					}
diff --git a/src/pa.c b/src/pa.c
index 0f95e93..eb7e462 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -31,8 +31,9 @@ pa_central_init(pa_central_t *central, base_t *base, bool hpa,
 bool
 pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
     emap_t *emap, base_t *base, unsigned ind, pa_shard_stats_t *stats,
-    malloc_mutex_t *stats_mtx, nstime_t *cur_time, size_t oversize_threshold,
-    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+    malloc_mutex_t *stats_mtx, nstime_t *cur_time,
+    size_t pac_oversize_threshold, ssize_t dirty_decay_ms,
+    ssize_t muzzy_decay_ms) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	if (edata_cache_init(&shard->edata_cache, base)) {
@@ -40,7 +41,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
 	}
 
 	if (pac_init(tsdn, &shard->pac, base, emap, &shard->edata_cache,
-	    cur_time, oversize_threshold, dirty_decay_ms, muzzy_decay_ms,
+	    cur_time, pac_oversize_threshold, dirty_decay_ms, muzzy_decay_ms,
 	    &stats->pac_stats, stats_mtx)) {
 		return true;
 	}
diff --git a/src/pac.c b/src/pac.c
index c6d9f14..53e3d82 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -36,9 +36,9 @@ pac_decay_data_get(pac_t *pac, extent_state_t state,
 
 bool
 pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
-    edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
-    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
-    malloc_mutex_t *stats_mtx) {
+    edata_cache_t *edata_cache, nstime_t *cur_time,
+    size_t pac_oversize_threshold, ssize_t dirty_decay_ms,
+    ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx) {
 	unsigned ind = base_ind_get(base);
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
@@ -73,7 +73,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	atomic_store_zu(&pac->oversize_threshold, oversize_threshold,
+	atomic_store_zu(&pac->oversize_threshold, pac_oversize_threshold,
 	    ATOMIC_RELAXED);
 	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
 		return true;
diff --git a/src/prof.c b/src/prof.c
index 625bcd7..f708d10 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -43,7 +43,7 @@ static counter_accum_t prof_idump_accumulated;
  * Initialized as opt_prof_active, and accessed via
  * prof_active_[gs]et{_unlocked,}().
  */
-bool prof_active;
+bool prof_active_state;
 static malloc_mutex_t prof_active_mtx;
 
 /*
@@ -416,7 +416,7 @@ prof_active_get(tsdn_t *tsdn) {
 
 	prof_active_assert();
 	malloc_mutex_lock(tsdn, &prof_active_mtx);
-	prof_active_current = prof_active;
+	prof_active_current = prof_active_state;
 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
 	return prof_active_current;
 }
@@ -427,8 +427,8 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 
 	prof_active_assert();
 	malloc_mutex_lock(tsdn, &prof_active_mtx);
-	prof_active_old = prof_active;
-	prof_active = active;
+	prof_active_old = prof_active_state;
+	prof_active_state = active;
 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
 	prof_active_assert();
 	return prof_active_old;
@@ -629,7 +629,7 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 	if (opt_prof) {
 		lg_prof_sample = opt_lg_prof_sample;
 		prof_unbias_map_init();
-		prof_active = opt_prof_active;
+		prof_active_state = opt_prof_active;
 		prof_gdump_val = opt_prof_gdump;
 		prof_thread_active_init = opt_prof_thread_active_init;
 
diff --git a/src/prof_data.c b/src/prof_data.c
index 6334985..3ef0100 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -397,7 +397,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 
 /* Used in unit tests. */
 static prof_tdata_t *
-prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+prof_tdata_count_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
     void *arg) {
 	size_t *tdata_count = (size_t *)arg;
 
@@ -895,7 +895,7 @@ struct prof_tdata_merge_iter_arg_s {
 };
 
 static prof_tdata_t *
-prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+prof_tdata_merge_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
     void *opaque) {
 	prof_tdata_merge_iter_arg_t *arg =
 	    (prof_tdata_merge_iter_arg_t *)opaque;
@@ -939,7 +939,7 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 }
 
 static prof_tdata_t *
-prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+prof_tdata_dump_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
     void *opaque) {
 	if (!tdata->dumping) {
 		return NULL;
@@ -1278,7 +1278,7 @@ prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
 }
 
 static prof_tdata_t *
-prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+prof_tdata_reset_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
     void *arg) {
 	tsdn_t *tsdn = (tsdn_t *)arg;
 
diff --git a/src/prof_sys.c b/src/prof_sys.c
index b7a3a2c..b5f1f5b 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -561,18 +561,18 @@ prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 	cassert(config_prof);
 
 	assert(tsd_reentrancy_level_get(tsd) == 0);
-	const char *prof_prefix = prof_prefix_get(tsd_tsdn(tsd));
+	const char *prefix = prof_prefix_get(tsd_tsdn(tsd));
 
 	if (vseq != VSEQ_INVALID) {
 	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c%"FMTu64".heap", prof_prefix,
-		    prof_getpid(), prof_dump_seq, v, vseq);
+		    "%s.%d.%"FMTu64".%c%"FMTu64".heap", prefix, prof_getpid(),
+		    prof_dump_seq, v, vseq);
 	} else {
 	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c.heap", prof_prefix,
-		    prof_getpid(), prof_dump_seq, v);
+		    "%s.%d.%"FMTu64".%c.heap", prefix, prof_getpid(),
+		    prof_dump_seq, v);
 	}
 	prof_dump_seq++;
 }
diff --git a/src/stats.c b/src/stats.c
index 7af5782..b1b3906 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1606,15 +1606,15 @@ stats_general_print(emitter_t *emitter) {
 		    "Maximum thread-cached size class", emitter_type_size, &sv);
 	}
 
-	unsigned nbins;
-	CTL_GET("arenas.nbins", &nbins, unsigned);
+	unsigned arenas_nbins;
+	CTL_GET("arenas.nbins", &arenas_nbins, unsigned);
 	emitter_kv(emitter, "nbins", "Number of bin size classes",
-	    emitter_type_unsigned, &nbins);
+	    emitter_type_unsigned, &arenas_nbins);
 
-	unsigned nhbins;
-	CTL_GET("arenas.nhbins", &nhbins, unsigned);
+	unsigned arenas_nhbins;
+	CTL_GET("arenas.nhbins", &arenas_nhbins, unsigned);
 	emitter_kv(emitter, "nhbins", "Number of thread-cache bin size classes",
-	    emitter_type_unsigned, &nhbins);
+	    emitter_type_unsigned, &arenas_nhbins);
 
 	/*
 	 * We do enough mallctls in a loop that we actually want to omit them
@@ -1624,7 +1624,7 @@ stats_general_print(emitter_t *emitter) {
 		emitter_json_array_kv_begin(emitter, "bin");
 		size_t arenas_bin_mib[CTL_MAX_DEPTH];
 		CTL_LEAF_PREPARE(arenas_bin_mib, 0, "arenas.bin");
-		for (unsigned i = 0; i < nbins; i++) {
+		for (unsigned i = 0; i < arenas_nbins; i++) {
 			arenas_bin_mib[2] = i;
 			emitter_json_object_begin(emitter);
 
diff --git a/test/analyze/prof_bias.c b/test/analyze/prof_bias.c
index 4b960a6..a96ca94 100644
--- a/test/analyze/prof_bias.c
+++ b/test/analyze/prof_bias.c
@@ -45,9 +45,9 @@ do_allocs(size_t sz, size_t cnt, bool do_frees) {
 
 int
 main(void) {
-	size_t lg_prof_sample = 19;
-	int err = mallctl("prof.reset", NULL, NULL, (void *)&lg_prof_sample,
-	    sizeof(lg_prof_sample));
+	size_t lg_prof_sample_local = 19;
+	int err = mallctl("prof.reset", NULL, NULL,
+	    (void *)&lg_prof_sample_local, sizeof(lg_prof_sample_local));
 	assert(err == 0);
 
 	prof_backtrace_hook_set(mock_backtrace);
diff --git a/test/src/test.c b/test/src/test.c
index f97ce4d..4cd803e 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -87,8 +87,8 @@ test_fail(const char *format, ...) {
 }
 
 static const char *
-test_status_string(test_status_t test_status) {
-	switch (test_status) {
+test_status_string(test_status_t current_status) {
+	switch (current_status) {
 	case test_status_pass: return "pass";
 	case test_status_skip: return "skip";
 	case test_status_fail: return "fail";
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 589689c..8ef0786 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -258,12 +258,12 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 
 	/* Try arena.create with custom hooks. */
 	size_t sz = sizeof(extent_hooks_t *);
-	extent_hooks_t *default_hooks;
-	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&default_hooks,
+	extent_hooks_t *a0_default_hooks;
+	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&a0_default_hooks,
 	    &sz, NULL, 0), 0, "Unexpected mallctlnametomib() failure");
 
 	/* Default impl; but wrapped as "customized". */
-	extent_hooks_t new_hooks = *default_hooks;
+	extent_hooks_t new_hooks = *a0_default_hooks;
 	extent_hooks_t *hook = &new_hooks;
 	sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index 1326a11..c2ec8c7 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -45,7 +45,7 @@
 	 */								\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	success = false;						\
-	for (int i = 0; i < 10 && !success; i++) {			\
+	for (int retry = 0; retry < 10 && !success; retry++) {		\
 		expected = val2;					\
 		success = atomic_compare_exchange_weak_##ta(&atom,	\
 		    &expected, val3, ATOMIC_RELAXED, ATOMIC_RELAXED);	\
diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
index 992990f..901c52b 100644
--- a/test/unit/batch_alloc.c
+++ b/test/unit/batch_alloc.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #define BATCH_MAX ((1U << 16) + 1024)
-static void *ptrs[BATCH_MAX];
+static void *global_ptrs[BATCH_MAX];
 
 #define PAGE_ALIGNED(ptr) (((uintptr_t)ptr & PAGE_MASK) == 0)
 
@@ -122,13 +122,14 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 			}
 			size_t batch = base + (size_t)j;
 			assert(batch < BATCH_MAX);
-			size_t filled = batch_alloc_wrapper(ptrs, batch, size,
-			    flags);
+			size_t filled = batch_alloc_wrapper(global_ptrs, batch,
+			    size, flags);
 			assert_zu_eq(filled, batch, "");
-			verify_batch_basic(tsd, ptrs, batch, usize, zero);
-			verify_batch_locality(tsd, ptrs, batch, usize, arena,
-			    nregs);
-			release_batch(ptrs, batch, usize);
+			verify_batch_basic(tsd, global_ptrs, batch, usize,
+			    zero);
+			verify_batch_locality(tsd, global_ptrs, batch, usize,
+			    arena, nregs);
+			release_batch(global_ptrs, batch, usize);
 		}
 	}
 
@@ -163,16 +164,16 @@ TEST_BEGIN(test_batch_alloc_large) {
 	size_t size = SC_LARGE_MINCLASS;
 	for (size_t batch = 0; batch < 4; ++batch) {
 		assert(batch < BATCH_MAX);
-		size_t filled = batch_alloc(ptrs, batch, size, 0);
+		size_t filled = batch_alloc(global_ptrs, batch, size, 0);
 		assert_zu_eq(filled, batch, "");
-		release_batch(ptrs, batch, size);
+		release_batch(global_ptrs, batch, size);
 	}
 	size = tcache_maxclass + 1;
 	for (size_t batch = 0; batch < 4; ++batch) {
 		assert(batch < BATCH_MAX);
-		size_t filled = batch_alloc(ptrs, batch, size, 0);
+		size_t filled = batch_alloc(global_ptrs, batch, size, 0);
 		assert_zu_eq(filled, batch, "");
-		release_batch(ptrs, batch, size);
+		release_batch(global_ptrs, batch, size);
 	}
 }
 TEST_END
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 505b6fa..b1e2f6e 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -69,10 +69,10 @@ test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	    &hpa_hooks_default);
 	assert_false(err, "");
 
-	const size_t oversize_threshold = 8 * 1024 * 1024;
+	const size_t pa_oversize_threshold = 8 * 1024 * 1024;
 	err = pa_shard_init(TSDN_NULL, &test_data->shard, &test_data->central,
 	    &test_data->emap, test_data->base, /* ind */ 1, &test_data->stats,
-	    &test_data->stats_mtx, &time, oversize_threshold, dirty_decay_ms,
+	    &test_data->stats_mtx, &time, pa_oversize_threshold, dirty_decay_ms,
 	    muzzy_decay_ms);
 	assert_false(err, "");
 
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index e9f5e56..455ac52 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -26,14 +26,14 @@ TEST_BEGIN(test_idump) {
 	bool active;
 	void *p;
 
-	const char *prefix = TEST_PREFIX;
+	const char *test_prefix = TEST_PREFIX;
 
 	test_skip_if(!config_prof);
 
 	active = true;
 
-	expect_d_eq(mallctl("prof.prefix", NULL, NULL, (void *)&prefix,
-	    sizeof(prefix)), 0,
+	expect_d_eq(mallctl("prof.prefix", NULL, NULL, (void *)&test_prefix,
+	    sizeof(test_prefix)), 0,
 	    "Unexpected mallctl failure while overwriting dump prefix");
 
 	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 9974d10..c23b01e 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -15,7 +15,7 @@ confirm_prof_setup() {
 	    "opt_prof_recent_alloc_max not set correctly");
 
 	/* Dynamics */
-	assert_true(prof_active, "prof_active not on");
+	assert_true(prof_active_state, "prof_active not on");
 	assert_zd_eq(prof_recent_alloc_max_ctl_read(), OPT_ALLOC_MAX,
 	    "prof_recent_alloc_max not set correctly");
 }
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index a0fb038..9b33b20 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -21,26 +21,25 @@ set_prof_active(bool active) {
 
 static size_t
 get_lg_prof_sample(void) {
-	size_t lg_prof_sample;
+	size_t ret;
 	size_t sz = sizeof(size_t);
 
-	expect_d_eq(mallctl("prof.lg_sample", (void *)&lg_prof_sample, &sz,
-	    NULL, 0), 0,
+	expect_d_eq(mallctl("prof.lg_sample", (void *)&ret, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
-	return lg_prof_sample;
+	return ret;
 }
 
 static void
-do_prof_reset(size_t lg_prof_sample) {
+do_prof_reset(size_t lg_prof_sample_input) {
 	expect_d_eq(mallctl("prof.reset", NULL, NULL,
-	    (void *)&lg_prof_sample, sizeof(size_t)), 0,
+	    (void *)&lg_prof_sample_input, sizeof(size_t)), 0,
 	    "Unexpected mallctl failure while resetting profile data");
-	expect_zu_eq(lg_prof_sample, get_lg_prof_sample(),
+	expect_zu_eq(lg_prof_sample_input, get_lg_prof_sample(),
 	    "Expected profile sample rate change");
 }
 
 TEST_BEGIN(test_prof_reset_basic) {
-	size_t lg_prof_sample_orig, lg_prof_sample, lg_prof_sample_next;
+	size_t lg_prof_sample_orig, lg_prof_sample_cur, lg_prof_sample_next;
 	size_t sz;
 	unsigned i;
 
@@ -52,8 +51,8 @@ TEST_BEGIN(test_prof_reset_basic) {
 	    "Unexpected mallctl failure while reading profiling sample rate");
 	expect_zu_eq(lg_prof_sample_orig, 0,
 	    "Unexpected profiling sample rate");
-	lg_prof_sample = get_lg_prof_sample();
-	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+	lg_prof_sample_cur = get_lg_prof_sample();
+	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample_cur,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
 	    "\"prof.lg_sample\"");
 
@@ -61,8 +60,8 @@ TEST_BEGIN(test_prof_reset_basic) {
 	for (i = 0; i < 2; i++) {
 		expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 		    "Unexpected mallctl failure while resetting profile data");
-		lg_prof_sample = get_lg_prof_sample();
-		expect_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+		lg_prof_sample_cur = get_lg_prof_sample();
+		expect_zu_eq(lg_prof_sample_orig, lg_prof_sample_cur,
 		    "Unexpected profile sample rate change");
 	}
 
@@ -70,15 +69,15 @@ TEST_BEGIN(test_prof_reset_basic) {
 	lg_prof_sample_next = 1;
 	for (i = 0; i < 2; i++) {
 		do_prof_reset(lg_prof_sample_next);
-		lg_prof_sample = get_lg_prof_sample();
-		expect_zu_eq(lg_prof_sample, lg_prof_sample_next,
+		lg_prof_sample_cur = get_lg_prof_sample();
+		expect_zu_eq(lg_prof_sample_cur, lg_prof_sample_next,
 		    "Expected profile sample rate change");
 		lg_prof_sample_next = lg_prof_sample_orig;
 	}
 
 	/* Make sure the test code restored prof.lg_sample. */
-	lg_prof_sample = get_lg_prof_sample();
-	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+	lg_prof_sample_cur = get_lg_prof_sample();
+	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample_cur,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
 	    "\"prof.lg_sample\"");
 }
diff --git a/test/unit/rb.c b/test/unit/rb.c
index 7d4c454..827ec51 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -964,7 +964,7 @@ do_update_search_test(int nnodes, int ntrees, int nremovals,
 				tree_insert(&tree, &nodes[j]);
 			}
 		}
-		for (int i = 0; i < nupdates; i++) {
+		for (int j = 0; j < nupdates; j++) {
 			uint32_t ind = gen_rand32_range(sfmt, nnodes);
 			nodes[ind].specialness = 1 - nodes[ind].specialness;
 			tree_update_summaries(&tree, &nodes[ind]);
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 53c90f2..37ff88f 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -13,43 +13,43 @@ static atomic_u_t	nfinished;
 
 static unsigned
 do_arena_create(extent_hooks_t *h) {
-	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+	unsigned new_arena_ind;
+	size_t ind_sz = sizeof(unsigned);
+	expect_d_eq(mallctl("arenas.create", (void *)&new_arena_ind, &ind_sz,
 	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
 	    "Unexpected mallctl() failure");
-	return arena_ind;
+	return new_arena_ind;
 }
 
 static void
-do_arena_destroy(unsigned arena_ind) {
+do_arena_destroy(unsigned ind) {
 	size_t mib[3];
 	size_t miblen;
 
 	miblen = sizeof(mib)/sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
+	mib[1] = (size_t)ind;
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
 static void
 do_refresh(void) {
-	uint64_t epoch = 1;
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)), 0, "Unexpected mallctl() failure");
+	uint64_t refresh_epoch = 1;
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&refresh_epoch,
+	    sizeof(refresh_epoch)), 0, "Unexpected mallctl() failure");
 }
 
 static size_t
-do_get_size_impl(const char *cmd, unsigned arena_ind) {
+do_get_size_impl(const char *cmd, unsigned ind) {
 	size_t mib[4];
 	size_t miblen = sizeof(mib) / sizeof(size_t);
 	size_t z = sizeof(size_t);
 
 	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
-	mib[2] = arena_ind;
+	mib[2] = ind;
 	size_t size;
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\"], ...) failure", cmd);
@@ -58,13 +58,13 @@ do_get_size_impl(const char *cmd, unsigned arena_ind) {
 }
 
 static size_t
-do_get_active(unsigned arena_ind) {
-	return do_get_size_impl("stats.arenas.0.pactive", arena_ind) * PAGE;
+do_get_active(unsigned ind) {
+	return do_get_size_impl("stats.arenas.0.pactive", ind) * PAGE;
 }
 
 static size_t
-do_get_mapped(unsigned arena_ind) {
-	return do_get_size_impl("stats.arenas.0.mapped", arena_ind);
+do_get_mapped(unsigned ind) {
+	return do_get_size_impl("stats.arenas.0.mapped", ind);
 }
 
 static void *
-- 
cgit v0.12


From 06aac61c4b261e5d1c8dcf3c7dd7921e9e395d62 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 29 Nov 2021 15:45:24 -0800
Subject: Split the core logic of tcache flush into a separate function.

The core function takes a ptr array as input (containing items to be flushed),
which will be reused to flush sanitizer-stashed items.
---
 src/tcache.c | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 39a4ea6..5c3d5b1 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -300,7 +300,7 @@ tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
 
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
-    szind_t binind, unsigned rem, bool small) {
+    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	/*
 	 * A couple lookup calls take tsdn; declare it once for convenience
@@ -313,24 +313,15 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	} else {
 		assert(binind < nhbins);
 	}
-	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &tcache_bin_info[binind]);
-	assert((cache_bin_sz_t)rem <= ncached);
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
 
-	unsigned nflush = ncached - rem;
 	/*
 	 * Variable length array must have > 0 length; the last element is never
 	 * touched (it's just included to satisfy the no-zero-length rule).
 	 */
 	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
-	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
-
-	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
-	    &ptrs, nflush);
-
-	tcache_bin_flush_edatas_lookup(tsd, &ptrs, binind, nflush, item_edata);
+	tcache_bin_flush_edatas_lookup(tsd, ptrs, binind, nflush, item_edata);
 
 	/*
 	 * The slabs where we freed the last remaining object in the slab (and
@@ -407,7 +398,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		 */
 		if (!small) {
 			for (unsigned i = 0; i < nflush; i++) {
-				void *ptr = ptrs.ptr[i];
+				void *ptr = ptrs->ptr[i];
 				edata = item_edata[i].edata;
 				assert(ptr != NULL && edata != NULL);
 
@@ -429,7 +420,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
 		}
 		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = ptrs.ptr[i];
+			void *ptr = ptrs->ptr[i];
 			edata = item_edata[i].edata;
 			assert(ptr != NULL && edata != NULL);
 			if (!tcache_bin_flush_match(edata, cur_arena_ind,
@@ -440,7 +431,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 				 * arena.  Either way, stash the object so that
 				 * it can be handled in a future pass.
 				 */
-				ptrs.ptr[ndeferred] = ptr;
+				ptrs->ptr[ndeferred] = ptr;
 				item_edata[ndeferred].edata = edata;
 				ndeferred++;
 				continue;
@@ -501,6 +492,23 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		}
 	}
 
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
+    szind_t binind, unsigned rem, bool small) {
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
+	    &tcache_bin_info[binind]);
+	assert((cache_bin_sz_t)rem <= ncached);
+	unsigned nflush = ncached - rem;
+
+	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
+	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
+	    &ptrs, nflush);
+
+	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
+	    small);
+
 	cache_bin_finish_flush(cache_bin, &tcache_bin_info[binind], &ptrs,
 	    ncached - rem);
 }
@@ -508,13 +516,13 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, rem, true);
+	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, true);
 }
 
 void
 tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, rem, false);
+	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, false);
 }
 
 void
-- 
cgit v0.12


From b75822bc6e5cbbf463c611d8dea32857f8de9d3e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 18 Oct 2021 17:33:15 -0700
Subject: Implement use-after-free detection using junk and stash.

On deallocation, sampled pointers (specially aligned) get junked and stashed
into tcache (to prevent immediate reuse).  The expected behavior is to have
read-after-free corrupted and stopped by the junk-filling, while
write-after-free is checked when flushing the stashed pointers.
---
 Makefile.in                                        |   1 +
 configure.ac                                       |  17 ++
 include/jemalloc/internal/cache_bin.h              | 101 ++++++++-
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |   3 +
 .../jemalloc/internal/jemalloc_internal_externs.h  |   3 +
 include/jemalloc/internal/jemalloc_preamble.h.in   |   8 +
 include/jemalloc/internal/san.h                    |  68 +++++++
 include/jemalloc/internal/tcache_externs.h         |  24 ++-
 include/jemalloc/internal/tcache_inlines.h         |  20 ++
 src/arena.c                                        |   2 +
 src/cache_bin.c                                    |   2 +
 src/ctl.c                                          |   4 +
 src/jemalloc.c                                     |  74 ++++++-
 src/san.c                                          |  60 ++++++
 src/tcache.c                                       |  47 +++++
 test/include/test/arena_util.h                     |   6 +
 test/unit/cache_bin.c                              | 149 ++++++++++++--
 test/unit/mallctl.c                                |  15 +-
 test/unit/tcache_max.c                             |   1 +
 test/unit/tcache_max.sh                            |   2 +-
 test/unit/uaf.c                                    | 225 +++++++++++++++++++++
 test/unit/uaf.sh                                   |   3 +
 22 files changed, 793 insertions(+), 42 deletions(-)
 create mode 100644 test/unit/uaf.c
 create mode 100644 test/unit/uaf.sh

diff --git a/Makefile.in b/Makefile.in
index 50c586c..7a820fe 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -284,6 +284,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/thread_event.c \
 	$(srcroot)test/unit/ticker.c \
 	$(srcroot)test/unit/tsd.c \
+	$(srcroot)test/unit/uaf.c \
 	$(srcroot)test/unit/witness.c \
 	$(srcroot)test/unit/zero.c \
 	$(srcroot)test/unit/zero_realloc_abort.c \
diff --git a/configure.ac b/configure.ac
index e18c0cc..49a12ac 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1564,6 +1564,23 @@ if test "x$enable_opt_size_checks" = "x1" ; then
 fi
 AC_SUBST([enable_opt_size_checks])
 
+dnl Do not check for use-after-free by default.
+AC_ARG_ENABLE([uaf-detection],
+  [AS_HELP_STRING([--enable-uaf-detection],
+  [Allow sampled junk-filling on deallocation to detect use-after-free])],
+[if test "x$enable_uaf_detection" = "xno" ; then
+  enable_uaf_detection="0"
+else
+  enable_uaf_detection="1"
+fi
+],
+[enable_uaf_detection="0"]
+)
+if test "x$enable_uaf_detection" = "x1" ; then
+  AC_DEFINE([JEMALLOC_UAF_DETECTION], [ ])
+fi
+AC_SUBST([enable_uaf_detection])
+
 JE_COMPILABLE([a program using __builtin_unreachable], [
 void foo (void) {
   __builtin_unreachable();
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 41942e9..266897f 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -98,7 +98,7 @@ struct cache_bin_s {
 	 * when the array is nonempty -- this is in the array).
 	 *
 	 * Recall that since the stack grows down, this is the lowest address in
-	 * the array.
+	 * the array.  Only adjusted when stashing items.
 	 */
 	uint16_t low_bits_full;
 
@@ -107,7 +107,7 @@ struct cache_bin_s {
 	 * is empty.
 	 *
 	 * The stack grows down -- this is one past the highest address in the
-	 * array.
+	 * array.  Immutable after initialization.
 	 */
 	uint16_t low_bits_empty;
 };
@@ -136,6 +136,26 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 	descriptor->bins = bins;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_nonfast_aligned(const void *ptr) {
+	if (!config_uaf_detection) {
+		return false;
+	}
+	/*
+	 * Currently we use alignment to decide which pointer to junk & stash on
+	 * dealloc (for catching use-after-free).  In some common cases a
+	 * page-aligned check is needed already (sdalloc w/ config_prof), so we
+	 * are getting it more or less for free -- no added instructions on
+	 * free_fastpath.
+	 *
+	 * Another way of deciding which pointer to sample, is adding another
+	 * thread_event to pick one every N bytes.  That also adds no cost on
+	 * the fastpath, however it will tend to pick large allocations which is
+	 * not the desired behavior.
+	 */
+	return ((uintptr_t)ptr & san_cache_bin_nonfast_mask) == 0;
+}
+
 /* Returns ncached_max: Upper limit on ncached. */
 static inline cache_bin_sz_t
 cache_bin_info_ncached_max(cache_bin_info_t *info) {
@@ -233,6 +253,20 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
 }
 
 /*
+ * Internal.
+ *
+ * A pointer to the position with the lowest address of the backing array.
+ */
+static inline void **
+cache_bin_full_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
+	void **ret = cache_bin_empty_position_get(bin) - ncached_max;
+	assert(ret <= bin->stack_head);
+
+	return ret;
+}
+
+/*
  * As the name implies.  This is important since it's not correct to try to
  * batch fill a nonempty cache bin.
  */
@@ -359,13 +393,17 @@ cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
 	return n;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_full(cache_bin_t *bin) {
+	return ((uint16_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
+}
+
 /*
  * Free an object into the given bin.  Fails only if the bin is full.
  */
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
-	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
-	if (unlikely(low_bits == bin->low_bits_full)) {
+	if (unlikely(cache_bin_full(bin))) {
 		return false;
 	}
 
@@ -377,7 +415,39 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	return true;
 }
 
-/**
+/* Returns false if failed to stash (i.e. bin is full). */
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_stash(cache_bin_t *bin, void *ptr) {
+	if (cache_bin_full(bin)) {
+		return false;
+	}
+
+	/* Stash at the full position, in the [full, head) range. */
+	uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head;
+	/* Wraparound handled as well. */
+	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
+	*(void **)((uintptr_t)bin->stack_head - diff) = ptr;
+
+	assert(!cache_bin_full(bin));
+	bin->low_bits_full += sizeof(void *);
+	cache_bin_assert_earlier(bin, bin->low_bits_full, low_bits_head);
+
+	return true;
+}
+
+JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
+cache_bin_nstashed_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
+	void **full = cache_bin_full_position_get(bin, info);
+
+	uint16_t nstashed = cache_bin_diff(bin, (uint16_t)(uintptr_t)full,
+	    bin->low_bits_full) / sizeof(void *);
+	assert(nstashed <= ncached_max);
+
+	return nstashed;
+}
+
+/*
  * Filling and flushing are done in batch, on arrays of void *s.  For filling,
  * the arrays go forward, and can be accessed with ordinary array arithmetic.
  * For flushing, we work from the end backwards, and so need to use special
@@ -463,6 +533,27 @@ cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
 	cache_bin_low_water_adjust(bin);
 }
 
+static inline void
+cache_bin_init_ptr_array_for_stashed(cache_bin_t *bin, szind_t binind,
+    cache_bin_info_t *info, cache_bin_ptr_array_t *arr,
+    cache_bin_sz_t nstashed) {
+	assert(nstashed > 0);
+	assert(cache_bin_nstashed_get(bin, info) == nstashed);
+
+	void **full = cache_bin_full_position_get(bin, info);
+	arr->ptr = full;
+	assert(*arr->ptr != NULL);
+}
+
+static inline void
+cache_bin_finish_flush_stashed(cache_bin_t *bin, cache_bin_info_t *info) {
+	void **full = cache_bin_full_position_get(bin, info);
+
+	/* Reset the bin local full position. */
+	bin->low_bits_full = (uint16_t)(uintptr_t)full;
+	assert(cache_bin_nstashed_get(bin, info) == 0);
+}
+
 /*
  * Initialize a cache_bin_info to represent up to the given number of items in
  * the cache_bins it is associated with.
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index a4be549..0cb15d3 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -415,6 +415,9 @@
 /* Performs additional size checks when defined. */
 #undef JEMALLOC_OPT_SIZE_CHECKS
 
+/* Allows sampled junk and stash for checking use-after-free when defined. */
+#undef JEMALLOC_UAF_DETECTION
+
 /* Darwin VM_MAKE_TAG support */
 #undef JEMALLOC_HAVE_VM_MAKE_TAG
 
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index e8bfb03..fa1fabe 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -35,6 +35,9 @@ extern const char *zero_realloc_mode_names[];
 extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
 
+/* Escape free-fastpath when ptr & mask == 0 (for sanitization purpose). */
+extern uintptr_t san_cache_bin_nonfast_mask;
+
 /* Number of CPUs. */
 extern unsigned ncpus;
 
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index f5d83a6..5ce77d9 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -198,6 +198,14 @@ static const bool config_opt_size_checks =
 #endif
     ;
 
+static const bool config_uaf_detection =
+#if defined(JEMALLOC_UAF_DETECTION) || defined(JEMALLOC_DEBUG)
+    true
+#else
+    false
+#endif
+    ;
+
 /* Whether or not the C++ extensions are enabled. */
 static const bool config_enable_cxx =
 #ifdef JEMALLOC_ENABLE_CXX
diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index 70debf3..f97211a 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -10,9 +10,16 @@
 #define SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT 0
 #define SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT 0
 
+#define SAN_LG_UAF_ALIGN_DEFAULT (-1)
+#define SAN_CACHE_BIN_NONFAST_MASK_DEFAULT (uintptr_t)(-1)
+
+static const uintptr_t uaf_detect_junk = (uintptr_t)0x5b5b5b5b5b5b5b5bULL;
+
 /* 0 means disabled, i.e. never guarded. */
 extern size_t opt_san_guard_large;
 extern size_t opt_san_guard_small;
+/* -1 means disabled, i.e. never check for use-after-free. */
+extern ssize_t opt_lg_san_uaf_align;
 
 void san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     emap_t *emap, bool left, bool right, bool remap);
@@ -24,7 +31,10 @@ void san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  */
 void san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, emap_t *emap);
+void san_check_stashed_ptrs(void **ptrs, size_t nstashed, size_t usize);
+
 void tsd_san_init(tsd_t *tsd);
+void san_init(ssize_t lg_san_uaf_align);
 
 static inline void
 san_guard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
@@ -121,4 +131,62 @@ san_slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
 	}
 }
 
+static inline void
+san_junk_ptr_locations(void *ptr, size_t usize, void **first, void **mid,
+    void **last) {
+	size_t ptr_sz = sizeof(void *);
+
+	*first = ptr;
+
+	*mid = (void *)((uintptr_t)ptr + ((usize >> 1) & ~(ptr_sz - 1)));
+	assert(*first != *mid || usize == ptr_sz);
+	assert((uintptr_t)*first <= (uintptr_t)*mid);
+
+	/*
+	 * When usize > 32K, the gap between requested_size and usize might be
+	 * greater than 4K -- this means the last write may access an
+	 * likely-untouched page (default settings w/ 4K pages).  However by
+	 * default the tcache only goes up to the 32K size class, and is usually
+	 * tuned lower instead of higher, which makes it less of a concern.
+	 */
+	*last = (void *)((uintptr_t)ptr + usize - sizeof(uaf_detect_junk));
+	assert(*first != *last || usize == ptr_sz);
+	assert(*mid != *last || usize <= ptr_sz * 2);
+	assert((uintptr_t)*mid <= (uintptr_t)*last);
+}
+
+static inline bool
+san_junk_ptr_should_slow(void) {
+	/*
+	 * The latter condition (pointer size greater than the min size class)
+	 * is not expected -- fall back to the slow path for simplicity.
+	 */
+	return config_debug || (LG_SIZEOF_PTR > SC_LG_TINY_MIN);
+}
+
+static inline void
+san_junk_ptr(void *ptr, size_t usize) {
+	if (san_junk_ptr_should_slow()) {
+		memset(ptr, (char)uaf_detect_junk, usize);
+		return;
+	}
+
+	void *first, *mid, *last;
+	san_junk_ptr_locations(ptr, usize, &first, &mid, &last);
+	*(uintptr_t *)first = uaf_detect_junk;
+	*(uintptr_t *)mid = uaf_detect_junk;
+	*(uintptr_t *)last = uaf_detect_junk;
+}
+
+static inline bool
+san_uaf_detection_enabled(void) {
+	bool ret = config_uaf_detection && (opt_lg_san_uaf_align != -1);
+	if (config_uaf_detection && ret) {
+		assert(san_cache_bin_nonfast_mask == ((uintptr_t)1 <<
+		    opt_lg_san_uaf_align) - 1);
+	}
+
+	return ret;
+}
+
 #endif /* JEMALLOC_INTERNAL_GUARD_H */
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 95f3a68..a2ab710 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -34,23 +34,25 @@ extern cache_bin_info_t *tcache_bin_info;
  */
 extern tcaches_t	*tcaches;
 
-size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
-void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
+void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, bool *tcache_success);
 
-void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem);
-void	tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem);
-void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
+void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *bin,
+    szind_t binind, bool is_small);
+void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
-void	tcache_cleanup(tsd_t *tsd);
-void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool	tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
-void	tcaches_flush(tsd_t *tsd, unsigned ind);
-void	tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool	tcache_boot(tsdn_t *tsdn, base_t *base);
+void tcache_cleanup(tsd_t *tsd);
+void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
+void tcaches_flush(tsd_t *tsd, unsigned ind);
+void tcaches_destroy(tsd_t *tsd, unsigned ind);
+bool tcache_boot(tsdn_t *tsdn, base_t *base);
 void tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena);
 void tcache_prefork(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 926c852..2634f14 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/util.h"
@@ -61,6 +62,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 			return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
 			    binind, zero);
 		}
+		tcache_bin_flush_stashed(tsd, tcache, bin, binind,
+		    /* is_small */ true);
 
 		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
 		    bin, binind, &tcache_hard_success);
@@ -100,6 +103,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		if (unlikely(arena == NULL)) {
 			return NULL;
 		}
+		tcache_bin_flush_stashed(tsd, tcache, bin, binind,
+		    /* is_small */ false);
 
 		ret = large_malloc(tsd_tsdn(tsd), arena, sz_s2u(size), zero);
 		if (ret == NULL) {
@@ -126,6 +131,21 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SC_SMALL_MAXCLASS);
 
 	cache_bin_t *bin = &tcache->bins[binind];
+	/*
+	 * Not marking the branch unlikely because this is past free_fastpath()
+	 * (which handles the most common cases), i.e. at this point it's often
+	 * uncommon cases.
+	 */
+	if (cache_bin_nonfast_aligned(ptr)) {
+		/* Junk unconditionally, even if bin is full. */
+		san_junk_ptr(ptr, sz_index2size(binind));
+		if (cache_bin_stash(bin, ptr)) {
+			return;
+		}
+		assert(cache_bin_full(bin));
+		/* Bin full; fall through into the flush branch. */
+	}
+
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
 			arena_dalloc_small(tsd_tsdn(tsd), ptr);
diff --git a/src/arena.c b/src/arena.c
index 121832a..ed41d6d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -157,6 +157,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 			cache_bin_t *cache_bin = &descriptor->bins[i];
 			astats->tcache_bytes +=
 			    cache_bin_ncached_get_remote(cache_bin,
+			    &tcache_bin_info[i]) * sz_index2size(i) +
+			    cache_bin_nstashed_get(cache_bin,
 			    &tcache_bin_info[i]) * sz_index2size(i);
 		}
 	}
diff --git a/src/cache_bin.c b/src/cache_bin.c
index b747082..b8d81ef 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/cache_bin.h"
+#include "jemalloc/internal/safety_check.h"
 
 void
 cache_bin_info_init(cache_bin_info_t *info,
diff --git a/src/ctl.c b/src/ctl.c
index 81ab147..78dc579 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -150,6 +150,7 @@ CTL_PROTO(opt_prof_recent_alloc_max)
 CTL_PROTO(opt_prof_stats)
 CTL_PROTO(opt_prof_sys_thread_name)
 CTL_PROTO(opt_prof_time_res)
+CTL_PROTO(opt_lg_san_uaf_align)
 CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
@@ -472,6 +473,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_stats"),	CTL(opt_prof_stats)},
 	{NAME("prof_sys_thread_name"),	CTL(opt_prof_sys_thread_name)},
 	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)},
+	{NAME("lg_san_uaf_align"),	CTL(opt_lg_san_uaf_align)},
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
 };
 
@@ -2201,6 +2203,8 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_sys_thread_name, opt_prof_sys_thread_name,
     bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_time_res,
     prof_time_res_mode_names[opt_prof_time_res], const char *)
+CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align,
+    opt_lg_san_uaf_align, ssize_t)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d105dff..c8eef2d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1657,6 +1657,31 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 				CONF_CONTINUE;
 			}
+			if (config_uaf_detection &&
+			    CONF_MATCH("lg_san_uaf_align")) {
+				ssize_t a;
+				CONF_VALUE_READ(ssize_t, a)
+				if (CONF_VALUE_READ_FAIL() || a < -1) {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				}
+				if (a == -1) {
+					opt_lg_san_uaf_align = -1;
+					CONF_CONTINUE;
+				}
+
+				/* clip if necessary */
+				ssize_t max_allowed = (sizeof(size_t) << 3) - 1;
+				ssize_t min_allowed = LG_PAGE;
+				if (a > max_allowed) {
+					a = max_allowed;
+				} else if (a < min_allowed) {
+					a = min_allowed;
+				}
+
+				opt_lg_san_uaf_align = a;
+				CONF_CONTINUE;
+			}
 
 			CONF_HANDLE_SIZE_T(opt_san_guard_small,
 			    "san_guard_small", 0, SIZE_T_MAX,
@@ -1760,6 +1785,7 @@ malloc_init_hard_a0_locked() {
 		prof_boot0();
 	}
 	malloc_conf_init(&sc_data, bin_shard_sizes);
+	san_init(opt_lg_san_uaf_align);
 	sz_boot(&sc_data, opt_cache_oblivious);
 	bin_info_boot(&sc_data, bin_shard_sizes);
 
@@ -2970,6 +2996,41 @@ free_default(void *ptr) {
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
+	/*
+	 * free_fastpath do not handle two uncommon cases: 1) sampled profiled
+	 * objects and 2) sampled junk & stash for use-after-free detection.
+	 * Both have special alignments which are used to escape the fastpath.
+	 *
+	 * prof_sample is page-aligned, which covers the UAF check when both
+	 * are enabled (the assertion below).  Avoiding redundant checks since
+	 * this is on the fastpath -- at most one runtime branch from this.
+	 */
+	if (config_debug && cache_bin_nonfast_aligned(ptr)) {
+		assert(prof_sample_aligned(ptr));
+	}
+
+	if (config_prof && check_prof) {
+		/* When prof is enabled, the prof_sample alignment is enough. */
+		if (prof_sample_aligned(ptr)) {
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+	if (config_uaf_detection) {
+		if (cache_bin_nonfast_aligned(ptr)) {
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+	return false;
+}
+
 /* Returns whether or not the free attempt was successful. */
 JEMALLOC_ALWAYS_INLINE
 bool free_fastpath(void *ptr, size_t size, bool size_hint) {
@@ -2992,18 +3053,21 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		    &arena_emap_global, ptr, &alloc_ctx);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
-		if (unlikely(err || !alloc_ctx.slab)) {
+		if (unlikely(err || !alloc_ctx.slab ||
+		    free_fastpath_nonfast_aligned(ptr,
+		    /* check_prof */ false))) {
 			return false;
 		}
 		assert(alloc_ctx.szind != SC_NSIZES);
 	} else {
 		/*
-		 * Check for both sizes that are too large, and for sampled
-		 * objects.  Sampled objects are always page-aligned.  The
-		 * sampled object check will also check for null ptr.
+		 * Check for both sizes that are too large, and for sampled /
+		 * special aligned objects.  The alignment check will also check
+		 * for null ptr.
 		 */
 		if (unlikely(size > SC_LOOKUP_MAXCLASS ||
-		    (config_prof && prof_sample_aligned(ptr)))) {
+		    free_fastpath_nonfast_aligned(ptr,
+		    /* check_prof */ true))) {
 			return false;
 		}
 		alloc_ctx.szind = sz_size2index_lookup(size);
diff --git a/src/san.c b/src/san.c
index 15fdb7f..6e51291 100644
--- a/src/san.c
+++ b/src/san.c
@@ -10,6 +10,15 @@
 size_t opt_san_guard_large = SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT;
 size_t opt_san_guard_small = SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT;
 
+/* Aligned (-1 is off) ptrs will be junked & stashed on dealloc. */
+ssize_t opt_lg_san_uaf_align = SAN_LG_UAF_ALIGN_DEFAULT;
+
+/*
+ *  Initialized in san_init().  When disabled, the mask is set to (uintptr_t)-1
+ *  to always fail the nonfast_align check.
+ */
+uintptr_t san_cache_bin_nonfast_mask = SAN_CACHE_BIN_NONFAST_MASK_DEFAULT;
+
 static inline void
 san_find_guarded_addr(edata_t *edata, uintptr_t *guard1, uintptr_t *guard2,
     uintptr_t *addr, size_t size, bool left, bool right) {
@@ -141,8 +150,59 @@ san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	    /* right */ true, /* remap */ false);
 }
 
+static bool
+san_stashed_corrupted(void *ptr, size_t size) {
+	if (san_junk_ptr_should_slow()) {
+		for (size_t i = 0; i < size; i++) {
+			if (((char *)ptr)[i] != (char)uaf_detect_junk) {
+				return true;
+			}
+		}
+		return false;
+	}
+
+	void *first, *mid, *last;
+	san_junk_ptr_locations(ptr, size, &first, &mid, &last);
+	if (*(uintptr_t *)first != uaf_detect_junk ||
+	    *(uintptr_t *)mid != uaf_detect_junk ||
+	    *(uintptr_t *)last != uaf_detect_junk) {
+		return true;
+	}
+
+	return false;
+}
+
+void
+san_check_stashed_ptrs(void **ptrs, size_t nstashed, size_t usize) {
+	/*
+	 * Verify that the junked-filled & stashed pointers remain unchanged, to
+	 * detect write-after-free.
+	 */
+	for (size_t n = 0; n < nstashed; n++) {
+		void *stashed = ptrs[n];
+		assert(stashed != NULL);
+		assert(cache_bin_nonfast_aligned(stashed));
+		if (unlikely(san_stashed_corrupted(stashed, usize))) {
+			safety_check_fail("<jemalloc>: Write-after-free "
+			    "detected on deallocated pointer %p (size %zu).\n",
+			    stashed, usize);
+		}
+	}
+}
+
 void
 tsd_san_init(tsd_t *tsd) {
 	*tsd_san_extents_until_guard_smallp_get(tsd) = opt_san_guard_small;
 	*tsd_san_extents_until_guard_largep_get(tsd) = opt_san_guard_large;
 }
+
+void
+san_init(ssize_t lg_san_uaf_align) {
+	assert(lg_san_uaf_align == -1 || lg_san_uaf_align >= LG_PAGE);
+	if (lg_san_uaf_align == -1) {
+		san_cache_bin_nonfast_mask = (uintptr_t)-1;
+		return;
+	}
+
+	san_cache_bin_nonfast_mask = ((uintptr_t)1 << lg_san_uaf_align) - 1;
+}
diff --git a/src/tcache.c b/src/tcache.c
index 5c3d5b1..74f0d83 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/safety_check.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/sc.h"
 
 /******************************************************************************/
@@ -179,6 +180,8 @@ tcache_event(tsd_t *tsd) {
 	bool is_small = (szind < SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 
+	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
+
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &tcache_bin_info[szind]);
 	if (low_water > 0) {
@@ -497,6 +500,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem, bool small) {
+	tcache_bin_flush_stashed(tsd, tcache, cache_bin, binind, small);
+
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 	    &tcache_bin_info[binind]);
 	assert((cache_bin_sz_t)rem <= ncached);
@@ -525,6 +530,48 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, false);
 }
 
+/*
+ * Flushing stashed happens when 1) tcache fill, 2) tcache flush, or 3) tcache
+ * GC event.  This makes sure that the stashed items do not hold memory for too
+ * long, and new buffers can only be allocated when nothing is stashed.
+ *
+ * The downside is, the time between stash and flush may be relatively short,
+ * especially when the request rate is high.  It lowers the chance of detecting
+ * write-after-free -- however that is a delayed detection anyway, and is less
+ * of a focus than the memory overhead.
+ */
+void
+tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
+    szind_t binind, bool is_small) {
+	cache_bin_info_t *info = &tcache_bin_info[binind];
+	/*
+	 * The two below are for assertion only.  The content of original cached
+	 * items remain unchanged -- the stashed items reside on the other end
+	 * of the stack.  Checking the stack head and ncached to verify.
+	 */
+	void *head_content = *cache_bin->stack_head;
+	cache_bin_sz_t orig_cached = cache_bin_ncached_get_local(cache_bin,
+	    info);
+
+	cache_bin_sz_t nstashed = cache_bin_nstashed_get(cache_bin, info);
+	assert(orig_cached + nstashed <= cache_bin_info_ncached_max(info));
+	if (nstashed == 0) {
+		return;
+	}
+
+	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nstashed);
+	cache_bin_init_ptr_array_for_stashed(cache_bin, binind, info, &ptrs,
+	    nstashed);
+	san_check_stashed_ptrs(ptrs.ptr, nstashed, sz_index2size(binind));
+	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nstashed,
+	    is_small);
+	cache_bin_finish_flush_stashed(cache_bin, info);
+
+	assert(cache_bin_nstashed_get(cache_bin, info) == 0);
+	assert(cache_bin_ncached_get_local(cache_bin, info) == orig_cached);
+	assert(head_content == *cache_bin->stack_head);
+}
+
 void
 tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena) {
diff --git a/test/include/test/arena_util.h b/test/include/test/arena_util.h
index 524ee21..9a41dac 100644
--- a/test/include/test/arena_util.h
+++ b/test/include/test/arena_util.h
@@ -26,6 +26,12 @@ do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 
 static inline void
 do_arena_destroy(unsigned arena_ind) {
+	/* 
+	 * For convenience, flush tcache in case there are cached items.
+	 * However not assert success since the tcache may be disabled.
+	 */
+	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 56e6901..2b093b4 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -82,27 +82,30 @@ do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	free(out);
 }
 
-TEST_BEGIN(test_cache_bin) {
-	const int ncached_max = 100;
-	bool success;
-	void *ptr;
-
-	cache_bin_t bin;
-	cache_bin_info_t info;
-	cache_bin_info_init(&info, ncached_max);
-
+static void
+test_bin_init(cache_bin_t *bin, cache_bin_info_t *info) {
 	size_t size;
 	size_t alignment;
-	cache_bin_info_compute_alloc(&info, 1, &size, &alignment);
+	cache_bin_info_compute_alloc(info, 1, &size, &alignment);
 	void *mem = mallocx(size, MALLOCX_ALIGN(alignment));
 	assert_ptr_not_null(mem, "Unexpected mallocx failure");
 
 	size_t cur_offset = 0;
-	cache_bin_preincrement(&info, 1, mem, &cur_offset);
-	cache_bin_init(&bin, &info, mem, &cur_offset);
-	cache_bin_postincrement(&info, 1, mem, &cur_offset);
-
+	cache_bin_preincrement(info, 1, mem, &cur_offset);
+	cache_bin_init(bin, info, mem, &cur_offset);
+	cache_bin_postincrement(info, 1, mem, &cur_offset);
 	assert_zu_eq(cur_offset, size, "Should use all requested memory");
+}
+
+TEST_BEGIN(test_cache_bin) {
+	const int ncached_max = 100;
+	bool success;
+	void *ptr;
+
+	cache_bin_info_t info;
+	cache_bin_info_init(&info, ncached_max);
+	cache_bin_t bin;
+	test_bin_init(&bin, &info);
 
 	/* Initialize to empty; should then have 0 elements. */
 	expect_d_eq(ncached_max, cache_bin_info_ncached_max(&info), "");
@@ -258,7 +261,123 @@ TEST_BEGIN(test_cache_bin) {
 }
 TEST_END
 
+static void
+do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
+    cache_bin_sz_t nfill, cache_bin_sz_t nstash) {
+	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
+	    "Bin not empty");
+	expect_true(cache_bin_nstashed_get(bin, info) == 0, "Bin not empty");
+	expect_true(nfill + nstash <= info->ncached_max, "Exceeded max");
+
+	bool ret;
+	/* Fill */
+	for (cache_bin_sz_t i = 0; i < nfill; i++) {
+		ret = cache_bin_dalloc_easy(bin, &ptrs[i]);
+		expect_true(ret, "Unexpected fill failure");
+	}
+	expect_true(cache_bin_ncached_get_local(bin, info) == nfill,
+	    "Wrong cached count");
+
+	/* Stash */
+	for (cache_bin_sz_t i = 0; i < nstash; i++) {
+		ret = cache_bin_stash(bin, &ptrs[i + nfill]);
+		expect_true(ret, "Unexpected stash failure");
+	}
+	expect_true(cache_bin_nstashed_get(bin, info) == nstash,
+	    "Wrong stashed count");
+
+	if (nfill + nstash == info->ncached_max) {
+		ret = cache_bin_dalloc_easy(bin, &ptrs[0]);
+		expect_false(ret, "Should not dalloc into a full bin");
+		ret = cache_bin_stash(bin, &ptrs[0]);
+		expect_false(ret, "Should not stash into a full bin");
+	}
+
+	/* Alloc filled ones */
+	for (cache_bin_sz_t i = 0; i < nfill; i++) {
+		void *ptr = cache_bin_alloc(bin, &ret);
+		expect_true(ret, "Unexpected alloc failure");
+		/* Verify it's not from the stashed range. */
+		expect_true((uintptr_t)ptr < (uintptr_t)&ptrs[nfill],
+		    "Should not alloc stashed ptrs");
+	}
+	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
+	    "Wrong cached count");
+	expect_true(cache_bin_nstashed_get(bin, info) == nstash,
+	    "Wrong stashed count");
+
+	cache_bin_alloc(bin, &ret);
+	expect_false(ret, "Should not alloc stashed");
+
+	/* Clear stashed ones */
+	cache_bin_finish_flush_stashed(bin, info);
+	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
+	    "Wrong cached count");
+	expect_true(cache_bin_nstashed_get(bin, info) == 0,
+	    "Wrong stashed count");
+
+	cache_bin_alloc(bin, &ret);
+	expect_false(ret, "Should not alloc from empty bin");
+}
+
+TEST_BEGIN(test_cache_bin_stash) {
+	const int ncached_max = 100;
+
+	cache_bin_t bin;
+	cache_bin_info_t info;
+	cache_bin_info_init(&info, ncached_max);
+	test_bin_init(&bin, &info);
+
+	/*
+	 * The content of this array is not accessed; instead the interior
+	 * addresses are used to insert / stash into the bins as test pointers.
+	 */
+	void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+	bool ret;
+	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
+		expect_true(cache_bin_ncached_get_local(&bin, &info) ==
+		    (i / 2 + i % 2), "Wrong ncached value");
+		expect_true(cache_bin_nstashed_get(&bin, &info) == i / 2,
+		    "Wrong nstashed value");
+		if (i % 2 == 0) {
+			cache_bin_dalloc_easy(&bin, &ptrs[i]);
+		} else {
+			ret = cache_bin_stash(&bin, &ptrs[i]);
+			expect_true(ret, "Should be able to stash into a "
+			    "non-full cache bin");
+		}
+	}
+	ret = cache_bin_dalloc_easy(&bin, &ptrs[0]);
+	expect_false(ret, "Should not dalloc into a full cache bin");
+	ret = cache_bin_stash(&bin, &ptrs[0]);
+	expect_false(ret, "Should not stash into a full cache bin");
+	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
+		void *ptr = cache_bin_alloc(&bin, &ret);
+		if (i < ncached_max / 2) {
+			expect_true(ret, "Should be able to alloc");
+			uintptr_t diff = ((uintptr_t)ptr - (uintptr_t)&ptrs[0])
+			    / sizeof(void *);
+			expect_true(diff % 2 == 0, "Should be able to alloc");
+		} else {
+			expect_false(ret, "Should not alloc stashed");
+			expect_true(cache_bin_nstashed_get(&bin, &info) ==
+			    ncached_max / 2, "Wrong nstashed value");
+		}
+	}
+
+	test_bin_init(&bin, &info);
+	do_flush_stashed_test(&bin, &info, ptrs, ncached_max, 0);
+	do_flush_stashed_test(&bin, &info, ptrs, 0, ncached_max);
+	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 2);
+	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 4, ncached_max / 2);
+	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
+	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 4, ncached_max / 4);
+}
+TEST_END
+
 int
 main(void) {
-	return test(test_cache_bin);
+	return test(test_cache_bin,
+		test_cache_bin_stash);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 81a36c9..bd5ef9e 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -323,6 +323,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof);
 	TEST_MALLCTL_OPT(bool, prof_stats, prof);
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
+	TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection);
 
 #undef TEST_MALLCTL_OPT
 }
@@ -368,7 +369,7 @@ TEST_BEGIN(test_tcache_none) {
 	/* Make sure that tcache-based allocation returns p, not q. */
 	void *p1 = mallocx(42, 0);
 	expect_ptr_not_null(p1, "Unexpected mallocx() failure");
-	if (!opt_prof) {
+	if (!opt_prof && !san_uaf_detection_enabled()) {
 		expect_ptr_eq(p0, p1,
 		    "Expected tcache to allocate cached region");
 	}
@@ -434,8 +435,10 @@ TEST_BEGIN(test_tcache) {
 		ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
 		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
 		    i);
-		expect_ptr_eq(ps[i], p0,
-		    "Expected mallocx() to allocate cached region, i=%u", i);
+		if (!san_uaf_detection_enabled()) {
+			expect_ptr_eq(ps[i], p0, "Expected mallocx() to "
+			    "allocate cached region, i=%u", i);
+		}
 	}
 
 	/* Verify that reallocation uses cached regions. */
@@ -444,8 +447,10 @@ TEST_BEGIN(test_tcache) {
 		qs[i] = rallocx(ps[i], qsz, MALLOCX_TCACHE(tis[i]));
 		expect_ptr_not_null(qs[i], "Unexpected rallocx() failure, i=%u",
 		    i);
-		expect_ptr_eq(qs[i], q0,
-		    "Expected rallocx() to allocate cached region, i=%u", i);
+		if (!san_uaf_detection_enabled()) {
+			expect_ptr_eq(qs[i], q0, "Expected rallocx() to "
+			    "allocate cached region, i=%u", i);
+		}
 		/* Avoid undefined behavior in case of test failure. */
 		if (qs[i] == NULL) {
 			qs[i] = ps[i];
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 4f207e0..7b4217d 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -152,6 +152,7 @@ TEST_BEGIN(test_tcache_max) {
 	test_skip_if(!config_stats);
 	test_skip_if(!opt_tcache);
 	test_skip_if(opt_prof);
+	test_skip_if(san_uaf_detection_enabled());
 
 	for (alloc_option = alloc_option_start;
 	     alloc_option < alloc_option_end;
diff --git a/test/unit/tcache_max.sh b/test/unit/tcache_max.sh
index 4480d73..278c4ad 100644
--- a/test/unit/tcache_max.sh
+++ b/test/unit/tcache_max.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="tcache_max:1024"
+export MALLOC_CONF="tcache_max:1024,lg_san_uaf_align:-1"
diff --git a/test/unit/uaf.c b/test/unit/uaf.c
new file mode 100644
index 0000000..30842a3
--- /dev/null
+++ b/test/unit/uaf.c
@@ -0,0 +1,225 @@
+#include "test/jemalloc_test.h"
+#include "test/arena_util.h"
+
+#include "jemalloc/internal/cache_bin.h"
+#include "jemalloc/internal/safety_check.h"
+
+static size_t san_uaf_align;
+
+static bool fake_abort_called;
+void fake_abort(const char *message) {
+	(void)message;
+	fake_abort_called = true;
+}
+
+static void
+test_write_after_free_pre(void) {
+	safety_check_set_abort(&fake_abort);
+	fake_abort_called = false;
+}
+
+static void
+test_write_after_free_post(void) {
+	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	    0, "Unexpected tcache flush failure");
+	expect_true(fake_abort_called, "Use-after-free check didn't fire.");
+	safety_check_set_abort(NULL);
+}
+
+static bool
+uaf_detection_enabled(void) {
+	if (!config_uaf_detection) {
+		return false;
+	}
+
+	ssize_t lg_san_uaf_align;
+	size_t sz = sizeof(lg_san_uaf_align);
+	assert_d_eq(mallctl("opt.lg_san_uaf_align", &lg_san_uaf_align, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+	if (lg_san_uaf_align < 0) {
+		return false;
+	}
+	assert_zd_ge(lg_san_uaf_align, LG_PAGE, "san_uaf_align out of range");
+	san_uaf_align = (size_t)1 << lg_san_uaf_align;
+
+	bool tcache_enabled;
+	sz = sizeof(tcache_enabled);
+	assert_d_eq(mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
+	if (!tcache_enabled) {
+		return false;
+	}
+
+	return true;
+}
+
+static void
+test_use_after_free(size_t alloc_size, bool write_after_free) {
+	void *ptr = (void *)(uintptr_t)san_uaf_align;
+	assert_true(cache_bin_nonfast_aligned(ptr), "Wrong alignment");
+	ptr = (void *)((uintptr_t)123 * (uintptr_t)san_uaf_align);
+	assert_true(cache_bin_nonfast_aligned(ptr), "Wrong alignment");
+	ptr = (void *)((uintptr_t)san_uaf_align + 1);
+	assert_false(cache_bin_nonfast_aligned(ptr), "Wrong alignment");
+
+	/*
+	 * Disable purging (-1) so that all dirty pages remain committed, to
+	 * make use-after-free tolerable.
+	 */
+	unsigned arena_ind = do_arena_create(-1, -1);
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
+	size_t n_max = san_uaf_align * 2;
+	void **items = mallocx(n_max * sizeof(void *), flags);
+	assert_ptr_not_null(items, "Unexpected mallocx failure");
+
+	bool found = false;
+	size_t iter = 0;
+	char magic = 's';
+	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	    0, "Unexpected tcache flush failure");
+	while (!found) {
+		ptr = mallocx(alloc_size, flags);
+		assert_ptr_not_null(ptr, "Unexpected mallocx failure");
+
+		found = cache_bin_nonfast_aligned(ptr);
+		*(char *)ptr = magic;
+		items[iter] = ptr;
+		assert_zu_lt(iter++, n_max, "No aligned ptr found");
+	}
+
+	if (write_after_free) {
+		test_write_after_free_pre();
+	}
+	bool junked = false;
+	while (iter-- != 0) {
+		char *volatile mem = items[iter];
+		assert_c_eq(*mem, magic, "Unexpected memory content");
+		free(mem);
+		if (*mem != magic) {
+			junked = true;
+			assert_c_eq(*mem, (char)uaf_detect_junk,
+			    "Unexpected junk-filling bytes");
+			if (write_after_free) {
+				*(char *)mem = magic + 1;
+			}
+		}
+		/* Flush tcache (including stashed). */
+		assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+		    0, "Unexpected tcache flush failure");
+	}
+	expect_true(junked, "Aligned ptr not junked");
+	if (write_after_free) {
+		test_write_after_free_post();
+	}
+
+	dallocx(items, flags);
+	do_arena_destroy(arena_ind);
+}
+
+TEST_BEGIN(test_read_after_free) {
+	test_skip_if(!uaf_detection_enabled());
+
+	test_use_after_free(sizeof(void *), /* write_after_free */ false);
+	test_use_after_free(sizeof(void *) + 1, /* write_after_free */ false);
+	test_use_after_free(16, /* write_after_free */ false);
+	test_use_after_free(20, /* write_after_free */ false);
+	test_use_after_free(32, /* write_after_free */ false);
+	test_use_after_free(33, /* write_after_free */ false);
+	test_use_after_free(48, /* write_after_free */ false);
+	test_use_after_free(64, /* write_after_free */ false);
+	test_use_after_free(65, /* write_after_free */ false);
+	test_use_after_free(129, /* write_after_free */ false);
+	test_use_after_free(255, /* write_after_free */ false);
+	test_use_after_free(256, /* write_after_free */ false);
+}
+TEST_END
+
+TEST_BEGIN(test_write_after_free) {
+	test_skip_if(!uaf_detection_enabled());
+
+	test_use_after_free(sizeof(void *), /* write_after_free */ true);
+	test_use_after_free(sizeof(void *) + 1, /* write_after_free */ true);
+	test_use_after_free(16, /* write_after_free */ true);
+	test_use_after_free(20, /* write_after_free */ true);
+	test_use_after_free(32, /* write_after_free */ true);
+	test_use_after_free(33, /* write_after_free */ true);
+	test_use_after_free(48, /* write_after_free */ true);
+	test_use_after_free(64, /* write_after_free */ true);
+	test_use_after_free(65, /* write_after_free */ true);
+	test_use_after_free(129, /* write_after_free */ true);
+	test_use_after_free(255, /* write_after_free */ true);
+	test_use_after_free(256, /* write_after_free */ true);
+}
+TEST_END
+
+static bool
+check_allocated_intact(void **allocated, size_t n_alloc) {
+	for (unsigned i = 0; i < n_alloc; i++) {
+		void *ptr = *(void **)allocated[i];
+		bool found = false;
+		for (unsigned j = 0; j < n_alloc; j++) {
+			if (ptr == allocated[j]) {
+				found = true;
+				break;
+			}
+		}
+		if (!found) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+TEST_BEGIN(test_use_after_free_integration) {
+	test_skip_if(!uaf_detection_enabled());
+
+	unsigned arena_ind = do_arena_create(-1, -1);
+	int flags = MALLOCX_ARENA(arena_ind);
+
+	size_t n_alloc = san_uaf_align * 2;
+	void **allocated = mallocx(n_alloc * sizeof(void *), flags);
+	assert_ptr_not_null(allocated, "Unexpected mallocx failure");
+
+	for (unsigned i = 0; i < n_alloc; i++) {
+		allocated[i] = mallocx(sizeof(void *) * 8, flags);
+		assert_ptr_not_null(allocated[i], "Unexpected mallocx failure");
+		if (i > 0) {
+			/* Emulate a circular list. */
+			*(void **)allocated[i] = allocated[i - 1];
+		}
+	}
+	*(void **)allocated[0] = allocated[n_alloc - 1];
+	expect_true(check_allocated_intact(allocated, n_alloc),
+	    "Allocated data corrupted");
+
+	for (unsigned i = 0; i < n_alloc; i++) {
+		free(allocated[i]);
+	}
+	/* Read-after-free */
+	expect_false(check_allocated_intact(allocated, n_alloc),
+	    "Junk-filling not detected");
+
+	test_write_after_free_pre();
+	for (unsigned i = 0; i < n_alloc; i++) {
+		allocated[i] = mallocx(sizeof(void *), flags);
+		assert_ptr_not_null(allocated[i], "Unexpected mallocx failure");
+		*(void **)allocated[i] = (void *)(uintptr_t)i;
+	}
+	/* Write-after-free */
+	for (unsigned i = 0; i < n_alloc; i++) {
+		free(allocated[i]);
+		*(void **)allocated[i] = NULL;
+	}
+	test_write_after_free_post();
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_read_after_free,
+	    test_write_after_free,
+	    test_use_after_free_integration);
+}
diff --git a/test/unit/uaf.sh b/test/unit/uaf.sh
new file mode 100644
index 0000000..5f12dcf
--- /dev/null
+++ b/test/unit/uaf.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="lg_san_uaf_align:12"
-- 
cgit v0.12


From e491cef9abcc80de7c2648a0a244a5271848099a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 30 Nov 2021 14:39:34 -0800
Subject: Add stats for stashed bytes in tcache.

---
 include/jemalloc/internal/arena_stats.h |  1 +
 include/jemalloc/internal/cache_bin.h   | 58 +++++++++++++++++++++++----------
 src/arena.c                             | 15 +++++----
 src/ctl.c                               |  7 ++++
 src/stats.c                             |  3 +-
 src/tcache.c                            |  4 +--
 test/unit/cache_bin.c                   | 13 ++++----
 test/unit/stats.c                       | 14 ++++++--
 test/unit/uaf.c                         | 33 +++++++++++++++++++
 9 files changed, 112 insertions(+), 36 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 02c9340..15f1d34 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -73,6 +73,7 @@ struct arena_stats_s {
 
 	/* Number of bytes cached in tcache associated with this arena. */
 	size_t			tcache_bytes; /* Derived. */
+	size_t			tcache_stashed_bytes; /* Derived. */
 
 	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
 
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 266897f..76345be 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -224,18 +224,6 @@ cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
 }
 
 /*
- * Obtain a racy view of the number of items currently in the cache bin, in the
- * presence of possible concurrent modifications.
- */
-static inline cache_bin_sz_t
-cache_bin_ncached_get_remote(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
-	    /* racy */ true);
-	assert(n <= cache_bin_info_ncached_max(info));
-	return n;
-}
-
-/*
  * Internal.
  *
  * A pointer to the position one past the end of the backing array.
@@ -436,15 +424,49 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
-cache_bin_nstashed_get(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info,
+    bool racy) {
 	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
 	void **full = cache_bin_full_position_get(bin, info);
 
-	uint16_t nstashed = cache_bin_diff(bin, (uint16_t)(uintptr_t)full,
+	cache_bin_sz_t n = cache_bin_diff(bin, (uint16_t)(uintptr_t)full,
 	    bin->low_bits_full) / sizeof(void *);
-	assert(nstashed <= ncached_max);
+	assert(n <= ncached_max);
+
+	/* Below are for assertions only. */
+	void *stashed = *(full + n - 1);
+	bool aligned = cache_bin_nonfast_aligned(stashed);
+#ifdef JEMALLOC_JET
+	/* Allow arbitrary pointers to be stashed in tests. */
+	aligned = true;
+#endif
+	assert(n == 0 || (stashed != NULL && aligned) || racy);
+
+	return n;
+}
 
-	return nstashed;
+JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
+cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info, false);
+	assert(n <= cache_bin_info_ncached_max(info));
+	return n;
+}
+
+/*
+ * Obtain a racy view of the number of items currently in the cache bin, in the
+ * presence of possible concurrent modifications.
+ */
+static inline void
+cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
+    cache_bin_sz_t *ncached, cache_bin_sz_t *nstashed) {
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin, /* racy */ true);
+	assert(n <= cache_bin_info_ncached_max(info));
+	*ncached = n;
+
+	n = cache_bin_nstashed_get_internal(bin, info, /* racy */ true);
+	assert(n <= cache_bin_info_ncached_max(info));
+	*nstashed = n;
+	/* Note that cannot assert ncached + nstashed <= ncached_max (racy). */
 }
 
 /*
@@ -538,7 +560,7 @@ cache_bin_init_ptr_array_for_stashed(cache_bin_t *bin, szind_t binind,
     cache_bin_info_t *info, cache_bin_ptr_array_t *arr,
     cache_bin_sz_t nstashed) {
 	assert(nstashed > 0);
-	assert(cache_bin_nstashed_get(bin, info) == nstashed);
+	assert(cache_bin_nstashed_get_local(bin, info) == nstashed);
 
 	void **full = cache_bin_full_position_get(bin, info);
 	arr->ptr = full;
@@ -551,7 +573,7 @@ cache_bin_finish_flush_stashed(cache_bin_t *bin, cache_bin_info_t *info) {
 
 	/* Reset the bin local full position. */
 	bin->low_bits_full = (uint16_t)(uintptr_t)full;
-	assert(cache_bin_nstashed_get(bin, info) == 0);
+	assert(cache_bin_nstashed_get_local(bin, info) == 0);
 }
 
 /*
diff --git a/src/arena.c b/src/arena.c
index ed41d6d..bf880d7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -148,18 +148,21 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
-	/* tcache_bytes counts currently cached bytes. */
+	/* Currently cached bytes and sanitizer-stashed bytes in tcache. */
 	astats->tcache_bytes = 0;
+	astats->tcache_stashed_bytes = 0;
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 	cache_bin_array_descriptor_t *descriptor;
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		for (szind_t i = 0; i < nhbins; i++) {
 			cache_bin_t *cache_bin = &descriptor->bins[i];
-			astats->tcache_bytes +=
-			    cache_bin_ncached_get_remote(cache_bin,
-			    &tcache_bin_info[i]) * sz_index2size(i) +
-			    cache_bin_nstashed_get(cache_bin,
-			    &tcache_bin_info[i]) * sz_index2size(i);
+			cache_bin_sz_t ncached, nstashed;
+			cache_bin_nitems_get_remote(cache_bin,
+			    &tcache_bin_info[i], &ncached, &nstashed);
+
+			astats->tcache_bytes += ncached * sz_index2size(i);
+			astats->tcache_stashed_bytes += nstashed *
+			    sz_index2size(i);
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
diff --git a/src/ctl.c b/src/ctl.c
index 78dc579..5a92512 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -290,6 +290,7 @@ CTL_PROTO(stats_arenas_i_base)
 CTL_PROTO(stats_arenas_i_internal)
 CTL_PROTO(stats_arenas_i_metadata_thp)
 CTL_PROTO(stats_arenas_i_tcache_bytes)
+CTL_PROTO(stats_arenas_i_tcache_stashed_bytes)
 CTL_PROTO(stats_arenas_i_resident)
 CTL_PROTO(stats_arenas_i_abandoned_vm)
 CTL_PROTO(stats_arenas_i_hpa_sec_bytes)
@@ -787,6 +788,8 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("internal"),	CTL(stats_arenas_i_internal)},
 	{NAME("metadata_thp"),	CTL(stats_arenas_i_metadata_thp)},
 	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
+	{NAME("tcache_stashed_bytes"),
+	    CTL(stats_arenas_i_tcache_stashed_bytes)},
 	{NAME("resident"),	CTL(stats_arenas_i_resident)},
 	{NAME("abandoned_vm"),	CTL(stats_arenas_i_abandoned_vm)},
 	{NAME("hpa_sec_bytes"),	CTL(stats_arenas_i_hpa_sec_bytes)},
@@ -1169,6 +1172,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		    &astats->astats.pa_shard_stats.pac_stats.abandoned_vm);
 
 		sdstats->astats.tcache_bytes += astats->astats.tcache_bytes;
+		sdstats->astats.tcache_stashed_bytes +=
+		    astats->astats.tcache_stashed_bytes;
 
 		if (ctl_arena->arena_ind == 0) {
 			sdstats->astats.uptime = astats->astats.uptime;
@@ -3503,6 +3508,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp,
     arenas_i(mib[2])->astats->astats.metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
     arenas_i(mib[2])->astats->astats.tcache_bytes, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_stashed_bytes,
+    arenas_i(mib[2])->astats->astats.tcache_stashed_bytes, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
     arenas_i(mib[2])->astats->astats.resident,
     size_t)
diff --git a/src/stats.c b/src/stats.c
index b1b3906..bed585b 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1055,7 +1055,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	size_t large_allocated;
 	uint64_t large_nmalloc, large_ndalloc, large_nrequests, large_nfills,
 	    large_nflushes;
-	size_t tcache_bytes, abandoned_vm;
+	size_t tcache_bytes, tcache_stashed_bytes, abandoned_vm;
 	uint64_t uptime;
 
 	CTL_GET("arenas.page", &page, size_t);
@@ -1344,6 +1344,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_MEM_STAT(internal)
 	GET_AND_EMIT_MEM_STAT(metadata_thp)
 	GET_AND_EMIT_MEM_STAT(tcache_bytes)
+	GET_AND_EMIT_MEM_STAT(tcache_stashed_bytes)
 	GET_AND_EMIT_MEM_STAT(resident)
 	GET_AND_EMIT_MEM_STAT(abandoned_vm)
 	GET_AND_EMIT_MEM_STAT(extent_avail)
diff --git a/src/tcache.c b/src/tcache.c
index 74f0d83..45d4e81 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -553,7 +553,7 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	cache_bin_sz_t orig_cached = cache_bin_ncached_get_local(cache_bin,
 	    info);
 
-	cache_bin_sz_t nstashed = cache_bin_nstashed_get(cache_bin, info);
+	cache_bin_sz_t nstashed = cache_bin_nstashed_get_local(cache_bin, info);
 	assert(orig_cached + nstashed <= cache_bin_info_ncached_max(info));
 	if (nstashed == 0) {
 		return;
@@ -567,7 +567,7 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	    is_small);
 	cache_bin_finish_flush_stashed(cache_bin, info);
 
-	assert(cache_bin_nstashed_get(cache_bin, info) == 0);
+	assert(cache_bin_nstashed_get_local(cache_bin, info) == 0);
 	assert(cache_bin_ncached_get_local(cache_bin, info) == orig_cached);
 	assert(head_content == *cache_bin->stack_head);
 }
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 2b093b4..3b6dbab 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -266,7 +266,8 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
     cache_bin_sz_t nfill, cache_bin_sz_t nstash) {
 	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
 	    "Bin not empty");
-	expect_true(cache_bin_nstashed_get(bin, info) == 0, "Bin not empty");
+	expect_true(cache_bin_nstashed_get_local(bin, info) == 0,
+	    "Bin not empty");
 	expect_true(nfill + nstash <= info->ncached_max, "Exceeded max");
 
 	bool ret;
@@ -283,7 +284,7 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		ret = cache_bin_stash(bin, &ptrs[i + nfill]);
 		expect_true(ret, "Unexpected stash failure");
 	}
-	expect_true(cache_bin_nstashed_get(bin, info) == nstash,
+	expect_true(cache_bin_nstashed_get_local(bin, info) == nstash,
 	    "Wrong stashed count");
 
 	if (nfill + nstash == info->ncached_max) {
@@ -303,7 +304,7 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	}
 	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
 	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get(bin, info) == nstash,
+	expect_true(cache_bin_nstashed_get_local(bin, info) == nstash,
 	    "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
@@ -313,7 +314,7 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	cache_bin_finish_flush_stashed(bin, info);
 	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
 	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get(bin, info) == 0,
+	expect_true(cache_bin_nstashed_get_local(bin, info) == 0,
 	    "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
@@ -338,7 +339,7 @@ TEST_BEGIN(test_cache_bin_stash) {
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
 		expect_true(cache_bin_ncached_get_local(&bin, &info) ==
 		    (i / 2 + i % 2), "Wrong ncached value");
-		expect_true(cache_bin_nstashed_get(&bin, &info) == i / 2,
+		expect_true(cache_bin_nstashed_get_local(&bin, &info) == i / 2,
 		    "Wrong nstashed value");
 		if (i % 2 == 0) {
 			cache_bin_dalloc_easy(&bin, &ptrs[i]);
@@ -361,7 +362,7 @@ TEST_BEGIN(test_cache_bin_stash) {
 			expect_true(diff % 2 == 0, "Should be able to alloc");
 		} else {
 			expect_false(ret, "Should not alloc stashed");
-			expect_true(cache_bin_nstashed_get(&bin, &info) ==
+			expect_true(cache_bin_nstashed_get_local(&bin, &info) ==
 			    ncached_max / 2, "Wrong nstashed value");
 		}
 	}
diff --git a/test/unit/stats.c b/test/unit/stats.c
index cb99b09..bbdbd18 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -367,7 +367,7 @@ TEST_END
 static void
 test_tcache_bytes_for_usize(size_t usize) {
 	uint64_t epoch;
-	size_t tcache_bytes;
+	size_t tcache_bytes, tcache_stashed_bytes;
 	size_t sz = sizeof(tcache_bytes);
 
 	void *ptr = mallocx(usize, 0);
@@ -377,7 +377,11 @@ test_tcache_bytes_for_usize(size_t usize) {
 	assert_d_eq(mallctl(
 	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
 	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
-	size_t tcache_bytes_before = tcache_bytes;
+	assert_d_eq(mallctl(
+	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
+	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	size_t tcache_bytes_before = tcache_bytes + tcache_stashed_bytes;
 	dallocx(ptr, 0);
 
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
@@ -385,7 +389,11 @@ test_tcache_bytes_for_usize(size_t usize) {
 	assert_d_eq(mallctl(
 	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
 	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
-	size_t tcache_bytes_after = tcache_bytes;
+	assert_d_eq(mallctl(
+	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
+	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	size_t tcache_bytes_after = tcache_bytes + tcache_stashed_bytes;
 	assert_zu_eq(tcache_bytes_after - tcache_bytes_before,
 	    usize, "Incorrectly attributed a free");
 }
diff --git a/test/unit/uaf.c b/test/unit/uaf.c
index 30842a3..880aee4 100644
--- a/test/unit/uaf.c
+++ b/test/unit/uaf.c
@@ -53,6 +53,26 @@ uaf_detection_enabled(void) {
 	return true;
 }
 
+static size_t
+read_tcache_stashed_bytes(unsigned arena_ind) {
+	if (!config_stats) {
+		return 0;
+	}
+
+	uint64_t epoch;
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+
+	size_t tcache_stashed_bytes;
+	size_t sz = sizeof(tcache_stashed_bytes);
+	assert_d_eq(mallctl(
+	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
+	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	return tcache_stashed_bytes;
+}
+
 static void
 test_use_after_free(size_t alloc_size, bool write_after_free) {
 	void *ptr = (void *)(uintptr_t)san_uaf_align;
@@ -95,6 +115,7 @@ test_use_after_free(size_t alloc_size, bool write_after_free) {
 	while (iter-- != 0) {
 		char *volatile mem = items[iter];
 		assert_c_eq(*mem, magic, "Unexpected memory content");
+		size_t stashed_before = read_tcache_stashed_bytes(arena_ind);
 		free(mem);
 		if (*mem != magic) {
 			junked = true;
@@ -103,6 +124,18 @@ test_use_after_free(size_t alloc_size, bool write_after_free) {
 			if (write_after_free) {
 				*(char *)mem = magic + 1;
 			}
+
+			size_t stashed_after = read_tcache_stashed_bytes(
+			    arena_ind);
+			/*
+			 * An edge case is the deallocation above triggering the
+			 * tcache GC event, in which case the stashed pointers
+			 * may get flushed immediately, before returning from
+			 * free().  Treat these cases as checked already.
+			 */
+			if (stashed_after <= stashed_before) {
+				fake_abort_called = true;
+			}
 		}
 		/* Flush tcache (including stashed). */
 		assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
-- 
cgit v0.12


From 8b34a788b52c6410ef68f2dab6ebbf5079a0660e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 10 Dec 2021 20:31:28 -0800
Subject: Fix an used-uninitialized warning (false positive).

---
 src/tcache.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/tcache.c b/src/tcache.c
index 45d4e81..7138f88 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -418,7 +418,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 
 		/* Deallocate whatever we can. */
 		unsigned ndeferred = 0;
-		arena_dalloc_bin_locked_info_t dalloc_bin_info;
+		/* Init only to avoid used-uninitialized warning. */
+		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
 		if (small) {
 			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
 		}
-- 
cgit v0.12


From 01d61a3c6fa4664ba92f97bd75f4b513396b140e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 13 Dec 2021 22:05:13 -0800
Subject: Fix a conversion warning.

---
 include/jemalloc/internal/cache_bin.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 76345be..102c133 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -371,13 +371,15 @@ cache_bin_alloc(cache_bin_t *bin, bool *success) {
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
-	size_t n = cache_bin_ncached_get_internal(bin, /* racy */ false);
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
+	    /* racy */ false);
 	if (n > num) {
-		n = num;
+		n = (cache_bin_sz_t)num;
 	}
 	memcpy(out, bin->stack_head, n * sizeof(void *));
 	bin->stack_head += n;
 	cache_bin_low_water_adjust(bin);
+
 	return n;
 }
 
-- 
cgit v0.12


From dfdd7562f55a409a1667a00595349804fe55cace Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Dec 2021 13:01:17 -0800
Subject: Rename san_enabled() to san_guard_enabled().

---
 include/jemalloc/internal/san.h   | 2 +-
 test/unit/hpa_background_thread.c | 4 ++--
 test/unit/retained.c              | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index f97211a..27adddb 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -76,7 +76,7 @@ san_one_side_guarded_sz(size_t size) {
 }
 
 static inline bool
-san_enabled(void) {
+san_guard_enabled(void) {
 	return (opt_san_guard_large != 0 || opt_san_guard_small != 0);
 }
 
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 228b771..ad7bac4 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -129,7 +129,7 @@ TEST_BEGIN(test_hpa_background_thread_purges) {
 	test_skip_if(!hpa_supported());
 	test_skip_if(!have_background_thread);
 	/* Skip since guarded pages cannot be allocated from hpa. */
-	test_skip_if(san_enabled());
+	test_skip_if(san_guard_enabled());
 
 	unsigned arena_ind = create_arena();
 	/*
@@ -145,7 +145,7 @@ TEST_BEGIN(test_hpa_background_thread_enable_disable) {
 	test_skip_if(!hpa_supported());
 	test_skip_if(!have_background_thread);
 	/* Skip since guarded pages cannot be allocated from hpa. */
-	test_skip_if(san_enabled());
+	test_skip_if(san_guard_enabled());
 
 	unsigned arena_ind = create_arena();
 
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 37ff88f..aa9f684 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -104,7 +104,7 @@ TEST_BEGIN(test_retained) {
 
 	arena_ind = do_arena_create(NULL);
 	sz = nallocx(HUGEPAGE, 0);
-	size_t guard_sz = san_enabled() ? SAN_PAGE_GUARDS_SIZE : 0;
+	size_t guard_sz = san_guard_enabled() ? SAN_PAGE_GUARDS_SIZE : 0;
 	esz = sz + sz_large_pad + guard_sz;
 
 	atomic_store_u(&epoch, 0, ATOMIC_RELAXED);
-- 
cgit v0.12


From eabe88916290fec452048eaa1abe1cd52a794339 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Dec 2021 13:38:12 -0800
Subject: Rename full_position to low_bound in cache_bin.h.

---
 include/jemalloc/internal/cache_bin.h | 36 +++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 102c133..c98c46a 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -66,6 +66,17 @@ struct cache_bin_info_s {
 
 /*
  * Responsible for caching allocations associated with a single size.
+ *
+ * Several pointers are used to track the stack.  To save on metadata bytes,
+ * only the stack_head is a full sized pointer (which is dereferenced on the
+ * fastpath), while the others store only the low 16 bits -- this is correct
+ * because a single stack never takes more space than 2^16 bytes, and at the
+ * same time only equality checks are performed on the low bits.
+ *
+ * (low addr)                                                  (high addr)
+ * |------stashed------|------available------|------cached-----|
+ * ^                   ^                     ^                 ^
+ * low_bound(derived)  low_bits_full         stack_head        low_bits_empty
  */
 typedef struct cache_bin_s cache_bin_t;
 struct cache_bin_s {
@@ -94,11 +105,12 @@ struct cache_bin_s {
 
 	/*
 	 * The low bits of the value that stack_head will take on when the array
-	 * is full.  (But remember that stack_head always points to a valid item
-	 * when the array is nonempty -- this is in the array).
+	 * is full (of cached & stashed items).  But remember that stack_head
+	 * always points to a valid item when the array is nonempty -- this is
+	 * in the array.
 	 *
-	 * Recall that since the stack grows down, this is the lowest address in
-	 * the array.  Only adjusted when stashing items.
+	 * Recall that since the stack grows down, this is the lowest available
+	 * address in the array for caching.  Only adjusted when stashing items.
 	 */
 	uint16_t low_bits_full;
 
@@ -246,7 +258,7 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
  * A pointer to the position with the lowest address of the backing array.
  */
 static inline void **
-cache_bin_full_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
 	void **ret = cache_bin_empty_position_get(bin) - ncached_max;
 	assert(ret <= bin->stack_head);
@@ -429,14 +441,14 @@ JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info,
     bool racy) {
 	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
-	void **full = cache_bin_full_position_get(bin, info);
+	void **low_bound = cache_bin_low_bound_get(bin, info);
 
-	cache_bin_sz_t n = cache_bin_diff(bin, (uint16_t)(uintptr_t)full,
+	cache_bin_sz_t n = cache_bin_diff(bin, (uint16_t)(uintptr_t)low_bound,
 	    bin->low_bits_full) / sizeof(void *);
 	assert(n <= ncached_max);
 
 	/* Below are for assertions only. */
-	void *stashed = *(full + n - 1);
+	void *stashed = *(low_bound + n - 1);
 	bool aligned = cache_bin_nonfast_aligned(stashed);
 #ifdef JEMALLOC_JET
 	/* Allow arbitrary pointers to be stashed in tests. */
@@ -564,17 +576,17 @@ cache_bin_init_ptr_array_for_stashed(cache_bin_t *bin, szind_t binind,
 	assert(nstashed > 0);
 	assert(cache_bin_nstashed_get_local(bin, info) == nstashed);
 
-	void **full = cache_bin_full_position_get(bin, info);
-	arr->ptr = full;
+	void **low_bound = cache_bin_low_bound_get(bin, info);
+	arr->ptr = low_bound;
 	assert(*arr->ptr != NULL);
 }
 
 static inline void
 cache_bin_finish_flush_stashed(cache_bin_t *bin, cache_bin_info_t *info) {
-	void **full = cache_bin_full_position_get(bin, info);
+	void **low_bound = cache_bin_low_bound_get(bin, info);
 
 	/* Reset the bin local full position. */
-	bin->low_bits_full = (uint16_t)(uintptr_t)full;
+	bin->low_bits_full = (uint16_t)(uintptr_t)low_bound;
 	assert(cache_bin_nstashed_get_local(bin, info) == 0);
 }
 
-- 
cgit v0.12


From d660683d3ddc2aaebf41a5662a6bc629be016e6d Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Dec 2021 13:27:23 -0800
Subject: Fix test config of lg_san_uaf_align.

The option may be configure-disabled, which resulted in the invalid options
output from the tests.
---
 test/include/test/san.h | 8 ++++++++
 test/unit/tcache_max.c  | 3 +++
 test/unit/tcache_max.sh | 2 +-
 test/unit/uaf.c         | 6 +++++-
 test/unit/uaf.sh        | 3 ---
 5 files changed, 17 insertions(+), 5 deletions(-)
 delete mode 100644 test/unit/uaf.sh

diff --git a/test/include/test/san.h b/test/include/test/san.h
index 691dc50..da07865 100644
--- a/test/include/test/san.h
+++ b/test/include/test/san.h
@@ -1,3 +1,11 @@
+#if defined(JEMALLOC_UAF_DETECTION) || defined(JEMALLOC_DEBUG)
+#  define TEST_SAN_UAF_ALIGN_ENABLE "lg_san_uaf_align:12"
+#  define TEST_SAN_UAF_ALIGN_DISABLE "lg_san_uaf_align:-1"
+#else
+#  define TEST_SAN_UAF_ALIGN_ENABLE ""
+#  define TEST_SAN_UAF_ALIGN_DISABLE ""
+#endif
+
 static inline bool
 extent_is_guarded(tsdn_t *tsdn, void *ptr) {
 	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 7b4217d..1f657c8 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -1,4 +1,7 @@
 #include "test/jemalloc_test.h"
+#include "test/san.h"
+
+const char *malloc_conf = TEST_SAN_UAF_ALIGN_DISABLE;
 
 enum {
 	alloc_option_start = 0,
diff --git a/test/unit/tcache_max.sh b/test/unit/tcache_max.sh
index 278c4ad..4480d73 100644
--- a/test/unit/tcache_max.sh
+++ b/test/unit/tcache_max.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="tcache_max:1024,lg_san_uaf_align:-1"
+export MALLOC_CONF="tcache_max:1024"
diff --git a/test/unit/uaf.c b/test/unit/uaf.c
index 880aee4..a8433c2 100644
--- a/test/unit/uaf.c
+++ b/test/unit/uaf.c
@@ -1,9 +1,13 @@
 #include "test/jemalloc_test.h"
 #include "test/arena_util.h"
+#include "test/san.h"
 
 #include "jemalloc/internal/cache_bin.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/safety_check.h"
 
+const char *malloc_conf = TEST_SAN_UAF_ALIGN_ENABLE;
+
 static size_t san_uaf_align;
 
 static bool fake_abort_called;
@@ -28,7 +32,7 @@ test_write_after_free_post(void) {
 
 static bool
 uaf_detection_enabled(void) {
-	if (!config_uaf_detection) {
+	if (!config_uaf_detection || !san_uaf_detection_enabled()) {
 		return false;
 	}
 
diff --git a/test/unit/uaf.sh b/test/unit/uaf.sh
deleted file mode 100644
index 5f12dcf..0000000
--- a/test/unit/uaf.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-export MALLOC_CONF="lg_san_uaf_align:12"
-- 
cgit v0.12


From 067c2da07456660113bbb7bf76f0648c3c993a83 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Dec 2021 14:23:44 -0800
Subject: Fix unnecessary returns in san_(un)guard_pages_two_sided.

---
 include/jemalloc/internal/san.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index 27adddb..8813d6b 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -39,14 +39,13 @@ void san_init(ssize_t lg_san_uaf_align);
 static inline void
 san_guard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     emap_t *emap, bool remap) {
-	return san_guard_pages(tsdn, ehooks, edata, emap, true, true,
-	    remap);
+	san_guard_pages(tsdn, ehooks, edata, emap, true, true, remap);
 }
 
 static inline void
 san_unguard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     emap_t *emap) {
-	return san_unguard_pages(tsdn, ehooks, edata, emap, true, true);
+	san_unguard_pages(tsdn, ehooks, edata, emap, true, true);
 }
 
 static inline size_t
-- 
cgit v0.12


From f509703af59348496abdb0cb446e8d3d04bc085d Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Dec 2021 14:39:42 -0800
Subject: Fix two conversion warnings in tcache.

---
 src/tcache.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 7138f88..fa16732 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -116,7 +116,7 @@ tcache_gc_item_delay_compute(szind_t szind) {
 	if (item_delay >= delay_max) {
 		item_delay = delay_max - 1;
 	}
-	return item_delay;
+	return (uint8_t)item_delay;
 }
 
 static void
@@ -134,7 +134,11 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	size_t nflush = low_water - (low_water >> 2);
 	if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
-		tcache_slow->bin_flush_delay_items[szind] -= nflush;
+		/* Workaround for a conversion warning. */
+		uint8_t nflush_uint8 = (uint8_t)nflush;
+		assert(sizeof(tcache_slow->bin_flush_delay_items[0]) ==
+		    sizeof(nflush_uint8));
+		tcache_slow->bin_flush_delay_items[szind] -= nflush_uint8;
 		return;
 	} else {
 		tcache_slow->bin_flush_delay_items[szind]
-- 
cgit v0.12


From 18510020e75fd3f6a2c9e26057d9a188bee1fc21 Mon Sep 17 00:00:00 2001
From: Yuriy Chernyshov <georgthegreat@gmail.com>
Date: Mon, 27 Dec 2021 13:39:39 +0300
Subject: Fix symbol conflict with musl libc

`__libc` prefixed functions are used by musl libc as non-replaceable malloc stubs.

Fix this conflict by checking if we are linking against glibc.
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index c8eef2d..990855c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3239,7 +3239,7 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
     je_memalign;
 #  endif
 
-#  ifdef CPU_COUNT
+#  ifdef __GLIBC__
 /*
  * To enable static linking with glibc, the libc specific malloc interface must
  * be implemented also, so none of glibc's malloc.o functions are added to the
-- 
cgit v0.12


From c91e62dd375637e1d029af5385ce633a74f98712 Mon Sep 17 00:00:00 2001
From: Yuriy Chernyshov <georgthegreat@gmail.com>
Date: Wed, 5 Jan 2022 21:19:50 +0300
Subject: #include <features.h> as requested

---
 src/jemalloc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 990855c..fb43524 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3231,6 +3231,8 @@ je_valloc(size_t size) {
  * passed an extra argument for the caller return address, which will be
  * ignored.
  */
+#include <features.h> // defines __GLIBC__ if we are compiling against glibc
+
 JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free;
 JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc;
 JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
-- 
cgit v0.12


From 61978bbe693c020ffa29dee17b81072ac52726e0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 6 Jan 2022 16:54:01 -0800
Subject: Purge all if the last thread migrated away from an arena.

---
 src/jemalloc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index fb43524..2ffb9f0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -472,6 +472,12 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) {
 	arena_nthreads_dec(oldarena, false);
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
+
+	if (arena_nthreads_get(oldarena, false) == 0) {
+		/* Purge if the old arena has no associated threads anymore. */
+		arena_decay(tsd_tsdn(tsd), oldarena,
+		    /* is_background_thread */ false, /* all */ true);
+	}
 }
 
 static void
-- 
cgit v0.12


From 6230cc88b6b3902902c58e4331ca6273e71b8e2e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 6 Jan 2022 17:46:55 -0800
Subject: Add background thread sleep retry in test/unit/hpa_background_thread

Under high concurrency / heavy test load (e.g. using run_tests.sh), the
background thread may not get scheduled for a longer period of time.  Retry 100
times max before bailing out.
---
 test/unit/hpa_background_thread.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index ad7bac4..81c2561 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -4,8 +4,8 @@
 static void
 sleep_for_background_thread_interval() {
 	/*
-	 * The sleep interval set in our .sh file is 50ms.  So it should
-	 * definitely run if we sleep for for times that.
+	 * The sleep interval set in our .sh file is 50ms.  So it likely will
+	 * run if we sleep for four times that.
 	 */
 	sleep_ns(200 * 1000 * 1000);
 }
@@ -117,10 +117,18 @@ expect_purging(unsigned arena_ind, bool expect_deferred) {
 		}
 	}
 	expect_b_eq(expect_deferred, observed_dirty_page, "");
-	if (expect_deferred) {
+
+	/*
+	 * Under high concurrency / heavy test load (e.g. using run_test.sh),
+	 * the background thread may not get scheduled for a longer period of
+	 * time.  Retry 100 times max before bailing out.
+	 */
+	unsigned retry = 0;
+	while ((empty_ndirty = get_empty_ndirty(arena_ind)) > 0 &&
+	    expect_deferred && (retry++ < 100)) {
 		sleep_for_background_thread_interval();
 	}
-	empty_ndirty = get_empty_ndirty(arena_ind);
+
 	expect_zu_eq(0, empty_ndirty, "Should have seen a background purge");
 }
 
-- 
cgit v0.12


From 89fe8ee6bf7a23556350d883a310c0224a171879 Mon Sep 17 00:00:00 2001
From: Jonathan Swinney <jswinney@amazon.com>
Date: Thu, 6 Jan 2022 17:09:34 +0000
Subject: Use the isb instruction instead of yield for spin locks on arm

isb introduces a small delay which is closer to the x86 pause instruction.
---
 configure.ac | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/configure.ac b/configure.ac
index 49a12ac..3303bad 100644
--- a/configure.ac
+++ b/configure.ac
@@ -425,14 +425,15 @@ case "${host_cpu}" in
 	;;
   aarch64|arm*)
 	HAVE_CPU_SPINWAIT=1
-    AC_CACHE_VAL([je_cv_yield],
-      [JE_COMPILABLE([yield instruction], [],
-                    [[__asm__ volatile("yield"); return 0;]],
-                    [je_cv_yield])])
-	if test "x${je_cv_yield}" = "xyes" ; then
-	CPU_SPINWAIT='__asm__ volatile("yield")'
+	dnl isb is a better equivalent to the pause instruction on x86.
+	AC_CACHE_VAL([je_cv_isb],
+	  [JE_COMPILABLE([isb instruction], [],
+			[[__asm__ volatile("isb"); return 0;]],
+			[je_cv_isb])])
+	if test "x${je_cv_isb}" = "xyes" ; then
+	    CPU_SPINWAIT='__asm__ volatile("isb")'
 	fi
-    ;;
+	;;
   *)
 	HAVE_CPU_SPINWAIT=0
 	;;
-- 
cgit v0.12


From c9946fa7e679f9e9b739be83aff1b6a85cf8d78c Mon Sep 17 00:00:00 2001
From: Craig Leres <leres@xse.com>
Date: Tue, 4 Jan 2022 17:29:31 -0800
Subject: FreeBSD also needs the OS-X "don't declare system functions as
 nothrow" fix since it also has jemalloc in the base system

---
 include/jemalloc/jemalloc_macros.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 5bb5c75..ebb3137 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -142,7 +142,7 @@
 #  define JEMALLOC_COLD
 #endif
 
-#if defined(__APPLE__) && !defined(JEMALLOC_NO_RENAME)
+#if (defined(__APPLE__) || defined(__FreeBSD__)) && !defined(JEMALLOC_NO_RENAME)
 #  define JEMALLOC_SYS_NOTHROW
 #else
 #  define JEMALLOC_SYS_NOTHROW JEMALLOC_NOTHROW
-- 
cgit v0.12


From d66162e032190d74a2071e93049751744975ce55 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 7 Jan 2022 11:41:24 -0800
Subject: Fix the extent state checking on the merge error path.

With DSS as primary, the default merge impl will (correctly) decline to merge
when one of the extent is non-dss.  The error path should tolerate the
not-merged extent being in a merging state.
---
 src/extent.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 1c6fa1f..cf3d1f3 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -245,11 +245,10 @@ extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 static void
-extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+extent_deactivate_locked_impl(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
     edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
-	assert(edata_state_get(edata) == extent_state_active);
 
 	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
 	eset_t *eset = edata_guarded_get(edata) ? &ecache->guarded_eset :
@@ -258,6 +257,20 @@ extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 }
 
 static void
+extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+    edata_t *edata) {
+	assert(edata_state_get(edata) == extent_state_active);
+	extent_deactivate_locked_impl(tsdn, pac, ecache, edata);
+}
+
+static void
+extent_deactivate_check_state_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+    edata_t *edata, extent_state_t expected_state) {
+	assert(edata_state_get(edata) == expected_state);
+	extent_deactivate_locked_impl(tsdn, pac, ecache, edata);
+}
+
+static void
 extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, eset_t *eset,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
@@ -796,7 +809,8 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	    forward ? inner : outer, forward ? outer : inner,
 	    /* holding_core_locks */ true);
 	if (err) {
-		extent_deactivate_locked(tsdn, pac, ecache, outer);
+		extent_deactivate_check_state_locked(tsdn, pac, ecache, outer,
+		    extent_state_merging);
 	}
 
 	return err;
-- 
cgit v0.12


From 648b3b9f768674934c2bbf260bdc75301a63a314 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 7 Jan 2022 17:11:18 -0800
Subject: Lower the num_threads in the stress test of test/unit/prof_recent

This takes a fair amount of resources.  Under high concurrency it was causing
resource exhaustion such as pthread_create and mmap failures.
---
 test/unit/prof_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index c23b01e..4fb3723 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -553,7 +553,7 @@ TEST_END
 #undef DUMP_ERROR
 #undef DUMP_OUT_SIZE
 
-#define N_THREADS 16
+#define N_THREADS 8
 #define N_PTRS 512
 #define N_CTLS 8
 #define N_ITERS 2048
-- 
cgit v0.12


From ddb170b1d92d90ecee9ce87545086da9b34839aa Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 10 Jan 2022 13:34:07 -0800
Subject: Simplify arena_migrate() to take arena_t* instead of indices.

This makes debugging slightly easier and avoids the confusion of "should we
create new arenas" here.
---
 include/jemalloc/internal/jemalloc_internal_externs.h   | 2 +-
 include/jemalloc/internal/jemalloc_internal_inlines_b.h | 2 +-
 src/ctl.c                                               | 2 +-
 src/jemalloc.c                                          | 7 +++----
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index fa1fabe..fc834c6 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -62,7 +62,7 @@ void arena_set(unsigned ind, arena_t *arena);
 unsigned narenas_total_get(void);
 arena_t *arena_init(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
 arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
-void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
+void arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena);
 void iarena_cleanup(tsd_t *tsd);
 void arena_cleanup(tsd_t *tsd);
 size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 35d71d0..152f8a0 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -16,7 +16,7 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 		assert(newarena != NULL);
 
 		/* Set new arena/tcache associations. */
-		arena_migrate(tsd, oldind, newind);
+		arena_migrate(tsd, oldarena, newarena);
 		tcache_t *tcache = tcache_get(tsd);
 		if (tcache != NULL) {
 			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
diff --git a/src/ctl.c b/src/ctl.c
index 5a92512..6e0088f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2259,7 +2259,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			goto label_return;
 		}
 		/* Set new arena/tcache associations. */
-		arena_migrate(tsd, oldind, newind);
+		arena_migrate(tsd, oldarena, newarena);
 		if (tcache_available(tsd)) {
 			tcache_arena_reassociate(tsd_tsdn(tsd),
 			    tsd_tcache_slowp_get(tsd), tsd_tcachep_get(tsd),
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2ffb9f0..17a27ae 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -464,11 +464,10 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal) {
 }
 
 void
-arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) {
-	arena_t *oldarena, *newarena;
+arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena) {
+	assert(oldarena != NULL);
+	assert(newarena != NULL);
 
-	oldarena = arena_get(tsd_tsdn(tsd), oldind, false);
-	newarena = arena_get(tsd_tsdn(tsd), newind, false);
 	arena_nthreads_dec(oldarena, false);
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
-- 
cgit v0.12


From 8b49eb132eae6fd3de081addb06d967470bfa2aa Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 11 Jan 2022 15:02:44 -0800
Subject: Fix the HELP_STRING of --enable-doc.

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 3303bad..ac916c7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -977,7 +977,7 @@ AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH])
 
 dnl Enable documentation
 AC_ARG_ENABLE([doc],
-	      [AS_HELP_STRING([--enable-documentation], [Build documentation])],
+	      [AS_HELP_STRING([--enable-doc], [Build documentation])],
 if test "x$enable_doc" = "xno" ; then
   enable_doc="0"
 else
-- 
cgit v0.12


From 011449f17bdddd4c9e0510b27a3fb34e88d072ca Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 11 Jan 2022 14:54:33 -0800
Subject: Fix doc build with install-suffix.

---
 Makefile.in | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 7a820fe..80f3b95 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -177,7 +177,6 @@ else
 LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 endif
 PC := $(objroot)jemalloc.pc
-MAN3 := $(objroot)doc/jemalloc$(install_suffix).3
 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml
 DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html)
 DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3)
@@ -378,7 +377,7 @@ all: build_lib
 
 dist: build_doc
 
-$(objroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
+$(objroot)doc/%$(install_suffix).html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
 ifneq ($(XSLROOT),)
 	$(XSLTPROC) -o $@ $(objroot)doc/html.xsl $<
 else
@@ -388,9 +387,16 @@ endif
 	@echo "Missing xsltproc.  "$@" not (re)built."
 endif
 
-$(objroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
+$(objroot)doc/%$(install_suffix).3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
 ifneq ($(XSLROOT),)
 	$(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $<
+# The -o option (output filename) of xsltproc may not work (it uses the
+# <refname> in the .xml file).  Manually add the suffix if so.
+  ifneq ($(install_suffix),)
+	@if [ -f $(objroot)doc/jemalloc.3 ]; then \
+		mv $(objroot)doc/jemalloc.3 $(objroot)doc/jemalloc$(install_suffix).3 ; \
+	fi
+  endif
 else
 ifeq ($(wildcard $(DOCS_MAN3)),)
 	@echo "Missing xsltproc.  Doc not built." > $@
-- 
cgit v0.12


From eb196815d670f0937d2117ff0f2b885bd23c80de Mon Sep 17 00:00:00 2001
From: Charles <a837940593@gmail.com>
Date: Mon, 17 Jan 2022 23:18:54 +0800
Subject: Avoid calculating size of size class twice & delete sc_data_global.

---
 src/sc.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/sc.c b/src/sc.c
index 37683ff..9a0f76d 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -13,8 +13,6 @@
  * at least the damage is compartmentalized to this file.
  */
 
-sc_data_t sc_data_global;
-
 static size_t
 reg_size_compute(int lg_base, int lg_delta, int ndelta) {
 	return (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
@@ -64,9 +62,8 @@ size_class(
 	sc->lg_base = lg_base;
 	sc->lg_delta = lg_delta;
 	sc->ndelta = ndelta;
-	sc->psz = (reg_size_compute(lg_base, lg_delta, ndelta)
-	    % (ZU(1) << lg_page) == 0);
-	size_t size = (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
+	size_t size = reg_size_compute(lg_base, lg_delta, ndelta);
+	sc->psz = (size % (ZU(1) << lg_page) == 0);
 	if (index == 0) {
 		assert(!sc->psz);
 	}
-- 
cgit v0.12


From f15d8f3b416f6812ac030bc1a7aacf05927a4d7f Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Sat, 15 Jan 2022 13:51:33 -0800
Subject: Echo installed files via verbose 'install' command

It's not necessary to manually echo all install commands, similar effect
is achieved via 'install -v'
---
 Makefile.in | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 80f3b95..f77ee7c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -554,20 +554,18 @@ endif
 install_bin:
 	$(INSTALL) -d $(BINDIR)
 	@for b in $(BINS); do \
-	echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \
-	$(INSTALL) -m 755 $$b $(BINDIR); \
+	$(INSTALL) -v -m 755 $$b $(BINDIR); \
 done
 
 install_include:
 	$(INSTALL) -d $(INCLUDEDIR)/jemalloc
 	@for h in $(C_HDRS); do \
-	echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
-	$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+	$(INSTALL) -v -m 644 $$h $(INCLUDEDIR)/jemalloc; \
 done
 
 install_lib_shared: $(DSOS)
 	$(INSTALL) -d $(LIBDIR)
-	$(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+	$(INSTALL) -v -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
 ifneq ($(SOREV),$(SO))
 	ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
 endif
@@ -575,15 +573,13 @@ endif
 install_lib_static: $(STATIC_LIBS)
 	$(INSTALL) -d $(LIBDIR)
 	@for l in $(STATIC_LIBS); do \
-	echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \
-	$(INSTALL) -m 755 $$l $(LIBDIR); \
+	$(INSTALL) -v -m 755 $$l $(LIBDIR); \
 done
 
 install_lib_pc: $(PC)
 	$(INSTALL) -d $(LIBDIR)/pkgconfig
 	@for l in $(PC); do \
-	echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \
-	$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \
+	$(INSTALL) -v -m 644 $$l $(LIBDIR)/pkgconfig; \
 done
 
 ifeq ($(enable_shared), 1)
@@ -597,15 +593,13 @@ install_lib: install_lib_pc
 install_doc_html: build_doc_html
 	$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
 	@for d in $(DOCS_HTML); do \
-	echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
-	$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+	$(INSTALL) -v -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
 done
 
 install_doc_man: build_doc_man
 	$(INSTALL) -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
-	echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
-	$(INSTALL) -m 644 $$d $(MANDIR)/man3; \
+	$(INSTALL) -v -m 644 $$d $(MANDIR)/man3; \
 done
 
 install_doc: install_doc_html install_doc_man
-- 
cgit v0.12


From 640c3c72e661ec0b3f20865ee4fd4363644c017a Mon Sep 17 00:00:00 2001
From: Shuduo Sang <sdsang@taosdata.com>
Date: Wed, 2 Jun 2021 12:50:46 +0800
Subject: Add support for 'make uninstall'

---
 Makefile.in | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/Makefile.in b/Makefile.in
index f77ee7c..8e16982 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -610,6 +610,48 @@ ifeq ($(enable_doc), 1)
 install: install_doc
 endif
 
+uninstall_bin:
+	$(RM) -v $(foreach b,$(notdir $(BINS)),$(BINDIR)/$(b))
+
+uninstall_include:
+	$(RM) -v $(foreach h,$(notdir $(C_HDRS)),$(INCLUDEDIR)/jemalloc/$(h))
+	rmdir -v $(INCLUDEDIR)/jemalloc
+
+uninstall_lib_shared:
+	$(RM) -v $(LIBDIR)/$(LIBJEMALLOC).$(SOREV)
+ifneq ($(SOREV),$(SO))
+	$(RM) -v $(LIBDIR)/$(LIBJEMALLOC).$(SO)
+endif
+
+uninstall_lib_static:
+	$(RM) -v $(foreach l,$(notdir $(STATIC_LIBS)),$(LIBDIR)/$(l))
+
+uninstall_lib_pc:
+	$(RM) -v $(foreach p,$(notdir $(PC)),$(LIBDIR)/pkgconfig/$(p))
+
+ifeq ($(enable_shared), 1)
+uninstall_lib: uninstall_lib_shared
+endif
+ifeq ($(enable_static), 1)
+uninstall_lib: uninstall_lib_static
+endif
+uninstall_lib: uninstall_lib_pc
+
+uninstall_doc_html:
+	$(RM) -v $(foreach d,$(notdir $(DOCS_HTML)),$(DATADIR)/doc/jemalloc$(install_suffix)/$(d))
+	rmdir -v $(DATADIR)/doc/jemalloc$(install_suffix)
+
+uninstall_doc_man:
+	$(RM) -v $(foreach d,$(notdir $(DOCS_MAN3)),$(MANDIR)/man3/$(d))
+
+uninstall_doc: uninstall_doc_html uninstall_doc_man
+
+uninstall: uninstall_bin uninstall_include uninstall_lib
+
+ifeq ($(enable_doc), 1)
+uninstall: uninstall_doc
+endif
+
 tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
 tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE))
-- 
cgit v0.12


From 36a09ba2c712612675f182fe879514a6078f5c77 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 18 Jan 2022 14:08:01 -0800
Subject: Forbid spaces in install suffix

To avoid potential issues with removing unintended files after 'make
uninstall', spaces are no longer allowed in install suffix. It's worth
mentioning, that with GNU Make on Linux spaces in install suffix didn't
work anyway, leading to errors in the Makefile. But being verbose
about this restriction makes it more transparent for the developers.
---
 configure.ac | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index ac916c7..6a5d082 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1109,7 +1109,10 @@ AC_SUBST([private_namespace])
 dnl Do not add suffix to installed files by default.
 AC_ARG_WITH([install_suffix],
   [AS_HELP_STRING([--with-install-suffix=<suffix>], [Suffix to append to all installed files])],
-  [INSTALL_SUFFIX="$with_install_suffix"],
+  [case "$with_install_suffix" in
+   *\ * ) AC_MSG_ERROR([Install suffix should not contain spaces]) ;;
+   * ) INSTALL_SUFFIX="$with_install_suffix" ;;
+esac],
   [INSTALL_SUFFIX=]
 )
 install_suffix="$INSTALL_SUFFIX"
-- 
cgit v0.12


From eafd2ac39fc4b608fc24b755670ff5138b9173ee Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 18 Jan 2022 17:20:57 -0800
Subject: Forbid spaces in prefix and exec_prefix

Spaces in these are also not handled correctly by Make, so there's sense
in not allowing that.
---
 configure.ac | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 6a5d082..0661005 100644
--- a/configure.ac
+++ b/configure.ac
@@ -131,12 +131,14 @@ abs_objroot="`pwd`/"
 AC_SUBST([abs_objroot])
 
 dnl Munge install path variables.
-if test "x$prefix" = "xNONE" ; then
-  prefix="/usr/local"
-fi
-if test "x$exec_prefix" = "xNONE" ; then
-  exec_prefix=$prefix
-fi
+case "$prefix" in
+   *\ * ) AC_MSG_ERROR([Prefix should not contain spaces]) ;;
+   "NONE" ) prefix="/usr/local" ;;
+esac
+case "$exec_prefix" in
+   *\ * ) AC_MSG_ERROR([Exec prefix should not contain spaces]) ;;
+   "NONE" ) exec_prefix=$prefix ;;
+esac
 PREFIX=$prefix
 AC_SUBST([PREFIX])
 BINDIR=`eval echo $bindir`
-- 
cgit v0.12


From b798fabdf7c86288f303b1e0bcf877c9ded67c18 Mon Sep 17 00:00:00 2001
From: yunxu <yunxu@yunxu.xyz>
Date: Wed, 12 Jan 2022 18:46:34 +0800
Subject: Add prof_leak_error option

The option makes the process to exit with error code 1 if a memory leak
is detected. This is useful for implementing automated tools that rely
on leak detection.
---
 doc/jemalloc.xml.in                      | 19 +++++++++++++++++++
 include/jemalloc/internal/prof_externs.h |  1 +
 src/ctl.c                                |  3 +++
 src/jemalloc.c                           | 20 ++++++++++++++++++++
 src/prof.c                               |  1 +
 src/prof_data.c                          | 10 ++++++++++
 src/stats.c                              |  1 +
 test/unit/mallctl.c                      |  1 +
 8 files changed, 56 insertions(+)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index cba0b3f..6e2099a 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1553,6 +1553,25 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.prof_leak_error">
+        <term>
+          <mallctl>opt.prof_leak_error</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Similar to <link linkend="opt.prof_leak"><mallctl>
+        opt.prof_leak</mallctl></link>, but makes the process exit with error
+        code 1 if a memory leak is detected.  This option supersedes
+        <link linkend="opt.prof_leak"><mallctl>opt.prof_leak</mallctl></link>,
+        meaning that if both are specified, this option takes precedence.  When
+        enabled, also enables <link linkend="opt.prof_leak"><mallctl>
+        opt.prof_leak</mallctl></link>.  Works only when combined with
+        <link linkend="opt.prof_final"><mallctl>opt.prof_final</mallctl></link>,
+        otherwise does nothing.  This option is disabled by default.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.zero_realloc">
         <term>
           <mallctl>opt.zero_realloc</mallctl>
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 953192f..bdff134 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -12,6 +12,7 @@ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
 extern bool opt_prof_gdump;          /* High-water memory dumping. */
 extern bool opt_prof_final;          /* Final profile dumping. */
 extern bool opt_prof_leak;           /* Dump leak summary at exit. */
+extern bool opt_prof_leak_error;     /* Exit with error code if memory leaked */
 extern bool opt_prof_accum;          /* Report cumulative bytes. */
 extern bool opt_prof_log;            /* Turn logging on at boot. */
 extern char opt_prof_prefix[
diff --git a/src/ctl.c b/src/ctl.c
index 6e0088f..54d33ae 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -145,6 +145,7 @@ CTL_PROTO(opt_lg_prof_interval)
 CTL_PROTO(opt_prof_gdump)
 CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
+CTL_PROTO(opt_prof_leak_error)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(opt_prof_recent_alloc_max)
 CTL_PROTO(opt_prof_stats)
@@ -469,6 +470,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_gdump"),	CTL(opt_prof_gdump)},
 	{NAME("prof_final"),	CTL(opt_prof_final)},
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
+	{NAME("prof_leak_error"),	CTL(opt_prof_leak_error)},
 	{NAME("prof_accum"),	CTL(opt_prof_accum)},
 	{NAME("prof_recent_alloc_max"),	CTL(opt_prof_recent_alloc_max)},
 	{NAME("prof_stats"),	CTL(opt_prof_stats)},
@@ -2201,6 +2203,7 @@ CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_leak_error, opt_prof_leak_error, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
     opt_prof_recent_alloc_max, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_stats, opt_prof_stats, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 17a27ae..117a005 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1578,6 +1578,26 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
 				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
+				if (CONF_MATCH("prof_leak_error")) {
+					if (CONF_MATCH_VALUE("true")) {
+						if (!opt_prof_final) {
+							CONF_ERROR(
+							    "prof_leak_error is"
+							    " not allowed"
+							    " without"
+							    " prof_leak_final",
+							    k, klen, v, vlen);
+						} else {
+							opt_prof_leak = true;
+							opt_prof_leak_error =
+							    true;
+                                                }
+                                        } else if (!CONF_MATCH_VALUE("false")) {
+						CONF_ERROR("Invalid conf value",
+						    k, klen, v, vlen);
+					}
+					CONF_CONTINUE;
+				}
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
diff --git a/src/prof.c b/src/prof.c
index f708d10..cbfc740 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -31,6 +31,7 @@ ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
 bool opt_prof_gdump = false;
 bool opt_prof_final = false;
 bool opt_prof_leak = false;
+bool opt_prof_leak_error = false;
 bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 bool opt_prof_sys_thread_name = false;
diff --git a/src/prof_data.c b/src/prof_data.c
index 3ef0100..bfa55be 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1037,6 +1037,16 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx) {
 		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
 		malloc_printf(
 		    "<jemalloc>: Run jeprof on dump output for leak detail\n");
+		if (opt_prof_leak_error) {
+			malloc_printf(
+			    "<jemalloc>: Exiting with error code because memory"
+			    " leaks were detected\n");
+			/*
+			 * Use _exit() with underscore to avoid calling atexit()
+			 * and entering endless cycle.
+			 */
+			_exit(1);
+		}
 	}
 #endif
 }
diff --git a/src/stats.c b/src/stats.c
index bed585b..efc70fd 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1530,6 +1530,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("prof_gdump")
 	OPT_WRITE_BOOL("prof_final")
 	OPT_WRITE_BOOL("prof_leak")
+	OPT_WRITE_BOOL("prof_leak_error")
 	OPT_WRITE_BOOL("stats_print")
 	OPT_WRITE_CHAR_P("stats_print_opts")
 	OPT_WRITE_BOOL("stats_print")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index bd5ef9e..6efc8f1 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -320,6 +320,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_gdump, prof);
 	TEST_MALLCTL_OPT(bool, prof_final, prof);
 	TEST_MALLCTL_OPT(bool, prof_leak, prof);
+	TEST_MALLCTL_OPT(bool, prof_leak_error, prof);
 	TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof);
 	TEST_MALLCTL_OPT(bool, prof_stats, prof);
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
-- 
cgit v0.12


From 01a293fc08ba8b6df1824ffecd10d2be5879b980 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 21 Dec 2021 16:15:14 -0800
Subject: Add Windows to TravisCI

Implement the generation of Travis jobs for Windows. Currently, the
generated jobs replicate Appveyor setup and complete successfully. There
is support for MinGW GCC and MSVC compilers as well as 64 and 32 bit
compilation. Linux and MacOS jobs behave identically, but some
environment variables change - CROSS_COMPILE_32BIT=yes is added for
builds with cross compilation, empty COMPILER_FLAGS are not set anymore.
---
 .travis.yml                       | 273 +++++++++++++++++++++-----------------
 scripts/gen_travis.py             | 153 +++++++++++++--------
 scripts/linux/before_install.sh   |  13 ++
 scripts/windows/before_install.sh |  83 ++++++++++++
 scripts/windows/before_script.sh  |  20 +++
 scripts/windows/script.sh         |  10 ++
 6 files changed, 376 insertions(+), 176 deletions(-)
 create mode 100644 scripts/linux/before_install.sh
 create mode 100644 scripts/windows/before_install.sh
 create mode 100644 scripts/windows/before_script.sh
 create mode 100644 scripts/windows/script.sh

diff --git a/.travis.yml b/.travis.yml
index ecc13f4..9744425 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,322 +1,333 @@
-
 # This config file is generated by ./scripts/gen_travis.py.
 # Do not edit by hand.
 
-language: generic
+# We use 'minimal', because 'generic' makes Windows VMs hang at startup. Also
+# the software provided by 'generic' is simply not needed for our tests.
+# Differences are explained here:
+# https://docs.travis-ci.com/user/languages/minimal-and-generic/
+language: minimal
 dist: focal
 
 jobs:
   include:
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      addons: &gcc_multilib
-        apt:
-          packages:
-            - gcc-multilib
-            - g++-multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -325,14 +336,30 @@ jobs:
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 
+before_install:
+  - |-
+    if test -f "./scripts/$TRAVIS_OS_NAME/before_install.sh"; then
+      source ./scripts/$TRAVIS_OS_NAME/before_install.sh
+    fi
+
 before_script:
-  - autoconf
-  - scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
-  # If COMPILER_FLAGS are not empty, add them to CC and CXX
-  - ./configure ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" CXX="$CXX $COMPILER_FLAGS"} $CONFIGURE_FLAGS
-  - make -j3
-  - make -j3 tests
+  - |-
+    if test -f "./scripts/$TRAVIS_OS_NAME/before_script.sh"; then
+      source ./scripts/$TRAVIS_OS_NAME/before_script.sh
+    else
+      scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
+      autoconf
+      # If COMPILER_FLAGS are not empty, add them to CC and CXX
+      ./configure ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" CXX="$CXX $COMPILER_FLAGS"} $CONFIGURE_FLAGS
+      make -j3
+      make -j3 tests
+    fi
 
 script:
-  - make check
+  - |-
+    if test -f "./scripts/$TRAVIS_OS_NAME/script.sh"; then
+      source ./scripts/$TRAVIS_OS_NAME/script.sh
+    else
+      make check
+    fi
 
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index e98ebeb..63e0054 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -6,6 +6,7 @@ from enum import Enum, auto
 
 LINUX = 'linux'
 OSX = 'osx'
+WINDOWS = 'windows'
 
 
 AMD64 = 'amd64'
@@ -13,28 +14,48 @@ ARM64 = 'arm64'
 PPC64LE = 'ppc64le'
 
 
-TRAVIS_TEMPLATE = """
+TRAVIS_TEMPLATE = """\
 # This config file is generated by ./scripts/gen_travis.py.
 # Do not edit by hand.
 
-language: generic
+# We use 'minimal', because 'generic' makes Windows VMs hang at startup. Also
+# the software provided by 'generic' is simply not needed for our tests.
+# Differences are explained here:
+# https://docs.travis-ci.com/user/languages/minimal-and-generic/
+language: minimal
 dist: focal
 
 jobs:
   include:
 {jobs}
 
+before_install:
+  - |-
+    if test -f "./scripts/$TRAVIS_OS_NAME/before_install.sh"; then
+      source ./scripts/$TRAVIS_OS_NAME/before_install.sh
+    fi
+
 before_script:
-  - autoconf
-  - scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
-  # If COMPILER_FLAGS are not empty, add them to CC and CXX
-  - ./configure ${{COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" \
+  - |-
+    if test -f "./scripts/$TRAVIS_OS_NAME/before_script.sh"; then
+      source ./scripts/$TRAVIS_OS_NAME/before_script.sh
+    else
+      scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
+      autoconf
+      # If COMPILER_FLAGS are not empty, add them to CC and CXX
+      ./configure ${{COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" \
 CXX="$CXX $COMPILER_FLAGS"}} $CONFIGURE_FLAGS
-  - make -j3
-  - make -j3 tests
+      make -j3
+      make -j3 tests
+    fi
 
 script:
-  - make check
+  - |-
+    if test -f "./scripts/$TRAVIS_OS_NAME/script.sh"; then
+      source ./scripts/$TRAVIS_OS_NAME/script.sh
+    else
+      make check
+    fi
 """
 
 
@@ -44,6 +65,7 @@ class Option(object):
         COMPILER_FLAG = auto()
         CONFIGURE_FLAG = auto()
         MALLOC_CONF = auto()
+        FEATURE = auto()
 
     def __init__(self, type, value):
         self.type = type
@@ -65,6 +87,10 @@ class Option(object):
     def as_malloc_conf(value):
         return Option(Option.Type.MALLOC_CONF, value)
 
+    @staticmethod
+    def as_feature(value):
+        return Option(Option.Type.FEATURE, value)
+
     def __eq__(self, obj):
         return (isinstance(obj, Option) and obj.type == self.type
                 and obj.value == self.value)
@@ -81,13 +107,14 @@ MAX_UNUSUAL_OPTIONS = 2
 
 GCC = Option.as_compiler('CC=gcc CXX=g++')
 CLANG = Option.as_compiler('CC=clang CXX=clang++')
+CL = Option.as_compiler('CC=cl.exe CXX=cl.exe')
 
 
-compiler_default = GCC
 compilers_unusual = [CLANG,]
 
 
-compiler_flag_unusuals = [Option.as_compiler_flag(opt) for opt in ('-m32',)]
+CROSS_COMPILE_32BIT = Option.as_feature('CROSS_COMPILE_32BIT')
+feature_unusuals = [CROSS_COMPILE_32BIT]
 
 
 configure_flag_unusuals = [Option.as_configure_flag(opt) for opt in (
@@ -108,73 +135,75 @@ malloc_conf_unusuals = [Option.as_malloc_conf(opt) for opt in (
 )]
 
 
-all_unusuals = (compilers_unusual + compiler_flag_unusuals
+all_unusuals = (compilers_unusual + feature_unusuals
     + configure_flag_unusuals + malloc_conf_unusuals)
 
 
-gcc_multilib_set = False
-
-
 def get_extra_cflags(os, compiler):
+    if os == WINDOWS:
+        # For non-CL compilers under Windows (for now it's only MinGW-GCC),
+        # -fcommon needs to be specified to correctly handle multiple
+        # 'malloc_conf' symbols and such, which are declared weak under Linux.
+        # Weak symbols don't work with MinGW-GCC.
+        if compiler != CL.value:
+            return ['-fcommon']
+        else:
+            return []
+
     # We get some spurious errors when -Warray-bounds is enabled.
     extra_cflags = ['-Werror', '-Wno-array-bounds']
     if compiler == CLANG.value or os == OSX:
         extra_cflags += [
-	    '-Wno-unknown-warning-option',
-	    '-Wno-ignored-attributes'
-	]
+            '-Wno-unknown-warning-option',
+            '-Wno-ignored-attributes'
+        ]
     if os == OSX:
         extra_cflags += [
-	    '-Wno-deprecated-declarations',
-	]
+            '-Wno-deprecated-declarations',
+        ]
     return extra_cflags
 
 
 # Formats a job from a combination of flags
 def format_job(os, arch, combination):
-    global gcc_multilib_set
-
-    compiler = [x.value for x in combination if x.type == Option.Type.COMPILER]
-    assert(len(compiler) <= 1)
-    if not compiler:
-        compiler = compiler_default.value
-    else:
-        compiler = compiler[0]
+    compilers = [x.value for x in combination if x.type == Option.Type.COMPILER]
+    assert(len(compilers) <= 1)
     compiler_flags = [x.value for x in combination if x.type == Option.Type.COMPILER_FLAG]
     configure_flags = [x.value for x in combination if x.type == Option.Type.CONFIGURE_FLAG]
     malloc_conf = [x.value for x in combination if x.type == Option.Type.MALLOC_CONF]
+    features = [x.value for x in combination if x.type == Option.Type.FEATURE]
 
     if len(malloc_conf) > 0:
         configure_flags.append('--with-malloc-conf=' + ','.join(malloc_conf))
 
-    job = ""
-    job += '    - os: {}\n'.format(os)
-    job += '      arch: {}\n'.format(arch)
+    if not compilers:
+        compiler = GCC.value
+    else:
+        compiler = compilers[0]
 
-    if '-m32' in compiler_flags and os == 'linux':
-        job += '      addons:'
-        if gcc_multilib_set:
-            job += ' *gcc_multilib\n'
-        else:
-            job += ' &gcc_multilib\n'
-            job += '        apt:\n'
-            job += '          packages:\n'
-            job += '            - gcc-multilib\n'
-            job += '            - g++-multilib\n'
-            gcc_multilib_set = True
-
-    env_string = ('{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}" '
-        'EXTRA_CFLAGS="{}"'.format(
+    extra_environment_vars = ''
+    cross_compile = CROSS_COMPILE_32BIT.value in features
+    if os == LINUX and cross_compile:
+        compiler_flags.append('-m32')
+
+    features_str = ' '.join([' {}=yes'.format(feature) for feature in features])
+
+    stringify = lambda arr, name: ' {}="{}"'.format(name, ' '.join(arr)) if arr else ''
+    env_string = '{}{}{}{}{}{}'.format(
             compiler,
-            ' '.join(compiler_flags),
-            ' '.join(configure_flags),
-            ' '.join(get_extra_cflags(os, compiler))))
+            features_str,
+            stringify(compiler_flags, 'COMPILER_FLAGS'),
+            stringify(configure_flags, 'CONFIGURE_FLAGS'),
+            stringify(get_extra_cflags(os, compiler), 'EXTRA_CFLAGS'),
+            extra_environment_vars)
 
+    job = '    - os: {}\n'.format(os)
+    job += '      arch: {}\n'.format(arch)
     job += '      env: {}'.format(env_string)
     return job
 
 
-def generate_unusual_combinations(max_unusual_opts):
+def generate_unusual_combinations(unusuals, max_unusual_opts):
     """
     Generates different combinations of non-standard compilers, compiler flags,
     configure flags and malloc_conf settings.
@@ -182,20 +211,22 @@ def generate_unusual_combinations(max_unusual_opts):
     @param max_unusual_opts: Limit of unusual options per combination.
     """
     return chain.from_iterable(
-            [combinations(all_unusuals, i) for i in range(max_unusual_opts + 1)])
+            [combinations(unusuals, i) for i in range(max_unusual_opts + 1)])
 
 
 def included(combination, exclude):
     """
     Checks if the combination of options should be included in the Travis
     testing matrix.
+
+    @param exclude: A list of options to be avoided.
     """
     return not any(excluded in combination for excluded in exclude)
 
 
-def generate_jobs(os, arch, exclude, max_unusual_opts):
+def generate_jobs(os, arch, exclude, max_unusual_opts, unusuals=all_unusuals):
     jobs = []
-    for combination in generate_unusual_combinations(max_unusual_opts):
+    for combination in generate_unusual_combinations(unusuals, max_unusual_opts):
         if included(combination, exclude):
             jobs.append(format_job(os, arch, combination))
     return '\n'.join(jobs)
@@ -210,7 +241,7 @@ def generate_linux(arch):
     exclude = []
     if arch == PPC64LE:
         # Avoid 32 bit builds and clang on PowerPC
-        exclude = [Option.as_compiler_flag('-m32')] + compilers_unusual
+        exclude = (CROSS_COMPILE_32BIT, CLANG,)
 
     return generate_jobs(os, arch, exclude, max_unusual_opts)
 
@@ -230,6 +261,19 @@ def generate_macos(arch):
     return generate_jobs(os, arch, exclude, max_unusual_opts)
 
 
+def generate_windows(arch):
+    os = WINDOWS
+
+    max_unusual_opts = 3
+    unusuals = (
+        Option.as_configure_flag('--enable-debug'),
+        CL,
+        CROSS_COMPILE_32BIT,
+    )
+    return generate_jobs(os, arch, (), max_unusual_opts, unusuals)
+
+
+
 def get_manual_jobs():
     return """\
     # Development build
@@ -251,6 +295,9 @@ def main():
         generate_linux(PPC64LE),
 
         generate_macos(AMD64),
+
+        generate_windows(AMD64),
+
         get_manual_jobs()
     ))
 
diff --git a/scripts/linux/before_install.sh b/scripts/linux/before_install.sh
new file mode 100644
index 0000000..6741746
--- /dev/null
+++ b/scripts/linux/before_install.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -ev
+
+if [[ "$TRAVIS_OS_NAME" != "linux" ]]; then
+    echo "Incorrect \$TRAVIS_OS_NAME: expected linux, got $TRAVIS_OS_NAME"
+    exit 1
+fi
+
+if [[ "$CROSS_COMPILE_32BIT" == "yes" ]]; then
+    sudo apt-get update
+    sudo apt-get -y install gcc-multilib g++-multilib
+fi
diff --git a/scripts/windows/before_install.sh b/scripts/windows/before_install.sh
new file mode 100644
index 0000000..2740c45
--- /dev/null
+++ b/scripts/windows/before_install.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+set -e
+
+# The purpose of this script is to install build dependencies and set
+# $build_env to a function that sets appropriate environment variables,
+# to enable (mingw32|mingw64) environment if we want to compile with gcc, or
+# (mingw32|mingw64) + vcvarsall.bat if we want to compile with cl.exe
+
+if [[ "$TRAVIS_OS_NAME" != "windows" ]]; then
+    echo "Incorrect \$TRAVIS_OS_NAME: expected windows, got $TRAVIS_OS_NAME"
+    exit 1
+fi
+
+[[ ! -f C:/tools/msys64/msys2_shell.cmd ]] && rm -rf C:/tools/msys64
+choco uninstall -y mingw
+choco upgrade --no-progress -y msys2
+
+msys_shell_cmd="cmd //C RefreshEnv.cmd && set MSYS=winsymlinks:nativestrict && C:\\tools\\msys64\\msys2_shell.cmd"
+
+msys2() { $msys_shell_cmd -defterm -no-start -msys2 -c "$*"; }
+mingw32() { $msys_shell_cmd -defterm -no-start -mingw32 -c "$*"; }
+mingw64() { $msys_shell_cmd -defterm -no-start -mingw64 -c "$*"; }
+
+if [[ "$CROSS_COMPILE_32BIT" == "yes" ]]; then
+    mingw=mingw32
+    mingw_gcc_package_arch=i686
+else
+    mingw=mingw64
+    mingw_gcc_package_arch=x86_64
+fi
+
+if [[ "$CC" == *"gcc"* ]]; then
+    $mingw pacman -S --noconfirm --needed \
+        autotools \
+        git \
+        mingw-w64-${mingw_gcc_package_arch}-make \
+	    mingw-w64-${mingw_gcc_package_arch}-gcc \
+	    mingw-w64-${mingw_gcc_package_arch}-binutils
+    build_env=$mingw
+elif [[ "$CC" == *"cl"* ]]; then
+    $mingw pacman -S --noconfirm --needed \
+        autotools \
+	    git \
+	    mingw-w64-${mingw_gcc_package_arch}-make \
+	    mingw-w64-${mingw_gcc_package_arch}-binutils
+
+    # In order to use MSVC compiler (cl.exe), we need to correctly set some environment
+    # variables, namely PATH, INCLUDE, LIB and LIBPATH. The correct values of these
+    # variables are set by a batch script "vcvarsall.bat". The code below generates
+    # a batch script that calls "vcvarsall.bat" and prints the environment variables.
+    #
+    # Then, those environment variables are transformed from cmd to bash format and put
+    # into a script $apply_vsenv. If cl.exe needs to be used from bash, one can
+    # 'source $apply_vsenv' and it will apply the environment variables needed for cl.exe
+    # to be located and function correctly.
+    #
+    # At last, a function "mingw_with_msvc_vars" is generated which forwards user input
+    # into a correct mingw (32 or 64) subshell that automatically performs 'source $apply_vsenv',
+    # making it possible for autotools to discover and use cl.exe.
+    vcvarsall="vcvarsall.tmp.bat"
+    echo "@echo off" > $vcvarsall
+    echo "call \"c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\\\vcvarsall.bat\" $USE_MSVC" >> $vcvarsall
+    echo "set" >> $vcvarsall
+
+    apply_vsenv="./apply_vsenv.sh"
+    cmd //C $vcvarsall | grep -E "^PATH=" | sed -n -e 's/\(.*\)=\(.*\)/export \1=$PATH:"\2"/g' \
+        -e 's/\([a-zA-Z]\):[\\\/]/\/\1\//g' \
+        -e 's/\\/\//g' \
+        -e 's/;\//:\//gp' > $apply_vsenv
+    cmd //C $vcvarsall | grep -E "^(INCLUDE|LIB|LIBPATH)=" | sed -n -e 's/\(.*\)=\(.*\)/export \1="\2"/gp' >> $apply_vsenv
+
+    cat $apply_vsenv
+    mingw_with_msvc_vars() { $msys_shell_cmd -defterm -no-start -$mingw -c "source $apply_vsenv && ""$*"; }
+    build_env=mingw_with_msvc_vars
+
+    rm -f $vcvarsall
+else
+    echo "Unknown C compiler: $CC"
+    exit 1
+fi
+
+echo "Build environment function: $build_env"
diff --git a/scripts/windows/before_script.sh b/scripts/windows/before_script.sh
new file mode 100644
index 0000000..9d30aba
--- /dev/null
+++ b/scripts/windows/before_script.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+set -e
+
+if [[ "$TRAVIS_OS_NAME" != "windows" ]]; then
+    echo "Incorrect \$TRAVIS_OS_NAME: expected windows, got $TRAVIS_OS_NAME"
+    exit 1
+fi
+
+$build_env autoconf
+$build_env ./configure $CONFIGURE_FLAGS
+# mingw32-make simply means "make", unrelated to mingw32 vs mingw64.
+# Simply disregard the prefix and treat is as "make".
+$build_env mingw32-make -j3
+# At the moment, it's impossible to make tests in parallel,
+# seemingly due to concurrent writes to '.pdb' file. I don't know why
+# that happens, because we explicitly supply '/Fs' to the compiler.
+# Until we figure out how to fix it, we should build tests sequentially
+# on Windows.
+$build_env mingw32-make tests
diff --git a/scripts/windows/script.sh b/scripts/windows/script.sh
new file mode 100644
index 0000000..3a27f70
--- /dev/null
+++ b/scripts/windows/script.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+set -e
+
+if [[ "$TRAVIS_OS_NAME" != "windows" ]]; then
+    echo "Incorrect \$TRAVIS_OS_NAME: expected windows, got $TRAVIS_OS_NAME"
+    exit 1
+fi
+
+$build_env mingw32-make -k check
-- 
cgit v0.12


From 002f0e939795991f3f30fd0a6b0470094890305f Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 10 Jan 2022 17:29:17 -0800
Subject: Disable TravisCI jobs generation for Windows

These jobs take about 20 minutes to complete. We don't want to enable
them until we switch to unlimited concurrency plan, otherwise the builds
will take way too long.
---
 .travis.yml           | 24 ------------------------
 scripts/gen_travis.py |  2 +-
 2 files changed, 1 insertion(+), 25 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 9744425..c54cc45 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -304,30 +304,6 @@ jobs:
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 63e0054..685bad5 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -296,7 +296,7 @@ def main():
 
         generate_macos(AMD64),
 
-        generate_windows(AMD64),
+        #generate_windows(AMD64),
 
         get_manual_jobs()
     ))
-- 
cgit v0.12


From efc539c040cf11b19ffc8af29a8cc3e5c3609092 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 21 Jan 2022 17:56:12 -0800
Subject: Initialize prof_leak during prof init.

Otherwise, prof_leak may get set after prof_leak_error, and disagree with each
other.
---
 src/jemalloc.c | 2 +-
 src/prof.c     | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 117a005..85c38dd 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1585,7 +1585,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 							    "prof_leak_error is"
 							    " not allowed"
 							    " without"
-							    " prof_leak_final",
+							    " prof_final",
 							    k, klen, v, vlen);
 						} else {
 							opt_prof_leak = true;
diff --git a/src/prof.c b/src/prof.c
index cbfc740..7a6d5d5 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -564,6 +564,9 @@ prof_boot1(void) {
 	 * opt_prof must be in its final state before any arenas are
 	 * initialized, so this function must be executed early.
 	 */
+	if (opt_prof_leak_error && !opt_prof_leak) {
+		opt_prof_leak = true;
+	}
 
 	if (opt_prof_leak && !opt_prof) {
 		/*
-- 
cgit v0.12


From 8c59c44ffa83bab0f73d5cc8f7d0bbc8d649220b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 26 Jan 2022 14:05:04 -0800
Subject: Add a dependency checking step at the end of malloc_conf_init.

Currently only prof_leak_error and prof_final are checked.
---
 src/jemalloc.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 85c38dd..364dc57 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1578,26 +1578,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
 				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
-				if (CONF_MATCH("prof_leak_error")) {
-					if (CONF_MATCH_VALUE("true")) {
-						if (!opt_prof_final) {
-							CONF_ERROR(
-							    "prof_leak_error is"
-							    " not allowed"
-							    " without"
-							    " prof_final",
-							    k, klen, v, vlen);
-						} else {
-							opt_prof_leak = true;
-							opt_prof_leak_error =
-							    true;
-                                                }
-                                        } else if (!CONF_MATCH_VALUE("false")) {
-						CONF_ERROR("Invalid conf value",
-						    k, klen, v, vlen);
-					}
-					CONF_CONTINUE;
-				}
+				CONF_HANDLE_BOOL(opt_prof_leak_error,
+				    "prof_leak_error")
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
@@ -1742,6 +1724,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
 }
 
+static bool
+malloc_conf_init_check_deps(void) {
+	if (opt_prof_leak_error && !opt_prof_final) {
+		malloc_printf("<jemalloc>: prof_leak_error is set w/o "
+		    "prof_final.\n");
+		return true;
+	}
+
+	return false;
+}
+
 static void
 malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 	const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL,
@@ -1752,6 +1745,12 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 	malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf);
 	malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
 	    NULL);
+	if (malloc_conf_init_check_deps()) {
+		/* check_deps does warning msg only; abort below if needed. */
+		if (opt_abort_conf) {
+			malloc_abort_invalid_conf();
+		}
+	}
 }
 
 #undef MALLOC_CONF_NSOURCES
-- 
cgit v0.12


From 20f9802e4f25922884448d9581c66d76cc905c0c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 26 Jan 2022 18:40:49 -0800
Subject: Avoid overflow warnings in test/unit/safety_check.

---
 test/unit/safety_check.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/test/unit/safety_check.c b/test/unit/safety_check.c
index 516a096..8472667 100644
--- a/test/unit/safety_check.c
+++ b/test/unit/safety_check.c
@@ -13,6 +13,13 @@ void fake_abort(const char *message) {
 	fake_abort_called = true;
 }
 
+static void
+buffer_overflow_write(char *ptr, size_t size) {
+	/* Avoid overflow warnings. */
+	volatile size_t idx = size;
+	ptr[idx] = 0;
+}
+
 TEST_BEGIN(test_malloc_free_overflow) {
 	test_skip_if(!config_prof);
 	test_skip_if(!config_opt_safety_checks);
@@ -20,7 +27,7 @@ TEST_BEGIN(test_malloc_free_overflow) {
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
 	char* ptr = malloc(128);
-	ptr[128] = 0;
+	buffer_overflow_write(ptr, 128);
 	free(ptr);
 	safety_check_set_abort(NULL);
 
@@ -36,7 +43,7 @@ TEST_BEGIN(test_mallocx_dallocx_overflow) {
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
 	char* ptr = mallocx(128, 0);
-	ptr[128] = 0;
+	buffer_overflow_write(ptr, 128);
 	dallocx(ptr, 0);
 	safety_check_set_abort(NULL);
 
@@ -52,7 +59,7 @@ TEST_BEGIN(test_malloc_sdallocx_overflow) {
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
 	char* ptr = malloc(128);
-	ptr[128] = 0;
+	buffer_overflow_write(ptr, 128);
 	sdallocx(ptr, 128, 0);
 	safety_check_set_abort(NULL);
 
@@ -68,7 +75,7 @@ TEST_BEGIN(test_realloc_overflow) {
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
 	char* ptr = malloc(128);
-	ptr[128] = 0;
+	buffer_overflow_write(ptr, 128);
 	ptr = realloc(ptr, 129);
 	safety_check_set_abort(NULL);
 	free(ptr);
@@ -85,7 +92,7 @@ TEST_BEGIN(test_rallocx_overflow) {
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
 	char* ptr = malloc(128);
-	ptr[128] = 0;
+	buffer_overflow_write(ptr, 128);
 	ptr = rallocx(ptr, 129, 0);
 	safety_check_set_abort(NULL);
 	free(ptr);
@@ -102,7 +109,7 @@ TEST_BEGIN(test_xallocx_overflow) {
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
 	char* ptr = malloc(128);
-	ptr[128] = 0;
+	buffer_overflow_write(ptr, 128);
 	size_t result = xallocx(ptr, 129, 0, 0);
 	expect_zu_eq(result, 128, "");
 	free(ptr);
-- 
cgit v0.12


From a4e81221cceeb887708d53015d3d1f1f9642980a Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 31 Jan 2022 12:28:15 -0800
Subject: Document 'make uninstall'

Update INSTALL.md, reflecting the addition of 'uninstall' target.
---
 INSTALL.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/INSTALL.md b/INSTALL.md
index 14dacfa..90da718 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -12,6 +12,10 @@ that might work is:
     make
     make install
 
+You can uninstall the installed build artifacts like this:
+
+    make uninstall
+
 Notes:
  - "autoconf" needs to be installed
  - Documentation is built by the default target only when xsltproc is
-- 
cgit v0.12


From 063d134aeb4807872f45a3b7e6b43bed8f6320a2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 14 Feb 2022 17:30:11 -0800
Subject: Properly detect background thread support on Darwin.

When cross-compile, the host type / abi should be checked to determine
background thread compatibility.
---
 configure.ac | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 0661005..abcd91d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2394,7 +2394,8 @@ fi
 dnl ============================================================================
 dnl Enable background threads if possible.
 
-if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" ; then
+if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" -a \
+       "x${abi}" != "xmacho" ; then
   AC_DEFINE([JEMALLOC_BACKGROUND_THREAD], [ ], [ ])
 fi
 
-- 
cgit v0.12


From ca709c3139f77f4c00a903cdee46d71e9028f6c6 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 14 Feb 2022 17:57:14 -0800
Subject: Fix failed assertion due to racy memory access

While calculating the number of stashed pointers, multiple variables
potentially modified by a concurrent thread were used for the
calculation.  This led to some inconsistencies, correctly detected by
the assertions.  The change eliminates some possible inconsistencies by
using unmodified variables and only once a concurrently modified one.
The assertions are omitted for the cases where we acknowledge potential
inconsistencies too.
---
 include/jemalloc/internal/cache_bin.h | 75 +++++++++++++++++++++++++++--------
 src/cache_bin.c                       |  6 ++-
 2 files changed, 63 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index c98c46a..caf5be3 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -194,8 +194,15 @@ cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
  * be associated with the position earlier in memory.
  */
 static inline uint16_t
-cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
-	cache_bin_assert_earlier(bin, earlier, later);
+cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later, bool racy) {
+	/*
+	 * When it's racy, bin->low_bits_full can be modified concurrently. It
+	 * can cross the uint16_t max value and become less than
+	 * bin->low_bits_empty at the time of the check.
+	 */
+	if (!racy) {
+		cache_bin_assert_earlier(bin, earlier, later);
+	}
 	return later - earlier;
 }
 
@@ -207,7 +214,7 @@ cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 static inline cache_bin_sz_t
 cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
+	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty, racy);
 	cache_bin_sz_t n = diff / sizeof(void *);
 	/*
 	 * We have undefined behavior here; if this function is called from the
@@ -239,11 +246,15 @@ cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
  * Internal.
  *
  * A pointer to the position one past the end of the backing array.
+ *
+ * Do not call if racy, because both 'bin->stack_head' and 'bin->low_bits_full'
+ * are subject to concurrent modifications.
  */
 static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
+	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty,
+	    /* racy */ false);
 	uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
 	void **ret = (void **)empty_bits;
 
@@ -255,6 +266,22 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
 /*
  * Internal.
  *
+ * Calculates low bits of the lower bound of the usable cache bin's range (see
+ * cache_bin_t visual representation above).
+ *
+ * No values are concurrently modified, so should be safe to read in a
+ * multithreaded environment. Currently concurrent access happens only during
+ * arena statistics collection.
+ */
+static inline uint16_t
+cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	return (uint16_t)bin->low_bits_empty -
+	    info->ncached_max * sizeof(void *);
+}
+
+/*
+ * Internal.
+ *
  * A pointer to the position with the lowest address of the backing array.
  */
 static inline void **
@@ -284,7 +311,7 @@ cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
 static inline cache_bin_sz_t
 cache_bin_low_water_get_internal(cache_bin_t *bin) {
 	return cache_bin_diff(bin, bin->low_bits_low_water,
-	    bin->low_bits_empty) / sizeof(void *);
+	    bin->low_bits_empty, /* racy */ false) / sizeof(void *);
 }
 
 /* Returns the numeric value of low water in [0, ncached]. */
@@ -427,7 +454,8 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	/* Stash at the full position, in the [full, head) range. */
 	uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head;
 	/* Wraparound handled as well. */
-	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
+	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head,
+	    /* racy */ false);
 	*(void **)((uintptr_t)bin->stack_head - diff) = ptr;
 
 	assert(!cache_bin_full(bin));
@@ -437,31 +465,46 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	return true;
 }
 
+/*
+ * Get the number of stashed pointers.
+ *
+ * When called from a thread not owning the TLS (i.e. racy = true), it's
+ * important to keep in mind that 'bin->stack_head' and 'bin->low_bits_full' can
+ * be modified concurrently and almost none assertions about their values can be
+ * made.
+ */
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info,
     bool racy) {
 	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
-	void **low_bound = cache_bin_low_bound_get(bin, info);
+	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
+	    info);
 
-	cache_bin_sz_t n = cache_bin_diff(bin, (uint16_t)(uintptr_t)low_bound,
-	    bin->low_bits_full) / sizeof(void *);
+	cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
+	    bin->low_bits_full, racy) / sizeof(void *);
 	assert(n <= ncached_max);
 
-	/* Below are for assertions only. */
-	void *stashed = *(low_bound + n - 1);
-	bool aligned = cache_bin_nonfast_aligned(stashed);
+	if (!racy) {
+		/* Below are for assertions only. */
+		void **low_bound = cache_bin_low_bound_get(bin, info);
+
+		assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
+		void *stashed = *(low_bound + n - 1);
+		bool aligned = cache_bin_nonfast_aligned(stashed);
 #ifdef JEMALLOC_JET
-	/* Allow arbitrary pointers to be stashed in tests. */
-	aligned = true;
+		/* Allow arbitrary pointers to be stashed in tests. */
+		aligned = true;
 #endif
-	assert(n == 0 || (stashed != NULL && aligned) || racy);
+		assert(n == 0 || (stashed != NULL && aligned));
+	}
 
 	return n;
 }
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info, false);
+	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info,
+	    /* racy */ false);
 	assert(n <= cache_bin_info_ncached_max(info));
 	return n;
 }
diff --git a/src/cache_bin.c b/src/cache_bin.c
index b8d81ef..9ae072a 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -83,8 +83,10 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 	bin->low_bits_full = (uint16_t)(uintptr_t)full_position;
 	bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
-	assert(cache_bin_diff(bin, bin->low_bits_full,
-	    (uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
+	cache_bin_sz_t free_spots = cache_bin_diff(bin,
+	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head,
+	    /* racy */ false);
+	assert(free_spots == bin_stack_size);
 	assert(cache_bin_ncached_get_local(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin) == empty_position);
 
-- 
cgit v0.12


From 78b58379c854a639df79beb3289351129d863d4b Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 1 Mar 2022 18:31:30 +0300
Subject: Fix possible "nmalloc >= ndalloc" assertion.

It is possible that ndalloc will be updated before nmalloc, in
arena_large_ralloc_stats_update(), fix this by reorder those calls.

It was found by ClickHouse CI, that periodically hits this assertion [1].

  [1]: https://github.com/ClickHouse/ClickHouse/issues/31531

That issue contains lots of examples, with core dump and some gdb output [2].

  [2]: https://s3.amazonaws.com/clickhouse-test-reports/34951/96390a9263cb5af3d6e42a84988239c9ae87ce32/stress_test__debug__actions_.html

Here you can find binaries for that particular report [3] you need
clickhouse debug build [4].

  [3]: https://s3.amazonaws.com/clickhouse-builds/34951/96390a9263cb5af3d6e42a84988239c9ae87ce32/clickhouse_build_check_(actions)/report.html
  [4]: https://s3.amazonaws.com/clickhouse-builds/34951/96390a9263cb5af3d6e42a84988239c9ae87ce32/package_debug/clickhouse

Brief info from that report:

    2 0x000000002ad6dbfe in arena_stats_merge (tsdn=0x7f2399abdd20, arena=0x7f241ce01080, nthreads=0x7f24e4360958, dss=0x7f24e4360960, dirty_decay_ms=0x7f24e4360968, muzzy_decay_ms=0x7f24e4360970, nactive=0x7f24e4360978, ndirty=0x7f24e43
    e4360988, astats=0x7f24e4360998, bstats=0x7f24e4363310, lstats=0x7f24e4364990, estats=0x7f24e4366e50, hpastats=0x7f24e43693a0, secstats=0x7f24e436a020) at ../contrib/jemalloc/src/arena.c:138
            ndalloc = 226
            nflush = 0
            curlextents = 0
            nmalloc = 225
            nrequests = 0

Here you can see that they differs only by 1.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index bf880d7..857b27c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -322,8 +322,8 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 static void
 arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
     size_t usize) {
-	arena_large_dalloc_stats_update(tsdn, arena, oldusize);
 	arena_large_malloc_stats_update(tsdn, arena, usize);
+	arena_large_dalloc_stats_update(tsdn, arena, oldusize);
 }
 
 edata_t *
-- 
cgit v0.12


From eb65d1b07830b285bf7ac7678e964f080cd3916a Mon Sep 17 00:00:00 2001
From: Alex Lapenkov <lapenkov.a@yandex.ru>
Date: Sat, 22 Jan 2022 10:14:16 -0800
Subject: Fix FreeBSD system jemalloc TSD cleanup

Before this commit, in case FreeBSD libc jemalloc was overridden by another
jemalloc, proper thread shutdown callback was involved only for the overriding
jemalloc. A call to _malloc_thread_cleanup from libthr would be redirected to
user jemalloc, leaving data about dead threads hanging in system jemalloc. This
change tackles the issue in two ways. First, for current and old system
jemallocs, which we can not modify, the overriding jemalloc would locate and
invoke system cleanup routine. For upcoming jemalloc integrations, the cleanup
registering function will also be redirected to user jemalloc, which means that
system jemalloc's cleanup routine will be registered in user's jemalloc and a
single call to _malloc_thread_cleanup will be sufficient to invoke both
callbacks.
---
 configure.ac                                          |  2 +-
 include/jemalloc/internal/tsd.h                       |  5 ++++-
 include/jemalloc/internal/tsd_malloc_thread_cleanup.h |  2 +-
 include/jemalloc/internal/tsd_types.h                 |  2 +-
 include/jemalloc/internal/tsd_win.h                   |  2 +-
 src/tsd.c                                             | 16 +++++++++++-----
 6 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/configure.ac b/configure.ac
index abcd91d..69b8162 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1999,7 +1999,7 @@ AC_CHECK_FUNC([_malloc_thread_cleanup],
              )
 if test "x$have__malloc_thread_cleanup" = "x1" ; then
   AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ], [ ])
-  wrap_syms="${wrap_syms} _malloc_thread_cleanup"
+  wrap_syms="${wrap_syms} _malloc_thread_cleanup _malloc_tsd_cleanup_register"
   force_tls="1"
 fi
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 0a46d44..66d6882 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -153,9 +153,12 @@ typedef ql_elm(tsd_t) tsd_link_t;
     				TSD_DATA_SLOWER_INITIALIZER		\
 }
 
+#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
+void _malloc_tsd_cleanup_register(bool (*f)(void));
+#endif
+
 void *malloc_tsd_malloc(size_t size);
 void malloc_tsd_dalloc(void *wrapper);
-void malloc_tsd_cleanup_register(bool (*f)(void));
 tsd_t *malloc_tsd_boot0(void);
 void malloc_tsd_boot1(void);
 void tsd_cleanup(void *arg);
diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
index 65852d5..d8f3ef1 100644
--- a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -21,7 +21,7 @@ tsd_cleanup_wrapper(void) {
 
 JEMALLOC_ALWAYS_INLINE bool
 tsd_boot0(void) {
-	malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
+	_malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
 	tsd_booted = true;
 	return false;
 }
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index 6200af6..a6ae37d 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TSD_TYPES_H
 #define JEMALLOC_INTERNAL_TSD_TYPES_H
 
-#define MALLOC_TSD_CLEANUPS_MAX	2
+#define MALLOC_TSD_CLEANUPS_MAX	4
 
 typedef struct tsd_s tsd_t;
 typedef struct tsdn_s tsdn_t;
diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
index cf30d18..a91dac8 100644
--- a/include/jemalloc/internal/tsd_win.h
+++ b/include/jemalloc/internal/tsd_win.h
@@ -72,7 +72,7 @@ tsd_boot0(void) {
 	if (tsd_tsd == TLS_OUT_OF_INDEXES) {
 		return true;
 	}
-	malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
+	_malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
 	tsd_wrapper_set(&tsd_boot_wrapper);
 	tsd_booted = true;
 	return false;
diff --git a/src/tsd.c b/src/tsd.c
index 4859048..b98c34b 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -9,9 +9,6 @@
 /******************************************************************************/
 /* Data. */
 
-static unsigned ncleanups;
-static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
-
 /* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */
 JEMALLOC_DIAGNOSTIC_PUSH
 JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
@@ -337,6 +334,9 @@ malloc_tsd_dalloc(void *wrapper) {
 }
 
 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
+static unsigned ncleanups;
+static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
+
 #ifndef _WIN32
 JEMALLOC_EXPORT
 #endif
@@ -361,15 +361,19 @@ _malloc_thread_cleanup(void) {
 		}
 	} while (again);
 }
-#endif
 
+#ifndef _WIN32
+JEMALLOC_EXPORT
+#endif
 void
-malloc_tsd_cleanup_register(bool (*f)(void)) {
+_malloc_tsd_cleanup_register(bool (*f)(void)) {
 	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
 	cleanups[ncleanups] = f;
 	ncleanups++;
 }
 
+#endif
+
 static void
 tsd_do_data_cleanup(tsd_t *tsd) {
 	prof_tdata_cleanup(tsd);
@@ -429,7 +433,9 @@ tsd_t *
 malloc_tsd_boot0(void) {
 	tsd_t *tsd;
 
+#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
 	ncleanups = 0;
+#endif
 	if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
 	    WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
 		return NULL;
-- 
cgit v0.12


From 7ae0f15c598258610dd3cfd9633301ffa8661c45 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 21 Mar 2022 12:15:16 -0700
Subject: Add a default page size when cross-compile for Apple M1.

When cross-compile for M1 and no page size specified, use the default 16K and
skip detecting the page size (which is likely incorrect).
---
 configure.ac | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/configure.ac b/configure.ac
index 69b8162..5c7a8ef 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1677,6 +1677,15 @@ fi
 AC_ARG_WITH([lg_page],
   [AS_HELP_STRING([--with-lg-page=<lg-page>], [Base 2 log of system page size])],
   [LG_PAGE="$with_lg_page"], [LG_PAGE="detect"])
+case "${host}" in
+  aarch64-apple-darwin*)
+      dnl When cross-compile for Apple M1 and no page size specified, use the
+      dnl default and skip detecting the page size (which is likely incorrect).
+      if test "x${host}" != "x${build}" -a "x$LG_PAGE" = "xdetect"; then
+        LG_PAGE=14
+      fi
+      ;;
+esac
 if test "x$LG_PAGE" = "xdetect"; then
   AC_CACHE_CHECK([LG_PAGE],
                [je_cv_lg_page],
-- 
cgit v0.12


From 52631c90f664ded0a5106a7d5fd906d46a7c1f81 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 14 Mar 2022 20:17:14 -0700
Subject: Fix size class calculation for sec

Due to a bug in sec initialization, the number of cached size classes
was equal to 198. The bug caused the creation of more than a hundred of
unused bins, although it didn't affect the caching logic.
---
 src/sec.c       | 13 ++++++++-----
 test/unit/sec.c |  1 +
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/sec.c b/src/sec.c
index 0c4e703..6fffaf1 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -23,11 +23,11 @@ sec_bin_init(sec_bin_t *bin) {
 bool
 sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
     const sec_opts_t *opts) {
-	size_t max_alloc = opts->max_alloc & PAGE_MASK;
-	pszind_t npsizes = sz_psz2ind(max_alloc);
-	if (sz_pind2sz(npsizes) > opts->max_alloc) {
-		npsizes--;
-	}
+	assert(opts->max_alloc > 0);
+
+	size_t max_alloc = opts->max_alloc & ~PAGE_MASK;
+	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
+
 	size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
 	size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
 	size_t sz_alloc = sz_shards + sz_bins;
@@ -232,6 +232,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 		    deferred_work_generated);
 	}
 	pszind_t pszind = sz_psz2ind(size);
+	assert(pszind < sec->npsizes);
+
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
 	sec_bin_t *bin = &shard->bins[pszind];
 	bool do_batch_fill = false;
@@ -305,6 +307,7 @@ sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 	assert(shard->bytes_cur <= sec->opts.max_bytes);
 	size_t size = edata_size_get(edata);
 	pszind_t pszind = sz_psz2ind(size);
+	assert(pszind < sec->npsizes);
 	/*
 	 * Prepending here results in LIFO allocation per bin, which seems
 	 * reasonable.
diff --git a/test/unit/sec.c b/test/unit/sec.c
index e98bdc9..f3ec403 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -46,6 +46,7 @@ test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
 
 	bool err = sec_init(TSDN_NULL, sec, base, fallback, &opts);
 	assert_false(err, "Unexpected initialization failure");
+	assert_u_ge(sec->npsizes, 0, "Zero size classes allowed for caching");
 }
 
 static inline edata_t *
-- 
cgit v0.12


From 5bf03f8ce5802b90a16b595e962fe4f07ce7fe93 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 22 Mar 2022 14:33:04 -0700
Subject: Implement PAGE_FLOOR macro

---
 include/jemalloc/internal/pages.h | 3 +++
 src/sec.c                         | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 3d7993d..ad1f606 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -13,6 +13,9 @@
 /* Return the smallest pagesize multiple that is >= s. */
 #define PAGE_CEILING(s)							\
 	(((s) + PAGE_MASK) & ~PAGE_MASK)
+/* Return the largest pagesize multiple that is <=s. */
+#define PAGE_FLOOR(s) 							\
+	((s) & ~PAGE_MASK)
 
 /* Huge page size.  LG_HUGEPAGE is determined by the configure script. */
 #define HUGEPAGE	((size_t)(1U << LG_HUGEPAGE))
diff --git a/src/sec.c b/src/sec.c
index 6fffaf1..c13904d 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -25,7 +25,7 @@ sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
     const sec_opts_t *opts) {
 	assert(opts->max_alloc > 0);
 
-	size_t max_alloc = opts->max_alloc & ~PAGE_MASK;
+	size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
 	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
 
 	size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
-- 
cgit v0.12


From eaaa368bab472a78e99a25c1641d24ad3c2283ad Mon Sep 17 00:00:00 2001
From: Charles <a837940593@gmail.com>
Date: Tue, 1 Feb 2022 20:26:39 +0800
Subject: Add comments and use meaningful vars in sz_psz2ind.

---
 Makefile.in                    |  1 +
 include/jemalloc/internal/sc.h |  1 +
 include/jemalloc/internal/sz.h | 48 ++++++++++++++++++++++++------
 src/sc.c                       |  2 +-
 test/unit/sz.c                 | 66 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 108 insertions(+), 10 deletions(-)
 create mode 100644 test/unit/sz.c

diff --git a/Makefile.in b/Makefile.in
index 8e16982..cf6d568 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -278,6 +278,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/spin.c \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/stats_print.c \
+	$(srcroot)test/unit/sz.c \
 	$(srcroot)test/unit/tcache_max.c \
 	$(srcroot)test/unit/test_hooks.c \
 	$(srcroot)test/unit/thread_event.c \
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 8efd324..308985f 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -344,6 +344,7 @@ struct sc_data_s {
 	sc_t sc[SC_NSIZES];
 };
 
+size_t reg_size_compute(int lg_base, int lg_delta, int ndelta);
 void sc_data_init(sc_data_t *data);
 /*
  * Updates slab sizes in [begin, end] to be pgs pages in length, if possible.
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index f2be613..3c0fc1d 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -55,22 +55,52 @@ extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
 
 JEMALLOC_ALWAYS_INLINE pszind_t
 sz_psz2ind(size_t psz) {
+	assert(psz > 0);
 	if (unlikely(psz > SC_LARGE_MAXCLASS)) {
 		return SC_NPSIZES;
 	}
-	pszind_t x = lg_floor((psz<<1)-1);
-	pszind_t shift = (x < SC_LG_NGROUP + LG_PAGE) ?
+	/* x is the lg of the first base >= psz. */
+	pszind_t x = lg_ceil(psz);
+	/*
+	 * sc.h introduces a lot of size classes. These size classes are divided
+	 * into different size class groups. There is a very special size class
+	 * group, each size class in or after it is an integer multiple of PAGE.
+	 * We call it first_ps_rg. It means first page size regular group. The
+	 * range of first_ps_rg is (base, base * 2], and base == PAGE *
+	 * SC_NGROUP. off_to_first_ps_rg begins from 1, instead of 0. e.g.
+	 * off_to_first_ps_rg is 1 when psz is (PAGE * SC_NGROUP + 1).
+	 */
+	pszind_t off_to_first_ps_rg = (x < SC_LG_NGROUP + LG_PAGE) ?
 	    0 : x - (SC_LG_NGROUP + LG_PAGE);
-	pszind_t grp = shift << SC_LG_NGROUP;
 
-	pszind_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ?
-	    LG_PAGE : x - SC_LG_NGROUP - 1;
+	/*
+	 * Same as sc_s::lg_delta.
+	 * Delta for off_to_first_ps_rg == 1 is PAGE,
+	 * for each increase in offset, it's multiplied by two.
+	 * Therefore, lg_delta = LG_PAGE + (off_to_first_ps_rg - 1).
+	 */
+	pszind_t lg_delta = (off_to_first_ps_rg == 0) ?
+	    LG_PAGE : LG_PAGE + (off_to_first_ps_rg - 1);
 
-	size_t delta_inverse_mask = ZU(-1) << lg_delta;
-	pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
-	    ((ZU(1) << SC_LG_NGROUP) - 1);
+	/*
+	 * Let's write psz in binary, e.g. 0011 for 0x3, 0111 for 0x7.
+	 * The leftmost bits whose len is lg_base decide the base of psz.
+	 * The rightmost bits whose len is lg_delta decide (pgz % PAGE).
+	 * The middle bits whose len is SC_LG_NGROUP decide ndelta.
+	 * ndelta is offset to the first size class in the size class group,
+	 * starts from 1.
+	 * If you don't know lg_base, ndelta or lg_delta, see sc.h.
+	 * |xxxxxxxxxxxxxxxxxxxx|------------------------|yyyyyyyyyyyyyyyyyyyyy|
+	 * |<-- len: lg_base -->|<-- len: SC_LG_NGROUP-->|<-- len: lg_delta -->|
+	 *                      |<--      ndelta      -->|
+	 * rg_inner_off = ndelta - 1
+	 * Why use (psz - 1)?
+	 * To handle case: psz % (1 << lg_delta) == 0.
+	 */
+	pszind_t rg_inner_off = (((psz - 1)) >> lg_delta) & (SC_NGROUP - 1);
 
-	pszind_t ind = grp + mod;
+	pszind_t base_ind = off_to_first_ps_rg << SC_LG_NGROUP;
+	pszind_t ind = base_ind + rg_inner_off;
 	return ind;
 }
 
diff --git a/src/sc.c b/src/sc.c
index 9a0f76d..e4a94d8 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -13,7 +13,7 @@
  * at least the damage is compartmentalized to this file.
  */
 
-static size_t
+size_t
 reg_size_compute(int lg_base, int lg_delta, int ndelta) {
 	return (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
 }
diff --git a/test/unit/sz.c b/test/unit/sz.c
new file mode 100644
index 0000000..be11aca
--- /dev/null
+++ b/test/unit/sz.c
@@ -0,0 +1,66 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_sz_psz2ind) {
+	/*
+	 * Testing page size classes which reside prior to the regular group
+	 * with all size classes divisible by page size.
+	 * For x86_64 Linux, it's 4096, 8192, 12288, 16384, with correponding
+	 * pszind 0, 1, 2 and 3.
+	 */
+	for (size_t i = 0; i < SC_NGROUP; i++) {
+		for (size_t psz = i * PAGE + 1; psz <= (i + 1) * PAGE; psz++) {
+			pszind_t ind = sz_psz2ind(psz);
+			expect_zu_eq(ind, i, "Got %u as sz_psz2ind of %zu", ind,
+			    psz);
+		}
+	}
+
+	sc_data_t data;
+	memset(&data, 0, sizeof(data));
+	sc_data_init(&data);
+	/*
+	 * 'base' is the base of the first regular group with all size classes
+	 * divisible by page size.
+	 * For x86_64 Linux, it's 16384, and base_ind is 36.
+	 */
+	size_t base_psz = 1 << (SC_LG_NGROUP + LG_PAGE);
+	size_t base_ind = 0;
+	while (base_ind < SC_NSIZES &&
+	    reg_size_compute(data.sc[base_ind].lg_base,
+		data.sc[base_ind].lg_delta,
+		data.sc[base_ind].ndelta) < base_psz) {
+		base_ind++;
+	}
+	expect_zu_eq(
+	    reg_size_compute(data.sc[base_ind].lg_base,
+		data.sc[base_ind].lg_delta, data.sc[base_ind].ndelta),
+	    base_psz, "Size class equal to %zu not found", base_psz);
+	/*
+	 * Test different sizes falling into groups after the 'base'. The
+	 * increment is PAGE / 3 for the execution speed purpose.
+	 */
+	base_ind -= SC_NGROUP;
+	for (size_t psz = base_psz; psz <= 64 * 1024 * 1024; psz += PAGE / 3) {
+		pszind_t ind = sz_psz2ind(psz);
+		sc_t gt_sc = data.sc[ind + base_ind];
+		expect_zu_gt(psz,
+		    reg_size_compute(gt_sc.lg_base, gt_sc.lg_delta,
+			gt_sc.ndelta),
+		    "Got %u as sz_psz2ind of %zu", ind, psz);
+		sc_t le_sc = data.sc[ind + base_ind + 1];
+		expect_zu_le(psz,
+		    reg_size_compute(le_sc.lg_base, le_sc.lg_delta,
+			le_sc.ndelta),
+		    "Got %u as sz_psz2ind of %zu", ind, psz);
+	}
+
+	pszind_t max_ind = sz_psz2ind(SC_LARGE_MAXCLASS + 1);
+	expect_lu_eq(max_ind, SC_NPSIZES,
+	    "Got %u as sz_psz2ind of %llu", max_ind, SC_LARGE_MAXCLASS);
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_sz_psz2ind);
+}
-- 
cgit v0.12


From a93931537e3845c8baca6965aded9a9683fa1481 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 24 Mar 2022 18:07:27 -0700
Subject: Do not disable SEC by default for 64k pages platforms

Default SEC max_alloc option value was 32k, disabling SEC for platforms with
lg-page=16. This change enables SEC for all platforms, making minimum max_alloc
value equal to PAGE.
---
 include/jemalloc/internal/sec_opts.h | 2 +-
 src/sec.c                            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h
index 91b6d0d..a3ad72f 100644
--- a/include/jemalloc/internal/sec_opts.h
+++ b/include/jemalloc/internal/sec_opts.h
@@ -46,7 +46,7 @@ struct sec_opts_s {
 	/* nshards */							\
 	4,								\
 	/* max_alloc */							\
-	32 * 1024,							\
+	(32 * 1024) < PAGE ? PAGE : (32 * 1024),			\
 	/* max_bytes */							\
 	256 * 1024,							\
 	/* bytes_after_flush */						\
diff --git a/src/sec.c b/src/sec.c
index c13904d..df67559 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -23,7 +23,7 @@ sec_bin_init(sec_bin_t *bin) {
 bool
 sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
     const sec_opts_t *opts) {
-	assert(opts->max_alloc > 0);
+	assert(opts->max_alloc >= PAGE);
 
 	size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
 	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
-- 
cgit v0.12


From fdb6c101625060236732a6003116a129edda3687 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 6 Jan 2022 19:31:09 -0800
Subject: Add FreeBSD to TravisCI

Implement the generation of Travis jobs for FreeBSD. The generated jobs
replicate the existing CirrusCI config.
---
 .travis.yml                       | 48 +++++++++++++++++++++++++++++++++++++++
 scripts/freebsd/before_install.sh |  3 +++
 scripts/freebsd/before_script.sh  | 10 ++++++++
 scripts/freebsd/script.sh         |  3 +++
 scripts/gen_travis.py             | 19 ++++++++++++++++
 5 files changed, 83 insertions(+)
 create mode 100644 scripts/freebsd/before_install.sh
 create mode 100644 scripts/freebsd/before_script.sh
 create mode 100644 scripts/freebsd/script.sh

diff --git a/.travis.yml b/.travis.yml
index c54cc45..f2b107b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -304,6 +304,54 @@ jobs:
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/freebsd/before_install.sh b/scripts/freebsd/before_install.sh
new file mode 100644
index 0000000..f2bee32
--- /dev/null
+++ b/scripts/freebsd/before_install.sh
@@ -0,0 +1,3 @@
+#!/bin/tcsh
+
+su -m root -c 'pkg install -y git'
diff --git a/scripts/freebsd/before_script.sh b/scripts/freebsd/before_script.sh
new file mode 100644
index 0000000..29406f6
--- /dev/null
+++ b/scripts/freebsd/before_script.sh
@@ -0,0 +1,10 @@
+#!/bin/tcsh
+
+autoconf
+# We don't perfectly track freebsd stdlib.h definitions.  This is fine when
+# we count as a system header, but breaks otherwise, like during these
+# tests.
+./configure --with-jemalloc-prefix=ci_ ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" CXX="$CXX $COMPILER_FLAGS"} $CONFIGURE_FLAGS
+JE_NCPUS=`sysctl -n kern.smp.cpus`
+gmake -j${JE_NCPUS}
+gmake -j${JE_NCPUS} tests
diff --git a/scripts/freebsd/script.sh b/scripts/freebsd/script.sh
new file mode 100644
index 0000000..d9c53a2
--- /dev/null
+++ b/scripts/freebsd/script.sh
@@ -0,0 +1,3 @@
+#!/bin/tcsh
+
+gmake check
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 685bad5..40b0be1 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -7,6 +7,7 @@ from enum import Enum, auto
 LINUX = 'linux'
 OSX = 'osx'
 WINDOWS = 'windows'
+FREEBSD = 'freebsd'
 
 
 AMD64 = 'amd64'
@@ -140,6 +141,9 @@ all_unusuals = (compilers_unusual + feature_unusuals
 
 
 def get_extra_cflags(os, compiler):
+    if os == FREEBSD:
+        return []
+
     if os == WINDOWS:
         # For non-CL compilers under Windows (for now it's only MinGW-GCC),
         # -fcommon needs to be specified to correctly handle multiple
@@ -273,6 +277,19 @@ def generate_windows(arch):
     return generate_jobs(os, arch, (), max_unusual_opts, unusuals)
 
 
+def generate_freebsd(arch):
+    os = FREEBSD
+
+    max_unusual_opts = 4
+    unusuals = (
+        Option.as_configure_flag('--enable-debug'),
+        Option.as_configure_flag('--enable-prof --enable-prof-libunwind'),
+        Option.as_configure_flag('--with-lg-page=16 --with-malloc-conf=tcache:false'),
+        CROSS_COMPILE_32BIT,
+    )
+    return generate_jobs(os, arch, (), max_unusual_opts, unusuals)
+
+
 
 def get_manual_jobs():
     return """\
@@ -298,6 +315,8 @@ def main():
 
         #generate_windows(AMD64),
 
+        generate_freebsd(AMD64),
+
         get_manual_jobs()
     ))
 
-- 
cgit v0.12


From 8a49b62e788a5ae21a32a3a2caccf27b841c9bf8 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 21 Mar 2022 14:14:34 -0700
Subject: Enable TravisCI for Windows

---
 .travis.yml           | 24 ++++++++++++++++++++++++
 scripts/gen_travis.py |  2 +-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index f2b107b..29c19a7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -304,6 +304,30 @@ jobs:
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
     - os: freebsd
       arch: amd64
       env: CC=gcc CXX=g++
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 40b0be1..e076c35 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -313,7 +313,7 @@ def main():
 
         generate_macos(AMD64),
 
-        #generate_windows(AMD64),
+        generate_windows(AMD64),
 
         generate_freebsd(AMD64),
 
-- 
cgit v0.12


From 25517b852e76b429d4a97f4c96606263b2a9c209 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 21 Mar 2022 15:11:34 -0700
Subject: Reoreder TravisCI jobs to optimize CI time

Sorting jobs by descending expected runtime helps to utilize concurrency
better.
---
 .travis.yml           | 144 +++++++++++++++++++++++++-------------------------
 scripts/gen_travis.py |  10 ++--
 2 files changed, 77 insertions(+), 77 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 29c19a7..bf44fad 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,6 +10,78 @@ dist: focal
 
 jobs:
   include:
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -304,78 +376,6 @@ jobs:
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index e076c35..4366a06 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -308,16 +308,16 @@ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 def main():
     jobs = '\n'.join((
+        generate_windows(AMD64),
+
+        generate_freebsd(AMD64),
+
         generate_linux(AMD64),
         generate_linux(PPC64LE),
 
         generate_macos(AMD64),
 
-        generate_windows(AMD64),
-
-        generate_freebsd(AMD64),
-
-        get_manual_jobs()
+        get_manual_jobs(),
     ))
 
     print(TRAVIS_TEMPLATE.format(jobs=jobs))
-- 
cgit v0.12


From ed5fc14b28ca62a6ba57b65adf557e1ef09037f0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 23 Mar 2022 16:31:40 -0700
Subject: Use volatile to workaround buffer overflow false positives.

In test/integration/rallocx, full usable size is checked which may confuse
overflow detection.
---
 test/integration/rallocx.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index d4a48fc..68b8f38 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -41,7 +41,11 @@ get_large_size(size_t ind) {
 }
 
 TEST_BEGIN(test_grow_and_shrink) {
-	void *p, *q;
+	/*
+	 * Use volatile to workaround buffer overflow false positives
+	 * (-D_FORTIFY_SOURCE=3).
+	 */
+	void *volatile p, *volatile q;
 	size_t tsz;
 #define NCYCLES 3
 	unsigned i, j;
@@ -85,9 +89,13 @@ TEST_BEGIN(test_grow_and_shrink) {
 TEST_END
 
 static bool
-validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
+validate_fill(void *p, uint8_t c, size_t offset, size_t len) {
 	bool ret = false;
-	const uint8_t *buf = (const uint8_t *)p;
+	/*
+	 * Use volatile to workaround buffer overflow false positives
+	 * (-D_FORTIFY_SOURCE=3).
+	 */
+	uint8_t *volatile buf = (uint8_t *)p;
 	size_t i;
 
 	for (i = 0; i < len; i++) {
@@ -104,7 +112,11 @@ validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
 }
 
 TEST_BEGIN(test_zero) {
-	void *p, *q;
+	/*
+	 * Use volatile to workaround buffer overflow false positives
+	 * (-D_FORTIFY_SOURCE=3).
+	 */
+	void *volatile p, *volatile q;
 	size_t psz, qsz, i, j;
 	size_t start_sizes[] = {1, 3*1024, 63*1024, 4095*1024};
 #define FILL_BYTE 0xaaU
@@ -205,7 +217,11 @@ TEST_BEGIN(test_align_enum) {
 TEST_END
 
 TEST_BEGIN(test_lg_align_and_zero) {
-	void *p, *q;
+	/*
+	 * Use volatile to workaround buffer overflow false positives
+	 * (-D_FORTIFY_SOURCE=3).
+	 */
+	void *volatile p, *volatile q;
 	unsigned lg_align;
 	size_t sz;
 #define MAX_LG_ALIGN 25
-- 
cgit v0.12


From 5841b6dbe7106cf40923593ba8a0e6421a5fe905 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 19 Apr 2022 14:43:26 -0700
Subject: Update FreeBSD image to 12.3 for cirrus ci.

---
 .cirrus.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 4cca64b..7569539 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -29,7 +29,7 @@ task:
         UNCOMMON_CONFIG: --with-lg-page=16 --with-malloc-conf=tcache:false
   freebsd_instance:
     matrix:
-      image: freebsd-12-2-release-amd64
+      image: freebsd-12-3-release-amd64
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y
-- 
cgit v0.12


From 0e29ad4efa3d1c5ae9cd01afd32812dd18875200 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 15 Apr 2022 12:17:59 -0700
Subject: Rename zero_realloc option "strict" to "alloc".

With realloc(ptr, 0) being UB per C23, the option name "strict" makes less sense
now.  Rename to "alloc" which describes the behavior.
---
 Makefile.in                                        |  2 +-
 doc/jemalloc.xml.in                                |  6 +--
 .../jemalloc/internal/jemalloc_internal_types.h    |  2 +-
 src/jemalloc.c                                     | 12 +++---
 test/unit/zero_realloc_alloc.c                     | 48 ++++++++++++++++++++++
 test/unit/zero_realloc_alloc.sh                    |  3 ++
 test/unit/zero_realloc_strict.c                    | 48 ----------------------
 test/unit/zero_realloc_strict.sh                   |  3 --
 8 files changed, 62 insertions(+), 62 deletions(-)
 create mode 100644 test/unit/zero_realloc_alloc.c
 create mode 100644 test/unit/zero_realloc_alloc.sh
 delete mode 100644 test/unit/zero_realloc_strict.c
 delete mode 100644 test/unit/zero_realloc_strict.sh

diff --git a/Makefile.in b/Makefile.in
index cf6d568..1193cd8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -289,7 +289,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/zero.c \
 	$(srcroot)test/unit/zero_realloc_abort.c \
 	$(srcroot)test/unit/zero_realloc_free.c \
-	$(srcroot)test/unit/zero_realloc_strict.c \
+	$(srcroot)test/unit/zero_realloc_alloc.c \
 	$(srcroot)test/unit/zero_reallocs.c
 ifeq (@enable_prof@, 1)
 TESTS_UNIT += \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 6e2099a..8c3703b 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1580,19 +1580,19 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         </term>
         <listitem><para> Determines the behavior of
 	<function>realloc()</function> when passed a value of zero for the new
-	size.  <quote>strict</quote> treats this as an allocation of size zero
+	size.  <quote>alloc</quote> treats this as an allocation of size zero
 	(and returns a non-null result except in case of resource exhaustion).
 	<quote>free</quote> treats this as a deallocation of the pointer, and
 	returns <constant>NULL</constant> without setting
 	<varname>errno</varname>.  <quote>abort</quote> aborts the process if
-	zero is passed.  The default is <quote>strict</quote>.</para>
+	zero is passed.  The default is <quote>alloc</quote>.</para>
 
 	<para>There is considerable divergence of behaviors across
 	implementations in handling this case. Many have the behavior of
 	<quote>free</quote>. This can introduce security vulnerabilities, since
 	a <constant>NULL</constant> return value indicates failure, and the
 	continued validity of the passed-in pointer (per POSIX and C11).
-	<quote>strict</quote> is safe, but can cause leaks in programs that
+	<quote>alloc</quote> is safe, but can cause leaks in programs that
 	expect the common behavior.  Programs intended to be portable and
 	leak-free cannot assume either behavior, and must therefore never call
 	realloc with a size of 0.  The <quote>abort</quote> option enables these
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 61c1f31..62c2b59 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -9,7 +9,7 @@ typedef int malloc_cpuid_t;
 /* When realloc(non-null-ptr, 0) is called, what happens? */
 enum zero_realloc_action_e {
 	/* Realloc(ptr, 0) is free(ptr); return malloc(0); */
-	zero_realloc_action_strict = 0,
+	zero_realloc_action_alloc = 0,
 	/* Realloc(ptr, 0) is free(ptr); */
 	zero_realloc_action_free = 1,
 	/* Realloc(ptr, 0) aborts. */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 364dc57..7e5bd33 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -112,12 +112,12 @@ bool opt_cache_oblivious =
     ;
 
 zero_realloc_action_t opt_zero_realloc_action =
-    zero_realloc_action_strict;
+    zero_realloc_action_alloc;
 
 atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 
 const char *zero_realloc_mode_names[] = {
-	"strict",
+	"alloc",
 	"free",
 	"abort",
 };
@@ -1649,9 +1649,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 			if (CONF_MATCH("zero_realloc")) {
-				if (CONF_MATCH_VALUE("strict")) {
+				if (CONF_MATCH_VALUE("alloc")) {
 					opt_zero_realloc_action
-					    = zero_realloc_action_strict;
+					    = zero_realloc_action_alloc;
 				} else if (CONF_MATCH_VALUE("free")) {
 					opt_zero_realloc_action
 					    = zero_realloc_action_free;
@@ -3578,9 +3578,9 @@ do_realloc_nonnull_zero(void *ptr) {
 	if (config_stats) {
 		atomic_fetch_add_zu(&zero_realloc_count, 1, ATOMIC_RELAXED);
 	}
-	if (opt_zero_realloc_action == zero_realloc_action_strict) {
+	if (opt_zero_realloc_action == zero_realloc_action_alloc) {
 		/*
-		 * The user might have gotten a strict setting while expecting a
+		 * The user might have gotten an alloc setting while expecting a
 		 * free setting.  If that's the case, we at least try to
 		 * reduce the harm, and turn off the tcache while allocating, so
 		 * that we'll get a true first fit.
diff --git a/test/unit/zero_realloc_alloc.c b/test/unit/zero_realloc_alloc.c
new file mode 100644
index 0000000..65e07bd
--- /dev/null
+++ b/test/unit/zero_realloc_alloc.c
@@ -0,0 +1,48 @@
+#include "test/jemalloc_test.h"
+
+static uint64_t
+allocated() {
+	if (!config_stats) {
+		return 0;
+	}
+	uint64_t allocated;
+	size_t sz = sizeof(allocated);
+	expect_d_eq(mallctl("thread.allocated", (void *)&allocated, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
+	return allocated;
+}
+
+static uint64_t
+deallocated() {
+	if (!config_stats) {
+		return 0;
+	}
+	uint64_t deallocated;
+	size_t sz = sizeof(deallocated);
+	expect_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+	return deallocated;
+}
+
+TEST_BEGIN(test_realloc_alloc) {
+	void *ptr = mallocx(1, 0);
+	expect_ptr_not_null(ptr, "Unexpected mallocx error");
+	uint64_t allocated_before = allocated();
+	uint64_t deallocated_before = deallocated();
+	ptr = realloc(ptr, 0);
+	uint64_t allocated_after = allocated();
+	uint64_t deallocated_after = deallocated();
+	if (config_stats) {
+		expect_u64_lt(allocated_before, allocated_after,
+		    "Unexpected stats change");
+		expect_u64_lt(deallocated_before, deallocated_after,
+		    "Unexpected stats change");
+	}
+	dallocx(ptr, 0);
+}
+TEST_END
+int
+main(void) {
+	return test(
+	    test_realloc_alloc);
+}
diff --git a/test/unit/zero_realloc_alloc.sh b/test/unit/zero_realloc_alloc.sh
new file mode 100644
index 0000000..802687c
--- /dev/null
+++ b/test/unit/zero_realloc_alloc.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="zero_realloc:alloc"
diff --git a/test/unit/zero_realloc_strict.c b/test/unit/zero_realloc_strict.c
deleted file mode 100644
index 249d838..0000000
--- a/test/unit/zero_realloc_strict.c
+++ /dev/null
@@ -1,48 +0,0 @@
-#include "test/jemalloc_test.h"
-
-static uint64_t
-allocated() {
-	if (!config_stats) {
-		return 0;
-	}
-	uint64_t allocated;
-	size_t sz = sizeof(allocated);
-	expect_d_eq(mallctl("thread.allocated", (void *)&allocated, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
-	return allocated;
-}
-
-static uint64_t
-deallocated() {
-	if (!config_stats) {
-		return 0;
-	}
-	uint64_t deallocated;
-	size_t sz = sizeof(deallocated);
-	expect_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
-	return deallocated;
-}
-
-TEST_BEGIN(test_realloc_strict) {
-	void *ptr = mallocx(1, 0);
-	expect_ptr_not_null(ptr, "Unexpected mallocx error");
-	uint64_t allocated_before = allocated();
-	uint64_t deallocated_before = deallocated();
-	ptr = realloc(ptr, 0);
-	uint64_t allocated_after = allocated();
-	uint64_t deallocated_after = deallocated();
-	if (config_stats) {
-		expect_u64_lt(allocated_before, allocated_after,
-		    "Unexpected stats change");
-		expect_u64_lt(deallocated_before, deallocated_after,
-		    "Unexpected stats change");
-	}
-	dallocx(ptr, 0);
-}
-TEST_END
-int
-main(void) {
-	return test(
-	    test_realloc_strict);
-}
diff --git a/test/unit/zero_realloc_strict.sh b/test/unit/zero_realloc_strict.sh
deleted file mode 100644
index 314dcd0..0000000
--- a/test/unit/zero_realloc_strict.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-export MALLOC_CONF="zero_realloc:strict"
-- 
cgit v0.12


From 9a242f16d9e4a6afcd53782a9427471f6d144f1f Mon Sep 17 00:00:00 2001
From: cuishuang <imcusg@gmail.com>
Date: Sun, 24 Apr 2022 23:32:44 +0800
Subject: fix some typos

Signed-off-by: cuishuang <imcusg@gmail.com>
---
 bin/jeprof.in                                          | 4 ++--
 doc/jemalloc.xml.in                                    | 2 +-
 include/jemalloc/internal/extent.h                     | 2 +-
 include/jemalloc/internal/jemalloc_internal_includes.h | 2 +-
 include/jemalloc/internal/pa.h                         | 2 +-
 include/jemalloc/internal/sc.h                         | 2 +-
 src/jemalloc.c                                         | 2 +-
 src/tsd.c                                              | 2 +-
 test/analyze/rand.c                                    | 2 +-
 test/unit/sz.c                                         | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index e0b212a..dbf6252 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -5085,7 +5085,7 @@ sub MapToSymbols {
       } else {
 	# MapSymbolsWithNM tags each routine with its starting address,
 	# useful in case the image has multiple occurrences of this
-	# routine.  (It uses a syntax that resembles template paramters,
+	# routine.  (It uses a syntax that resembles template parameters,
 	# that are automatically stripped out by ShortFunctionName().)
 	# addr2line does not provide the same information.  So we check
 	# if nm disambiguated our symbol, and if so take the annotated
@@ -5437,7 +5437,7 @@ sub GetProcedureBoundaries {
   # "nm -f $image" is supposed to fail on GNU nm, but if:
   #
   # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND
-  # b. you have a.out in your current directory (a not uncommon occurence)
+  # b. you have a.out in your current directory (a not uncommon occurrence)
   #
   # then "nm -f $image" succeeds because -f only looks at the first letter of
   # the argument, which looks valid because it's [BbSsPp], and then since
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 8c3703b..ce7acd9 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1409,7 +1409,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         set to the empty string, no automatic dumps will occur; this is
         primarily useful for disabling the automatic final heap dump (which
         also disables leak reporting, if enabled).  The default prefix is
-        <filename>jeprof</filename>.  This prefix value can be overriden by
+        <filename>jeprof</filename>.  This prefix value can be overridden by
         <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>.
         </para></listitem>
       </varlistentry>
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 7336e8b..1d51d41 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -104,7 +104,7 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 		    edata_committed_get(neighbor))) {
 			/*
 			 * Some platforms (e.g. Windows) require an explicit
-			 * commit step (and writing to uncomitted memory is not
+			 * commit step (and writing to uncommitted memory is not
 			 * allowed).
 			 */
 			return false;
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 90a12a1..751c112 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -10,7 +10,7 @@
  * structs, externs, and inlines), and included each header file multiple times
  * in this file, picking out the portion we want on each pass using the
  * following #defines:
- *   JEMALLOC_H_TYPES   : Preprocessor-defined constants and psuedo-opaque data
+ *   JEMALLOC_H_TYPES   : Preprocessor-defined constants and pseudo-opaque data
  *                        types.
  *   JEMALLOC_H_STRUCTS : Data structures.
  *   JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 3cf370c..4748a05 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -180,7 +180,7 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool *deferred_work_generated);
 /*
  * Frees the given edata back to the pa.  Sets *generated_dirty if we produced
- * new dirty pages (well, we alwyas set it for now; but this need not be the
+ * new dirty pages (well, we always set it for now; but this need not be the
  * case).
  * (We could make generated_dirty the return value of course, but this is more
  * consistent with the shrink pathway and our error codes here).
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 308985f..9bab347 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -348,7 +348,7 @@ size_t reg_size_compute(int lg_base, int lg_delta, int ndelta);
 void sc_data_init(sc_data_t *data);
 /*
  * Updates slab sizes in [begin, end] to be pgs pages in length, if possible.
- * Otherwise, does its best to accomodate the request.
+ * Otherwise, does its best to accommodate the request.
  */
 void sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end,
     int pgs);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7e5bd33..9c94425 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -327,7 +327,7 @@ a0dalloc(void *ptr) {
 }
 
 /*
- * FreeBSD's libc uses the bootstrap_*() functions in bootstrap-senstive
+ * FreeBSD's libc uses the bootstrap_*() functions in bootstrap-sensitive
  * situations that cannot tolerate TLS variable access (TLS allocation and very
  * early internal data structure initialization).
  */
diff --git a/src/tsd.c b/src/tsd.c
index b98c34b..e8e4f3a 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -209,7 +209,7 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
 			/*
 			 * This is the tricky case.  We're transitioning from
 			 * one nominal state to another.  The caller can't know
-			 * about any races that are occuring at the same time,
+			 * about any races that are occurring at the same time,
 			 * so we always have to recompute no matter what.
 			 */
 			tsd_slow_update(tsd);
diff --git a/test/analyze/rand.c b/test/analyze/rand.c
index a4ab49a..bb20b06 100644
--- a/test/analyze/rand.c
+++ b/test/analyze/rand.c
@@ -34,7 +34,7 @@
  * (c) Any generated number >= n_bucket * 2^lg_bucket_width will be counted
  *     towards the last bucket; the expected mean and stddev provided should
  *     also reflect that.
- * (d) The number of iterations is adviced to be determined so that the bucket
+ * (d) The number of iterations is advised to be determined so that the bucket
  *     with the minimal expected proportion gets a sufficient count.
  */
 
diff --git a/test/unit/sz.c b/test/unit/sz.c
index be11aca..8ae04b9 100644
--- a/test/unit/sz.c
+++ b/test/unit/sz.c
@@ -4,7 +4,7 @@ TEST_BEGIN(test_sz_psz2ind) {
 	/*
 	 * Testing page size classes which reside prior to the regular group
 	 * with all size classes divisible by page size.
-	 * For x86_64 Linux, it's 4096, 8192, 12288, 16384, with correponding
+	 * For x86_64 Linux, it's 4096, 8192, 12288, 16384, with corresponding
 	 * pszind 0, 1, 2 and 3.
 	 */
 	for (size_t i = 0; i < SC_NGROUP; i++) {
-- 
cgit v0.12


From 391bad4b95839e2c690879ca62b1e904a49a78df Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 21 Apr 2022 16:31:33 -0700
Subject: Avoid abort() in test/integration/cpp/infallible_new_true.

Allow setting the safety check abort hook through mallctl, which avoids abort()
and core dumps.
---
 include/jemalloc/internal/safety_check.h     |  5 +-
 src/ctl.c                                    | 24 ++++++++
 src/safety_check.c                           |  4 +-
 test/integration/cpp/infallible_new_true.cpp | 92 +++++++++++++++-------------
 4 files changed, 79 insertions(+), 46 deletions(-)

diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index f10c68e..f1a74f1 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -4,8 +4,11 @@
 void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size);
 void safety_check_fail(const char *format, ...);
+
+typedef void (*safety_check_abort_hook_t)(const char *message);
+
 /* Can set to NULL for a default. */
-void safety_check_set_abort(void (*abort_fn)(const char *));
+void safety_check_set_abort(safety_check_abort_hook_t abort_fn);
 
 JEMALLOC_ALWAYS_INLINE void
 safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
diff --git a/src/ctl.c b/src/ctl.c
index 54d33ae..135271b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -14,6 +14,7 @@
 #include "jemalloc/internal/prof_recent.h"
 #include "jemalloc/internal/prof_stats.h"
 #include "jemalloc/internal/prof_sys.h"
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/util.h"
 
@@ -311,6 +312,7 @@ CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
 CTL_PROTO(experimental_hooks_prof_backtrace)
 CTL_PROTO(experimental_hooks_prof_dump)
+CTL_PROTO(experimental_hooks_safety_check_abort)
 CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
@@ -849,6 +851,7 @@ static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("remove"),	CTL(experimental_hooks_remove)},
 	{NAME("prof_backtrace"),	CTL(experimental_hooks_prof_backtrace)},
 	{NAME("prof_dump"),	CTL(experimental_hooks_prof_dump)},
+	{NAME("safety_check_abort"),	CTL(experimental_hooks_safety_check_abort)},
 };
 
 static const ctl_named_node_t experimental_thread_node[] = {
@@ -3437,6 +3440,27 @@ label_return:
 	return ret;
 }
 
+/* For integration test purpose only.  No plan to move out of experimental. */
+static int
+experimental_hooks_safety_check_abort_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	WRITEONLY();
+	if (newp != NULL) {
+		if (newlen != sizeof(safety_check_abort_hook_t)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		safety_check_abort_hook_t hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(hook, safety_check_abort_hook_t);
+		safety_check_set_abort(hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
 /******************************************************************************/
 
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
diff --git a/src/safety_check.c b/src/safety_check.c
index 552b312..209fdda 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -1,7 +1,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-static void (*safety_check_abort)(const char *message);
+static safety_check_abort_hook_t safety_check_abort;
 
 void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size) {
@@ -15,7 +15,7 @@ void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
 	    true_size, input_size, ptr, src);
 }
 
-void safety_check_set_abort(void (*abort_fn)(const char *)) {
+void safety_check_set_abort(safety_check_abort_hook_t abort_fn) {
 	safety_check_abort = abort_fn;
 }
 
diff --git a/test/integration/cpp/infallible_new_true.cpp b/test/integration/cpp/infallible_new_true.cpp
index 9b943bd..d675412 100644
--- a/test/integration/cpp/infallible_new_true.cpp
+++ b/test/integration/cpp/infallible_new_true.cpp
@@ -1,55 +1,61 @@
 #include <stdio.h>
 
+#include "test/jemalloc_test.h"
+
 /*
- * We can't test C++ in unit tests, and we can't change the safety check failure
- * hook in integration tests.  So we check that we *actually* abort on failure,
- * by forking and checking the child process exit code.
+ * We can't test C++ in unit tests.  In order to intercept abort, use a secret
+ * safety check abort hook in integration tests.
  */
+typedef void (*abort_hook_t)(const char *message);
+bool fake_abort_called;
+void fake_abort(const char *message) {
+	if (strcmp(message, "<jemalloc>: Allocation failed and "
+	    "opt.experimental_infallible_new is true. Aborting.\n") != 0) {
+		abort();
+	}
+	fake_abort_called = true;
+}
 
-/* It's a unix system? */
-#ifdef __unix__
-/* I know this! */
-#include <sys/types.h>
-#include <unistd.h>
-#include <sys/wait.h>
-static const bool can_fork = true;
-#else
-static const bool can_fork = false;
-#endif
+static bool
+own_operator_new(void) {
+	uint64_t before, after;
+	size_t sz = sizeof(before);
 
-#include "test/jemalloc_test.h"
+	/* thread.allocated is always available, even w/o config_stats. */
+	expect_d_eq(mallctl("thread.allocated", (void *)&before, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure reading stats");
+	void *volatile ptr = ::operator new((size_t)8);
+	expect_ptr_not_null(ptr, "Unexpected allocation failure");
+	expect_d_eq(mallctl("thread.allocated", (void *)&after, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure reading stats");
+
+	return (after != before);
+}
 
 TEST_BEGIN(test_failing_alloc) {
-	test_skip_if(!can_fork);
-#ifdef __unix__
-	pid_t pid = fork();
-	expect_d_ne(pid, -1, "Unexpected fork failure");
-	if (pid == 0) {
-		/*
-		 * In the child, we'll print an error message to stderr before
-		 * exiting.  Close stderr to avoid spamming output for this
-		 * expected failure.
-		 */
-		fclose(stderr);
-		try {
-			/* Too big of an allocation to succeed. */
-			void *volatile ptr = ::operator new((size_t)-1);
-			(void)ptr;
-		} catch (...) {
-			/*
-			 * Swallow the exception; remember, we expect this to
-			 * fail via an abort within new, not because an
-			 * exception didn't get caught.
-			 */
-		}
-	} else {
-		int status;
-		pid_t err = waitpid(pid, &status, 0);
-		expect_d_ne(-1, err, "waitpid failure");
-		expect_false(WIFEXITED(status),
-		    "Should have seen an abnormal failure");
-	}
+	abort_hook_t abort_hook = &fake_abort;
+	expect_d_eq(mallctl("experimental.hooks.safety_check_abort", NULL, NULL,
+	    (void *)&abort_hook, sizeof(abort_hook)), 0,
+	    "Unexpected mallctl failure setting abort hook");
+
+	/*
+	 * Not owning operator new is only expected to happen on MinGW which
+	 * does not support operator new / delete replacement.
+	 */
+#ifdef _WIN32
+	test_skip_if(!own_operator_new());
+#else
+	expect_true(own_operator_new(), "No operator new overload");
 #endif
+	void *volatile ptr = (void *)1;
+	try {
+		/* Too big of an allocation to succeed. */
+		ptr = ::operator new((size_t)-1);
+	} catch (...) {
+		abort();
+	}
+	expect_ptr_null(ptr, "Allocation should have failed");
+	expect_b_eq(fake_abort_called, true, "Abort hook not invoked");
 }
 TEST_END
 
-- 
cgit v0.12


From ceca07d2ca95f7c2680263f3c679ba3f611d5ffb Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 25 Apr 2022 14:17:52 -0700
Subject: Correct the name of stats.mutexes.prof_thds_data in doc.

---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index ce7acd9..12dc571 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2733,7 +2733,7 @@ struct extent_hooks_s {
         counters</link>.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.mutexes.prof_tdatas">
+      <varlistentry id="stats.mutexes.prof_thds_data">
         <term>
           <mallctl>stats.mutexes.prof_thds_data.{counter}</mallctl>
 	  (<type>counter specific type</type>) <literal>r-</literal>
-- 
cgit v0.12


From f5e840bbf0213d86ae3d0a915df8abd03d75cdf6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 27 Apr 2022 18:16:11 -0700
Subject: Minor typo fix in doc.

---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 12dc571..5c12f1d 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1345,7 +1345,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         maximum, size classes up to 8 MiB can be cached.  The default maximum is
         32 KiB (2^15).  As a convenience, this may also be set by specifying
         lg_tcache_max, which will be taken to be the base-2 logarithm of the
-        setting of tcache_max</para></listitem>
+        setting of tcache_max.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.thp">
-- 
cgit v0.12


From 254b011915c0c68549beb7a91be02cf56d81fa32 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 28 Apr 2022 17:40:37 -0700
Subject: Small doc tweak of opt.trust_madvise.

Avoid quoted enabled and disabled because it's a bool type instead of char *.
---
 doc/jemalloc.xml.in | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 5c12f1d..fe4ded9 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -972,10 +972,9 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Do not perform runtime check for MADV_DONTNEED, to
-        check that it actually zeros pages.  The default is
-        <quote>disabled</quote> on linux and <quote>enabled</quote> elsewhere.
-        </para></listitem>
+        <listitem><para>If true, do not perform runtime check for MADV_DONTNEED,
+        to check that it actually zeros pages.  The default is disabled on Linux
+        and enabled elsewhere.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.retain">
-- 
cgit v0.12


From a7d73dd4c9ba97bb033f7ae15f218a65d8b8ace6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 3 May 2022 14:28:30 -0700
Subject: Update TUNING.md to include the new tcache_max option.

---
 TUNING.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/TUNING.md b/TUNING.md
index 34fca05..e96399d 100644
--- a/TUNING.md
+++ b/TUNING.md
@@ -1,5 +1,5 @@
 This document summarizes the common approaches for performance fine tuning with
-jemalloc (as of 5.1.0).  The default configuration of jemalloc tends to work
+jemalloc (as of 5.3.0).  The default configuration of jemalloc tends to work
 reasonably well in practice, and most applications should not have to tune any
 options. However, in order to cover a wide range of applications and avoid
 pathological cases, the default setting is sometimes kept conservative and
@@ -76,14 +76,14 @@ Examples:
 
 * High resource consumption application, prioritizing memory usage:
 
-    `background_thread:true` combined with shorter decay time (decreased
-    `dirty_decay_ms` and / or `muzzy_decay_ms`,
+    `background_thread:true,tcache_max:4096` combined with shorter decay time
+    (decreased `dirty_decay_ms` and / or `muzzy_decay_ms`,
     e.g. `dirty_decay_ms:5000,muzzy_decay_ms:5000`), and lower arena count
     (e.g. number of CPUs).
 
 * Low resource consumption application:
 
-    `narenas:1,lg_tcache_max:13` combined with shorter decay time (decreased
+    `narenas:1,tcache_max:1024` combined with shorter decay time (decreased
     `dirty_decay_ms` and / or `muzzy_decay_ms`,e.g.
     `dirty_decay_ms:1000,muzzy_decay_ms:0`).
 
-- 
cgit v0.12


From 66c889500a20e6493a6768de6eaa7347daf61483 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 4 May 2022 11:38:57 -0700
Subject: Make test/unit/background_thread_enable more conservative.

To avoid resource exhaustion on 32-bit platforms.
---
 test/unit/background_thread_enable.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index 46776f3..44034ac 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -2,12 +2,8 @@
 
 const char *malloc_conf = "background_thread:false,narenas:1,max_background_threads:20";
 
-TEST_BEGIN(test_deferred) {
-	test_skip_if(!have_background_thread);
-
-	unsigned id;
-	size_t sz_u = sizeof(unsigned);
-
+static unsigned
+max_test_narenas(void) {
 	/*
 	 * 10 here is somewhat arbitrary, except insofar as we want to ensure
 	 * that the number of background threads is smaller than the number of
@@ -15,7 +11,22 @@ TEST_BEGIN(test_deferred) {
 	 * cpu to handle background purging, so this is a conservative
 	 * approximation.
 	 */
-	for (unsigned i = 0; i < 10 * ncpus; i++) {
+	unsigned ret = 10 * ncpus;
+	/* Limit the max to avoid VM exhaustion on 32-bit . */
+	if (ret > 512) {
+		ret = 512;
+	}
+
+	return ret;
+}
+
+TEST_BEGIN(test_deferred) {
+	test_skip_if(!have_background_thread);
+
+	unsigned id;
+	size_t sz_u = sizeof(unsigned);
+
+	for (unsigned i = 0; i < max_test_narenas(); i++) {
 		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}
@@ -50,7 +61,7 @@ TEST_BEGIN(test_max_background_threads) {
 	unsigned id;
 	size_t sz_u = sizeof(unsigned);
 
-	for (unsigned i = 0; i < 10 * ncpus; i++) {
+	for (unsigned i = 0; i < max_test_narenas(); i++) {
 		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}
-- 
cgit v0.12


From 8cb814629acc7c7a8c1008f47e35d3f40129f5fa Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 3 May 2022 15:41:43 -0700
Subject: Make the default option of zero realloc match the system allocator.

---
 configure.ac                                          |  9 +++++++++
 doc/jemalloc.xml.in                                   | 15 ++++++++-------
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  3 +++
 src/jemalloc.c                                        |  7 ++++++-
 4 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5c7a8ef..f6d25f3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -638,6 +638,7 @@ dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
 default_retain="0"
+zero_realloc_default_free="0"
 maps_coalesce="1"
 DUMP_SYMS="${NM} -a"
 SYM_PREFIX=""
@@ -684,6 +685,7 @@ case "${host}" in
 	if test "${LG_SIZEOF_PTR}" = "3"; then
 	  default_retain="1"
 	fi
+	zero_realloc_default_free="1"
 	;;
   *-*-linux*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
@@ -698,6 +700,7 @@ case "${host}" in
 	if test "${LG_SIZEOF_PTR}" = "3"; then
 	  default_retain="1"
 	fi
+	zero_realloc_default_free="1"
 	;;
   *-*-kfreebsd*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
@@ -773,6 +776,7 @@ case "${host}" in
 	if test "${LG_SIZEOF_PTR}" = "3"; then
 	  default_retain="1"
 	fi
+	zero_realloc_default_free="1"
 	;;
   *-*-nto-qnx)
 	abi="elf"
@@ -1395,6 +1399,11 @@ if test "x$default_retain" = "x1" ; then
   AC_DEFINE([JEMALLOC_RETAIN], [ ], [ ])
 fi
 
+dnl Indicate whether realloc(ptr, 0) defaults to the "alloc" behavior.
+if test "x$zero_realloc_default_free" = "x1" ; then
+  AC_DEFINE([JEMALLOC_ZERO_REALLOC_DEFAULT_FREE], [ ], [ ])
+fi
+
 dnl Enable allocation from DSS if supported by the OS.
 have_dss="1"
 dnl Check whether the BSD/SUSv1 sbrk() exists.  If not, disable DSS support.
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index fe4ded9..e28e8f3 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1578,13 +1578,14 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <literal>r-</literal>
         </term>
         <listitem><para> Determines the behavior of
-	<function>realloc()</function> when passed a value of zero for the new
-	size.  <quote>alloc</quote> treats this as an allocation of size zero
-	(and returns a non-null result except in case of resource exhaustion).
-	<quote>free</quote> treats this as a deallocation of the pointer, and
-	returns <constant>NULL</constant> without setting
-	<varname>errno</varname>.  <quote>abort</quote> aborts the process if
-	zero is passed.  The default is <quote>alloc</quote>.</para>
+        <function>realloc()</function> when passed a value of zero for the new
+        size.  <quote>alloc</quote> treats this as an allocation of size zero
+        (and returns a non-null result except in case of resource exhaustion).
+        <quote>free</quote> treats this as a deallocation of the pointer, and
+        returns <constant>NULL</constant> without setting
+        <varname>errno</varname>.  <quote>abort</quote> aborts the process if
+        zero is passed.  The default is <quote>free</quote> on Linux and
+        Windows, and <quote>alloc</quote> elsewhere.</para>
 
 	<para>There is considerable divergence of behaviors across
 	implementations in handling this case. Many have the behavior of
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 0cb15d3..3588072 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -421,4 +421,7 @@
 /* Darwin VM_MAKE_TAG support */
 #undef JEMALLOC_HAVE_VM_MAKE_TAG
 
+/* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */
+#undef JEMALLOC_ZERO_REALLOC_DEFAULT_FREE
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9c94425..7655de4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -112,7 +112,12 @@ bool opt_cache_oblivious =
     ;
 
 zero_realloc_action_t opt_zero_realloc_action =
-    zero_realloc_action_alloc;
+#ifdef JEMALLOC_ZERO_REALLOC_DEFAULT_FREE
+    zero_realloc_action_free
+#else
+    zero_realloc_action_alloc
+#endif
+    ;
 
 atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 
-- 
cgit v0.12


From 304c919829f9f340669b61fa64867cfe5dba8021 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 27 Apr 2022 18:05:07 -0700
Subject: Update ChangeLog for 5.3.0.

---
 ChangeLog | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index e55813b..32fde56 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,106 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 5.3.0 (May 6, 2022)
+
+  This release contains many speed and space optimizations, from micro
+  optimizations on common paths to rework of internal data structures and
+  locking schemes, and many more too detailed to list below.  Multiple percent
+  of system level metric improvements were measured in tested production
+  workloads.  The release has gone through large-scale production testing.
+
+  New features:
+  - Add the thread.idle mallctl which hints that the calling thread will be
+    idle for a nontrivial period of time.  (@davidtgoldblatt)
+  - Allow small size classes to be the maximum size class to cache in the
+    thread-specific cache, through the opt.[lg_]tcache_max option.  (@interwq,
+    @jordalgo)
+  - Make the behavior of realloc(ptr, 0) configurable with opt.zero_realloc.
+    (@davidtgoldblatt)
+  - Add 'make uninstall' support.  (@sangshuduo, @Lapenkov)
+  - Support C++17 over-aligned allocation.  (@marksantaniello)
+  - Add the thread.peak mallctl for approximate per-thread peak memory tracking.
+    (@davidtgoldblatt)
+  - Add interval-based stats output opt.stats_interval.  (@interwq)
+  - Add prof.prefix to override filename prefixes for dumps.  (@zhxchen17)
+  - Add high resolution timestamp support for profiling.  (@tyroguru)
+  - Add the --collapsed flag to jeprof for flamegraph generation.
+    (@igorwwwwwwwwwwwwwwwwwwww)
+  - Add the --debug-syms-by-id option to jeprof for debug symbols discovery.
+    (@DeannaGelbart)
+  - Add the opt.prof_leak_error option to exit with error code when leak is
+    detected using opt.prof_final.  (@yunxuo)
+  - Add opt.cache_oblivious as an runtime alternative to config.cache_oblivious.
+    (@interwq)
+  - Add mallctl interfaces:
+    + opt.zero_realloc  (@davidtgoldblatt)
+    + opt.cache_oblivious  (@interwq)
+    + opt.prof_leak_error  (@yunxuo)
+    + opt.stats_interval  (@interwq)
+    + opt.stats_interval_opts  (@interwq)
+    + opt.tcache_max  (@interwq)
+    + opt.trust_madvise  (@azat)
+    + prof.prefix  (@zhxchen17)
+    + stats.zero_reallocs  (@davidtgoldblatt)
+    + thread.idle  (@davidtgoldblatt)
+    + thread.peak.{read,reset}  (@davidtgoldblatt)
+
+  Bug fixes:
+  - Fix the synchronization around explicit tcache creation which could cause
+    invalid tcache identifiers.  This regression was first released in 5.0.0.
+    (@yoshinorim, @davidtgoldblatt)
+  - Fix a profiling biasing issue which could cause incorrect heap usage and
+    object counts.  This issue existed in all previous releases with the heap
+    profiling feature.  (@davidtgoldblatt)
+  - Fix the order of stats counter updating on large realloc which could cause
+    failed assertions.  This regression was first released in 5.0.0.  (@azat)
+  - Fix the locking on the arena destroy mallctl, which could cause concurrent
+    arena creations to fail.  This functionality was first introduced in 5.0.0.
+    (@interwq)
+
+  Portability improvements:
+  - Remove nothrow from system function declarations on macOS and FreeBSD.
+    (@davidtgoldblatt, @fredemmott, @leres)
+  - Improve overcommit and page alignment settings on NetBSD.  (@zoulasc)
+  - Improve CPU affinity support on BSD platforms.  (@devnexen)
+  - Improve utrace detection and support.  (@devnexen)
+  - Improve QEMU support with MADV_DONTNEED zeroed pages detection.  (@azat)
+  - Add memcntl support on Solaris / illumos.  (@devnexen)
+  - Improve CPU_SPINWAIT on ARM.  (@AWSjswinney)
+  - Improve TSD cleanup on FreeBSD.  (@Lapenkov)
+  - Disable percpu_arena if the CPU count cannot be reliably detected.  (@azat)
+  - Add malloc_size(3) override support.  (@devnexen)
+  - Add mmap VM_MAKE_TAG support.  (@devnexen)
+  - Add support for MADV_[NO]CORE.  (@devnexen)
+  - Add support for DragonFlyBSD.  (@devnexen)
+  - Fix the QUANTUM setting on MIPS64.  (@brooksdavis)
+  - Add the QUANTUM setting for ARC.  (@vineetgarc)
+  - Add the QUANTUM setting for LoongArch.  (@wangjl-uos)
+  - Add QNX support.  (@jqian-aurora)
+  - Avoid atexit(3) calls unless the relevant profiling features are enabled.
+    (@BusyJay, @laiwei-rice, @interwq)
+  - Fix unknown option detection when using Clang.  (@Lapenkov)
+  - Fix symbol conflict with musl libc.  (@georgthegreat)
+  - Add -Wimplicit-fallthrough checks.  (@nickdesaulniers)
+  - Add __forceinline support on MSVC.  (@santagada)
+  - Improve FreeBSD and Windows CI support.  (@Lapenkov)
+  - Add CI support for PPC64LE architecture.  (@ezeeyahoo)
+
+  Incompatible changes:
+  - Maximum size class allowed in tcache (opt.[lg_]tcache_max) now has an upper
+    bound of 8MiB.  (@interwq)
+
+  Optimizations and refactors (@davidtgoldblatt, @Lapenkov, @interwq):
+  - Optimize the common cases of the thread cache operations.
+  - Optimize internal data structures, including RB tree and pairing heap.
+  - Optimize the internal locking on extent management.
+  - Extract and refactor the internal page allocator and interface modules.
+
+  Documentation:
+  - Fix doc build with --with-install-suffix.  (@lawmurray, @interwq)
+  - Add PROFILING_INTERNALS.md.  (@davidtgoldblatt)
+  - Ensure the proper order of doc building and installation.  (@Mingli-Yu)
+
 * 5.2.1 (August 5, 2019)
 
   This release is primarily about Windows.  A critical virtual memory leak is
-- 
cgit v0.12