From aa44ddbcdd43cc8a8352b654f4a003d83b9c15b7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 2 Jul 2017 17:55:52 -0700
Subject: Fix a typo.

---
 ChangeLog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index ee1b7ea..967d04d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -22,7 +22,7 @@ brevity.  Much more detail can be found in the git revision history:
     unlikely to be an issue with other libc implementations.  (@interwq)
   - Mask signals during background thread creation.  This prevents signals from
     being inadvertently delivered to background threads.  (@jasone,
-    @davidgoldblatt, @interwq)
+    @davidtgoldblatt, @interwq)
   - Avoid inactivity checks within background threads, in order to prevent
     recursive mutex acquisition.  (@interwq)
   - Fix extent_grow_retained() to use the specified hooks when the
-- 
cgit v0.12


From 0a4f5a7eea5e42292cea95fd30a88201c8d4a1ca Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Jul 2017 14:05:33 -0700
Subject: Fix deadlock in multithreaded fork in OS X.

On OS X, we rely on the zone machinery to call our prefork and postfork
handlers.

In zone_force_unlock, we call jemalloc_postfork_child, reinitializing all our
mutexes regardless of state, since the mutex implementation will assert if the
tid of the unlocker is different from that of the locker.  This has the effect
of unlocking the mutexes, but also fails to wake any threads waiting on them in
the parent.

To fix this, we track whether or not we're the parent or child after the fork,
and unlock or reinit as appropriate.

This resolves #895.
---
 src/zone.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/zone.c b/src/zone.c
index 9d3b7b4..23dfdd0 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -89,6 +89,7 @@ JEMALLOC_ATTR(weak_import);
 static malloc_zone_t *default_zone, *purgeable_zone;
 static malloc_zone_t jemalloc_zone;
 static struct malloc_introspection_t jemalloc_zone_introspect;
+static pid_t zone_force_lock_pid = -1;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
@@ -270,6 +271,12 @@ zone_log(malloc_zone_t *zone, void *address) {
 static void
 zone_force_lock(malloc_zone_t *zone) {
 	if (isthreaded) {
+		/*
+		 * See the note in zone_force_unlock, below, to see why we need
+		 * this.
+		 */
+		assert(zone_force_lock_pid == -1);
+		zone_force_lock_pid = getpid();
 		jemalloc_prefork();
 	}
 }
@@ -277,14 +284,25 @@ zone_force_lock(malloc_zone_t *zone) {
 static void
 zone_force_unlock(malloc_zone_t *zone) {
 	/*
-	 * Call jemalloc_postfork_child() rather than
-	 * jemalloc_postfork_parent(), because this function is executed by both
-	 * parent and child.  The parent can tolerate having state
-	 * reinitialized, but the child cannot unlock mutexes that were locked
-	 * by the parent.
+	 * zone_force_lock and zone_force_unlock are the entry points to the
+	 * forking machinery on OS X.  The tricky thing is, the child is not
+	 * allowed to unlock mutexes locked in the parent, even if owned by the
+	 * forking thread (and the mutex type we use in OS X will fail an assert
+	 * if we try).  In the child, we can get away with reinitializing all
+	 * the mutexes, which has the effect of unlocking them.  In the parent,
+	 * doing this would mean we wouldn't wake any waiters blocked on the
+	 * mutexes we unlock.  So, we record the pid of the current thread in
+	 * zone_force_lock, and use that to detect if we're in the parent or
+	 * child here, to decide which unlock logic we need.
 	 */
 	if (isthreaded) {
-		jemalloc_postfork_child();
+		assert(zone_force_lock_pid != -1);
+		if (getpid() == zone_force_lock_pid) {
+			jemalloc_postfork_parent();
+		} else {
+			jemalloc_postfork_child();
+		}
+		zone_force_lock_pid = -1;
 	}
 }
 
-- 
cgit v0.12


From fb6787a78c3a1e3a4868520d0531fc2ebdda21d8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Jul 2017 13:19:31 -0700
Subject: Add a test of behavior under multi-threaded forking.

Forking a multithreaded process is dangerous but allowed, so long as the child
only executes async-signal-safe functions (e.g. exec).  Add a test to ensure
that we don't break this behavior.
---
 test/unit/fork.c | 108 ++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 87 insertions(+), 21 deletions(-)

diff --git a/test/unit/fork.c b/test/unit/fork.c
index afe2214..b169075 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -4,6 +4,30 @@
 #include <sys/wait.h>
 #endif
 
+#ifndef _WIN32
+static void
+wait_for_child_exit(int pid) {
+	int status;
+	while (true) {
+		if (waitpid(pid, &status, 0) == -1) {
+			test_fail("Unexpected waitpid() failure.");
+		}
+		if (WIFSIGNALED(status)) {
+			test_fail("Unexpected child termination due to "
+			    "signal %d", WTERMSIG(status));
+			break;
+		}
+		if (WIFEXITED(status)) {
+			if (WEXITSTATUS(status) != 0) {
+				test_fail("Unexpected child exit value %d",
+				    WEXITSTATUS(status));
+			}
+			break;
+		}
+	}
+}
+#endif
+
 TEST_BEGIN(test_fork) {
 #ifndef _WIN32
 	void *p;
@@ -40,26 +64,67 @@ TEST_BEGIN(test_fork) {
 		/* Child. */
 		_exit(0);
 	} else {
-		int status;
+		wait_for_child_exit(pid);
+	}
+#else
+	test_skip("fork(2) is irrelevant to Windows");
+#endif
+}
+TEST_END
 
-		/* Parent. */
-		while (true) {
-			if (waitpid(pid, &status, 0) == -1) {
-				test_fail("Unexpected waitpid() failure");
-			}
-			if (WIFSIGNALED(status)) {
-				test_fail("Unexpected child termination due to "
-				    "signal %d", WTERMSIG(status));
-				break;
-			}
-			if (WIFEXITED(status)) {
-				if (WEXITSTATUS(status) != 0) {
-					test_fail(
-					    "Unexpected child exit value %d",
-					    WEXITSTATUS(status));
-				}
-				break;
-			}
+#ifndef _WIN32
+static void *
+do_fork_thd(void *arg) {
+	malloc(1);
+	int pid = fork();
+	if (pid == -1) {
+		/* Error. */
+		test_fail("Unexpected fork() failure");
+	} else if (pid == 0) {
+		/* Child. */
+		char *args[] = {"true", NULL};
+		execvp(args[0], args);
+		test_fail("Exec failed");
+	} else {
+		/* Parent */
+		wait_for_child_exit(pid);
+	}
+	return NULL;
+}
+#endif
+
+#ifndef _WIN32
+static void
+do_test_fork_multithreaded() {
+	thd_t child;
+	thd_create(&child, do_fork_thd, NULL);
+	do_fork_thd(NULL);
+	thd_join(child, NULL);
+}
+#endif
+
+TEST_BEGIN(test_fork_multithreaded) {
+#ifndef _WIN32
+	/*
+	 * We've seen bugs involving hanging on arenas_lock (though the same
+	 * class of bugs can happen on any mutex).  The bugs are intermittent
+	 * though, so we want to run the test multiple times.  Since we hold the
+	 * arenas lock only early in the process lifetime, we can't just run
+	 * this test in a loop (since, after all the arenas are initialized, we
+	 * won't acquire arenas_lock any further).  We therefore repeat the test
+	 * with multiple processes.
+	 */
+	for (int i = 0; i < 100; i++) {
+		int pid = fork();
+		if (pid == -1) {
+			/* Error. */
+			test_fail("Unexpected fork() failure,");
+		} else if (pid == 0) {
+			/* Child. */
+			do_test_fork_multithreaded();
+			_exit(0);
+		} else {
+			wait_for_child_exit(pid);
 		}
 	}
 #else
@@ -70,6 +135,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_fork);
+	return test_no_reentrancy(
+	    test_fork,
+	    test_fork_multithreaded);
 }
-- 
cgit v0.12


From 0975b88dfd3a890f469c8c282a5140013af85ab2 Mon Sep 17 00:00:00 2001
From: "Y. T. Chung" <zonyitoo@gmail.com>
Date: Thu, 20 Jul 2017 23:02:23 +0800
Subject: Fall back to FD_CLOEXEC when O_CLOEXEC is unavailable.

Older Linux systems don't have O_CLOEXEC.  If that's the case, we fcntl
immediately after open, to minimize the length of the racy period in
which an
operation in another thread can leak a file descriptor to a child.
---
 src/pages.c | 27 ++++++++++++++++++++++-----
 src/prof.c  |  6 ++++++
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index fec64dd..0883647 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -353,14 +353,31 @@ os_overcommits_proc(void) {
 	ssize_t nread;
 
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
-	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
-	    O_CLOEXEC);
+	#if defined(O_CLOEXEC)
+		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
+			O_CLOEXEC);
+	#else
+		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+	#endif
 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
-	fd = (int)syscall(SYS_openat,
-	    AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+	#if defined(O_CLOEXEC)
+		fd = (int)syscall(SYS_openat,
+			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+	#else
+		fd = (int)syscall(SYS_openat,
+			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+	#endif
 #else
-	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+	#if defined(O_CLOEXEC)
+		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+	#else
+		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+	#endif
 #endif
+
 	if (fd == -1) {
 		return false; /* Error. */
 	}
diff --git a/src/prof.c b/src/prof.c
index 975722c..a1ca9e2 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1409,7 +1409,13 @@ prof_open_maps(const char *format, ...) {
 	va_start(ap, format);
 	malloc_vsnprintf(filename, sizeof(filename), format, ap);
 	va_end(ap);
+
+#if defined(O_CLOEXEC)
 	mfd = open(filename, O_RDONLY | O_CLOEXEC);
+#else
+	mfd = open(filename, O_RDONLY);
+	fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
+#endif
 
 	return mfd;
 }
-- 
cgit v0.12


From 9761b449c8c6b70abdb4cfa953e59847a84af406 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Wed, 19 Jul 2017 16:36:46 -0700
Subject: Add a logging facility.

This sets up a hierarchical logging facility, so that we can add logging
statements liberally, and turn them on in a fine-grained manner.
---
 Makefile.in                                        |   2 +
 configure.ac                                       |  16 ++
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |   6 +
 include/jemalloc/internal/jemalloc_preamble.h.in   |  11 ++
 include/jemalloc/internal/log.h                    |  89 ++++++++++
 src/jemalloc.c                                     |  12 ++
 src/log.c                                          |  78 +++++++++
 test/unit/log.c                                    | 182 +++++++++++++++++++++
 8 files changed, 396 insertions(+)
 create mode 100644 include/jemalloc/internal/log.h
 create mode 100644 src/log.c
 create mode 100644 test/unit/log.c

diff --git a/Makefile.in b/Makefile.in
index fec1397..6e3424f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -102,6 +102,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/hash.c \
 	$(srcroot)src/hooks.c \
 	$(srcroot)src/large.c \
+	$(srcroot)src/log.c \
 	$(srcroot)src/malloc_io.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/mutex_pool.c \
@@ -171,6 +172,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
+	$(srcroot)test/unit/log.c \
 	$(srcroot)test/unit/mallctl.c \
 	$(srcroot)test/unit/malloc_io.c \
 	$(srcroot)test/unit/math.c \
diff --git a/configure.ac b/configure.ac
index 1551ded..0215154 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1226,6 +1226,21 @@ if test "x$enable_cache_oblivious" = "x1" ; then
 fi
 AC_SUBST([enable_cache_oblivious])
 
+dnl Do not log by default.
+AC_ARG_ENABLE([log],
+  [AS_HELP_STRING([--enable-log], [Support debug logging])],
+[if test "x$enable_log" = "xno" ; then
+  enable_log="0"
+else
+  enable_log="1"
+fi
+],
+[enable_log="0"]
+)
+if test "x$enable_log" = "x1" ; then
+  AC_DEFINE([JEMALLOC_LOG], [ ])
+fi
+AC_SUBST([enable_log])
 
 
 JE_COMPILABLE([a program using __builtin_unreachable], [
@@ -2188,6 +2203,7 @@ AC_MSG_RESULT([thp                : ${enable_thp}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
 AC_MSG_RESULT([utrace             : ${enable_utrace}])
 AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
+AC_MSG_RESULT([log                : ${enable_log}])
 AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
 AC_MSG_RESULT([cache-oblivious    : ${enable_cache_oblivious}])
 AC_MSG_RESULT([cxx                : ${enable_cxx}])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c0f834f..b73daf0 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -238,6 +238,12 @@
 #undef JEMALLOC_CACHE_OBLIVIOUS
 
 /*
+ * If defined, enable logging facilities.  We make this a configure option to
+ * avoid taking extra branches everywhere.
+ */
+#undef JEMALLOC_LOG
+
+/*
  * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
  */
 #undef JEMALLOC_ZONE
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 18539a0..099f98d 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -146,6 +146,17 @@ static const bool config_cache_oblivious =
     false
 #endif
     ;
+/*
+ * Undocumented, for jemalloc development use only at the moment.  See the note
+ * in jemalloc/internal/log.h.
+ */
+static const bool config_log =
+#ifdef JEMALLOC_LOG
+    true
+#else
+    false
+#endif
+    ;
 #ifdef JEMALLOC_HAVE_SCHED_GETCPU
 /* Currently percpu_arena depends on sched_getcpu. */
 #define JEMALLOC_PERCPU_ARENA
diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
new file mode 100644
index 0000000..8413a4d
--- /dev/null
+++ b/include/jemalloc/internal/log.h
@@ -0,0 +1,89 @@
+#ifndef JEMALLOC_INTERNAL_LOG_H
+#define JEMALLOC_INTERNAL_LOG_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+
+#ifdef JEMALLOC_LOG
+#  define JEMALLOC_LOG_BUFSIZE 1000
+#else
+#  define JEMALLOC_LOG_BUFSIZE 1
+#endif
+
+/*
+ * The log_vars malloc_conf option is a '|'-delimited list of log_var name
+ * segments to log.  The log_var names are themselves hierarchical, with '.' as
+ * the delimiter (a "segment" is just a prefix in the log namespace).  So, if
+ * you have:
+ *
+ * static log_var_t log_arena = LOG_VAR_INIT("arena"); // 1
+ * static log_var_t log_arena_a = LOG_VAR_INIT("arena.a"); // 2
+ * static log_var_t log_arena_b = LOG_VAR_INIT("arena.b"); // 3
+ * static log_var_t log_arena_a_a = LOG_VAR_INIT("arena.a.a"); // 4
+ * static_log_var_t log_extent_a = LOG_VAR_INIT("extent.a"); // 5
+ * static_log_var_t log_extent_b = LOG_VAR_INIT("extent.b"); // 6
+ *
+ * And your malloc_conf option is "log_vars=arena.a|extent", then log_vars 2, 4,
+ * 5, and 6 will be enabled.  You can enable logging from all log vars by
+ * writing "log_vars=.".
+ *
+ * You can then log by writing:
+ *   log(log_var, "format string -- my int is %d", my_int);
+ *
+ * None of this should be regarded as a stable API for right now.  It's intended
+ * as a debugging interface, to let us keep around some of our printf-debugging
+ * statements.
+ */
+
+extern char log_var_names[JEMALLOC_LOG_BUFSIZE];
+extern atomic_b_t log_init_done;
+
+typedef struct log_var_s log_var_t;
+struct log_var_s {
+	/*
+	 * Lowest bit is "inited", second lowest is "enabled".  Putting them in
+	 * a single word lets us avoid any fences on weak architectures.
+	 */
+	atomic_u_t state;
+	const char *name;
+};
+
+#define LOG_NOT_INITIALIZED 0U
+#define LOG_INITIALIZED_NOT_ENABLED 1U
+#define LOG_ENABLED 2U
+
+#define LOG_VAR_INIT(name_str) {ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str}
+
+/*
+ * Returns the value we should assume for state (which is not necessarily
+ * accurate; if logging is done before logging has finished initializing, then
+ * we default to doing the safe thing by logging everything).
+ */
+unsigned log_var_update_state(log_var_t *log_var);
+
+/* We factor out the metadata management to allow us to test more easily. */
+#define log_do_begin(log_var)						\
+if (config_log) {							\
+	unsigned log_state = atomic_load_u(&(log_var).state,		\
+	    ATOMIC_RELAXED);						\
+	if (unlikely(log_state == LOG_NOT_INITIALIZED)) {		\
+		log_state = log_var_update_state(&(log_var));		\
+		assert(log_state != LOG_NOT_INITIALIZED);		\
+	}								\
+	if (log_state == LOG_ENABLED) {					\
+		{
+			/* User code executes here. */
+#define log_do_end(log_var)						\
+		}							\
+	}								\
+}
+
+#define log(log_var, format, ...)					\
+do {									\
+	log_do_begin(log_var)						\
+		malloc_printf("%s: " format "\n",			\
+		    (log_var).name, __VA_ARGS__);			\
+	log_do_end(log_var)						\
+} while (0)
+
+#endif /* JEMALLOC_INTERNAL_LOG_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0ee8ad4..09bac9e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -8,6 +8,7 @@
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/log.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
@@ -1173,6 +1174,16 @@ malloc_conf_init(void) {
 				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
 			}
+			if (config_log) {
+				if (CONF_MATCH("log_vars")) {
+					size_t cpylen = (
+					    vlen <= sizeof(log_var_names) ?
+					    vlen : sizeof(log_var_names) - 1);
+					strncpy(log_var_names, v, cpylen);
+					log_var_names[cpylen] = '\0';
+					continue;
+				}
+			}
 			malloc_conf_error("Invalid conf pair", k, klen, v,
 			    vlen);
 #undef CONF_MATCH
@@ -1189,6 +1200,7 @@ malloc_conf_init(void) {
 #undef CONF_HANDLE_CHAR_P
 		}
 	}
+	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
 }
 
 static bool
diff --git a/src/log.c b/src/log.c
new file mode 100644
index 0000000..022dc58
--- /dev/null
+++ b/src/log.c
@@ -0,0 +1,78 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/log.h"
+
+char log_var_names[JEMALLOC_LOG_BUFSIZE];
+atomic_b_t log_init_done = ATOMIC_INIT(false);
+
+/*
+ * Returns true if we were able to pick out a segment.  Fills in r_segment_end
+ * with a pointer to the first character after the end of the string.
+ */
+static const char *
+log_var_extract_segment(const char* segment_begin) {
+	const char *end;
+	for (end = segment_begin; *end != '\0' && *end != '|'; end++) {
+	}
+	return end;
+}
+
+static bool
+log_var_matches_segment(const char *segment_begin, const char *segment_end,
+    const char *log_var_begin, const char *log_var_end) {
+	assert(segment_begin <= segment_end);
+	assert(log_var_begin < log_var_end);
+
+	ptrdiff_t segment_len = segment_end - segment_begin;
+	ptrdiff_t log_var_len = log_var_end - log_var_begin;
+	/* The special '.' segment matches everything. */
+	if (segment_len == 1 && *segment_begin == '.') {
+		return true;
+	}
+        if (segment_len == log_var_len) {
+		return strncmp(segment_begin, log_var_begin, segment_len) == 0;
+	} else if (segment_len < log_var_len) {
+		return strncmp(segment_begin, log_var_begin, segment_len) == 0
+		    && log_var_begin[segment_len] == '.';
+        } else {
+		return false;
+	}
+}
+
+unsigned
+log_var_update_state(log_var_t *log_var) {
+	const char *log_var_begin = log_var->name;
+	const char *log_var_end = log_var->name + strlen(log_var->name);
+
+	/* Pointer to one before the beginning of the current segment. */
+	const char *segment_begin = log_var_names;
+
+	/*
+	 * If log_init done is false, we haven't parsed the malloc conf yet.  To
+	 * avoid log-spew, we default to not displaying anything.
+	 */
+	if (!atomic_load_b(&log_init_done, ATOMIC_ACQUIRE)) {
+		return LOG_INITIALIZED_NOT_ENABLED;
+	}
+
+	while (true) {
+		const char *segment_end = log_var_extract_segment(
+		    segment_begin);
+		assert(segment_end < log_var_names + JEMALLOC_LOG_BUFSIZE);
+		if (log_var_matches_segment(segment_begin, segment_end,
+		    log_var_begin, log_var_end)) {
+			atomic_store_u(&log_var->state, LOG_ENABLED,
+			    ATOMIC_RELAXED);
+			return LOG_ENABLED;
+		}
+		if (*segment_end == '\0') {
+			/* Hit the end of the segment string with no match. */
+			atomic_store_u(&log_var->state,
+			    LOG_INITIALIZED_NOT_ENABLED, ATOMIC_RELAXED);
+			return LOG_INITIALIZED_NOT_ENABLED;
+		}
+		/* Otherwise, skip the delimiter and continue. */
+		segment_begin = segment_end + 1;
+	}
+}
diff --git a/test/unit/log.c b/test/unit/log.c
new file mode 100644
index 0000000..6db256f
--- /dev/null
+++ b/test/unit/log.c
@@ -0,0 +1,182 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/log.h"
+
+static void
+expect_no_logging(const char *names) {
+	log_var_t log_l1 = LOG_VAR_INIT("l1");
+	log_var_t log_l2 = LOG_VAR_INIT("l2");
+	log_var_t log_l2_a = LOG_VAR_INIT("l2.a");
+
+	strcpy(log_var_names, names);
+
+	int count = 0;
+
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(log_l1)
+			count++;
+		log_do_end(log_l1)
+
+		log_do_begin(log_l2)
+			count++;
+		log_do_end(log_l2)
+
+		log_do_begin(log_l2_a)
+			count++;
+		log_do_end(log_l2_a)
+	}
+	assert_d_eq(count, 0, "Disabled logging not ignored!");
+}
+
+TEST_BEGIN(test_log_disabled) {
+	test_skip_if(!config_log);
+	atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
+	expect_no_logging("");
+	expect_no_logging("abc");
+	expect_no_logging("a.b.c");
+	expect_no_logging("l12");
+	expect_no_logging("l123|a456|b789");
+	expect_no_logging("|||");
+}
+TEST_END
+
+TEST_BEGIN(test_log_enabled_direct) {
+	test_skip_if(!config_log);
+	atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
+	log_var_t log_l1 = LOG_VAR_INIT("l1");
+	log_var_t log_l1_a = LOG_VAR_INIT("l1.a");
+	log_var_t log_l2 = LOG_VAR_INIT("l2");
+
+	int count;
+
+	count = 0;
+	strcpy(log_var_names, "l1");
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(log_l1)
+			count++;
+		log_do_end(log_l1)
+	}
+	assert_d_eq(count, 10, "Mis-logged!");
+
+	count = 0;
+	strcpy(log_var_names, "l1.a");
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(log_l1_a)
+			count++;
+		log_do_end(log_l1_a)
+	}
+	assert_d_eq(count, 10, "Mis-logged!");
+
+	count = 0;
+	strcpy(log_var_names, "l1.a|abc|l2|def");
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(log_l1_a)
+			count++;
+		log_do_end(log_l1_a)
+
+		log_do_begin(log_l2)
+			count++;
+		log_do_end(log_l2)
+	}
+	assert_d_eq(count, 20, "Mis-logged!");
+}
+TEST_END
+
+TEST_BEGIN(test_log_enabled_indirect) {
+	test_skip_if(!config_log);
+	atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
+	strcpy(log_var_names, "l0|l1|abc|l2.b|def");
+
+	/* On. */
+	log_var_t log_l1 = LOG_VAR_INIT("l1");
+	/* Off. */
+	log_var_t log_l1a = LOG_VAR_INIT("l1a");
+	/* On. */
+	log_var_t log_l1_a = LOG_VAR_INIT("l1.a");
+	/* Off. */
+	log_var_t log_l2_a = LOG_VAR_INIT("l2.a");
+	/* On. */
+	log_var_t log_l2_b_a = LOG_VAR_INIT("l2.b.a");
+	/* On. */
+	log_var_t log_l2_b_b = LOG_VAR_INIT("l2.b.b");
+
+	/* 4 are on total, so should sum to 40. */
+	int count = 0;
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(log_l1)
+			count++;
+		log_do_end(log_l1)
+
+		log_do_begin(log_l1a)
+			count++;
+		log_do_end(log_l1a)
+
+		log_do_begin(log_l1_a)
+			count++;
+		log_do_end(log_l1_a)
+
+		log_do_begin(log_l2_a)
+			count++;
+		log_do_end(log_l2_a)
+
+		log_do_begin(log_l2_b_a)
+			count++;
+		log_do_end(log_l2_b_a)
+
+		log_do_begin(log_l2_b_b)
+			count++;
+		log_do_end(log_l2_b_b)
+	}
+
+	assert_d_eq(count, 40, "Mis-logged!");
+}
+TEST_END
+
+TEST_BEGIN(test_log_enabled_global) {
+	test_skip_if(!config_log);
+	atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
+	strcpy(log_var_names, "abc|.|def");
+
+	log_var_t log_l1 = LOG_VAR_INIT("l1");
+	log_var_t log_l2_a_a = LOG_VAR_INIT("l2.a.a");
+
+	int count = 0;
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(log_l1)
+		    count++;
+		log_do_end(log_l1)
+
+		log_do_begin(log_l2_a_a)
+		    count++;
+		log_do_end(log_l2_a_a)
+	}
+	assert_d_eq(count, 20, "Mis-logged!");
+}
+TEST_END
+
+TEST_BEGIN(test_logs_if_no_init) {
+	test_skip_if(!config_log);
+	atomic_store_b(&log_init_done, false, ATOMIC_RELAXED);
+
+	log_var_t l = LOG_VAR_INIT("definitely.not.enabled");
+
+	int count = 0;
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(l)
+			count++;
+		log_do_end(l)
+	}
+	assert_d_eq(count, 0, "Logging shouldn't happen if not initialized.");
+}
+TEST_END
+
+int
+main(void) {
+
+	return test(
+	    test_log_disabled,
+	    test_log_enabled_direct,
+	    test_log_enabled_indirect,
+	    test_log_enabled_global,
+	    test_logs_if_no_init);
+}
-- 
cgit v0.12


From e215a7bc18a2c3263a6fcca37c1ec53af6c4babd Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Wed, 19 Jul 2017 18:05:28 -0700
Subject: Add entry and exit logging to all core functions.

I.e. mallloc, free, the allocx API, the posix extensions.
---
 include/jemalloc/internal/log.h |   6 ++
 src/jemalloc.c                  | 199 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 204 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 8413a4d..1df8cff 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -30,6 +30,12 @@
  * You can then log by writing:
  *   log(log_var, "format string -- my int is %d", my_int);
  *
+ * The namespaces currently in use:
+ *   core.[malloc|free|posix_memalign|...].[entry|exit]:
+ *       The entry/exit points of the functions publicly exposed by jemalloc.
+ *       The "entry" variants try to log arguments to the functions, and the
+ *       "exit" ones try to log return values.
+ *
  * None of this should be regarded as a stable API for right now.  It's intended
  * as a debugging interface, to let us keep around some of our printf-debugging
  * statements.
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 09bac9e..48a268f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1974,6 +1974,13 @@ je_malloc(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_malloc_entry = LOG_VAR_INIT(
+	    "core.malloc.entry");
+	static log_var_t log_core_malloc_exit = LOG_VAR_INIT(
+	    "core.malloc.exit");
+
+	log(log_core_malloc_entry, "size: %zu", size);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -1988,6 +1995,8 @@ je_malloc(size_t size) {
 
 	imalloc(&sopts, &dopts);
 
+	log(log_core_malloc_exit, "result: %p", ret);
+
 	return ret;
 }
 
@@ -1998,6 +2007,14 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_posix_memalign_entry = LOG_VAR_INIT(
+	    "core.posix_memalign.entry");
+	static log_var_t log_core_posix_memalign_exit = LOG_VAR_INIT(
+	    "core.posix_memalign.exit");
+
+	log(log_core_posix_memalign_entry, "mem ptr: %p, alignment: %zu, "
+	    "size: %zu", memptr, alignment, size);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -2014,6 +2031,10 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	dopts.alignment = alignment;
 
 	ret = imalloc(&sopts, &dopts);
+
+	log(log_core_posix_memalign_exit, "result: %d, alloc ptr: %p", ret,
+	    *memptr);
+
 	return ret;
 }
 
@@ -2026,6 +2047,14 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_aligned_alloc_entry = LOG_VAR_INIT(
+	    "core.aligned_alloc.entry");
+	static log_var_t log_core_aligned_alloc_exit = LOG_VAR_INIT(
+	    "core.aligned_alloc.exit");
+
+	log(log_core_aligned_alloc_entry, "alignment: %zu, size: %zu\n",
+	    alignment, size);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -2044,6 +2073,9 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	dopts.alignment = alignment;
 
 	imalloc(&sopts, &dopts);
+
+	log(log_core_aligned_alloc_exit, "result: %p", ret);
+
 	return ret;
 }
 
@@ -2055,6 +2087,13 @@ je_calloc(size_t num, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_calloc_entry = LOG_VAR_INIT(
+	    "core.calloc.entry");
+	static log_var_t log_core_calloc_exit = LOG_VAR_INIT(
+	    "core.calloc.exit");
+
+	log(log_core_calloc_entry, "num: %zu, size: %zu\n", num, size);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -2071,6 +2110,8 @@ je_calloc(size_t num, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
+	log(log_core_calloc_exit, "result: %p", ret);
+
 	return ret;
 }
 
@@ -2204,6 +2245,13 @@ je_realloc(void *ptr, size_t size) {
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_usize = 0;
 
+	static log_var_t log_core_realloc_entry = LOG_VAR_INIT(
+	    "core.realloc.entry");
+	static log_var_t log_core_realloc_exit = LOG_VAR_INIT(
+	    "core.realloc.exit");
+
+	log(log_core_realloc_entry, "ptr: %p, size: %zu\n", ptr, size);
+
 	if (unlikely(size == 0)) {
 		if (ptr != NULL) {
 			/* realloc(ptr, 0) is equivalent to free(ptr). */
@@ -2216,6 +2264,8 @@ je_realloc(void *ptr, size_t size) {
 				tcache = NULL;
 			}
 			ifree(tsd, ptr, tcache, true);
+
+			log(log_core_realloc_exit, "result: %p", NULL);
 			return NULL;
 		}
 		size = 1;
@@ -2248,7 +2298,9 @@ je_realloc(void *ptr, size_t size) {
 		tsdn = tsd_tsdn(tsd);
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
-		return je_malloc(size);
+		void *ret = je_malloc(size);
+		log(log_core_realloc_exit, "result: %p", ret);
+		return ret;
 	}
 
 	if (unlikely(ret == NULL)) {
@@ -2269,11 +2321,20 @@ je_realloc(void *ptr, size_t size) {
 	}
 	UTRACE(ptr, size, ret);
 	check_entry_exit_locking(tsdn);
+
+	log(log_core_realloc_exit, "result: %p", ret);
 	return ret;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free(void *ptr) {
+	static log_var_t log_core_free_entry = LOG_VAR_INIT(
+	    "core.free.entry");
+	static log_var_t log_core_free_exit = LOG_VAR_INIT(
+	    "core.free.exit");
+
+	log(log_core_free_entry, "ptr: %p", ptr);
+
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		/*
@@ -2303,6 +2364,7 @@ je_free(void *ptr) {
 		}
 		check_entry_exit_locking(tsd_tsdn(tsd));
 	}
+	log(log_core_free_exit, "%s", "");
 }
 
 /*
@@ -2322,6 +2384,14 @@ je_memalign(size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_memalign_entry = LOG_VAR_INIT(
+	    "core.memalign.entry");
+	static log_var_t log_core_memalign_exit = LOG_VAR_INIT(
+	    "core.memalign.exit");
+
+	log(log_core_memalign_entry, "alignment: %zu, size: %zu\n", alignment,
+	    size);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -2339,6 +2409,8 @@ je_memalign(size_t alignment, size_t size) {
 	dopts.alignment = alignment;
 
 	imalloc(&sopts, &dopts);
+
+	log(log_core_memalign_exit, "result: %p", ret);
 	return ret;
 }
 #endif
@@ -2353,6 +2425,13 @@ je_valloc(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_valloc_entry = LOG_VAR_INIT(
+	    "core.valloc.entry");
+	static log_var_t log_core_valloc_exit = LOG_VAR_INIT(
+	    "core.valloc.exit");
+
+	log(log_core_valloc_entry, "size: %zu\n", size);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -2371,6 +2450,7 @@ je_valloc(size_t size) {
 
 	imalloc(&sopts, &dopts);
 
+	log(log_core_valloc_exit, "result: %p\n", ret);
 	return ret;
 }
 #endif
@@ -2444,6 +2524,13 @@ je_mallocx(size_t size, int flags) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_mallocx_entry = LOG_VAR_INIT(
+	    "core.mallocx.entry");
+	static log_var_t log_core_mallocx_exit = LOG_VAR_INIT(
+	    "core.mallocx.exit");
+
+	log(log_core_mallocx_entry, "size: %zu, flags: %d", size, flags);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -2477,6 +2564,8 @@ je_mallocx(size_t size, int flags) {
 	}
 
 	imalloc(&sopts, &dopts);
+
+	log(log_core_mallocx_exit, "result: %p", ret);
 	return ret;
 }
 
@@ -2557,6 +2646,15 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	arena_t *arena;
 	tcache_t *tcache;
 
+	static log_var_t log_core_rallocx_entry = LOG_VAR_INIT(
+	    "core.rallocx.entry");
+	static log_var_t log_core_rallocx_exit = LOG_VAR_INIT(
+	    "core.rallocx.exit");
+
+	log(log_core_rallocx_entry, "ptr: %p, size: %zu, flags: %d", ptr,
+	    size, flags);
+
+
 	assert(ptr != NULL);
 	assert(size != 0);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -2619,6 +2717,8 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	}
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	log(log_core_rallocx_exit, "result: %p", p);
 	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2627,6 +2727,8 @@ label_oom:
 	}
 	UTRACE(ptr, size, 0);
 	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	log(log_core_rallocx_exit, "result: %p", NULL);
 	return NULL;
 }
 
@@ -2713,6 +2815,15 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	bool zero = flags & MALLOCX_ZERO;
 
+	static log_var_t log_core_xallocx_entry = LOG_VAR_INIT(
+	    "core.xallocx.entry");
+	static log_var_t log_core_xallocx_exit = LOG_VAR_INIT(
+	    "core.xallocx.exit");
+
+	log(log_core_xallocx_entry, "ptr: %p, size: %zu, extra: %zu, "
+	    "flags: %d", ptr, size, extra, flags);
+
+
 	assert(ptr != NULL);
 	assert(size != 0);
 	assert(SIZE_T_MAX - size >= extra);
@@ -2762,6 +2873,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 label_not_resized:
 	UTRACE(ptr, size, ptr);
 	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	log(log_core_xallocx_exit, "result: %zu", usize);
 	return usize;
 }
 
@@ -2771,6 +2884,13 @@ je_sallocx(const void *ptr, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
+	static log_var_t log_core_sallocx_entry = LOG_VAR_INIT(
+	    "core.sallocx.entry");
+	static log_var_t log_core_sallocx_exit = LOG_VAR_INIT(
+	    "core.sallocx.exit");
+
+	log(log_core_sallocx_entry, "ptr: %p, flags: %d", ptr, flags);
+
 	assert(malloc_initialized() || IS_INITIALIZER);
 	assert(ptr != NULL);
 
@@ -2785,11 +2905,20 @@ je_sallocx(const void *ptr, int flags) {
 	}
 
 	check_entry_exit_locking(tsdn);
+
+	log(log_core_sallocx_exit, "result: %zu", usize);
 	return usize;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_dallocx(void *ptr, int flags) {
+	static log_var_t log_core_dallocx_entry = LOG_VAR_INIT(
+	    "core.dallocx.entry");
+	static log_var_t log_core_dallocx_exit = LOG_VAR_INIT(
+	    "core.dallocx.exit");
+
+	log(log_core_dallocx_entry, "ptr: %p, flags: %d", ptr, flags);
+
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
@@ -2827,6 +2956,8 @@ je_dallocx(void *ptr, int flags) {
 		ifree(tsd, ptr, tcache, true);
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	log(log_core_dallocx_exit, "%s", "");
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -2848,6 +2979,14 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
+	static log_var_t log_core_sdallocx_entry = LOG_VAR_INIT(
+	    "core.sdallocx.entry");
+	static log_var_t log_core_sdallocx_exit = LOG_VAR_INIT(
+	    "core.sdallocx.exit");
+
+	log(log_core_sdallocx_entry, "ptr: %p, size: %zu, flags: %d", ptr,
+	    size, flags);
+
 	tsd_t *tsd = tsd_fetch();
 	bool fast = tsd_fast(tsd);
 	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
@@ -2884,6 +3023,8 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 		isfree(tsd, ptr, usize, tcache, true);
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	log(log_core_sdallocx_exit, "%s", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2892,9 +3033,17 @@ je_nallocx(size_t size, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
+	static log_var_t log_core_nallocx_entry = LOG_VAR_INIT(
+	    "core.nallocx.entry");
+	static log_var_t log_core_nallocx_exit = LOG_VAR_INIT(
+	    "core.nallocx.exit");
+
+	log(log_core_nallocx_entry, "size: %zu, flags: %d", size, flags);
+
 	assert(size != 0);
 
 	if (unlikely(malloc_init())) {
+		log(log_core_nallocx_exit, "result: %zu", ZU(0));
 		return 0;
 	}
 
@@ -2903,10 +3052,12 @@ je_nallocx(size_t size, int flags) {
 
 	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > LARGE_MAXCLASS)) {
+		log(log_core_nallocx_exit, "result: %zu", ZU(0));
 		return 0;
 	}
 
 	check_entry_exit_locking(tsdn);
+	log(log_core_nallocx_exit, "result: %zu", usize);
 	return usize;
 }
 
@@ -2916,7 +3067,15 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	int ret;
 	tsd_t *tsd;
 
+	static log_var_t log_core_mallctl_entry = LOG_VAR_INIT(
+	    "core.mallctl.entry");
+	static log_var_t log_core_mallctl_exit = LOG_VAR_INIT(
+	    "core.mallctl.exit");
+
+	log(log_core_mallctl_entry, "name: %s", name);
+
 	if (unlikely(malloc_init())) {
+		log(log_core_mallctl_exit, "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -2924,6 +3083,8 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
+	
+	log(log_core_mallctl_exit, "result: %d", ret);
 	return ret;
 }
 
@@ -2931,7 +3092,15 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	int ret;
 
+	static log_var_t log_core_mallctlnametomib_entry = LOG_VAR_INIT(
+	    "core.mallctlnametomib.entry");
+	static log_var_t log_core_mallctlnametomib_exit = LOG_VAR_INIT(
+	    "core.mallctlnametomib.exit");
+
+	log(log_core_mallctlnametomib_entry, "name: %s", name);
+
 	if (unlikely(malloc_init())) {
+		log(log_core_mallctlnametomib_exit, "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -2939,6 +3108,8 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_nametomib(tsd, name, mibp, miblenp);
 	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	log(log_core_mallctlnametomib_exit, "result: %d", ret);
 	return ret;
 }
 
@@ -2948,7 +3119,16 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	int ret;
 	tsd_t *tsd;
 
+	static log_var_t log_core_mallctlbymib_entry = LOG_VAR_INIT(
+	    "core.mallctlbymib.entry");
+	static log_var_t log_core_mallctlbymib_exit = LOG_VAR_INIT(
+	    "core.mallctlbymib.exit");
+
+	log(log_core_mallctlbymib_entry, "%s", "");
+
+
 	if (unlikely(malloc_init())) {
+		log(log_core_mallctlbymib_exit, "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -2956,6 +3136,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
+	log(log_core_mallctlbymib_exit, "result: %d", ret);
 	return ret;
 }
 
@@ -2964,10 +3145,18 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts) {
 	tsdn_t *tsdn;
 
+	static log_var_t log_core_malloc_stats_print_entry = LOG_VAR_INIT(
+	    "core.malloc_stats_print.entry");
+	static log_var_t log_core_malloc_stats_print_exit = LOG_VAR_INIT(
+	    "core.malloc_stats_print.exit");
+
+	log(log_core_malloc_stats_print_entry, "%s", "");
+
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 	stats_print(write_cb, cbopaque, opts);
 	check_entry_exit_locking(tsdn);
+	log(log_core_malloc_stats_print_exit, "%s", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2975,6 +3164,13 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	size_t ret;
 	tsdn_t *tsdn;
 
+	static log_var_t log_core_malloc_usable_size_entry = LOG_VAR_INIT(
+	    "core.malloc_usable_size.entry");
+	static log_var_t log_core_malloc_usable_size_exit = LOG_VAR_INIT(
+	    "core.malloc_usable_size.exit");
+
+	log(log_core_malloc_usable_size_entry, "ptr: %p", ptr);
+
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsdn = tsdn_fetch();
@@ -2992,6 +3188,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	}
 
 	check_entry_exit_locking(tsdn);
+	log(log_core_malloc_usable_size_exit, "result: %zu", ret);
 	return ret;
 }
 
-- 
cgit v0.12


From aa6c2821374f6dd6ed2e628c06bc08b0c4bc485c Mon Sep 17 00:00:00 2001
From: "Y. T. Chung" <zonyitoo@gmail.com>
Date: Fri, 21 Jul 2017 21:40:29 +0800
Subject: Validates fd before calling fcntl

---
 src/pages.c | 12 +++++++++---
 src/prof.c  |  4 +++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index 0883647..f8ef2bc 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -358,7 +358,9 @@ os_overcommits_proc(void) {
 			O_CLOEXEC);
 	#else
 		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
-		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+		if (fd != -1) {
+			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+		}
 	#endif
 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
 	#if defined(O_CLOEXEC)
@@ -367,14 +369,18 @@ os_overcommits_proc(void) {
 	#else
 		fd = (int)syscall(SYS_openat,
 			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
-		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+		if (fd != -1) {
+			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+		}
 	#endif
 #else
 	#if defined(O_CLOEXEC)
 		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
 	#else
 		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
-		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+		if (fd != -1) {
+			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+		}
 	#endif
 #endif
 
diff --git a/src/prof.c b/src/prof.c
index a1ca9e2..32760e6 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1414,7 +1414,9 @@ prof_open_maps(const char *format, ...) {
 	mfd = open(filename, O_RDONLY | O_CLOEXEC);
 #else
 	mfd = open(filename, O_RDONLY);
-	fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
+	if (mfd != -1) {
+		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
+	}
 #endif
 
 	return mfd;
-- 
cgit v0.12


From a9f7732d45c22ca7d22bed6ff2eaeb702356884e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 21 Jul 2017 13:34:45 -0700
Subject: Logging: allow logging with empty varargs.

Currently, the log macro requires at least one argument after the format string,
because of the way the preprocessor handles varargs macros.  We can hide some of
that irritation by pushing the extra arguments into a varargs function.
---
 configure.ac                                       |  1 +
 .../jemalloc/internal/jemalloc_internal_macros.h   |  3 ++
 include/jemalloc/internal/log.h                    | 40 ++++++++++++++++++----
 include/jemalloc/internal/malloc_io.h              |  4 +++
 src/jemalloc.c                                     | 14 ++++----
 src/log.c                                          |  4 +--
 test/unit/log.c                                    | 16 +++++++--
 7 files changed, 65 insertions(+), 17 deletions(-)

diff --git a/configure.ac b/configure.ac
index 0215154..ba0409a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -243,6 +243,7 @@ if test "x$GCC" = "xyes" ; then
   JE_CFLAGS_ADD([-Wshorten-64-to-32])
   JE_CFLAGS_ADD([-Wsign-compare])
   JE_CFLAGS_ADD([-Wundef])
+  JE_CFLAGS_ADD([-Wno-format-zero-length])
   JE_CFLAGS_ADD([-pipe])
   JE_CFLAGS_ADD([-g3])
 elif test "x$je_cv_msvc" = "xyes" ; then
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 4571895..ed75d37 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -37,4 +37,7 @@
 #  define JET_MUTABLE const
 #endif
 
+#define JEMALLOC_VA_ARGS_HEAD(head, ...) head
+#define JEMALLOC_VA_ARGS_TAIL(head, ...) __VA_ARGS__
+
 #endif /* JEMALLOC_INTERNAL_MACROS_H */
diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 1df8cff..5ce8c35 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -2,14 +2,17 @@
 #define JEMALLOC_INTERNAL_LOG_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
 
 #ifdef JEMALLOC_LOG
-#  define JEMALLOC_LOG_BUFSIZE 1000
+#  define JEMALLOC_LOG_VAR_BUFSIZE 1000
 #else
-#  define JEMALLOC_LOG_BUFSIZE 1
+#  define JEMALLOC_LOG_VAR_BUFSIZE 1
 #endif
 
+#define JEMALLOC_LOG_BUFSIZE 4096
+
 /*
  * The log_vars malloc_conf option is a '|'-delimited list of log_var name
  * segments to log.  The log_var names are themselves hierarchical, with '.' as
@@ -41,7 +44,7 @@
  * statements.
  */
 
-extern char log_var_names[JEMALLOC_LOG_BUFSIZE];
+extern char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
 extern atomic_b_t log_init_done;
 
 typedef struct log_var_s log_var_t;
@@ -84,11 +87,36 @@ if (config_log) {							\
 	}								\
 }
 
-#define log(log_var, format, ...)					\
+/*
+ * MSVC has some preprocessor bugs in its expansion of __VA_ARGS__ during
+ * preprocessing.  To work around this, we take all potential extra arguments in
+ * a var-args functions.  Since a varargs macro needs at least one argument in
+ * the "...", we accept the format string there, and require that the first
+ * argument in this "..." is a const char *.
+ */
+static inline void
+log_impl_varargs(const char *name, ...) {
+	char buf[JEMALLOC_LOG_BUFSIZE];
+	va_list ap;
+
+	va_start(ap, name);
+	const char *format = va_arg(ap, const char *);
+	size_t dst_offset = 0;
+	dst_offset += malloc_snprintf(buf, JEMALLOC_LOG_BUFSIZE, "%s: ", name);
+	dst_offset += malloc_vsnprintf(buf + dst_offset,
+	    JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap);
+	dst_offset += malloc_snprintf(buf + dst_offset,
+	    JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
+	va_end(ap);
+
+	malloc_write(buf);
+}
+
+/* Call as log(log_var, "format_string %d", arg_for_format_string); */
+#define log(log_var, ...)						\
 do {									\
 	log_do_begin(log_var)						\
-		malloc_printf("%s: " format "\n",			\
-		    (log_var).name, __VA_ARGS__);			\
+		log_impl_varargs((log_var).name, __VA_ARGS__);		\
 	log_do_end(log_var)						\
 } while (0)
 
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 47ae58e..4992d1d 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -53,6 +53,10 @@ size_t malloc_vsnprintf(char *str, size_t size, const char *format,
     va_list ap);
 size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
     JEMALLOC_FORMAT_PRINTF(3, 4);
+/*
+ * The caller can set write_cb and cbopaque to null to choose to print with the
+ * je_malloc_message hook.
+ */
 void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *format, va_list ap);
 void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 48a268f..1dc6682 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2364,7 +2364,7 @@ je_free(void *ptr) {
 		}
 		check_entry_exit_locking(tsd_tsdn(tsd));
 	}
-	log(log_core_free_exit, "%s", "");
+	log(log_core_free_exit, "");
 }
 
 /*
@@ -2957,7 +2957,7 @@ je_dallocx(void *ptr, int flags) {
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_dallocx_exit, "%s", "");
+	log(log_core_dallocx_exit, "");
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -3024,7 +3024,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_sdallocx_exit, "%s", "");
+	log(log_core_sdallocx_exit, "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -3083,7 +3083,7 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
-	
+
 	log(log_core_mallctl_exit, "result: %d", ret);
 	return ret;
 }
@@ -3124,7 +3124,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	static log_var_t log_core_mallctlbymib_exit = LOG_VAR_INIT(
 	    "core.mallctlbymib.exit");
 
-	log(log_core_mallctlbymib_entry, "%s", "");
+	log(log_core_mallctlbymib_entry, "");
 
 
 	if (unlikely(malloc_init())) {
@@ -3150,13 +3150,13 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	static log_var_t log_core_malloc_stats_print_exit = LOG_VAR_INIT(
 	    "core.malloc_stats_print.exit");
 
-	log(log_core_malloc_stats_print_entry, "%s", "");
+	log(log_core_malloc_stats_print_entry, "");
 
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 	stats_print(write_cb, cbopaque, opts);
 	check_entry_exit_locking(tsdn);
-	log(log_core_malloc_stats_print_exit, "%s", "");
+	log(log_core_malloc_stats_print_exit, "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
diff --git a/src/log.c b/src/log.c
index 022dc58..778902f 100644
--- a/src/log.c
+++ b/src/log.c
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/log.h"
 
-char log_var_names[JEMALLOC_LOG_BUFSIZE];
+char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
 atomic_b_t log_init_done = ATOMIC_INIT(false);
 
 /*
@@ -59,7 +59,7 @@ log_var_update_state(log_var_t *log_var) {
 	while (true) {
 		const char *segment_end = log_var_extract_segment(
 		    segment_begin);
-		assert(segment_end < log_var_names + JEMALLOC_LOG_BUFSIZE);
+		assert(segment_end < log_var_names + JEMALLOC_LOG_VAR_BUFSIZE);
 		if (log_var_matches_segment(segment_begin, segment_end,
 		    log_var_begin, log_var_end)) {
 			atomic_store_u(&log_var->state, LOG_ENABLED,
diff --git a/test/unit/log.c b/test/unit/log.c
index 6db256f..053fea4 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -170,13 +170,25 @@ TEST_BEGIN(test_logs_if_no_init) {
 }
 TEST_END
 
+/*
+ * This really just checks to make sure that this usage compiles; we don't have
+ * any test code to run.
+ */
+TEST_BEGIN(test_log_only_format_string) {
+	if (false) {
+		static log_var_t l = LOG_VAR_INIT("str");
+		log(l, "No arguments follow this format string.");
+	}
+}
+TEST_END
+
 int
 main(void) {
-
 	return test(
 	    test_log_disabled,
 	    test_log_enabled_direct,
 	    test_log_enabled_indirect,
 	    test_log_enabled_global,
-	    test_logs_if_no_init);
+	    test_logs_if_no_init,
+	    test_log_only_format_string);
 }
-- 
cgit v0.12


From b28f31e7ed6c987bdbf3bdd9ce4aa63245926b4d Mon Sep 17 00:00:00 2001
From: Qinfan Wu <wqfish@fb.com>
Date: Mon, 24 Jul 2017 11:59:29 -0700
Subject: Split out cold code path in newImpl

I noticed that the whole newImpl is inlined. Since OOM handling code is
rarely executed, we should only inline the hot path.
---
 src/jemalloc_cpp.cpp | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 844ab39..f0cedda 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -39,12 +39,10 @@ void	operator delete(void *ptr, std::size_t size) noexcept;
 void	operator delete[](void *ptr, std::size_t size) noexcept;
 #endif
 
-template <bool IsNoExcept>
-void *
-newImpl(std::size_t size) noexcept(IsNoExcept) {
-	void *ptr = je_malloc(size);
-	if (likely(ptr != nullptr))
-		return ptr;
+JEMALLOC_NOINLINE
+static void *
+handleOOM(std::size_t size, bool nothrow) {
+	void *ptr = nullptr;
 
 	while (ptr == nullptr) {
 		std::new_handler handler;
@@ -68,11 +66,22 @@ newImpl(std::size_t size) noexcept(IsNoExcept) {
 		ptr = je_malloc(size);
 	}
 
-	if (ptr == nullptr && !IsNoExcept)
+	if (ptr == nullptr && !nothrow)
 		std::__throw_bad_alloc();
 	return ptr;
 }
 
+template <bool IsNoExcept>
+JEMALLOC_ALWAYS_INLINE
+void *
+newImpl(std::size_t size) noexcept(IsNoExcept) {
+	void *ptr = je_malloc(size);
+	if (likely(ptr != nullptr))
+		return ptr;
+
+	return handleOOM(size, IsNoExcept);
+}
+
 void *
 operator new(std::size_t size) {
 	return newImpl<false>(size);
-- 
cgit v0.12


From e6aeceb6068ace14ca530506fdfeb5f1cadd9a19 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 24 Jul 2017 12:29:28 -0700
Subject: Logging: log using the log var names directly.

Currently we have to log by writing something like:

  static log_var_t log_a_b_c = LOG_VAR_INIT("a.b.c");
  log (log_a_b_c, "msg");

This is sort of annoying.  Let's just write:

  log("a.b.c", "msg");
---
 include/jemalloc/internal/log.h |   5 +-
 src/jemalloc.c                  | 198 ++++++++++------------------------------
 test/unit/log.c                 |   3 +-
 3 files changed, 51 insertions(+), 155 deletions(-)

diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 5ce8c35..9f32fb4 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -112,9 +112,10 @@ log_impl_varargs(const char *name, ...) {
 	malloc_write(buf);
 }
 
-/* Call as log(log_var, "format_string %d", arg_for_format_string); */
-#define log(log_var, ...)						\
+/* Call as log("log.var.str", "format_string %d", arg_for_format_string); */
+#define log(log_var_str, ...)						\
 do {									\
+	static log_var_t log_var = LOG_VAR_INIT(log_var_str);		\
 	log_do_begin(log_var)						\
 		log_impl_varargs((log_var).name, __VA_ARGS__);		\
 	log_do_end(log_var)						\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1dc6682..ed47052 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1974,12 +1974,7 @@ je_malloc(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_malloc_entry = LOG_VAR_INIT(
-	    "core.malloc.entry");
-	static log_var_t log_core_malloc_exit = LOG_VAR_INIT(
-	    "core.malloc.exit");
-
-	log(log_core_malloc_entry, "size: %zu", size);
+	log("core.malloc.entry", "size: %zu", size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -1995,7 +1990,7 @@ je_malloc(size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log(log_core_malloc_exit, "result: %p", ret);
+	log("core.malloc.exit", "result: %p", ret);
 
 	return ret;
 }
@@ -2007,12 +2002,7 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_posix_memalign_entry = LOG_VAR_INIT(
-	    "core.posix_memalign.entry");
-	static log_var_t log_core_posix_memalign_exit = LOG_VAR_INIT(
-	    "core.posix_memalign.exit");
-
-	log(log_core_posix_memalign_entry, "mem ptr: %p, alignment: %zu, "
+	log("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, "
 	    "size: %zu", memptr, alignment, size);
 
 	static_opts_init(&sopts);
@@ -2032,7 +2022,7 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 
 	ret = imalloc(&sopts, &dopts);
 
-	log(log_core_posix_memalign_exit, "result: %d, alloc ptr: %p", ret,
+	log("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret,
 	    *memptr);
 
 	return ret;
@@ -2047,12 +2037,7 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_aligned_alloc_entry = LOG_VAR_INIT(
-	    "core.aligned_alloc.entry");
-	static log_var_t log_core_aligned_alloc_exit = LOG_VAR_INIT(
-	    "core.aligned_alloc.exit");
-
-	log(log_core_aligned_alloc_entry, "alignment: %zu, size: %zu\n",
+	log("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n",
 	    alignment, size);
 
 	static_opts_init(&sopts);
@@ -2074,7 +2059,7 @@ je_aligned_alloc(size_t alignment, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log(log_core_aligned_alloc_exit, "result: %p", ret);
+	log("core.aligned_alloc.exit", "result: %p", ret);
 
 	return ret;
 }
@@ -2087,12 +2072,7 @@ je_calloc(size_t num, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_calloc_entry = LOG_VAR_INIT(
-	    "core.calloc.entry");
-	static log_var_t log_core_calloc_exit = LOG_VAR_INIT(
-	    "core.calloc.exit");
-
-	log(log_core_calloc_entry, "num: %zu, size: %zu\n", num, size);
+	log("core.calloc.entry", "num: %zu, size: %zu\n", num, size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2110,7 +2090,7 @@ je_calloc(size_t num, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log(log_core_calloc_exit, "result: %p", ret);
+	log("core.calloc.exit", "result: %p", ret);
 
 	return ret;
 }
@@ -2245,12 +2225,7 @@ je_realloc(void *ptr, size_t size) {
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_usize = 0;
 
-	static log_var_t log_core_realloc_entry = LOG_VAR_INIT(
-	    "core.realloc.entry");
-	static log_var_t log_core_realloc_exit = LOG_VAR_INIT(
-	    "core.realloc.exit");
-
-	log(log_core_realloc_entry, "ptr: %p, size: %zu\n", ptr, size);
+	log("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
 
 	if (unlikely(size == 0)) {
 		if (ptr != NULL) {
@@ -2265,7 +2240,7 @@ je_realloc(void *ptr, size_t size) {
 			}
 			ifree(tsd, ptr, tcache, true);
 
-			log(log_core_realloc_exit, "result: %p", NULL);
+			log("core.realloc.exit", "result: %p", NULL);
 			return NULL;
 		}
 		size = 1;
@@ -2299,7 +2274,7 @@ je_realloc(void *ptr, size_t size) {
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		void *ret = je_malloc(size);
-		log(log_core_realloc_exit, "result: %p", ret);
+		log("core.realloc.exit", "result: %p", ret);
 		return ret;
 	}
 
@@ -2322,18 +2297,13 @@ je_realloc(void *ptr, size_t size) {
 	UTRACE(ptr, size, ret);
 	check_entry_exit_locking(tsdn);
 
-	log(log_core_realloc_exit, "result: %p", ret);
+	log("core.realloc.exit", "result: %p", ret);
 	return ret;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free(void *ptr) {
-	static log_var_t log_core_free_entry = LOG_VAR_INIT(
-	    "core.free.entry");
-	static log_var_t log_core_free_exit = LOG_VAR_INIT(
-	    "core.free.exit");
-
-	log(log_core_free_entry, "ptr: %p", ptr);
+	log("core.free.entry", "ptr: %p", ptr);
 
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
@@ -2364,7 +2334,7 @@ je_free(void *ptr) {
 		}
 		check_entry_exit_locking(tsd_tsdn(tsd));
 	}
-	log(log_core_free_exit, "");
+	log("core.free.exit", "");
 }
 
 /*
@@ -2384,12 +2354,7 @@ je_memalign(size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_memalign_entry = LOG_VAR_INIT(
-	    "core.memalign.entry");
-	static log_var_t log_core_memalign_exit = LOG_VAR_INIT(
-	    "core.memalign.exit");
-
-	log(log_core_memalign_entry, "alignment: %zu, size: %zu\n", alignment,
+	log("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment,
 	    size);
 
 	static_opts_init(&sopts);
@@ -2410,7 +2375,7 @@ je_memalign(size_t alignment, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log(log_core_memalign_exit, "result: %p", ret);
+	log("core.memalign.exit", "result: %p", ret);
 	return ret;
 }
 #endif
@@ -2425,12 +2390,7 @@ je_valloc(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_valloc_entry = LOG_VAR_INIT(
-	    "core.valloc.entry");
-	static log_var_t log_core_valloc_exit = LOG_VAR_INIT(
-	    "core.valloc.exit");
-
-	log(log_core_valloc_entry, "size: %zu\n", size);
+	log("core.valloc.entry", "size: %zu\n", size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2450,7 +2410,7 @@ je_valloc(size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log(log_core_valloc_exit, "result: %p\n", ret);
+	log("core.valloc.exit", "result: %p\n", ret);
 	return ret;
 }
 #endif
@@ -2524,12 +2484,7 @@ je_mallocx(size_t size, int flags) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_mallocx_entry = LOG_VAR_INIT(
-	    "core.mallocx.entry");
-	static log_var_t log_core_mallocx_exit = LOG_VAR_INIT(
-	    "core.mallocx.exit");
-
-	log(log_core_mallocx_entry, "size: %zu, flags: %d", size, flags);
+	log("core.mallocx.entry", "size: %zu, flags: %d", size, flags);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2565,7 +2520,7 @@ je_mallocx(size_t size, int flags) {
 
 	imalloc(&sopts, &dopts);
 
-	log(log_core_mallocx_exit, "result: %p", ret);
+	log("core.mallocx.exit", "result: %p", ret);
 	return ret;
 }
 
@@ -2646,12 +2601,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	arena_t *arena;
 	tcache_t *tcache;
 
-	static log_var_t log_core_rallocx_entry = LOG_VAR_INIT(
-	    "core.rallocx.entry");
-	static log_var_t log_core_rallocx_exit = LOG_VAR_INIT(
-	    "core.rallocx.exit");
-
-	log(log_core_rallocx_entry, "ptr: %p, size: %zu, flags: %d", ptr,
+	log("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 	    size, flags);
 
 
@@ -2718,7 +2668,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_rallocx_exit, "result: %p", p);
+	log("core.rallocx.exit", "result: %p", p);
 	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2728,7 +2678,7 @@ label_oom:
 	UTRACE(ptr, size, 0);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_rallocx_exit, "result: %p", NULL);
+	log("core.rallocx.exit", "result: %p", NULL);
 	return NULL;
 }
 
@@ -2815,15 +2765,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	bool zero = flags & MALLOCX_ZERO;
 
-	static log_var_t log_core_xallocx_entry = LOG_VAR_INIT(
-	    "core.xallocx.entry");
-	static log_var_t log_core_xallocx_exit = LOG_VAR_INIT(
-	    "core.xallocx.exit");
-
-	log(log_core_xallocx_entry, "ptr: %p, size: %zu, extra: %zu, "
+	log("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
 	    "flags: %d", ptr, size, extra, flags);
 
-
 	assert(ptr != NULL);
 	assert(size != 0);
 	assert(SIZE_T_MAX - size >= extra);
@@ -2874,7 +2818,7 @@ label_not_resized:
 	UTRACE(ptr, size, ptr);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_xallocx_exit, "result: %zu", usize);
+	log("core.xallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
@@ -2884,12 +2828,7 @@ je_sallocx(const void *ptr, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
-	static log_var_t log_core_sallocx_entry = LOG_VAR_INIT(
-	    "core.sallocx.entry");
-	static log_var_t log_core_sallocx_exit = LOG_VAR_INIT(
-	    "core.sallocx.exit");
-
-	log(log_core_sallocx_entry, "ptr: %p, flags: %d", ptr, flags);
+	log("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags);
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	assert(ptr != NULL);
@@ -2906,18 +2845,13 @@ je_sallocx(const void *ptr, int flags) {
 
 	check_entry_exit_locking(tsdn);
 
-	log(log_core_sallocx_exit, "result: %zu", usize);
+	log("core.sallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_dallocx(void *ptr, int flags) {
-	static log_var_t log_core_dallocx_entry = LOG_VAR_INIT(
-	    "core.dallocx.entry");
-	static log_var_t log_core_dallocx_exit = LOG_VAR_INIT(
-	    "core.dallocx.exit");
-
-	log(log_core_dallocx_entry, "ptr: %p, flags: %d", ptr, flags);
+	log("core.dallocx.entry", "ptr: %p, flags: %d", ptr, flags);
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -2957,7 +2891,7 @@ je_dallocx(void *ptr, int flags) {
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_dallocx_exit, "");
+	log("core.dallocx.exit", "");
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -2979,12 +2913,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	static log_var_t log_core_sdallocx_entry = LOG_VAR_INIT(
-	    "core.sdallocx.entry");
-	static log_var_t log_core_sdallocx_exit = LOG_VAR_INIT(
-	    "core.sdallocx.exit");
-
-	log(log_core_sdallocx_entry, "ptr: %p, size: %zu, flags: %d", ptr,
+	log("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 	    size, flags);
 
 	tsd_t *tsd = tsd_fetch();
@@ -3024,7 +2953,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_sdallocx_exit, "");
+	log("core.sdallocx.exit", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -3033,17 +2962,10 @@ je_nallocx(size_t size, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
-	static log_var_t log_core_nallocx_entry = LOG_VAR_INIT(
-	    "core.nallocx.entry");
-	static log_var_t log_core_nallocx_exit = LOG_VAR_INIT(
-	    "core.nallocx.exit");
-
-	log(log_core_nallocx_entry, "size: %zu, flags: %d", size, flags);
-
 	assert(size != 0);
 
 	if (unlikely(malloc_init())) {
-		log(log_core_nallocx_exit, "result: %zu", ZU(0));
+		log("core.nallocx.exit", "result: %zu", ZU(0));
 		return 0;
 	}
 
@@ -3052,12 +2974,12 @@ je_nallocx(size_t size, int flags) {
 
 	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > LARGE_MAXCLASS)) {
-		log(log_core_nallocx_exit, "result: %zu", ZU(0));
+		log("core.nallocx.exit", "result: %zu", ZU(0));
 		return 0;
 	}
 
 	check_entry_exit_locking(tsdn);
-	log(log_core_nallocx_exit, "result: %zu", usize);
+	log("core.nallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
@@ -3067,15 +2989,10 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	int ret;
 	tsd_t *tsd;
 
-	static log_var_t log_core_mallctl_entry = LOG_VAR_INIT(
-	    "core.mallctl.entry");
-	static log_var_t log_core_mallctl_exit = LOG_VAR_INIT(
-	    "core.mallctl.exit");
-
-	log(log_core_mallctl_entry, "name: %s", name);
+	log("core.mallctl.entry", "name: %s", name);
 
 	if (unlikely(malloc_init())) {
-		log(log_core_mallctl_exit, "result: %d", EAGAIN);
+		log("core.mallctl.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -3084,7 +3001,7 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_mallctl_exit, "result: %d", ret);
+	log("core.mallctl.exit", "result: %d", ret);
 	return ret;
 }
 
@@ -3092,15 +3009,10 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	int ret;
 
-	static log_var_t log_core_mallctlnametomib_entry = LOG_VAR_INIT(
-	    "core.mallctlnametomib.entry");
-	static log_var_t log_core_mallctlnametomib_exit = LOG_VAR_INIT(
-	    "core.mallctlnametomib.exit");
-
-	log(log_core_mallctlnametomib_entry, "name: %s", name);
+	log("core.mallctlnametomib.entry", "name: %s", name);
 
 	if (unlikely(malloc_init())) {
-		log(log_core_mallctlnametomib_exit, "result: %d", EAGAIN);
+		log("core.mallctlnametomib.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -3109,7 +3021,7 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	ret = ctl_nametomib(tsd, name, mibp, miblenp);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_mallctlnametomib_exit, "result: %d", ret);
+	log("core.mallctlnametomib.exit", "result: %d", ret);
 	return ret;
 }
 
@@ -3119,16 +3031,10 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	int ret;
 	tsd_t *tsd;
 
-	static log_var_t log_core_mallctlbymib_entry = LOG_VAR_INIT(
-	    "core.mallctlbymib.entry");
-	static log_var_t log_core_mallctlbymib_exit = LOG_VAR_INIT(
-	    "core.mallctlbymib.exit");
-
-	log(log_core_mallctlbymib_entry, "");
-
+	log("core.mallctlbymib.entry", "");
 
 	if (unlikely(malloc_init())) {
-		log(log_core_mallctlbymib_exit, "result: %d", EAGAIN);
+		log("core.mallctlbymib.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -3136,7 +3042,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
-	log(log_core_mallctlbymib_exit, "result: %d", ret);
+	log("core.mallctlbymib.exit", "result: %d", ret);
 	return ret;
 }
 
@@ -3145,18 +3051,13 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts) {
 	tsdn_t *tsdn;
 
-	static log_var_t log_core_malloc_stats_print_entry = LOG_VAR_INIT(
-	    "core.malloc_stats_print.entry");
-	static log_var_t log_core_malloc_stats_print_exit = LOG_VAR_INIT(
-	    "core.malloc_stats_print.exit");
-
-	log(log_core_malloc_stats_print_entry, "");
+	log("core.malloc_stats_print.entry", "");
 
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 	stats_print(write_cb, cbopaque, opts);
 	check_entry_exit_locking(tsdn);
-	log(log_core_malloc_stats_print_exit, "");
+	log("core.malloc_stats_print.exit", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -3164,12 +3065,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	size_t ret;
 	tsdn_t *tsdn;
 
-	static log_var_t log_core_malloc_usable_size_entry = LOG_VAR_INIT(
-	    "core.malloc_usable_size.entry");
-	static log_var_t log_core_malloc_usable_size_exit = LOG_VAR_INIT(
-	    "core.malloc_usable_size.exit");
-
-	log(log_core_malloc_usable_size_entry, "ptr: %p", ptr);
+	log("core.malloc_usable_size.entry", "ptr: %p", ptr);
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 
@@ -3188,7 +3084,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	}
 
 	check_entry_exit_locking(tsdn);
-	log(log_core_malloc_usable_size_exit, "result: %zu", ret);
+	log("core.malloc_usable_size.exit", "result: %zu", ret);
 	return ret;
 }
 
diff --git a/test/unit/log.c b/test/unit/log.c
index 053fea4..3c1a208 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -176,8 +176,7 @@ TEST_END
  */
 TEST_BEGIN(test_log_only_format_string) {
 	if (false) {
-		static log_var_t l = LOG_VAR_INIT("str");
-		log(l, "No arguments follow this format string.");
+		log("log_str", "No arguments follow this format string.");
 	}
 }
 TEST_END
-- 
cgit v0.12


From 7c22ea7a93f16c90f49de8ee226e3bcd1521c93e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 24 Jul 2017 12:56:02 -0700
Subject: Only run test/integration/sdallocx non-reentrantly.

This is a temporary workaround until we add some beefier CI machines.  Right
now, we're seeing too many OOMs for this to be useful.
---
 test/integration/sdallocx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/integration/sdallocx.c b/test/integration/sdallocx.c
index e7ea1d8..ca01448 100644
--- a/test/integration/sdallocx.c
+++ b/test/integration/sdallocx.c
@@ -49,7 +49,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
+	return test_no_reentrancy(
 	    test_basic,
 	    test_alignment_and_size);
 }
-- 
cgit v0.12


From 2d2fa72647e0e535088793a0335d0294277d2f09 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 28 Jul 2017 13:01:15 -0700
Subject: Filter out "newImpl" from profiling output.

---
 bin/jeprof.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index e6f4af4..1bbc51e 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -2895,6 +2895,7 @@ sub RemoveUninterestingFrames {
     foreach my $name ('@JEMALLOC_PREFIX@calloc',
                       'cfree',
                       '@JEMALLOC_PREFIX@malloc',
+                      'newImpl',
                       '@JEMALLOC_PREFIX@free',
                       '@JEMALLOC_PREFIX@memalign',
                       '@JEMALLOC_PREFIX@posix_memalign',
-- 
cgit v0.12


From 3800e55a2c6f4ffb03242db06437ad371db4ccd8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 31 Jul 2017 13:01:07 -0700
Subject: Bypass extent_alloc_wrapper_hard for no_move_expand.

When retain is enabled, we should not attempt mmap for in-place expansion
(large_ralloc_no_move), because it's virtually impossible to succeed, and causes
unnecessary syscalls (which can cause lock contention under load).
---
 src/extent.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/extent.c b/src/extent.c
index fa45c84..f464de4 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1296,6 +1296,15 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_t *extent = extent_alloc_retained(tsdn, arena, r_extent_hooks,
 	    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	if (extent == NULL) {
+		if (opt_retain && new_addr != NULL) {
+			/*
+			 * When retain is enabled and new_addr is set, we do not
+			 * attempt extent_alloc_wrapper_hard which does mmap
+			 * that is very unlikely to succeed (unless it happens
+			 * to be at the end).
+			 */
+			return NULL;
+		}
 		extent = extent_alloc_wrapper_hard(tsdn, arena, r_extent_hooks,
 		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}
-- 
cgit v0.12


From 9a39b23c9c823e8157e2e6850014fa67c09f9351 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 31 Jul 2017 15:17:57 -0700
Subject: Remove a redundant '--with-malloc-conf=tcache:false' from
 gen_run_tests.py

This is already tested via its inclusion in possible_malloc_conf_opts.
---
 scripts/gen_run_tests.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index ddf2153..bf19c2c 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -22,7 +22,6 @@ possible_config_opts = [
     '--enable-debug',
     '--enable-prof',
     '--disable-stats',
-    '--with-malloc-conf=tcache:false',
 ]
 possible_malloc_conf_opts = [
     'tcache:false',
-- 
cgit v0.12


From 1ab2ab294c8f29a6f314f3ff30fbf4cdb2f01af6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 31 Jul 2017 14:35:33 -0700
Subject: Only read szind if ptr is not paged aligned in sdallocx.

If ptr is not page aligned, we know the allocation was not sampled. In this case
use the size passed into sdallocx directly w/o accessing rtree.  This improve
sdallocx efficiency in the common case (not sampled && small allocation).
---
 src/jemalloc.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index ed47052..4c73ba4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2194,17 +2194,37 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	alloc_ctx_t alloc_ctx, *ctx;
-	if (config_prof && opt_prof) {
+	if (!config_cache_oblivious && ((uintptr_t)ptr & PAGE_MASK) != 0) {
+		/*
+		 * When cache_oblivious is disabled and ptr is not page aligned,
+		 * the allocation was not sampled -- usize can be used to
+		 * determine szind directly.
+		 */
+		alloc_ctx.szind = sz_size2index(usize);
+		alloc_ctx.slab = true;
+		ctx = &alloc_ctx;
+		if (config_debug) {
+			alloc_ctx_t dbg_ctx;
+			rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+			rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree,
+			    rtree_ctx, (uintptr_t)ptr, true, &dbg_ctx.szind,
+			    &dbg_ctx.slab);
+			assert(dbg_ctx.szind == alloc_ctx.szind);
+			assert(dbg_ctx.slab == alloc_ctx.slab);
+		}
+	} else if (config_prof && opt_prof) {
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 		assert(alloc_ctx.szind == sz_size2index(usize));
 		ctx = &alloc_ctx;
-		prof_free(tsd, ptr, usize, ctx);
 	} else {
 		ctx = NULL;
 	}
 
+	if (config_prof && opt_prof) {
+		prof_free(tsd, ptr, usize, ctx);
+	}
 	if (config_stats) {
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	}
-- 
cgit v0.12


From 048c6679cd0ef1500d0609dce48fcd823d15d93b Mon Sep 17 00:00:00 2001
From: Ryan Libby <rlibby@FreeBSD.org>
Date: Mon, 7 Aug 2017 22:00:22 -0700
Subject: Remove external linkage for spin_adaptive

The external linkage for spin_adaptive was not used, and the inline
declaration of spin_adaptive that was used caused a probem on FreeBSD
where CPU_SPINWAIT is implemented as a call to a static procedure for
x86 architectures.
---
 Makefile.in                                            | 1 -
 include/jemalloc/internal/spin.h                       | 8 +-------
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 1 -
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 ---
 src/spin.c                                             | 4 ----
 5 files changed, 1 insertion(+), 16 deletions(-)
 delete mode 100644 src/spin.c

diff --git a/Makefile.in b/Makefile.in
index 6e3424f..0698633 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -112,7 +112,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/stats.c \
-	$(srcroot)src/spin.c \
 	$(srcroot)src/sz.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/ticker.c \
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
index e2afc98..aded0fc 100644
--- a/include/jemalloc/internal/spin.h
+++ b/include/jemalloc/internal/spin.h
@@ -1,19 +1,13 @@
 #ifndef JEMALLOC_INTERNAL_SPIN_H
 #define JEMALLOC_INTERNAL_SPIN_H
 
-#ifdef JEMALLOC_SPIN_C_
-#  define SPIN_INLINE extern inline
-#else
-#  define SPIN_INLINE inline
-#endif
-
 #define SPIN_INITIALIZER {0U}
 
 typedef struct {
 	unsigned iteration;
 } spin_t;
 
-SPIN_INLINE void
+static inline void
 spin_adaptive(spin_t *spin) {
 	volatile uint32_t i;
 
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 2addd29..97f892e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -56,7 +56,6 @@
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
-    <ClCompile Include="..\..\..\..\src\spin.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 4edf09b..d2de135 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -70,9 +70,6 @@
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\spin.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/spin.c b/src/spin.c
deleted file mode 100644
index 24372c2..0000000
--- a/src/spin.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#define JEMALLOC_SPIN_C_
-#include "jemalloc/internal/jemalloc_preamble.h"
-
-#include "jemalloc/internal/spin.h"
-- 
cgit v0.12


From d157864027562dc17475edfd1bc6dce559b7ac4b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 4 Aug 2017 16:35:43 -0700
Subject: Filter out "void *newImpl" in prof output.

---
 bin/jeprof.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index 1bbc51e..588c6b4 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -2896,6 +2896,7 @@ sub RemoveUninterestingFrames {
                       'cfree',
                       '@JEMALLOC_PREFIX@malloc',
                       'newImpl',
+                      'void* newImpl',
                       '@JEMALLOC_PREFIX@free',
                       '@JEMALLOC_PREFIX@memalign',
                       '@JEMALLOC_PREFIX@posix_memalign',
-- 
cgit v0.12


From 8fdd9a579779b84d6af27f94c295f82a4df8e5be Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 10 Aug 2017 13:14:26 -0700
Subject: Implement opt.metadata_thp

This option enables transparent huge page for base allocators (require
MADV_HUGEPAGE support).
---
 configure.ac                                       |  3 ++
 doc/jemalloc.xml.in                                | 17 ++++++-
 include/jemalloc/internal/base_externs.h           |  2 +
 include/jemalloc/internal/base_types.h             |  2 +
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |  6 +++
 include/jemalloc/internal/pages.h                  |  3 ++
 src/base.c                                         | 43 ++++++++++++------
 src/ctl.c                                          |  3 ++
 src/jemalloc.c                                     |  1 +
 src/pages.c                                        | 53 +++++++++++++++++++++-
 src/stats.c                                        |  1 +
 test/unit/mallctl.c                                |  1 +
 12 files changed, 118 insertions(+), 17 deletions(-)

diff --git a/configure.ac b/configure.ac
index ba0409a..e1a7343 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1824,6 +1824,9 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_HUGEPAGE);
 	madvise((void *)0, 0, MADV_NOHUGEPAGE);
 ], [je_cv_thp])
+  if test "x${je_cv_thp}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ])
+  fi
 fi
 
 dnl Enable transparent huge page support by default.
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 21e401a..f1712f0 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -916,6 +916,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.metadata_thp">
+        <term>
+          <mallctl>opt.metadata_thp</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>If true, allow jemalloc to use transparent huge page
+        (THP) for internal metadata (see <link
+        linkend="stats.metadata">stats.metadata</link> for details).  This
+        option is disabled by default.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.retain">
         <term>
           <mallctl>opt.retain</mallctl>
@@ -2187,7 +2199,10 @@ struct extent_hooks_s {
         metadata structures (see <link
         linkend="stats.arenas.i.base"><mallctl>stats.arenas.&lt;i&gt;.base</mallctl></link>)
         and internal allocations (see <link
-        linkend="stats.arenas.i.internal"><mallctl>stats.arenas.&lt;i&gt;.internal</mallctl></link>).</para></listitem>
+        linkend="stats.arenas.i.internal"><mallctl>stats.arenas.&lt;i&gt;.internal</mallctl></link>).
+        Transparent huge page (enabled with <link
+        linkend="opt.metadata_thp">opt.metadata_thp</link>) usage is not
+        considered.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.resident">
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index a4fd5ac..a5cb8a8 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
 #define JEMALLOC_INTERNAL_BASE_EXTERNS_H
 
+extern bool opt_metadata_thp;
+
 base_t *b0get(void);
 base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 void base_delete(tsdn_t *tsdn, base_t *base);
diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h
index be7ee82..6e71033 100644
--- a/include/jemalloc/internal/base_types.h
+++ b/include/jemalloc/internal/base_types.h
@@ -4,4 +4,6 @@
 typedef struct base_block_s base_block_t;
 typedef struct base_s base_t;
 
+#define METADATA_THP_DEFAULT false
+
 #endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index b73daf0..5fa7f51 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -261,6 +261,12 @@
 #undef JEMALLOC_HAVE_MADVISE
 
 /*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+#undef JEMALLOC_HAVE_MADVISE_HUGE
+
+/*
  * Methods for purging unused pages differ between operating systems.
  *
  *   madvise(..., MADV_FREE) : This marks pages as being unused, such that they
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 28383b7..121fff3 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -58,6 +58,9 @@ static const bool pages_can_purge_forced =
 #endif
     ;
 
+/* Whether transparent huge page state is "madvise". */
+extern bool thp_state_madvise;
+
 void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
 void pages_unmap(void *addr, size_t size);
 bool pages_commit(void *addr, size_t size);
diff --git a/src/base.c b/src/base.c
index 97078b1..9925978 100644
--- a/src/base.c
+++ b/src/base.c
@@ -10,7 +10,9 @@
 /******************************************************************************/
 /* Data. */
 
-static base_t	*b0;
+static base_t *b0;
+
+bool opt_metadata_thp = METADATA_THP_DEFAULT;
 
 /******************************************************************************/
 
@@ -20,19 +22,26 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 	bool zero = true;
 	bool commit = true;
 
+	/* We use hugepage sizes regardless of opt_metadata_thp. */
 	assert(size == HUGEPAGE_CEILING(size));
-
+	size_t alignment = opt_metadata_thp ? HUGEPAGE : PAGE;
 	if (extent_hooks == &extent_hooks_default) {
-		addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit);
+		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
 	} else {
 		/* No arena context as we are creating new arenas. */
 		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
 		pre_reentrancy(tsd, NULL);
-		addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE,
+		addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment,
 		    &zero, &commit, ind);
 		post_reentrancy(tsd);
 	}
 
+	if (addr != NULL && opt_metadata_thp && thp_state_madvise) {
+		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
+		    (size & HUGEPAGE_MASK) == 0);
+		pages_huge(addr, size);
+	}
+
 	return addr;
 }
 
@@ -51,16 +60,16 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 	 */
 	if (extent_hooks == &extent_hooks_default) {
 		if (!extent_dalloc_mmap(addr, size)) {
-			return;
+			goto label_done;
 		}
 		if (!pages_decommit(addr, size)) {
-			return;
+			goto label_done;
 		}
 		if (!pages_purge_forced(addr, size)) {
-			return;
+			goto label_done;
 		}
 		if (!pages_purge_lazy(addr, size)) {
-			return;
+			goto label_done;
 		}
 		/* Nothing worked.  This should never happen. */
 		not_reached();
@@ -70,27 +79,33 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 		if (extent_hooks->dalloc != NULL &&
 		    !extent_hooks->dalloc(extent_hooks, addr, size, true,
 		    ind)) {
-			goto label_done;
+			goto label_post_reentrancy;
 		}
 		if (extent_hooks->decommit != NULL &&
 		    !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
 		    ind)) {
-			goto label_done;
+			goto label_post_reentrancy;
 		}
 		if (extent_hooks->purge_forced != NULL &&
 		    !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
 		    size, ind)) {
-			goto label_done;
+			goto label_post_reentrancy;
 		}
 		if (extent_hooks->purge_lazy != NULL &&
 		    !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
 		    ind)) {
-			goto label_done;
+			goto label_post_reentrancy;
 		}
 		/* Nothing worked.  That's the application's problem. */
-	label_done:
+	label_post_reentrancy:
 		post_reentrancy(tsd);
-		return;
+	}
+label_done:
+	if (opt_metadata_thp && thp_state_madvise) {
+		/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
+		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
+		    (size & HUGEPAGE_MASK) == 0);
+		pages_nohuge(addr, size);
 	}
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index 36bc8fb..c299103 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -80,6 +80,7 @@ CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
+CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_narenas)
@@ -274,6 +275,7 @@ static const ctl_named_node_t	config_node[] = {
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
+	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("narenas"),	CTL(opt_narenas)},
@@ -1568,6 +1570,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
+CTL_RO_NL_GEN(opt_metadata_thp, opt_metadata_thp, bool)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4c73ba4..cbae259 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1055,6 +1055,7 @@ malloc_conf_init(void) {
 			if (opt_abort_conf && had_conf_error) {
 				malloc_abort_invalid_conf();
 			}
+			CONF_HANDLE_BOOL(opt_metadata_thp, "metadata_thp")
 			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
diff --git a/src/pages.c b/src/pages.c
index f8ef2bc..9561f6d 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -25,6 +25,8 @@ static int	mmap_flags;
 #endif
 static bool	os_overcommits;
 
+bool thp_state_madvise;
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -291,7 +293,7 @@ pages_huge(void *addr, size_t size) {
 	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
 	assert(HUGEPAGE_CEILING(size) == size);
 
-#ifdef JEMALLOC_THP
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
 	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
 #else
 	return true;
@@ -303,7 +305,7 @@ pages_nohuge(void *addr, size_t size) {
 	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
 	assert(HUGEPAGE_CEILING(size) == size);
 
-#ifdef JEMALLOC_THP
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
 	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
 #else
 	return false;
@@ -413,6 +415,51 @@ os_overcommits_proc(void) {
 }
 #endif
 
+static void
+init_thp_state(void) {
+#ifndef JEMALLOC_HAVE_MADVISE_HUGE
+	if (opt_metadata_thp && opt_abort) {
+		malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
+		abort();
+	}
+	goto label_error;
+#endif
+	static const char madvise_state[] = "always [madvise] never\n";
+	char buf[sizeof(madvise_state)];
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
+	int fd = (int)syscall(SYS_open,
+	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#else
+	int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#endif
+	if (fd == -1) {
+		goto label_error;
+	}
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
+	ssize_t nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
+#else
+	ssize_t nread = read(fd, &buf, sizeof(buf));
+#endif
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
+	syscall(SYS_close, fd);
+#else
+	close(fd);
+#endif
+
+	if (nread < 1) {
+		goto label_error;
+	}
+	if (strncmp(buf, madvise_state, (size_t)nread) == 0) {
+		thp_state_madvise = true;
+		return;
+	}
+label_error:
+	thp_state_madvise = false;
+}
+
 bool
 pages_boot(void) {
 	os_page = os_page_detect();
@@ -441,5 +488,7 @@ pages_boot(void) {
 	os_overcommits = false;
 #endif
 
+	init_thp_state();
+
 	return false;
 }
diff --git a/src/stats.c b/src/stats.c
index 087df76..746cc42 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -802,6 +802,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	}
 	OPT_WRITE_BOOL(abort, ",")
 	OPT_WRITE_BOOL(abort_conf, ",")
+	OPT_WRITE_BOOL(metadata_thp, ",")
 	OPT_WRITE_BOOL(retain, ",")
 	OPT_WRITE_CHAR_P(dss, ",")
 	OPT_WRITE_UNSIGNED(narenas, ",")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index f611654..d9fdd05 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -157,6 +157,7 @@ TEST_BEGIN(test_mallctl_opt) {
 } while (0)
 
 	TEST_MALLCTL_OPT(bool, abort, always);
+	TEST_MALLCTL_OPT(bool, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
-- 
cgit v0.12


From 3ec279ba1c702286b2a7d4ce7aaf48d7905f1c5b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 11 Aug 2017 15:41:52 -0700
Subject: Fix test/unit/pages.

As part of the metadata_thp support, We now have a separate swtich
(JEMALLOC_HAVE_MADVISE_HUGE) for MADV_HUGEPAGE availability.  Use that instead
of JEMALLOC_THP (which doesn't guard pages_huge anymore) in tests.
---
 include/jemalloc/internal/jemalloc_preamble.h.in |  7 +++++++
 src/pages.c                                      | 13 +++++++------
 test/unit/pages.c                                |  2 +-
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 099f98d..f6ed731 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -61,6 +61,13 @@ static const bool have_dss =
     false
 #endif
     ;
+static const bool have_madvise_huge =
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
+    true
+#else
+    false
+#endif
+    ;
 static const bool config_fill =
 #ifdef JEMALLOC_FILL
     true
diff --git a/src/pages.c b/src/pages.c
index 9561f6d..70f1fd3 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -417,13 +417,14 @@ os_overcommits_proc(void) {
 
 static void
 init_thp_state(void) {
-#ifndef JEMALLOC_HAVE_MADVISE_HUGE
-	if (opt_metadata_thp && opt_abort) {
-		malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
-		abort();
+	if (!have_madvise_huge) {
+		if (opt_metadata_thp && opt_abort) {
+			malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
+			abort();
+		}
+		goto label_error;
 	}
-	goto label_error;
-#endif
+
 	static const char madvise_state[] = "always [madvise] never\n";
 	char buf[sizeof(madvise_state)];
 
diff --git a/test/unit/pages.c b/test/unit/pages.c
index 67dbb4c..1a979e6 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -11,7 +11,7 @@ TEST_BEGIN(test_pages_huge) {
 	assert_ptr_not_null(pages, "Unexpected pages_map() error");
 
 	hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
-	assert_b_ne(pages_huge(hugepage, HUGEPAGE), config_thp,
+	assert_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
 	    "Unexpected pages_huge() result");
 	assert_false(pages_nohuge(hugepage, HUGEPAGE),
 	    "Unexpected pages_nohuge() result");
-- 
cgit v0.12


From 8da69b69e6c4cd951832138780ac632e57987b7c Mon Sep 17 00:00:00 2001
From: Faidon Liambotis <paravoid@debian.org>
Date: Mon, 7 Aug 2017 21:51:09 +0300
Subject: Fix support for GNU/kFreeBSD

The configure.ac seciton right now is the same for Linux and kFreeBSD,
which results into an incorrect configuration of e.g. defining
JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY instead of FreeBSD's
JEMALLOC_SYSCTL_VM_OVERCOMMIT.

GNU/kFreeBSD is really a glibc + FreeBSD kernel system, so it needs its
own entry which has a mixture of configuration options from Linux and
FreeBSD.
---
 configure.ac | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index e1a7343..49b2df1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -567,7 +567,7 @@ case "${host}" in
 	  default_retain="1"
 	fi
 	;;
-  *-*-linux* | *-*-kfreebsd*)
+  *-*-linux*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
@@ -580,6 +580,15 @@ case "${host}" in
 	  default_retain="1"
 	fi
 	;;
+  *-*-kfreebsd*)
+	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
+	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
+	abi="elf"
+	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
+	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
+	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
+	;;
   *-*-netbsd*)
 	AC_MSG_CHECKING([ABI])
         AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
-- 
cgit v0.12


From 82d1a3fb318fb086cd4207ca03dbdd5b0e3bbb26 Mon Sep 17 00:00:00 2001
From: Faidon Liambotis <paravoid@debian.org>
Date: Mon, 7 Aug 2017 21:56:21 +0300
Subject: Add support for m68k, nios2, SH3 architectures

Add minimum alignment for three more architectures, as requested by
Debian users or porters (see Debian bugs #807554, #816236, #863424).
---
 include/jemalloc/internal/jemalloc_internal_types.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 50f9d00..6b987d6 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -79,9 +79,15 @@ typedef int malloc_cpuid_t;
 #  ifdef __hppa__
 #    define LG_QUANTUM		4
 #  endif
+#  ifdef __m68k__
+#    define LG_QUANTUM		3
+#  endif
 #  ifdef __mips__
 #    define LG_QUANTUM		3
 #  endif
+#  ifdef __nios2__
+#    define LG_QUANTUM		3
+#  endif
 #  ifdef __or1k__
 #    define LG_QUANTUM		3
 #  endif
@@ -94,7 +100,8 @@ typedef int malloc_cpuid_t;
 #  ifdef __s390__
 #    define LG_QUANTUM		4
 #  endif
-#  ifdef __SH4__
+#  if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \
+	defined(__SH4_SINGLE_ONLY__))
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __tile__
-- 
cgit v0.12


From b0825351d9eb49976164cff969a93877ac11f2c0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 11 Aug 2017 16:06:51 -0700
Subject: Add missing mallctl unit test for abort_conf.

The abort_conf option was missed from test/unit/mallctl.
---
 test/unit/mallctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index d9fdd05..0b14e78 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -157,6 +157,7 @@ TEST_BEGIN(test_mallctl_opt) {
 } while (0)
 
 	TEST_MALLCTL_OPT(bool, abort, always);
+	TEST_MALLCTL_OPT(bool, abort_conf, always);
 	TEST_MALLCTL_OPT(bool, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
-- 
cgit v0.12


From f3170baa30654b2f62547fa1ac80707d396e1245 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 10 Aug 2017 14:27:58 -0700
Subject: Pull out caching for a bin into its own file.

This is the first step towards breaking up the tcache and arena (since they
interact primarily at the bin level).  It should also make a future arena
caching implementation more straightforward.
---
 include/jemalloc/internal/arena_externs.h          |  2 +-
 include/jemalloc/internal/cache_bin.h              | 82 +++++++++++++++++++++
 .../internal/jemalloc_internal_inlines_a.h         |  8 +-
 include/jemalloc/internal/stats.h                  |  1 -
 include/jemalloc/internal/stats_tsd.h              | 12 ---
 include/jemalloc/internal/tcache_externs.h         |  8 +-
 include/jemalloc/internal/tcache_inlines.h         | 85 ++++++++--------------
 include/jemalloc/internal/tcache_structs.h         | 40 ++--------
 include/jemalloc/internal/tcache_types.h           |  5 --
 src/arena.c                                        |  6 +-
 src/tcache.c                                       | 38 +++++-----
 11 files changed, 148 insertions(+), 139 deletions(-)
 create mode 100644 include/jemalloc/internal/cache_bin.h
 delete mode 100644 include/jemalloc/internal/stats_tsd.h

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index af16d15..4e546c3 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -50,7 +50,7 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
+    cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
 void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
     bool zero);
 
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
new file mode 100644
index 0000000..37025b5
--- /dev/null
+++ b/include/jemalloc/internal/cache_bin.h
@@ -0,0 +1,82 @@
+#ifndef JEMALLOC_INTERNAL_CACHE_BIN_H
+#define JEMALLOC_INTERNAL_CACHE_BIN_H
+
+/*
+ * The count of the number of cached allocations in a bin.  We make this signed
+ * so that negative numbers can encode "invalid" states (e.g. a low water mark
+ * for a bin that has never been filled).
+ */
+typedef int32_t cache_bin_sz_t;
+
+typedef struct cache_bin_stats_s cache_bin_stats_t;
+struct cache_bin_stats_s {
+	/*
+	 * Number of allocation requests that corresponded to the size of this
+	 * bin.
+	 */
+	uint64_t nrequests;
+};
+
+/*
+ * Read-only information associated with each element of tcache_t's tbins array
+ * is stored separately, mainly to reduce memory usage.
+ */
+typedef struct cache_bin_info_s cache_bin_info_t;
+struct cache_bin_info_s {
+	/* Upper limit on ncached. */
+	cache_bin_sz_t ncached_max;
+};
+
+typedef struct cache_bin_s cache_bin_t;
+struct cache_bin_s {
+	/* Min # cached since last GC. */
+	cache_bin_sz_t low_water;
+	/* # of cached objects. */
+	cache_bin_sz_t ncached;
+	/*
+	 * ncached and stats are both modified frequently.  Let's keep them
+	 * close so that they have a higher chance of being on the same
+	 * cacheline, thus less write-backs.
+	 */
+	cache_bin_stats_t tstats;
+	/*
+	 * Stack of available objects.
+	 *
+	 * To make use of adjacent cacheline prefetch, the items in the avail
+	 * stack goes to higher address for newer allocations.  avail points
+	 * just above the available space, which means that
+	 * avail[-ncached, ... -1] are available items and the lowest item will
+	 * be allocated first.
+	 */
+	void **avail;
+};
+
+JEMALLOC_ALWAYS_INLINE void *
+cache_alloc_easy(cache_bin_t *bin, bool *success) {
+	void *ret;
+
+	if (unlikely(bin->ncached == 0)) {
+		bin->low_water = -1;
+		*success = false;
+		return NULL;
+	}
+	/*
+	 * success (instead of ret) should be checked upon the return of this
+	 * function.  We avoid checking (ret == NULL) because there is never a
+	 * null stored on the avail stack (which is unknown to the compiler),
+	 * and eagerly checking ret would cause pipeline stall (waiting for the
+	 * cacheline).
+	 */
+	*success = true;
+	ret = *(bin->avail - bin->ncached);
+	bin->ncached--;
+
+	if (unlikely(bin->ncached < bin->low_water)) {
+		bin->low_water = bin->ncached;
+	}
+
+	return ret;
+
+}
+
+#endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 24ea416..5ec35db 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -106,16 +106,16 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
 	return &tdata->decay_ticker;
 }
 
-JEMALLOC_ALWAYS_INLINE tcache_bin_t *
+JEMALLOC_ALWAYS_INLINE cache_bin_t *
 tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
 	assert(binind < NBINS);
-	return &tcache->tbins_small[binind];
+	return &tcache->bins_small[binind];
 }
 
-JEMALLOC_ALWAYS_INLINE tcache_bin_t *
+JEMALLOC_ALWAYS_INLINE cache_bin_t *
 tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
 	assert(binind >= NBINS &&binind < nhbins);
-	return &tcache->tbins_large[binind - NBINS];
+	return &tcache->bins_large[binind - NBINS];
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 1198779..ab872e5 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -5,7 +5,6 @@
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/stats_tsd.h"
 
 /*  OPTION(opt,		var_name,	default,	set_value_to) */
 #define STATS_PRINT_OPTIONS						\
diff --git a/include/jemalloc/internal/stats_tsd.h b/include/jemalloc/internal/stats_tsd.h
deleted file mode 100644
index d0c3bbe..0000000
--- a/include/jemalloc/internal/stats_tsd.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_STATS_TSD_H
-#define JEMALLOC_INTERNAL_STATS_TSD_H
-
-typedef struct tcache_bin_stats_s {
-	/*
-	 * Number of allocation requests that corresponded to the size of this
-	 * bin.
-	 */
-	uint64_t	nrequests;
-} tcache_bin_stats_t;
-
-#endif /* JEMALLOC_INTERNAL_STATS_TSD_H */
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index db3e9c7..790367b 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -6,7 +6,7 @@
 extern bool	opt_tcache;
 extern ssize_t	opt_lg_tcache_max;
 
-extern tcache_bin_info_t	*tcache_bin_info;
+extern cache_bin_info_t	*tcache_bin_info;
 
 /*
  * Number of tcache bins.  There are NBINS small-object bins, plus 0 or more
@@ -30,10 +30,10 @@ extern tcaches_t	*tcaches;
 size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
 void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
 void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
-void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+    cache_bin_t *tbin, szind_t binind, bool *tcache_success);
+void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem);
-void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+void	tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
 void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
     arena_t *arena);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index c55bcd2..d1632d8 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -38,43 +38,16 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) {
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) {
-	void *ret;
-
-	if (unlikely(tbin->ncached == 0)) {
-		tbin->low_water = -1;
-		*tcache_success = false;
-		return NULL;
-	}
-	/*
-	 * tcache_success (instead of ret) should be checked upon the return of
-	 * this function.  We avoid checking (ret == NULL) because there is
-	 * never a null stored on the avail stack (which is unknown to the
-	 * compiler), and eagerly checking ret would cause pipeline stall
-	 * (waiting for the cacheline).
-	 */
-	*tcache_success = true;
-	ret = *(tbin->avail - tbin->ncached);
-	tbin->ncached--;
-
-	if (unlikely((low_water_t)tbin->ncached < tbin->low_water)) {
-		tbin->low_water = tbin->ncached;
-	}
-
-	return ret;
-}
-
-JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	tcache_bin_t *tbin;
+	cache_bin_t *bin;
 	bool tcache_success;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	assert(binind < NBINS);
-	tbin = tcache_small_bin_get(tcache, binind);
-	ret = tcache_alloc_easy(tbin, &tcache_success);
+	bin = tcache_small_bin_get(tcache, binind);
+	ret = cache_alloc_easy(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -84,7 +57,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		}
 
 		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
-		    tbin, binind, &tcache_hard_success);
+		    bin, binind, &tcache_hard_success);
 		if (tcache_hard_success == false) {
 			return NULL;
 		}
@@ -118,7 +91,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	}
 
 	if (config_stats) {
-		tbin->tstats.nrequests++;
+		bin->tstats.nrequests++;
 	}
 	if (config_prof) {
 		tcache->prof_accumbytes += usize;
@@ -131,12 +104,12 @@ JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	tcache_bin_t *tbin;
+	cache_bin_t *bin;
 	bool tcache_success;
 
 	assert(binind >= NBINS &&binind < nhbins);
-	tbin = tcache_large_bin_get(tcache, binind);
-	ret = tcache_alloc_easy(tbin, &tcache_success);
+	bin = tcache_large_bin_get(tcache, binind);
+	ret = cache_alloc_easy(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
@@ -176,7 +149,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		}
 
 		if (config_stats) {
-			tbin->tstats.nrequests++;
+			bin->tstats.nrequests++;
 		}
 		if (config_prof) {
 			tcache->prof_accumbytes += usize;
@@ -190,8 +163,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
-	tcache_bin_t *tbin;
-	tcache_bin_info_t *tbin_info;
+	cache_bin_t *bin;
+	cache_bin_info_t *bin_info;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
 
@@ -199,15 +172,15 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
 	}
 
-	tbin = tcache_small_bin_get(tcache, binind);
-	tbin_info = &tcache_bin_info[binind];
-	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
-		tcache_bin_flush_small(tsd, tcache, tbin, binind,
-		    (tbin_info->ncached_max >> 1));
+	bin = tcache_small_bin_get(tcache, binind);
+	bin_info = &tcache_bin_info[binind];
+	if (unlikely(bin->ncached == bin_info->ncached_max)) {
+		tcache_bin_flush_small(tsd, tcache, bin, binind,
+		    (bin_info->ncached_max >> 1));
 	}
-	assert(tbin->ncached < tbin_info->ncached_max);
-	tbin->ncached++;
-	*(tbin->avail - tbin->ncached) = ptr;
+	assert(bin->ncached < bin_info->ncached_max);
+	bin->ncached++;
+	*(bin->avail - bin->ncached) = ptr;
 
 	tcache_event(tsd, tcache);
 }
@@ -215,8 +188,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
-	tcache_bin_t *tbin;
-	tcache_bin_info_t *tbin_info;
+	cache_bin_t *bin;
+	cache_bin_info_t *bin_info;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
@@ -225,15 +198,15 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 		large_dalloc_junk(ptr, sz_index2size(binind));
 	}
 
-	tbin = tcache_large_bin_get(tcache, binind);
-	tbin_info = &tcache_bin_info[binind];
-	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
-		tcache_bin_flush_large(tsd, tbin, binind,
-		    (tbin_info->ncached_max >> 1), tcache);
+	bin = tcache_large_bin_get(tcache, binind);
+	bin_info = &tcache_bin_info[binind];
+	if (unlikely(bin->ncached == bin_info->ncached_max)) {
+		tcache_bin_flush_large(tsd, bin, binind,
+		    (bin_info->ncached_max >> 1), tcache);
 	}
-	assert(tbin->ncached < tbin_info->ncached_max);
-	tbin->ncached++;
-	*(tbin->avail - tbin->ncached) = ptr;
+	assert(bin->ncached < bin_info->ncached_max);
+	bin->ncached++;
+	*(bin->avail - bin->ncached) = ptr;
 
 	tcache_event(tsd, tcache);
 }
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 7eb516f..ad0fe66 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -3,54 +3,26 @@
 
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/stats_tsd.h"
+#include "jemalloc/internal/cache_bin.h"
 #include "jemalloc/internal/ticker.h"
 
-/*
- * Read-only information associated with each element of tcache_t's tbins array
- * is stored separately, mainly to reduce memory usage.
- */
-struct tcache_bin_info_s {
-	unsigned	ncached_max;	/* Upper limit on ncached. */
-};
-
-struct tcache_bin_s {
-	low_water_t	low_water;	/* Min # cached since last GC. */
-	uint32_t	ncached;	/* # of cached objects. */
-	/*
-	 * ncached and stats are both modified frequently.  Let's keep them
-	 * close so that they have a higher chance of being on the same
-	 * cacheline, thus less write-backs.
-	 */
-	tcache_bin_stats_t tstats;
-	/*
-	 * To make use of adjacent cacheline prefetch, the items in the avail
-	 * stack goes to higher address for newer allocations.  avail points
-	 * just above the available space, which means that
-	 * avail[-ncached, ... -1] are available items and the lowest item will
-	 * be allocated first.
-	 */
-	void		**avail;	/* Stack of available objects. */
-};
-
 struct tcache_s {
 	/* Data accessed frequently first: prof, ticker and small bins. */
 	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum(). */
 	ticker_t	gc_ticker;	/* Drives incremental GC. */
 	/*
-	 * The pointer stacks associated with tbins follow as a contiguous
-	 * array.  During tcache initialization, the avail pointer in each
-	 * element of tbins is initialized to point to the proper offset within
-	 * this array.
+	 * The pointer stacks associated with bins follow as a contiguous array.
+	 * During tcache initialization, the avail pointer in each element of
+	 * tbins is initialized to point to the proper offset within this array.
 	 */
-	tcache_bin_t	tbins_small[NBINS];
+	cache_bin_t	bins_small[NBINS];
 	/* Data accessed less often below. */
 	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
 	arena_t		*arena;		/* Associated arena. */
 	szind_t		next_gc_bin;	/* Next bin to GC. */
 	/* For small bins, fill (ncached_max >> lg_fill_div). */
 	uint8_t		lg_fill_div[NBINS];
-	tcache_bin_t	tbins_large[NSIZES-NBINS];
+	cache_bin_t	bins_large[NSIZES-NBINS];
 };
 
 /* Linkage for list of available (previously used) explicit tcache IDs. */
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 1155d62..e49bc9d 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -3,14 +3,9 @@
 
 #include "jemalloc/internal/size_classes.h"
 
-typedef struct tcache_bin_info_s tcache_bin_info_t;
-typedef struct tcache_bin_s tcache_bin_t;
 typedef struct tcache_s tcache_t;
 typedef struct tcaches_s tcaches_t;
 
-/* ncached is cast to this type for comparison. */
-typedef int32_t low_water_t;
-
 /*
  * tcache pointers close to NULL are used to encode state information that is
  * used for two purposes: preventing thread caching on a per thread basis and
diff --git a/src/arena.c b/src/arena.c
index 632fce5..60b482e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -307,12 +307,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	ql_foreach(tcache, &arena->tcache_ql, link) {
 		szind_t i = 0;
 		for (; i < NBINS; i++) {
-			tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
+			cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    tbin->ncached * sz_index2size(i));
 		}
 		for (; i < nhbins; i++) {
-			tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
+			cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    tbin->ncached * sz_index2size(i));
 		}
@@ -1420,7 +1420,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 
 void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
+    cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
 	unsigned i, nfill;
 	arena_bin_t *bin;
 
diff --git a/src/tcache.c b/src/tcache.c
index 936ef31..7d32d4d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -12,7 +12,7 @@
 bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 
-tcache_bin_info_t	*tcache_bin_info;
+cache_bin_info_t	*tcache_bin_info;
 static unsigned		stack_nelms; /* Total stack elms per tcache. */
 
 unsigned		nhbins;
@@ -40,7 +40,7 @@ void
 tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 	szind_t binind = tcache->next_gc_bin;
 
-	tcache_bin_t *tbin;
+	cache_bin_t *tbin;
 	if (binind < NBINS) {
 		tbin = tcache_small_bin_get(tcache, binind);
 	} else {
@@ -58,7 +58,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
 			 * the fill count is always at least 1.
 			 */
-			tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
+			cache_bin_info_t *tbin_info = &tcache_bin_info[binind];
 			if ((tbin_info->ncached_max >>
 			     (tcache->lg_fill_div[binind] + 1)) >= 1) {
 				tcache->lg_fill_div[binind]++;
@@ -86,7 +86,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 
 void *
 tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind, bool *tcache_success) {
+    cache_bin_t *tbin, szind_t binind, bool *tcache_success) {
 	void *ret;
 
 	assert(tcache->arena != NULL);
@@ -95,18 +95,18 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	if (config_prof) {
 		tcache->prof_accumbytes = 0;
 	}
-	ret = tcache_alloc_easy(tbin, tcache_success);
+	ret = cache_alloc_easy(tbin, tcache_success);
 
 	return ret;
 }
 
 void
-tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem) {
 	bool merged_stats = false;
 
 	assert(binind < NBINS);
-	assert(rem <= tbin->ncached);
+	assert((cache_bin_sz_t)rem <= tbin->ncached);
 
 	arena_t *arena = tcache->arena;
 	assert(arena != NULL);
@@ -180,18 +180,18 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
 	    sizeof(void *));
 	tbin->ncached = rem;
-	if ((low_water_t)tbin->ncached < tbin->low_water) {
+	if (tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
 	}
 }
 
 void
-tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache) {
 	bool merged_stats = false;
 
 	assert(binind < nhbins);
-	assert(rem <= tbin->ncached);
+	assert((cache_bin_sz_t)rem <= tbin->ncached);
 
 	arena_t *arena = tcache->arena;
 	assert(arena != NULL);
@@ -278,7 +278,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
 	    sizeof(void *));
 	tbin->ncached = rem;
-	if ((low_water_t)tbin->ncached < tbin->low_water) {
+	if (tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
 	}
 }
@@ -354,8 +354,8 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 
 	size_t stack_offset = 0;
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
-	memset(tcache->tbins_small, 0, sizeof(tcache_bin_t) * NBINS);
-	memset(tcache->tbins_large, 0, sizeof(tcache_bin_t) * (nhbins - NBINS));
+	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * NBINS);
+	memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - NBINS));
 	unsigned i = 0;
 	for (; i < NBINS; i++) {
 		tcache->lg_fill_div[i] = 1;
@@ -450,7 +450,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	assert(tcache->arena != NULL);
 
 	for (unsigned i = 0; i < NBINS; i++) {
-		tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
+		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
 
 		if (config_stats) {
@@ -458,7 +458,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 		}
 	}
 	for (unsigned i = NBINS; i < nhbins; i++) {
-		tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
+		cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
 		if (config_stats) {
@@ -525,7 +525,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
-		tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
+		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		malloc_mutex_unlock(tsdn, &bin->lock);
@@ -533,7 +533,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	}
 
 	for (; i < nhbins; i++) {
-		tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
+		cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
 		arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
 		    tbin->tstats.nrequests);
 		tbin->tstats.nrequests = 0;
@@ -657,8 +657,8 @@ tcache_boot(tsdn_t *tsdn) {
 	nhbins = sz_size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
-	    * sizeof(tcache_bin_info_t), CACHELINE);
+	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
+	    * sizeof(cache_bin_info_t), CACHELINE);
 	if (tcache_bin_info == NULL) {
 		return true;
 	}
-- 
cgit v0.12


From 9c0549007dcb64f4ff35d37390a9a6a8d3cea880 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 11 Aug 2017 17:34:21 -0700
Subject: Make arena stats collection go through cache bins.

This eliminates the need for the arena stats code to "know" about tcaches; all
that it needs is a cache_bin_array_descriptor_t to tell it where to find
cache_bins whose stats it should aggregate.
---
 include/jemalloc/internal/arena_structs_b.h | 11 ++++----
 include/jemalloc/internal/cache_bin.h       | 34 ++++++++++++++++++++++++-
 include/jemalloc/internal/tcache_structs.h  | 39 +++++++++++++++++++++++------
 src/arena.c                                 |  8 +++---
 src/tcache.c                                |  9 +++++++
 5 files changed, 84 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index d1fffec..c4e4310 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -162,14 +162,15 @@ struct arena_s {
 	arena_stats_t		stats;
 
 	/*
-	 * List of tcaches for extant threads associated with this arena.
-	 * Stats from these are merged incrementally, and at exit if
-	 * opt_stats_print is enabled.
+	 * Lists of tcaches and cache_bin_array_descriptors for extant threads
+	 * associated with this arena.  Stats from these are merged
+	 * incrementally, and at exit if opt_stats_print is enabled.
 	 *
 	 * Synchronization: tcache_ql_mtx.
 	 */
-	ql_head(tcache_t)	tcache_ql;
-	malloc_mutex_t		tcache_ql_mtx;
+	ql_head(tcache_t)			tcache_ql;
+	ql_head(cache_bin_array_descriptor_t)	cache_bin_array_descriptor_ql;
+	malloc_mutex_t				tcache_ql_mtx;
 
 	/* Synchronization: internal. */
 	prof_accum_t		prof_accum;
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 37025b5..85d9de0 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -1,6 +1,19 @@
 #ifndef JEMALLOC_INTERNAL_CACHE_BIN_H
 #define JEMALLOC_INTERNAL_CACHE_BIN_H
 
+#include "jemalloc/internal/ql.h"
+
+/*
+ * The cache_bins are the mechanism that the tcache and the arena use to
+ * communicate.  The tcache fills from and flushes to the arena by passing a
+ * cache_bin_t to fill/flush.  When the arena needs to pull stats from the
+ * tcaches associated with it, it does so by iterating over its
+ * cache_bin_array_descriptor_t objects and reading out per-bin stats it
+ * contains.  This makes it so that the arena need not know about the existence
+ * of the tcache at all.
+ */
+
+
 /*
  * The count of the number of cached allocations in a bin.  We make this signed
  * so that negative numbers can encode "invalid" states (e.g. a low water mark
@@ -51,6 +64,26 @@ struct cache_bin_s {
 	void **avail;
 };
 
+typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
+struct cache_bin_array_descriptor_s {
+	/*
+	 * The arena keeps a list of the cache bins associated with it, for
+	 * stats collection.
+	 */
+	ql_elm(cache_bin_array_descriptor_t) link;
+	/* Pointers to the tcache bins. */
+	cache_bin_t *bins_small;
+	cache_bin_t *bins_large;
+};
+
+static inline void
+cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
+    cache_bin_t *bins_small, cache_bin_t *bins_large) {
+	ql_elm_new(descriptor, link);
+	descriptor->bins_small = bins_small;
+	descriptor->bins_large = bins_large;
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 cache_alloc_easy(cache_bin_t *bin, bool *success) {
 	void *ret;
@@ -76,7 +109,6 @@ cache_alloc_easy(cache_bin_t *bin, bool *success) {
 	}
 
 	return ret;
-
 }
 
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index ad0fe66..07b7387 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -7,21 +7,46 @@
 #include "jemalloc/internal/ticker.h"
 
 struct tcache_s {
-	/* Data accessed frequently first: prof, ticker and small bins. */
-	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum(). */
-	ticker_t	gc_ticker;	/* Drives incremental GC. */
+	/*
+	 * To minimize our cache-footprint, we put the frequently accessed data
+	 * together at the start of this struct.
+	 */
+
+	/* Cleared after arena_prof_accum(). */
+	uint64_t	prof_accumbytes;
+	/* Drives incremental GC. */
+	ticker_t	gc_ticker;
 	/*
 	 * The pointer stacks associated with bins follow as a contiguous array.
 	 * During tcache initialization, the avail pointer in each element of
 	 * tbins is initialized to point to the proper offset within this array.
 	 */
 	cache_bin_t	bins_small[NBINS];
-	/* Data accessed less often below. */
-	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
-	arena_t		*arena;		/* Associated arena. */
-	szind_t		next_gc_bin;	/* Next bin to GC. */
+
+	/*
+	 * This data is less hot; we can be a little less careful with our
+	 * footprint here.
+	 */
+	/* Lets us track all the tcaches in an arena. */
+	ql_elm(tcache_t) link;
+	/*
+	 * The descriptor lets the arena find our cache bins without seeing the
+	 * tcache definition.  This enables arenas to aggregate stats across
+	 * tcaches without having a tcache dependency.
+	 */
+	cache_bin_array_descriptor_t cache_bin_array_descriptor;
+
+	/* The arena this tcache is associated with. */
+	arena_t		*arena;
+	/* Next bin to GC. */
+	szind_t		next_gc_bin;
 	/* For small bins, fill (ncached_max >> lg_fill_div). */
 	uint8_t		lg_fill_div[NBINS];
+	/*
+	 * We put the cache bins for large size classes at the end of the
+	 * struct, since some of them might not get used.  This might end up
+	 * letting us avoid touching an extra page if we don't have to.
+	 */
 	cache_bin_t	bins_large[NSIZES-NBINS];
 };
 
diff --git a/src/arena.c b/src/arena.c
index 60b482e..19aafaf 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -303,16 +303,16 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	/* tcache_bytes counts currently cached bytes. */
 	atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
-	tcache_t *tcache;
-	ql_foreach(tcache, &arena->tcache_ql, link) {
+	cache_bin_array_descriptor_t *descriptor;
+	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		szind_t i = 0;
 		for (; i < NBINS; i++) {
-			cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
+			cache_bin_t *tbin = &descriptor->bins_small[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    tbin->ncached * sz_index2size(i));
 		}
 		for (; i < nhbins; i++) {
-			cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
+			cache_bin_t *tbin = &descriptor->bins_large[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    tbin->ncached * sz_index2size(i));
 		}
diff --git a/src/tcache.c b/src/tcache.c
index 7d32d4d..e22f806 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -291,8 +291,15 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
+
 		ql_elm_new(tcache, link);
 		ql_tail_insert(&arena->tcache_ql, tcache, link);
+		cache_bin_array_descriptor_init(
+		    &tcache->cache_bin_array_descriptor, tcache->bins_small,
+		    tcache->bins_large);
+		ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
+		    &tcache->cache_bin_array_descriptor, link);
+
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
 }
@@ -316,6 +323,8 @@ tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
 			assert(in_ql);
 		}
 		ql_remove(&arena->tcache_ql, tcache, link);
+		ql_remove(&arena->cache_bin_array_descriptor_ql,
+		    &tcache->cache_bin_array_descriptor, link);
 		tcache_stats_merge(tsdn, tcache, arena);
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
-- 
cgit v0.12


From ea91dfa58e11373748f747041c3041f72c9a7658 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 14 Aug 2017 13:32:28 -0700
Subject: Document the ialloc function abbreviations.

In the jemalloc_internal_inlines files, we have a lot of somewhat terse function
names.  This commit adds some documentation to aid in translation.
---
 include/jemalloc/internal/cache_bin.h                  |  2 +-
 .../jemalloc/internal/jemalloc_internal_inlines_c.h    | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 85d9de0..9b87439 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -17,7 +17,7 @@
 /*
  * The count of the number of cached allocations in a bin.  We make this signed
  * so that negative numbers can encode "invalid" states (e.g. a low water mark
- * for a bin that has never been filled).
+ * of -1 for a cache that has been depleted).
  */
 typedef int32_t cache_bin_sz_t;
 
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 7ffce6f..c54fc99 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -5,6 +5,24 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/witness.h"
 
+/*
+ * Translating the names of the 'i' functions:
+ *   Abbreviations used in the first part of the function name (before
+ *   alloc/dalloc) describe what that function accomplishes:
+ *     a: arena (query)
+ *     s: size (query, or sized deallocation)
+ *     e: extent (query)
+ *     p: aligned (allocates)
+ *     vs: size (query, without knowing that the pointer is into the heap)
+ *     r: rallocx implementation
+ *     x: xallocx implementation
+ *   Abbreviations used in the second part of the function name (after
+ *   alloc/dalloc) describe the arguments it takes
+ *     z: whether to return zeroed memory
+ *     t: accepts a tcache_t * parameter
+ *     m: accepts an arena_t * parameter
+ */
+
 JEMALLOC_ALWAYS_INLINE arena_t *
 iaalloc(tsdn_t *tsdn, const void *ptr) {
 	assert(ptr != NULL);
-- 
cgit v0.12


From 47b20bb6544de9cdd4ca7ab870d6ad257c0ce4ff Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 24 Aug 2017 14:29:28 -0700
Subject: Change opt.metadata_thp to [disabled,auto,always].

To avoid the high RSS caused by THP + low usage arena (i.e. THP becomes a
significant percentage), added a new "auto" option which will only start using
THP after a base allocator used up the first THP region.  Starting from the
second hugepage (in a single arena), "auto" behaves the same as "always",
i.e. madvise hugepage right away.
---
 doc/jemalloc.xml.in                      | 12 +++++----
 include/jemalloc/internal/base_externs.h |  3 ++-
 include/jemalloc/internal/base_inlines.h |  4 +++
 include/jemalloc/internal/base_types.h   | 17 +++++++++++-
 src/base.c                               | 46 +++++++++++++++++++++++---------
 src/ctl.c                                |  3 ++-
 src/jemalloc.c                           | 18 ++++++++++++-
 src/pages.c                              |  2 +-
 src/stats.c                              |  2 +-
 test/unit/mallctl.c                      |  2 +-
 10 files changed, 84 insertions(+), 25 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index f1712f0..0c95604 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -919,13 +919,15 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
       <varlistentry id="opt.metadata_thp">
         <term>
           <mallctl>opt.metadata_thp</mallctl>
-          (<type>bool</type>)
+          (<type>const char *</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>If true, allow jemalloc to use transparent huge page
-        (THP) for internal metadata (see <link
-        linkend="stats.metadata">stats.metadata</link> for details).  This
-        option is disabled by default.</para></listitem>
+        <listitem><para>Controls whether to allow jemalloc to use transparent
+        huge page (THP) for internal metadata (see <link
+        linkend="stats.metadata">stats.metadata</link>).  <quote>always</quote>
+        allows such usage.  <quote>auto</quote> uses no THP initially, but may
+        begin to do so when metadata usage reaches certain level.  The default
+        is <quote>disabled</quote>.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.retain">
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index a5cb8a8..6cd1187 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -1,7 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
 #define JEMALLOC_INTERNAL_BASE_EXTERNS_H
 
-extern bool opt_metadata_thp;
+extern metadata_thp_mode_t opt_metadata_thp;
+extern const char *metadata_thp_mode_names[];
 
 base_t *b0get(void);
 base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
index 931560b..aec0e2e 100644
--- a/include/jemalloc/internal/base_inlines.h
+++ b/include/jemalloc/internal/base_inlines.h
@@ -6,4 +6,8 @@ base_ind_get(const base_t *base) {
 	return base->ind;
 }
 
+static inline bool
+metadata_thp_enabled(void) {
+	return (opt_metadata_thp != metadata_thp_disabled);
+}
 #endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */
diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h
index 6e71033..97e38a9 100644
--- a/include/jemalloc/internal/base_types.h
+++ b/include/jemalloc/internal/base_types.h
@@ -4,6 +4,21 @@
 typedef struct base_block_s base_block_t;
 typedef struct base_s base_t;
 
-#define METADATA_THP_DEFAULT false
+#define METADATA_THP_DEFAULT metadata_thp_disabled
+
+typedef enum {
+	metadata_thp_disabled   = 0,
+	/*
+	 * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
+	 * + low usage arena (i.e. THP becomes a significant percentage), the
+	 * "auto" option only starts using THP after a base allocator used up
+	 * the first THP region.  Starting from the second hugepage (in a single
+	 * arena), "auto" behaves the same as "always", i.e. madvise hugepage
+	 * right away.
+	 */
+	metadata_thp_auto       = 1,
+	metadata_thp_always     = 2,
+	metadata_thp_mode_limit = 3
+} metadata_thp_mode_t;
 
 #endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
diff --git a/src/base.c b/src/base.c
index 9925978..9cb02b6 100644
--- a/src/base.c
+++ b/src/base.c
@@ -12,7 +12,13 @@
 
 static base_t *b0;
 
-bool opt_metadata_thp = METADATA_THP_DEFAULT;
+metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
+
+const char *metadata_thp_mode_names[] = {
+	"disabled",
+	"auto",
+	"always"
+};
 
 /******************************************************************************/
 
@@ -24,7 +30,7 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 
 	/* We use hugepage sizes regardless of opt_metadata_thp. */
 	assert(size == HUGEPAGE_CEILING(size));
-	size_t alignment = opt_metadata_thp ? HUGEPAGE : PAGE;
+	size_t alignment = metadata_thp_enabled() ? HUGEPAGE : PAGE;
 	if (extent_hooks == &extent_hooks_default) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
 	} else {
@@ -36,12 +42,6 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 		post_reentrancy(tsd);
 	}
 
-	if (addr != NULL && opt_metadata_thp && thp_state_madvise) {
-		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
-		    (size & HUGEPAGE_MASK) == 0);
-		pages_huge(addr, size);
-	}
-
 	return addr;
 }
 
@@ -101,7 +101,7 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 		post_reentrancy(tsd);
 	}
 label_done:
-	if (opt_metadata_thp && thp_state_madvise) {
+	if (metadata_thp_enabled() && thp_state_madvise) {
 		/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
 		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
 		    (size & HUGEPAGE_MASK) == 0);
@@ -181,8 +181,8 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
  * On success a pointer to the initialized base_block_t header is returned.
  */
 static base_block_t *
-base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
-    pszind_t *pind_last, size_t *extent_sn_next, size_t size,
+base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
+    unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size,
     size_t alignment) {
 	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
 	size_t usize = ALIGNMENT_CEILING(size, alignment);
@@ -208,6 +208,26 @@ base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
 	if (block == NULL) {
 		return NULL;
 	}
+
+	if (metadata_thp_enabled() && thp_state_madvise) {
+		void *addr = (void *)block;
+		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
+		    (block_size & HUGEPAGE_MASK) == 0);
+		/* base == NULL indicates this is a new base. */
+		if (base != NULL || opt_metadata_thp == metadata_thp_always) {
+			/* Use hugepage for the new block. */
+			pages_huge(addr, block_size);
+		}
+		if (base != NULL && opt_metadata_thp == metadata_thp_auto) {
+			/* Make the first block THP lazily. */
+			base_block_t *first_block = base->blocks;
+			if (first_block->next == NULL) {
+				assert((first_block->size & HUGEPAGE_MASK) == 0);
+				pages_huge(first_block, first_block->size);
+			}
+		}
+	}
+
 	*pind_last = sz_psz2ind(block_size);
 	block->size = block_size;
 	block->next = NULL;
@@ -231,7 +251,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	 * called.
 	 */
 	malloc_mutex_unlock(tsdn, &base->mtx);
-	base_block_t *block = base_block_alloc(tsdn, extent_hooks,
+	base_block_t *block = base_block_alloc(tsdn, base, extent_hooks,
 	    base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
 	    alignment);
 	malloc_mutex_lock(tsdn, &base->mtx);
@@ -259,7 +279,7 @@ base_t *
 base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	pszind_t pind_last = 0;
 	size_t extent_sn_next = 0;
-	base_block_t *block = base_block_alloc(tsdn, extent_hooks, ind,
+	base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind,
 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
 	if (block == NULL) {
 		return NULL;
diff --git a/src/ctl.c b/src/ctl.c
index c299103..ace10b0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1570,7 +1570,8 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
-CTL_RO_NL_GEN(opt_metadata_thp, opt_metadata_thp, bool)
+CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
+    const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index cbae259..3c0ea7d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1055,7 +1055,23 @@ malloc_conf_init(void) {
 			if (opt_abort_conf && had_conf_error) {
 				malloc_abort_invalid_conf();
 			}
-			CONF_HANDLE_BOOL(opt_metadata_thp, "metadata_thp")
+			if (strncmp("metadata_thp", k, klen) == 0) {
+				int i;
+				bool match = false;
+				for (i = 0; i < metadata_thp_mode_limit; i++) {
+					if (strncmp(metadata_thp_mode_names[i],
+					    v, vlen) == 0) {
+						opt_metadata_thp = i;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					malloc_conf_error("Invalid conf value",
+					    k, klen, v, vlen);
+				}
+				continue;
+			}
 			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
diff --git a/src/pages.c b/src/pages.c
index 70f1fd3..4ca3107 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -418,7 +418,7 @@ os_overcommits_proc(void) {
 static void
 init_thp_state(void) {
 	if (!have_madvise_huge) {
-		if (opt_metadata_thp && opt_abort) {
+		if (metadata_thp_enabled() && opt_abort) {
 			malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
 			abort();
 		}
diff --git a/src/stats.c b/src/stats.c
index 746cc42..e1a3f8c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -802,11 +802,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	}
 	OPT_WRITE_BOOL(abort, ",")
 	OPT_WRITE_BOOL(abort_conf, ",")
-	OPT_WRITE_BOOL(metadata_thp, ",")
 	OPT_WRITE_BOOL(retain, ",")
 	OPT_WRITE_CHAR_P(dss, ",")
 	OPT_WRITE_UNSIGNED(narenas, ",")
 	OPT_WRITE_CHAR_P(percpu_arena, ",")
+	OPT_WRITE_CHAR_P(metadata_thp, ",")
 	OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",")
 	OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",")
 	OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 0b14e78..5612cce 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -158,7 +158,7 @@ TEST_BEGIN(test_mallctl_opt) {
 
 	TEST_MALLCTL_OPT(bool, abort, always);
 	TEST_MALLCTL_OPT(bool, abort_conf, always);
-	TEST_MALLCTL_OPT(bool, metadata_thp, always);
+	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
-- 
cgit v0.12


From e55c3ca26758bcb7f6f1621fd690caa245f16942 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 25 Aug 2017 13:24:49 -0700
Subject: Add stats for metadata_thp.

Report number of THPs used in arena and aggregated stats.
---
 doc/jemalloc.xml.in                      | 26 ++++++++++++++++
 include/jemalloc/internal/base_externs.h |  2 +-
 include/jemalloc/internal/base_structs.h |  2 ++
 include/jemalloc/internal/ctl.h          |  1 +
 include/jemalloc/internal/stats.h        |  1 +
 src/arena.c                              |  5 ++--
 src/base.c                               | 51 +++++++++++++++++++++++++++-----
 src/ctl.c                                | 12 ++++++++
 src/stats.c                              | 22 +++++++++++---
 test/unit/base.c                         | 24 +++++++++++----
 10 files changed, 125 insertions(+), 21 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 0c95604..f7fbe30 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2207,6 +2207,20 @@ struct extent_hooks_s {
         considered.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.metadata_thp">
+        <term>
+          <mallctl>stats.metadata_thp</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of transparent huge pages (THP) used for
+        metadata.  See <link
+        linkend="stats.metadata"><mallctl>stats.metadata</mallctl></link> and
+        <link linkend="opt.metadata_thp">opt.metadata_thp</link>) for
+        details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.resident">
         <term>
           <mallctl>stats.resident</mallctl>
@@ -2523,6 +2537,18 @@ struct extent_hooks_s {
         profiles.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.metadata_thp">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.metadata_thp</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of transparent huge pages (THP) used for
+        metadata.  See <link linkend="opt.metadata_thp">opt.metadata_thp</link>
+        for details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.resident">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.resident</mallctl>
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index 6cd1187..7b705c9 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -13,7 +13,7 @@ extent_hooks_t *base_extent_hooks_set(base_t *base,
 void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
 extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base);
 void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
-    size_t *resident, size_t *mapped);
+    size_t *resident, size_t *mapped, size_t *n_thp);
 void base_prefork(tsdn_t *tsdn, base_t *base);
 void base_postfork_parent(tsdn_t *tsdn, base_t *base);
 void base_postfork_child(tsdn_t *tsdn, base_t *base);
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index 18e227b..b542169 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -50,6 +50,8 @@ struct base_s {
 	size_t		allocated;
 	size_t		resident;
 	size_t		mapped;
+	/* Number of THP regions touched. */
+	size_t		n_thp;
 };
 
 #endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index a91c4cf..a36feaf 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -48,6 +48,7 @@ typedef struct ctl_stats_s {
 	size_t allocated;
 	size_t active;
 	size_t metadata;
+	size_t metadata_thp;
 	size_t resident;
 	size_t mapped;
 	size_t retained;
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index ab872e5..f19df37 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -142,6 +142,7 @@ typedef struct arena_stats_s {
 	atomic_zu_t		base; /* Derived. */
 	atomic_zu_t		internal;
 	atomic_zu_t		resident; /* Derived. */
+	atomic_zu_t		metadata_thp;
 
 	atomic_zu_t		allocated_large; /* Derived. */
 	arena_stats_u64_t	nmalloc_large; /* Derived. */
diff --git a/src/arena.c b/src/arena.c
index 19aafaf..18ed5aa 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -234,9 +234,9 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
 	    muzzy_decay_ms, nactive, ndirty, nmuzzy);
 
-	size_t base_allocated, base_resident, base_mapped;
+	size_t base_allocated, base_resident, base_mapped, metadata_thp;
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
-	    &base_mapped);
+	    &base_mapped, &metadata_thp);
 
 	arena_stats_lock(tsdn, &arena->stats);
 
@@ -267,6 +267,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	arena_stats_accum_zu(&astats->base, base_allocated);
 	arena_stats_accum_zu(&astats->internal, arena_internal_get(arena));
+	arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
 	arena_stats_accum_zu(&astats->resident, base_resident +
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
 	    extents_npages_get(&arena->extents_dirty) +
diff --git a/src/base.c b/src/base.c
index 9cb02b6..609a445 100644
--- a/src/base.c
+++ b/src/base.c
@@ -22,6 +22,11 @@ const char *metadata_thp_mode_names[] = {
 
 /******************************************************************************/
 
+static inline bool
+metadata_thp_madvise(void) {
+	return (metadata_thp_enabled() && thp_state_madvise);
+}
+
 static void *
 base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
 	void *addr;
@@ -101,7 +106,7 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 		post_reentrancy(tsd);
 	}
 label_done:
-	if (metadata_thp_enabled() && thp_state_madvise) {
+	if (metadata_thp_madvise()) {
 		/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
 		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
 		    (size & HUGEPAGE_MASK) == 0);
@@ -120,6 +125,13 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
 	extent_binit(extent, addr, size, sn);
 }
 
+static bool
+base_is_single_block(base_t *base) {
+	assert(base->blocks != NULL &&
+	    (base->blocks->size & HUGEPAGE_MASK) == 0);
+	return (base->blocks->next == NULL);
+}
+
 static void *
 base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
     size_t alignment) {
@@ -155,12 +167,20 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
 		base->allocated += size;
 		/*
 		 * Add one PAGE to base_resident for every page boundary that is
-		 * crossed by the new allocation.
+		 * crossed by the new allocation. Adjust n_thp similarly when
+		 * metadata_thp is enabled.
 		 */
 		base->resident += PAGE_CEILING((uintptr_t)addr + size) -
 		    PAGE_CEILING((uintptr_t)addr - gap_size);
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
+		if (metadata_thp_madvise() && (!base_is_single_block(base) ||
+		    opt_metadata_thp == metadata_thp_always)) {
+			base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
+			    - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
+			    LG_HUGEPAGE;
+			assert(base->mapped >= base->n_thp << LG_HUGEPAGE);
+		}
 	}
 }
 
@@ -209,7 +229,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
 		return NULL;
 	}
 
-	if (metadata_thp_enabled() && thp_state_madvise) {
+	if (metadata_thp_madvise()) {
 		void *addr = (void *)block;
 		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
 		    (block_size & HUGEPAGE_MASK) == 0);
@@ -218,12 +238,15 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
 			/* Use hugepage for the new block. */
 			pages_huge(addr, block_size);
 		}
-		if (base != NULL && opt_metadata_thp == metadata_thp_auto) {
+		if (base != NULL && base_is_single_block(base) &&
+		    opt_metadata_thp == metadata_thp_auto) {
 			/* Make the first block THP lazily. */
 			base_block_t *first_block = base->blocks;
-			if (first_block->next == NULL) {
-				assert((first_block->size & HUGEPAGE_MASK) == 0);
-				pages_huge(first_block, first_block->size);
+			assert((first_block->size & HUGEPAGE_MASK) == 0);
+			pages_huge(first_block, first_block->size);
+			if (config_stats) {
+				assert(base->n_thp == 0);
+				base->n_thp += first_block->size >> LG_HUGEPAGE;
 			}
 		}
 	}
@@ -264,8 +287,15 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 		base->allocated += sizeof(base_block_t);
 		base->resident += PAGE_CEILING(sizeof(base_block_t));
 		base->mapped += block->size;
+		if (metadata_thp_madvise()) {
+			assert(!base_is_single_block(base));
+			assert(base->n_thp > 0);
+			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
+			    LG_HUGEPAGE;
+		}
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
+		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
 	}
 	return &block->extent;
 }
@@ -307,8 +337,12 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		base->allocated = sizeof(base_block_t);
 		base->resident = PAGE_CEILING(sizeof(base_block_t));
 		base->mapped = block->size;
+		base->n_thp = (opt_metadata_thp == metadata_thp_always) &&
+		    metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t))
+		    >> LG_HUGEPAGE : 0;
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
+		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
 	}
 	base_extent_bump_alloc_post(tsdn, base, &block->extent, gap_size, base,
 	    base_size);
@@ -403,7 +437,7 @@ base_alloc_extent(tsdn_t *tsdn, base_t *base) {
 
 void
 base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
-    size_t *mapped) {
+    size_t *mapped, size_t *n_thp) {
 	cassert(config_stats);
 
 	malloc_mutex_lock(tsdn, &base->mtx);
@@ -412,6 +446,7 @@ base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
 	*allocated = base->allocated;
 	*resident = base->resident;
 	*mapped = base->mapped;
+	*n_thp = base->n_thp;
 	malloc_mutex_unlock(tsdn, &base->mtx);
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index ace10b0..a2f3837 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -183,6 +183,7 @@ CTL_PROTO(stats_arenas_i_muzzy_nmadvise)
 CTL_PROTO(stats_arenas_i_muzzy_purged)
 CTL_PROTO(stats_arenas_i_base)
 CTL_PROTO(stats_arenas_i_internal)
+CTL_PROTO(stats_arenas_i_metadata_thp)
 CTL_PROTO(stats_arenas_i_tcache_bytes)
 CTL_PROTO(stats_arenas_i_resident)
 INDEX_PROTO(stats_arenas_i)
@@ -192,6 +193,7 @@ CTL_PROTO(stats_background_thread_num_threads)
 CTL_PROTO(stats_background_thread_num_runs)
 CTL_PROTO(stats_background_thread_run_interval)
 CTL_PROTO(stats_metadata)
+CTL_PROTO(stats_metadata_thp)
 CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
 CTL_PROTO(stats_retained)
@@ -476,6 +478,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("muzzy_purged"),	CTL(stats_arenas_i_muzzy_purged)},
 	{NAME("base"),		CTL(stats_arenas_i_base)},
 	{NAME("internal"),	CTL(stats_arenas_i_internal)},
+	{NAME("metadata_thp"),	CTL(stats_arenas_i_metadata_thp)},
 	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
 	{NAME("resident"),	CTL(stats_arenas_i_resident)},
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
@@ -514,6 +517,7 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("allocated"),	CTL(stats_allocated)},
 	{NAME("active"),	CTL(stats_active)},
 	{NAME("metadata"),	CTL(stats_metadata)},
+	{NAME("metadata_thp"),	CTL(stats_metadata_thp)},
 	{NAME("resident"),	CTL(stats_resident)},
 	{NAME("mapped"),	CTL(stats_mapped)},
 	{NAME("retained"),	CTL(stats_retained)},
@@ -775,6 +779,8 @@ MUTEX_PROF_ARENA_MUTEXES
 			    &astats->astats.internal);
 			accum_atomic_zu(&sdstats->astats.resident,
 			    &astats->astats.resident);
+			accum_atomic_zu(&sdstats->astats.metadata_thp,
+			    &astats->astats.metadata_thp);
 		} else {
 			assert(atomic_load_zu(
 			    &astats->astats.internal, ATOMIC_RELAXED) == 0);
@@ -940,6 +946,8 @@ ctl_refresh(tsdn_t *tsdn) {
 		    &ctl_sarena->astats->astats.base, ATOMIC_RELAXED) +
 		    atomic_load_zu(&ctl_sarena->astats->astats.internal,
 			ATOMIC_RELAXED);
+		ctl_stats->metadata_thp = atomic_load_zu(
+		    &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED);
 		ctl_stats->resident = atomic_load_zu(
 		    &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED);
 		ctl_stats->mapped = atomic_load_zu(
@@ -2464,6 +2472,7 @@ CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t)
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
 CTL_RO_CGEN(config_stats, stats_active, ctl_stats->active, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats->metadata, size_t)
+CTL_RO_CGEN(config_stats, stats_metadata_thp, ctl_stats->metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_retained, ctl_stats->retained, size_t)
@@ -2519,6 +2528,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_base,
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED),
     size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp,
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.metadata_thp,
+    ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes,
     ATOMIC_RELAXED), size_t)
diff --git a/src/stats.c b/src/stats.c
index e1a3f8c..cbeb923 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -401,7 +401,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	const char *dss;
 	ssize_t dirty_decay_ms, muzzy_decay_ms;
 	size_t page, pactive, pdirty, pmuzzy, mapped, retained;
-	size_t base, internal, resident;
+	size_t base, internal, resident, metadata_thp;
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 	size_t small_allocated;
@@ -613,6 +613,15 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "internal:                %12zu\n", internal);
 	}
 
+	CTL_M2_GET("stats.arenas.0.metadata_thp", i, &metadata_thp, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"metadata_thp\": %zu,\n", metadata_thp);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "metadata_thp:            %12zu\n", metadata_thp);
+	}
+
 	CTL_M2_GET("stats.arenas.0.tcache_bytes", i, &tcache_bytes, size_t);
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
@@ -1007,13 +1016,15 @@ static void
 stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, bool merged, bool destroyed, bool unmerged, bool bins,
     bool large, bool mutex) {
-	size_t allocated, active, metadata, resident, mapped, retained;
+	size_t allocated, active, metadata, metadata_thp, resident, mapped,
+	    retained;
 	size_t num_background_threads;
 	uint64_t background_thread_num_runs, background_thread_run_interval;
 
 	CTL_GET("stats.allocated", &allocated, size_t);
 	CTL_GET("stats.active", &active, size_t);
 	CTL_GET("stats.metadata", &metadata, size_t);
+	CTL_GET("stats.metadata_thp", &metadata_thp, size_t);
 	CTL_GET("stats.resident", &resident, size_t);
 	CTL_GET("stats.mapped", &mapped, size_t);
 	CTL_GET("stats.retained", &retained, size_t);
@@ -1047,6 +1058,8 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"metadata\": %zu,\n", metadata);
 		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"metadata_thp\": %zu,\n", metadata_thp);
+		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"resident\": %zu,\n", resident);
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"mapped\": %zu,\n", mapped);
@@ -1082,9 +1095,10 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t}%s\n", (merged || unmerged || destroyed) ? "," : "");
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
-		    "Allocated: %zu, active: %zu, metadata: %zu,"
+		    "Allocated: %zu, active: %zu, metadata: %zu (n_thp %zu),"
 		    " resident: %zu, mapped: %zu, retained: %zu\n",
-		    allocated, active, metadata, resident, mapped, retained);
+		    allocated, active, metadata, metadata_thp, resident, mapped,
+		    retained);
 
 		if (have_background_thread && num_background_threads > 0) {
 			malloc_cprintf(write_cb, cbopaque,
diff --git a/test/unit/base.c b/test/unit/base.c
index 7fa24ac..6b792cf 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -28,22 +28,28 @@ static extent_hooks_t hooks_not_null = {
 
 TEST_BEGIN(test_base_hooks_default) {
 	base_t *base;
-	size_t allocated0, allocated1, resident, mapped;
+	size_t allocated0, allocated1, resident, mapped, n_thp;
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	base = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated0, &resident, &mapped);
+		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
+		    &n_thp);
 		assert_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
+		if (opt_metadata_thp == metadata_thp_always) {
+			assert_zu_gt(n_thp, 0,
+			    "Base should have 1 THP at least.");
+		}
 	}
 
 	assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
 	    "Unexpected base_alloc() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated1, &resident, &mapped);
+		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
+		    &n_thp);
 		assert_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
@@ -55,7 +61,7 @@ TEST_END
 TEST_BEGIN(test_base_hooks_null) {
 	extent_hooks_t hooks_orig;
 	base_t *base;
-	size_t allocated0, allocated1, resident, mapped;
+	size_t allocated0, allocated1, resident, mapped, n_thp;
 
 	extent_hooks_prep();
 	try_dalloc = false;
@@ -71,16 +77,22 @@ TEST_BEGIN(test_base_hooks_null) {
 	assert_ptr_not_null(base, "Unexpected base_new() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated0, &resident, &mapped);
+		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
+		    &n_thp);
 		assert_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
+		if (opt_metadata_thp == metadata_thp_always) {
+			assert_zu_gt(n_thp, 0,
+			    "Base should have 1 THP at least.");
+		}
 	}
 
 	assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
 	    "Unexpected base_alloc() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated1, &resident, &mapped);
+		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
+		    &n_thp);
 		assert_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
-- 
cgit v0.12


From a315688be0f38188f16fe89ee1657c7f596f8cbb Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 30 Aug 2017 16:17:04 -0700
Subject: Relax constraints on reentrancy for extent hooks.

If we guarantee no malloc activity in extent hooks, it's possible to make
customized hooks working on arena 0.  Remove the non-a0 assertion to enable such
use cases.
---
 include/jemalloc/internal/jemalloc_internal_inlines_a.h |  1 +
 src/extent.c                                            | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 5ec35db..c6a1f7e 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -151,6 +151,7 @@ pre_reentrancy(tsd_t *tsd, arena_t *arena) {
 	assert(arena != arena_get(tsd_tsdn(tsd), 0, false));
 
 	bool fast = tsd_fast(tsd);
+	assert(tsd_reentrancy_level_get(tsd) < INT8_MAX);
 	++*tsd_reentrancy_levelp_get(tsd);
 	if (fast) {
 		/* Prepare slow path for reentrancy. */
diff --git a/src/extent.c b/src/extent.c
index f464de4..3f1c76f 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1028,7 +1028,18 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 static void
 extent_hook_pre_reentrancy(tsdn_t *tsdn, arena_t *arena) {
 	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
-	pre_reentrancy(tsd, arena);
+	if (arena == arena_get(tsd_tsdn(tsd), 0, false)) {
+		/*
+		 * The only legitimate case of customized extent hooks for a0 is
+		 * hooks with no allocation activities.  One such example is to
+		 * place metadata on pre-allocated resources such as huge pages.
+		 * In that case, rely on reentrancy_level checks to catch
+		 * infinite recursions.
+		 */
+		pre_reentrancy(tsd, NULL);
+	} else {
+		pre_reentrancy(tsd, arena);
+	}
 }
 
 static void
-- 
cgit v0.12


From cf4738455d990918914cdc8608936433ef897a6e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 6 Sep 2017 10:15:33 -0700
Subject: Fix a link for dirty_decay_ms in manual.

---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index f7fbe30..dda9a73 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1036,7 +1036,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         The default decay time is 10 seconds.  See <link
         linkend="arenas.dirty_decay_ms"><mallctl>arenas.dirty_decay_ms</mallctl></link>
         and <link
-        linkend="arena.i.muzzy_decay_ms"><mallctl>arena.&lt;i&gt;.muzzy_decay_ms</mallctl></link>
+        linkend="arena.i.dirty_decay_ms"><mallctl>arena.&lt;i&gt;.dirty_decay_ms</mallctl></link>
         for related dynamic control options.  See <link
         linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
         for a description of muzzy pages.</para></listitem>
-- 
cgit v0.12


From 886053b966f4108e4b9ee5e29a0a708e91bc72f8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 11 Sep 2017 13:32:58 -0700
Subject: Fix huge page test in test/unit/pages.

Huge pages could be disabled even if the kernel header has MAD_HUGEPAGE
defined.  Guard the huge pagetest with runtime detection.
---
 test/unit/pages.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/test/unit/pages.c b/test/unit/pages.c
index 1a979e6..49ad009 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -10,11 +10,13 @@ TEST_BEGIN(test_pages_huge) {
 	pages = pages_map(NULL, alloc_size, PAGE, &commit);
 	assert_ptr_not_null(pages, "Unexpected pages_map() error");
 
-	hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
-	assert_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
-	    "Unexpected pages_huge() result");
-	assert_false(pages_nohuge(hugepage, HUGEPAGE),
-	    "Unexpected pages_nohuge() result");
+	if (thp_state_madvise) {
+	    hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
+	    assert_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
+	        "Unexpected pages_huge() result");
+	    assert_false(pages_nohuge(hugepage, HUGEPAGE),
+	        "Unexpected pages_nohuge() result");
+	}
 
 	pages_unmap(pages, alloc_size);
 }
-- 
cgit v0.12


From 9b20a4bf70efd675604985ca37335f8b0136a289 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 12 Sep 2017 11:38:13 -0700
Subject: Clear cache bin ql postfork.

This fixes a regression in 9c05490, which introduced the new cache bin ql.  The
list needs to be cleaned up after fork, same as tcache_ql.
---
 src/arena.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/arena.c b/src/arena.c
index 18ed5aa..43ba601 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1936,6 +1936,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		}
 
 		ql_new(&arena->tcache_ql);
+		ql_new(&arena->cache_bin_array_descriptor_ql);
 		if (malloc_mutex_init(&arena->tcache_ql_mtx, "tcache_ql",
 		    WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) {
 			goto label_error;
@@ -2155,10 +2156,16 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 	if (config_stats) {
 		ql_new(&arena->tcache_ql);
+		ql_new(&arena->cache_bin_array_descriptor_ql);
 		tcache_t *tcache = tcache_get(tsdn_tsd(tsdn));
 		if (tcache != NULL && tcache->arena == arena) {
 			ql_elm_new(tcache, link);
 			ql_tail_insert(&arena->tcache_ql, tcache, link);
+			cache_bin_array_descriptor_init(
+			    &tcache->cache_bin_array_descriptor,
+			    tcache->bins_small, tcache->bins_large);
+			ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
+			    &tcache->cache_bin_array_descriptor, link);
 		}
 	}
 
-- 
cgit v0.12


From 9e39425bf1653e4bebb7b377dd716f98cab069ff Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 19 Sep 2017 09:27:33 -0700
Subject: Force Ubuntu "precise" for Travis CI builds.

We've been seeing strange errors in jemalloc_cpp.cpp since Travis upgraded from
precise to trusty as their default CI environment (seeming to stem from some
the new clang version finding the headers for an old version of libstdc++.  In
the long run we'll have to deal with this "for real", but at that point we may
have a better C++ story in general, making it a moot point.
---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 418fc6f..4cc116e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,5 @@
 language: generic
+dist: precise
 
 matrix:
   include:
-- 
cgit v0.12


From d60f3bac1237666922c16e7a1b281a2c7721863c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 18 Sep 2017 14:22:44 -0700
Subject: Add missing field in initializer for rtree cache.

Fix a warning by -Wmissing-field-initializers.
---
 include/jemalloc/internal/rtree_tsd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/rtree_tsd.h b/include/jemalloc/internal/rtree_tsd.h
index 3cdc862..93a7517 100644
--- a/include/jemalloc/internal/rtree_tsd.h
+++ b/include/jemalloc/internal/rtree_tsd.h
@@ -26,7 +26,7 @@
  * Zero initializer required for tsd initialization only.  Proper initialization
  * done via rtree_ctx_data_init().
  */
-#define RTREE_CTX_ZERO_INITIALIZER {{{0}}}
+#define RTREE_CTX_ZERO_INITIALIZER {{{0}}, {{0}}}
 
 
 typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
-- 
cgit v0.12


From eaa58a50267df6f5f2a5da38d654fd98fc4a1136 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 18 Sep 2017 14:36:43 -0700
Subject: Put static keyword first.

Fix a warning by -Wold-style-declaration.
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3c0ea7d..4c31a2d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1730,7 +1730,7 @@ compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts,
 	}
 
 	/* A size_t with its high-half bits all set to 1. */
-	const static size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2);
+	static const size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2);
 
 	*size = dopts->item_size * dopts->num_items;
 
-- 
cgit v0.12


From 96f1468221b9e846dd70eb7e65634a41e6804c20 Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@gmail.com>
Date: Thu, 6 Jul 2017 14:50:55 -0400
Subject: whitespace

---
 configure.ac | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/configure.ac b/configure.ac
index 49b2df1..4373c21 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,7 +8,7 @@ dnl ============================================================================
 dnl Custom macro definitions.
 
 dnl JE_CONCAT_VVV(r, a, b)
-dnl 
+dnl
 dnl Set $r to the concatenation of $a and $b, with a space separating them iff
 dnl both $a and $b are non-emty.
 AC_DEFUN([JE_CONCAT_VVV],
@@ -20,7 +20,7 @@ fi
 )
 
 dnl JE_APPEND_VS(a, b)
-dnl 
+dnl
 dnl Set $a to the concatenation of $a and b, with a space separating them iff
 dnl both $a and b are non-empty.
 AC_DEFUN([JE_APPEND_VS],
@@ -31,7 +31,7 @@ AC_DEFUN([JE_APPEND_VS],
 CONFIGURE_CFLAGS=
 SPECIFIED_CFLAGS="${CFLAGS}"
 dnl JE_CFLAGS_ADD(cflag)
-dnl 
+dnl
 dnl CFLAGS is the concatenation of CONFIGURE_CFLAGS and SPECIFIED_CFLAGS
 dnl (ignoring EXTRA_CFLAGS, which does not impact configure tests.  This macro
 dnl appends to CONFIGURE_CFLAGS and regenerates CFLAGS.
@@ -57,7 +57,7 @@ JE_CONCAT_VVV(CFLAGS, CONFIGURE_CFLAGS, SPECIFIED_CFLAGS)
 
 dnl JE_CFLAGS_SAVE()
 dnl JE_CFLAGS_RESTORE()
-dnl 
+dnl
 dnl Save/restore CFLAGS.  Nesting is not supported.
 AC_DEFUN([JE_CFLAGS_SAVE],
 SAVED_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
@@ -91,7 +91,7 @@ JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
 ])
 
 dnl JE_COMPILABLE(label, hcode, mcode, rvar)
-dnl 
+dnl
 dnl Use AC_LINK_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors
 dnl cause failure.
 AC_DEFUN([JE_COMPILABLE],
@@ -517,7 +517,7 @@ AC_PROG_AWK
 dnl Platform-specific settings.  abi and RPATH can probably be determined
 dnl programmatically, but doing so is error-prone, which makes it generally
 dnl not worth the trouble.
-dnl 
+dnl
 dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
@@ -1412,7 +1412,7 @@ AC_ARG_WITH([lg_page_sizes],
 
 dnl ============================================================================
 dnl jemalloc configuration.
-dnl 
+dnl
 
 AC_ARG_WITH([version],
   [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
-- 
cgit v0.12


From 24766ccd5bcc379b7d518b3ec2480d2d146873ac Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@gmail.com>
Date: Thu, 6 Jul 2017 15:04:17 -0400
Subject: Allow toolchain to determine nm

---
 configure.ac | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 4373c21..c98f7b6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -512,6 +512,11 @@ AN_PROGRAM([ar], [AC_PROG_AR])
 AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
 AC_PROG_AR
 
+AN_MAKEVAR([NM], [AC_PROG_NM])
+AN_PROGRAM([nm], [AC_PROG_NM])
+AC_DEFUN([AC_PROG_NM], [AC_CHECK_TOOL(NM, nm, :)])
+AC_PROG_NM
+
 AC_PROG_AWK
 
 dnl Platform-specific settings.  abi and RPATH can probably be determined
@@ -523,7 +528,7 @@ dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
 default_retain="0"
 maps_coalesce="1"
-DUMP_SYMS="nm -a"
+DUMP_SYMS="${NM} -a"
 SYM_PREFIX=""
 case "${host}" in
   *-*-darwin* | *-*-ios*)
-- 
cgit v0.12


From a545f1804a19f48244ee5e328e32e2d036ffea0d Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@gmail.com>
Date: Thu, 6 Jul 2017 15:14:48 -0400
Subject: dumpbin doesn't exist in mingw

---
 configure.ac | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index c98f7b6..ab2f41a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -645,7 +645,13 @@ case "${host}" in
 	  DSO_LDFLAGS="-shared"
 	  link_whole_archive="1"
 	fi
-	DUMP_SYMS="dumpbin /SYMBOLS"
+	case "${host}" in
+	  *-*-cygwin*)
+	    DUMP_SYMS="dumpbin /SYMBOLS"
+	    ;;
+	  *)
+	    ;;
+	esac
 	a="lib"
 	libprefix=""
 	SOREV="${so}"
-- 
cgit v0.12


From 56f0e57844bc1d2c806738860bf93e2ccee135b5 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 18 Sep 2017 14:34:13 -0700
Subject: Add "falls through" comment explicitly.

Fix warnings by -Wimplicit-fallthrough.
---
 include/jemalloc/internal/hash.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 188296c..dcfc992 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -260,22 +260,22 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
 		uint64_t k2 = 0;
 
 		switch (len & 15) {
-		case 15: k2 ^= ((uint64_t)(tail[14])) << 48;
-		case 14: k2 ^= ((uint64_t)(tail[13])) << 40;
-		case 13: k2 ^= ((uint64_t)(tail[12])) << 32;
-		case 12: k2 ^= ((uint64_t)(tail[11])) << 24;
-		case 11: k2 ^= ((uint64_t)(tail[10])) << 16;
-		case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;
+		case 15: k2 ^= ((uint64_t)(tail[14])) << 48; /* falls through */
+		case 14: k2 ^= ((uint64_t)(tail[13])) << 40; /* falls through */
+		case 13: k2 ^= ((uint64_t)(tail[12])) << 32; /* falls through */
+		case 12: k2 ^= ((uint64_t)(tail[11])) << 24; /* falls through */
+		case 11: k2 ^= ((uint64_t)(tail[10])) << 16; /* falls through */
+		case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;  /* falls through */
 		case  9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
 			k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
-
-		case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56;
-		case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48;
-		case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40;
-		case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32;
-		case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24;
-		case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16;
-		case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;
+			/* falls through */
+		case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56; /* falls through */
+		case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48; /* falls through */
+		case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40; /* falls through */
+		case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32; /* falls through */
+		case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24; /* falls through */
+		case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16; /* falls through */
+		case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;  /* falls through */
 		case  1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
 			k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
 		}
-- 
cgit v0.12


From 3959a9fe1973a7d7ddbbd99056c22e9b684a3275 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 22 Sep 2017 15:35:29 -0700
Subject: Avoid left shift by negative values.

Fix warnings on -Wshift-negative-value.
---
 include/jemalloc/internal/sz.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 7f640d5..9794628 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -61,7 +61,7 @@ sz_psz2ind(size_t psz) {
 		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
 		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
 
-		size_t delta_inverse_mask = ZD(-1) << lg_delta;
+		size_t delta_inverse_mask = ZU(-1) << lg_delta;
 		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
 		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
 
@@ -142,7 +142,7 @@ sz_size2index_compute(size_t size) {
 		szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 
-		size_t delta_inverse_mask = ZD(-1) << lg_delta;
+		size_t delta_inverse_mask = ZU(-1) << lg_delta;
 		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
 		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
 
-- 
cgit v0.12


From 0720192a323f5dd2dd27828c6ab3061f8f039416 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 26 Sep 2017 13:45:21 -0700
Subject: Add runtime detection of lazy purging support.

It's possible to build with lazy purge enabled but depoly to systems without
such support.  In this case, rely on the boot time detection instead of keep
making unnecessary madvise calls (which all returns EINVAL).
---
 src/pages.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 4ca3107..8469188 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -27,6 +27,9 @@ static bool	os_overcommits;
 
 bool thp_state_madvise;
 
+/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
+static bool pages_can_purge_lazy_runtime = true;
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -254,6 +257,13 @@ pages_purge_lazy(void *addr, size_t size) {
 	if (!pages_can_purge_lazy) {
 		return true;
 	}
+	if (!pages_can_purge_lazy_runtime) {
+		/*
+		 * Built with lazy purge enabled, but detected it was not
+		 * supported on the current system.
+		 */
+		return true;
+	}
 
 #ifdef _WIN32
 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
@@ -491,5 +501,19 @@ pages_boot(void) {
 
 	init_thp_state();
 
+	/* Detect lazy purge runtime support. */
+	if (pages_can_purge_lazy) {
+		bool committed = false;
+		void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
+		if (madv_free_page == NULL) {
+			return true;
+		}
+		assert(pages_can_purge_lazy_runtime);
+		if (pages_purge_lazy(madv_free_page, PAGE)) {
+			pages_can_purge_lazy_runtime = false;
+		}
+		os_pages_unmap(madv_free_page, PAGE);
+	}
+
 	return false;
 }
-- 
cgit v0.12


From 7a8bc7172b17e219b3603e99c8da44efb283e652 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 Sep 2017 13:54:08 -0700
Subject: ARM: Don't extend bit LG_VADDR to compute high address bits.

In userspace ARM on Linux, zero-ing the high bits is the correct way to do this.
This doesn't fix the fact that we currently set LG_VADDR to 48 on ARM, when in
fact larger virtual address sizes are coming soon.  We'll cross that bridge when
we come to it.
---
 include/jemalloc/internal/rtree.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index b5d4db3..4563db2 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -178,9 +178,21 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 
 JEMALLOC_ALWAYS_INLINE extent_t *
 rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
+#    ifdef __aarch64__
+	/*
+	 * aarch64 doesn't sign extend the highest virtual address bit to set
+	 * the higher ones.  Instead, the high bits gets zeroed.
+	 */
+	uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
+	/* Mask off the slab bit. */
+	uintptr_t low_bit_mask = ~(uintptr_t)1;
+	uintptr_t mask = high_bit_mask & low_bit_mask;
+	return (extent_t *)(bits & mask);
+#    else
 	/* Restore sign-extended high bits, mask slab bit. */
 	return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
 	    RTREE_NHIB) & ~((uintptr_t)0x1));
+#    endif
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
-- 
cgit v0.12


From 8a7ee3014cea09e13e605bf47c11943df5a5eb2b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Oct 2017 17:48:03 -0700
Subject: Logging: capitalize log macro.

Dodge a name-conflict with the math.h logarithm function. D'oh.
---
 include/jemalloc/internal/log.h | 33 ++++++--------
 src/jemalloc.c                  | 96 ++++++++++++++++++++---------------------
 test/unit/log.c                 |  2 +-
 3 files changed, 61 insertions(+), 70 deletions(-)

diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 9f32fb4..6420858 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -14,30 +14,21 @@
 #define JEMALLOC_LOG_BUFSIZE 4096
 
 /*
- * The log_vars malloc_conf option is a '|'-delimited list of log_var name
- * segments to log.  The log_var names are themselves hierarchical, with '.' as
+ * The log malloc_conf option is a '|'-delimited list of log_var name segments
+ * which should be logged.  The names are themselves hierarchical, with '.' as
  * the delimiter (a "segment" is just a prefix in the log namespace).  So, if
  * you have:
  *
- * static log_var_t log_arena = LOG_VAR_INIT("arena"); // 1
- * static log_var_t log_arena_a = LOG_VAR_INIT("arena.a"); // 2
- * static log_var_t log_arena_b = LOG_VAR_INIT("arena.b"); // 3
- * static log_var_t log_arena_a_a = LOG_VAR_INIT("arena.a.a"); // 4
- * static_log_var_t log_extent_a = LOG_VAR_INIT("extent.a"); // 5
- * static_log_var_t log_extent_b = LOG_VAR_INIT("extent.b"); // 6
+ * log("arena", "log msg for arena"); // 1
+ * log("arena.a", "log msg for arena.a"); // 2
+ * log("arena.b", "log msg for arena.b"); // 3
+ * log("arena.a.a", "log msg for arena.a.a"); // 4
+ * log("extent.a", "log msg for extent.a"); // 5
+ * log("extent.b", "log msg for extent.b"); // 6
  *
- * And your malloc_conf option is "log_vars=arena.a|extent", then log_vars 2, 4,
- * 5, and 6 will be enabled.  You can enable logging from all log vars by
- * writing "log_vars=.".
- *
- * You can then log by writing:
- *   log(log_var, "format string -- my int is %d", my_int);
- *
- * The namespaces currently in use:
- *   core.[malloc|free|posix_memalign|...].[entry|exit]:
- *       The entry/exit points of the functions publicly exposed by jemalloc.
- *       The "entry" variants try to log arguments to the functions, and the
- *       "exit" ones try to log return values.
+ * And your malloc_conf option is "log=arena.a|extent", then lines 2, 4, 5, and
+ * 6 will print at runtime.  You can enable logging from all log vars by
+ * writing "log=.".
  *
  * None of this should be regarded as a stable API for right now.  It's intended
  * as a debugging interface, to let us keep around some of our printf-debugging
@@ -113,7 +104,7 @@ log_impl_varargs(const char *name, ...) {
 }
 
 /* Call as log("log.var.str", "format_string %d", arg_for_format_string); */
-#define log(log_var_str, ...)						\
+#define LOG(log_var_str, ...)						\
 do {									\
 	static log_var_t log_var = LOG_VAR_INIT(log_var_str);		\
 	log_do_begin(log_var)						\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4c31a2d..28e604b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1192,7 +1192,7 @@ malloc_conf_init(void) {
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
 			}
 			if (config_log) {
-				if (CONF_MATCH("log_vars")) {
+				if (CONF_MATCH("log")) {
 					size_t cpylen = (
 					    vlen <= sizeof(log_var_names) ?
 					    vlen : sizeof(log_var_names) - 1);
@@ -1991,7 +1991,7 @@ je_malloc(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.malloc.entry", "size: %zu", size);
+	LOG("core.malloc.entry", "size: %zu", size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2007,7 +2007,7 @@ je_malloc(size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log("core.malloc.exit", "result: %p", ret);
+	LOG("core.malloc.exit", "result: %p", ret);
 
 	return ret;
 }
@@ -2019,7 +2019,7 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, "
+	LOG("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, "
 	    "size: %zu", memptr, alignment, size);
 
 	static_opts_init(&sopts);
@@ -2039,7 +2039,7 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 
 	ret = imalloc(&sopts, &dopts);
 
-	log("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret,
+	LOG("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret,
 	    *memptr);
 
 	return ret;
@@ -2054,7 +2054,7 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n",
+	LOG("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n",
 	    alignment, size);
 
 	static_opts_init(&sopts);
@@ -2076,7 +2076,7 @@ je_aligned_alloc(size_t alignment, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log("core.aligned_alloc.exit", "result: %p", ret);
+	LOG("core.aligned_alloc.exit", "result: %p", ret);
 
 	return ret;
 }
@@ -2089,7 +2089,7 @@ je_calloc(size_t num, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.calloc.entry", "num: %zu, size: %zu\n", num, size);
+	LOG("core.calloc.entry", "num: %zu, size: %zu\n", num, size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2107,7 +2107,7 @@ je_calloc(size_t num, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log("core.calloc.exit", "result: %p", ret);
+	LOG("core.calloc.exit", "result: %p", ret);
 
 	return ret;
 }
@@ -2262,7 +2262,7 @@ je_realloc(void *ptr, size_t size) {
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_usize = 0;
 
-	log("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
+	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
 
 	if (unlikely(size == 0)) {
 		if (ptr != NULL) {
@@ -2277,7 +2277,7 @@ je_realloc(void *ptr, size_t size) {
 			}
 			ifree(tsd, ptr, tcache, true);
 
-			log("core.realloc.exit", "result: %p", NULL);
+			LOG("core.realloc.exit", "result: %p", NULL);
 			return NULL;
 		}
 		size = 1;
@@ -2311,7 +2311,7 @@ je_realloc(void *ptr, size_t size) {
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		void *ret = je_malloc(size);
-		log("core.realloc.exit", "result: %p", ret);
+		LOG("core.realloc.exit", "result: %p", ret);
 		return ret;
 	}
 
@@ -2334,13 +2334,13 @@ je_realloc(void *ptr, size_t size) {
 	UTRACE(ptr, size, ret);
 	check_entry_exit_locking(tsdn);
 
-	log("core.realloc.exit", "result: %p", ret);
+	LOG("core.realloc.exit", "result: %p", ret);
 	return ret;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free(void *ptr) {
-	log("core.free.entry", "ptr: %p", ptr);
+	LOG("core.free.entry", "ptr: %p", ptr);
 
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
@@ -2371,7 +2371,7 @@ je_free(void *ptr) {
 		}
 		check_entry_exit_locking(tsd_tsdn(tsd));
 	}
-	log("core.free.exit", "");
+	LOG("core.free.exit", "");
 }
 
 /*
@@ -2391,7 +2391,7 @@ je_memalign(size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment,
+	LOG("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment,
 	    size);
 
 	static_opts_init(&sopts);
@@ -2412,7 +2412,7 @@ je_memalign(size_t alignment, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log("core.memalign.exit", "result: %p", ret);
+	LOG("core.memalign.exit", "result: %p", ret);
 	return ret;
 }
 #endif
@@ -2427,7 +2427,7 @@ je_valloc(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.valloc.entry", "size: %zu\n", size);
+	LOG("core.valloc.entry", "size: %zu\n", size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2447,7 +2447,7 @@ je_valloc(size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log("core.valloc.exit", "result: %p\n", ret);
+	LOG("core.valloc.exit", "result: %p\n", ret);
 	return ret;
 }
 #endif
@@ -2521,7 +2521,7 @@ je_mallocx(size_t size, int flags) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.mallocx.entry", "size: %zu, flags: %d", size, flags);
+	LOG("core.mallocx.entry", "size: %zu, flags: %d", size, flags);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2557,7 +2557,7 @@ je_mallocx(size_t size, int flags) {
 
 	imalloc(&sopts, &dopts);
 
-	log("core.mallocx.exit", "result: %p", ret);
+	LOG("core.mallocx.exit", "result: %p", ret);
 	return ret;
 }
 
@@ -2638,7 +2638,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	arena_t *arena;
 	tcache_t *tcache;
 
-	log("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
+	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 	    size, flags);
 
 
@@ -2705,7 +2705,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.rallocx.exit", "result: %p", p);
+	LOG("core.rallocx.exit", "result: %p", p);
 	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2715,7 +2715,7 @@ label_oom:
 	UTRACE(ptr, size, 0);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.rallocx.exit", "result: %p", NULL);
+	LOG("core.rallocx.exit", "result: %p", NULL);
 	return NULL;
 }
 
@@ -2802,7 +2802,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	bool zero = flags & MALLOCX_ZERO;
 
-	log("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
+	LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
 	    "flags: %d", ptr, size, extra, flags);
 
 	assert(ptr != NULL);
@@ -2855,7 +2855,7 @@ label_not_resized:
 	UTRACE(ptr, size, ptr);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.xallocx.exit", "result: %zu", usize);
+	LOG("core.xallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
@@ -2865,7 +2865,7 @@ je_sallocx(const void *ptr, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
-	log("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags);
+	LOG("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags);
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	assert(ptr != NULL);
@@ -2882,13 +2882,13 @@ je_sallocx(const void *ptr, int flags) {
 
 	check_entry_exit_locking(tsdn);
 
-	log("core.sallocx.exit", "result: %zu", usize);
+	LOG("core.sallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_dallocx(void *ptr, int flags) {
-	log("core.dallocx.entry", "ptr: %p, flags: %d", ptr, flags);
+	LOG("core.dallocx.entry", "ptr: %p, flags: %d", ptr, flags);
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -2928,7 +2928,7 @@ je_dallocx(void *ptr, int flags) {
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.dallocx.exit", "");
+	LOG("core.dallocx.exit", "");
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -2950,7 +2950,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	log("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
+	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 	    size, flags);
 
 	tsd_t *tsd = tsd_fetch();
@@ -2990,7 +2990,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.sdallocx.exit", "");
+	LOG("core.sdallocx.exit", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -3002,7 +3002,7 @@ je_nallocx(size_t size, int flags) {
 	assert(size != 0);
 
 	if (unlikely(malloc_init())) {
-		log("core.nallocx.exit", "result: %zu", ZU(0));
+		LOG("core.nallocx.exit", "result: %zu", ZU(0));
 		return 0;
 	}
 
@@ -3011,12 +3011,12 @@ je_nallocx(size_t size, int flags) {
 
 	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > LARGE_MAXCLASS)) {
-		log("core.nallocx.exit", "result: %zu", ZU(0));
+		LOG("core.nallocx.exit", "result: %zu", ZU(0));
 		return 0;
 	}
 
 	check_entry_exit_locking(tsdn);
-	log("core.nallocx.exit", "result: %zu", usize);
+	LOG("core.nallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
@@ -3026,10 +3026,10 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	int ret;
 	tsd_t *tsd;
 
-	log("core.mallctl.entry", "name: %s", name);
+	LOG("core.mallctl.entry", "name: %s", name);
 
 	if (unlikely(malloc_init())) {
-		log("core.mallctl.exit", "result: %d", EAGAIN);
+		LOG("core.mallctl.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -3038,7 +3038,7 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.mallctl.exit", "result: %d", ret);
+	LOG("core.mallctl.exit", "result: %d", ret);
 	return ret;
 }
 
@@ -3046,10 +3046,10 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	int ret;
 
-	log("core.mallctlnametomib.entry", "name: %s", name);
+	LOG("core.mallctlnametomib.entry", "name: %s", name);
 
 	if (unlikely(malloc_init())) {
-		log("core.mallctlnametomib.exit", "result: %d", EAGAIN);
+		LOG("core.mallctlnametomib.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -3058,7 +3058,7 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	ret = ctl_nametomib(tsd, name, mibp, miblenp);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.mallctlnametomib.exit", "result: %d", ret);
+	LOG("core.mallctlnametomib.exit", "result: %d", ret);
 	return ret;
 }
 
@@ -3068,10 +3068,10 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	int ret;
 	tsd_t *tsd;
 
-	log("core.mallctlbymib.entry", "");
+	LOG("core.mallctlbymib.entry", "");
 
 	if (unlikely(malloc_init())) {
-		log("core.mallctlbymib.exit", "result: %d", EAGAIN);
+		LOG("core.mallctlbymib.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -3079,7 +3079,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
-	log("core.mallctlbymib.exit", "result: %d", ret);
+	LOG("core.mallctlbymib.exit", "result: %d", ret);
 	return ret;
 }
 
@@ -3088,13 +3088,13 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts) {
 	tsdn_t *tsdn;
 
-	log("core.malloc_stats_print.entry", "");
+	LOG("core.malloc_stats_print.entry", "");
 
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 	stats_print(write_cb, cbopaque, opts);
 	check_entry_exit_locking(tsdn);
-	log("core.malloc_stats_print.exit", "");
+	LOG("core.malloc_stats_print.exit", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -3102,7 +3102,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	size_t ret;
 	tsdn_t *tsdn;
 
-	log("core.malloc_usable_size.entry", "ptr: %p", ptr);
+	LOG("core.malloc_usable_size.entry", "ptr: %p", ptr);
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 
@@ -3121,7 +3121,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	}
 
 	check_entry_exit_locking(tsdn);
-	log("core.malloc_usable_size.exit", "result: %zu", ret);
+	LOG("core.malloc_usable_size.exit", "result: %zu", ret);
 	return ret;
 }
 
diff --git a/test/unit/log.c b/test/unit/log.c
index 3c1a208..a52bd73 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -176,7 +176,7 @@ TEST_END
  */
 TEST_BEGIN(test_log_only_format_string) {
 	if (false) {
-		log("log_str", "No arguments follow this format string.");
+		LOG("log_str", "No arguments follow this format string.");
 	}
 }
 TEST_END
-- 
cgit v0.12


From 7c6c99b8295829580c506067495a23c07436e266 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Tue, 26 Sep 2017 17:22:01 -0700
Subject: Use ph instead of rb tree for extents_avail_

There does not seem to be any overlap between usage of
extent_avail and extent_heap, so we can use the same hook.

The only remaining usage of rb trees is in the profiling code,
which has some 'interesting' iteration constraints.

Fixes #888
---
 include/jemalloc/internal/extent_externs.h |  1 -
 include/jemalloc/internal/extent_structs.h | 28 +++++++++++++---------------
 src/extent.c                               |  2 +-
 3 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 489a813..9da5d01 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -4,7 +4,6 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_pool.h"
 #include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/rtree.h"
 
 extern rtree_t			extents_rtree;
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index d297950..641a632 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -5,7 +5,6 @@
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/size_classes.h"
 
@@ -120,20 +119,19 @@ struct extent_s {
 		size_t			e_bsize;
 	};
 
-	union {
-		/*
-		 * List linkage, used by a variety of lists:
-		 * - arena_bin_t's slabs_full
-		 * - extents_t's LRU
-		 * - stashed dirty extents
-		 * - arena's large allocations
-		 */
-		ql_elm(extent_t)	ql_link;
-		/* Red-black tree linkage, used by arena's extent_avail. */
-		rb_node(extent_t)	rb_link;
-	};
+	/*
+	 * List linkage, used by a variety of lists:
+	 * - arena_bin_t's slabs_full
+	 * - extents_t's LRU
+	 * - stashed dirty extents
+	 * - arena's large allocations
+	 */
+	ql_elm(extent_t)	ql_link;
 
-	/* Linkage for per size class sn/address-ordered heaps. */
+	/* 
+	 * Linkage for per size class sn/address-ordered heaps, and
+	 * for extent_avail
+	 */
 	phn(extent_t)		ph_link;
 
 	union {
@@ -148,7 +146,7 @@ struct extent_s {
 	};
 };
 typedef ql_head(extent_t) extent_list_t;
-typedef rb_tree(extent_t) extent_tree_t;
+typedef ph(extent_t) extent_tree_t;
 typedef ph(extent_t) extent_heap_t;
 
 /* Quantized collection of extents, with built-in LRU queue. */
diff --git a/src/extent.c b/src/extent.c
index 3f1c76f..221c80c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -117,7 +117,7 @@ static void extent_record(tsdn_t *tsdn, arena_t *arena,
 
 /******************************************************************************/
 
-rb_gen(UNUSED, extent_avail_, extent_tree_t, extent_t, rb_link,
+ph_gen(UNUSED, extent_avail_, extent_tree_t, extent_t, ph_link,
     extent_esnead_comp)
 
 typedef enum {
-- 
cgit v0.12


From 1245faae9052350a96dbcb22de7979bca566dbec Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Oct 2017 18:03:02 -0700
Subject: Power: disable the CPU_SPINWAIT macro.

Quoting from https://github.com/jemalloc/jemalloc/issues/761 :

[...] reading the Power ISA documentation[1], the assembly in [the CPU_SPINWAIT
macro] isn't correct anyway (as @marxin points out): the setting of the
program-priority register is "sticky", and we never undo the lowering.

We could do something similar, but given that we don't have testing here in the
first place, I'm inclined to simply not try. I'll put something up reverting the
problematic commit tomorrow.

[1] Book II, chapter 3 of the 2.07B or 3.0B ISA documents.
---
 configure.ac                                          |  7 +++----
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  2 ++
 include/jemalloc/internal/spin.h                      | 12 +++++++++++-
 src/mutex.c                                           |  3 ++-
 4 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index ab2f41a..f957377 100644
--- a/configure.ac
+++ b/configure.ac
@@ -381,6 +381,7 @@ dnl CPU-specific settings.
 CPU_SPINWAIT=""
 case "${host_cpu}" in
   i686|x86_64)
+	HAVE_CPU_SPINWAIT=1
 	if test "x${je_cv_msvc}" = "xyes" ; then
 	    AC_CACHE_VAL([je_cv_pause_msvc],
 	      [JE_COMPILABLE([pause instruction MSVC], [],
@@ -399,13 +400,11 @@ case "${host_cpu}" in
 	    fi
 	fi
 	;;
-  powerpc*)
-	AC_DEFINE_UNQUOTED([HAVE_ALTIVEC], [ ])
-	CPU_SPINWAIT='__asm__ volatile("or 31,31,31")'
-	;;
   *)
+	HAVE_CPU_SPINWAIT=0
 	;;
 esac
+AC_DEFINE_UNQUOTED([HAVE_CPU_SPINWAIT], [$HAVE_CPU_SPINWAIT])
 AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT])
 
 case "${host_cpu}" in
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 5fa7f51..31262fb 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -33,6 +33,8 @@
  * order to yield to another virtual CPU.
  */
 #undef CPU_SPINWAIT
+/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
+#undef HAVE_CPU_SPINWAIT
 
 /*
  * Number of significant bits in virtual addresses.  This may be less than the
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
index aded0fc..22804c6 100644
--- a/include/jemalloc/internal/spin.h
+++ b/include/jemalloc/internal/spin.h
@@ -8,12 +8,22 @@ typedef struct {
 } spin_t;
 
 static inline void
+spin_cpu_spinwait() {
+#  if HAVE_CPU_SPINWAIT
+	CPU_SPINWAIT;
+#  else
+	volatile int x = 0;
+	x = x;
+#  endif
+}
+
+static inline void
 spin_adaptive(spin_t *spin) {
 	volatile uint32_t i;
 
 	if (spin->iteration < 5) {
 		for (i = 0; i < (1U << spin->iteration); i++) {
-			CPU_SPINWAIT;
+			spin_cpu_spinwait();
 		}
 		spin->iteration++;
 	} else {
diff --git a/src/mutex.c b/src/mutex.c
index a528ef0..3de7f44 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/spin.h"
 
 #ifndef _CRT_SPINCOUNT
 #define _CRT_SPINCOUNT 4000
@@ -53,7 +54,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 
 	int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN;
 	do {
-		CPU_SPINWAIT;
+		spin_cpu_spinwait();
 		if (!malloc_mutex_trylock_final(mutex)) {
 			data->n_spin_acquired++;
 			return;
-- 
cgit v0.12


From 79e83451ff262fbc4bf66059eae672286b5eb9f0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 4 Oct 2017 18:41:51 -0700
Subject: Enable a0 metadata thp on the 3rd base block.

Since we allocate rtree nodes from a0's base, it's pushed to over 1 block on
initialization right away, which makes the auto thp mode less effective on a0.
We change a0 to make the switch on the 3rd block instead.
---
 src/base.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 64 insertions(+), 21 deletions(-)

diff --git a/src/base.c b/src/base.c
index 609a445..c6db425 100644
--- a/src/base.c
+++ b/src/base.c
@@ -126,10 +126,58 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
 }
 
 static bool
-base_is_single_block(base_t *base) {
-	assert(base->blocks != NULL &&
-	    (base->blocks->size & HUGEPAGE_MASK) == 0);
-	return (base->blocks->next == NULL);
+base_auto_thp_triggered(base_t *base, bool with_new_block) {
+	assert(opt_metadata_thp == metadata_thp_auto);
+	base_block_t *b1 = base->blocks;
+	assert(b1 != NULL);
+
+	base_block_t *b2 = b1->next;
+	if (base_ind_get(base) != 0) {
+		return with_new_block ? true: b2 != NULL;
+	}
+
+	base_block_t *b3 = (b2 != NULL) ? b2->next : NULL;
+	return with_new_block ? b2 != NULL : b3 != NULL;
+}
+
+static void
+base_auto_thp_switch(base_t *base) {
+	assert(opt_metadata_thp == metadata_thp_auto);
+
+	base_block_t *b1 = base->blocks;
+	assert(b1 != NULL);
+	base_block_t *b2 = b1->next;
+
+	/* Called when adding a new block. */
+	bool should_switch;
+	if (base_ind_get(base) != 0) {
+		/* Makes the switch on the 2nd block. */
+		should_switch = (b2 == NULL);
+	} else {
+		/*
+		 * a0 switches to thp on the 3rd block, since rtree nodes are
+		 * allocated from a0 base, which takes an entire block on init.
+		 */
+		base_block_t *b3 = (b2 != NULL) ? b2->next :
+			NULL;
+		should_switch = (b2 != NULL) && (b3 == NULL);
+	}
+	if (!should_switch) {
+		return;
+	}
+
+	assert(base->n_thp == 0);
+	/* Make the initial blocks THP lazily. */
+	base_block_t *block = base->blocks;
+	while (block != NULL) {
+		assert((block->size & HUGEPAGE_MASK) == 0);
+		pages_huge(block, block->size);
+		if (config_stats) {
+			base->n_thp += block->size >> LG_HUGEPAGE;
+		}
+		block = block->next;
+		assert(block == NULL || (base_ind_get(base) == 0));
+	}
 }
 
 static void *
@@ -174,8 +222,8 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
 		    PAGE_CEILING((uintptr_t)addr - gap_size);
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
-		if (metadata_thp_madvise() && (!base_is_single_block(base) ||
-		    opt_metadata_thp == metadata_thp_always)) {
+		if (metadata_thp_madvise() && (opt_metadata_thp ==
+		    metadata_thp_always || base_auto_thp_triggered(base, false))) {
 			base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
 			    - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
 			    LG_HUGEPAGE;
@@ -233,21 +281,15 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
 		void *addr = (void *)block;
 		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
 		    (block_size & HUGEPAGE_MASK) == 0);
-		/* base == NULL indicates this is a new base. */
-		if (base != NULL || opt_metadata_thp == metadata_thp_always) {
-			/* Use hugepage for the new block. */
+		if (opt_metadata_thp == metadata_thp_always) {
 			pages_huge(addr, block_size);
-		}
-		if (base != NULL && base_is_single_block(base) &&
-		    opt_metadata_thp == metadata_thp_auto) {
-			/* Make the first block THP lazily. */
-			base_block_t *first_block = base->blocks;
-			assert((first_block->size & HUGEPAGE_MASK) == 0);
-			pages_huge(first_block, first_block->size);
-			if (config_stats) {
-				assert(base->n_thp == 0);
-				base->n_thp += first_block->size >> LG_HUGEPAGE;
+		} else if (opt_metadata_thp == metadata_thp_auto &&
+		    base != NULL) {
+			/* base != NULL indicates this is not a new base. */
+			if (base_auto_thp_triggered(base, true)) {
+				pages_huge(addr, block_size);
 			}
+			base_auto_thp_switch(base);
 		}
 	}
 
@@ -287,8 +329,9 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 		base->allocated += sizeof(base_block_t);
 		base->resident += PAGE_CEILING(sizeof(base_block_t));
 		base->mapped += block->size;
-		if (metadata_thp_madvise()) {
-			assert(!base_is_single_block(base));
+		if (metadata_thp_madvise() &&
+		    !(opt_metadata_thp == metadata_thp_auto
+		      && !base_auto_thp_triggered(base, false))) {
 			assert(base->n_thp > 0);
 			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
 			    LG_HUGEPAGE;
-- 
cgit v0.12


From a2e6eb2c226ff63397220517883e13717f97da05 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 4 Oct 2017 16:39:33 -0700
Subject: Delay background_thread_ctl_init to right before thread creation.

ctl_init sets isthreaded, which means it should be done without holding any
locks.
---
 src/background_thread.c | 3 ---
 src/jemalloc.c          | 7 ++++++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index eb30eb5..609be52 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -848,9 +848,6 @@ background_thread_boot1(tsdn_t *tsdn) {
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	if (opt_background_thread) {
-		background_thread_ctl_init(tsdn);
-	}
 
 	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
 	    b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 28e604b..f29fc7d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1522,6 +1522,8 @@ malloc_init_hard(void) {
 	post_reentrancy(tsd);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 
+	witness_assert_lockless(witness_tsd_tsdn(
+	    tsd_witness_tsdp_get_unsafe(tsd)));
 	malloc_tsd_boot1();
 	/* Update TSD after tsd_boot1. */
 	tsd = tsd_fetch();
@@ -1529,8 +1531,11 @@ malloc_init_hard(void) {
 		assert(have_background_thread);
 		/*
 		 * Need to finish init & unlock first before creating background
-		 * threads (pthread_create depends on malloc).
+		 * threads (pthread_create depends on malloc).  ctl_init (which
+		 * sets isthreaded) needs to be called without holding any lock.
 		 */
+		background_thread_ctl_init(tsd_tsdn(tsd));
+
 		malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
 		bool err = background_thread_create(tsd, 0);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
-- 
cgit v0.12


From 7e74093c96c019ce52aee9a03fc745647d79ca5f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 5 Oct 2017 14:56:49 -0700
Subject: Set isthreaded manually.

Avoid relying pthread_once which creates dependency during init.
---
 src/background_thread.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index 609be52..6baff22 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -30,19 +30,20 @@ bool can_enable_background_thread;
 
 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
     void *(*)(void *), void *__restrict);
-static pthread_once_t once_control = PTHREAD_ONCE_INIT;
 
 static void
-pthread_create_wrapper_once(void) {
+pthread_create_wrapper_init(void) {
 #ifdef JEMALLOC_LAZY_LOCK
-	isthreaded = true;
+	if (!isthreaded) {
+		isthreaded = true;
+	}
 #endif
 }
 
 int
 pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
     void *(*start_routine)(void *), void *__restrict arg) {
-	pthread_once(&once_control, pthread_create_wrapper_once);
+	pthread_create_wrapper_init();
 
 	return pthread_create_fptr(thread, attr, start_routine, arg);
 }
@@ -805,7 +806,7 @@ void
 background_thread_ctl_init(tsdn_t *tsdn) {
 	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
-	pthread_once(&once_control, pthread_create_wrapper_once);
+	pthread_create_wrapper_init();
 #endif
 }
 
-- 
cgit v0.12


From fc83de0384a2ad87cf5059d4345acf014c77e6e4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 6 Oct 2017 15:51:35 -0700
Subject: Document the potential issues about opt.background_thread.

---
 doc/jemalloc.xml.in | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index dda9a73..8151b5b 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1010,9 +1010,12 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
           (<type>const bool</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Internal background worker threads enabled/disabled. See
-        <link linkend="background_thread">background_thread</link> for dynamic
-        control options and details.  This option is disabled by
+        <listitem><para>Internal background worker threads enabled/disabled.
+        Because of potential circular dependencies, enabling background thread
+        using this option may cause crash or deadlock during initialization. For
+        a reliable way to use this feature, see <link
+        linkend="background_thread">background_thread</link> for dynamic control
+        options and details.  This option is disabled by
         default.</para></listitem>
       </varlistentry>
 
-- 
cgit v0.12


From 31ab38be5f3c4b826db89ff3cd4f32f988747f06 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 5 Oct 2017 16:28:55 -0700
Subject: Define MADV_FREE on our own when needed.

On x86 Linux, we define our own MADV_FREE if madvise(2) is available, but no
MADV_FREE is detected.  This allows the feature to be built in and enabled with
runtime detection.
---
 configure.ac                                          | 9 +++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in | 3 +++
 include/jemalloc/internal/jemalloc_preamble.h.in      | 4 ++++
 src/pages.c                                           | 8 +++++++-
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index f957377..b4c66fb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1824,6 +1824,15 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 ], [je_cv_madv_free])
   if test "x${je_cv_madv_free}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+  elif test "x${je_cv_madvise}" = "xyes" ; then
+    case "${host_cpu}" in i686|x86_64)
+        case "${host}" in *-*-linux*)
+            AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+            AC_DEFINE([JEMALLOC_DEFINE_MADVISE_FREE], [ ])
+	    ;;
+        esac
+        ;;
+    esac
   fi
 
   dnl Check for madvise(..., MADV_DONTNEED).
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 31262fb..b56f21f 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -285,6 +285,9 @@
 #undef JEMALLOC_PURGE_MADVISE_DONTNEED
 #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
 
+/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
+#undef JEMALLOC_DEFINE_MADVISE_FREE
+
 /*
  * Defined if transparent huge pages (THPs) are supported via the
  * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index f6ed731..f81f3a4 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -47,6 +47,10 @@
 #endif
 #include "jemalloc/internal/hooks.h"
 
+#ifdef JEMALLOC_DEFINE_MADVISE_FREE
+#  define JEMALLOC_MADV_FREE 8
+#endif
+
 static const bool config_debug =
 #ifdef JEMALLOC_DEBUG
     true
diff --git a/src/pages.c b/src/pages.c
index 8469188..e8112f7 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -269,7 +269,13 @@ pages_purge_lazy(void *addr, size_t size) {
 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
 	return false;
 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
-	return (madvise(addr, size, MADV_FREE) != 0);
+	return (madvise(addr, size,
+#  ifdef MADV_FREE
+	    MADV_FREE
+#  else
+	    JEMALLOC_MADV_FREE
+#  endif
+	    ) != 0);
 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
     !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
 	return (madvise(addr, size, MADV_DONTNEED) != 0);
-- 
cgit v0.12


From f4f814cd4cca4be270c22c4e943cd5ae6c40fea9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 5 Oct 2017 16:32:28 -0700
Subject: Remove the default value for JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS.

---
 configure.ac | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index b4c66fb..558489c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -561,7 +561,7 @@ case "${host}" in
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
@@ -575,7 +575,7 @@ case "${host}" in
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
-- 
cgit v0.12


From 33df2fa1694c9fdc1912aecaa19babc194f377ac Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Mon, 16 Oct 2017 16:40:50 +0200
Subject: Fix MSVC 2015 project and add a VS 2017 solution

---
 .gitignore                                         |   2 +
 msvc/ReadMe.txt                                    |   7 +-
 msvc/jemalloc_vc2017.sln                           |  63 ++++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj     |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters       |   3 +
 msvc/projects/vc2015/test_threads/test_threads.cpp |  88 ------
 msvc/projects/vc2015/test_threads/test_threads.h   |   3 -
 .../vc2015/test_threads/test_threads.vcxproj       |   6 +-
 .../test_threads/test_threads.vcxproj.filters      |   6 +-
 .../vc2015/test_threads/test_threads_main.cpp      |  11 -
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj     | 345 +++++++++++++++++++++
 .../vc2017/jemalloc/jemalloc.vcxproj.filters       |  95 ++++++
 .../vc2017/test_threads/test_threads.vcxproj       | 326 +++++++++++++++++++
 .../test_threads/test_threads.vcxproj.filters      |  26 ++
 msvc/test_threads/test_threads.cpp                 |  88 ++++++
 msvc/test_threads/test_threads.h                   |   3 +
 msvc/test_threads/test_threads_main.cpp            |  11 +
 17 files changed, 972 insertions(+), 112 deletions(-)
 create mode 100644 msvc/jemalloc_vc2017.sln
 delete mode 100644 msvc/projects/vc2015/test_threads/test_threads.cpp
 delete mode 100644 msvc/projects/vc2015/test_threads/test_threads.h
 delete mode 100644 msvc/projects/vc2015/test_threads/test_threads_main.cpp
 create mode 100644 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
 create mode 100644 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
 create mode 100644 msvc/projects/vc2017/test_threads/test_threads.vcxproj
 create mode 100644 msvc/projects/vc2017/test_threads/test_threads.vcxproj.filters
 create mode 100644 msvc/test_threads/test_threads.cpp
 create mode 100644 msvc/test_threads/test_threads.h
 create mode 100644 msvc/test_threads/test_threads_main.cpp

diff --git a/.gitignore b/.gitignore
index a25aaf7..19199cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,12 +77,14 @@ test/include/test/jemalloc_test_defs.h
 *.pdb
 *.sdf
 *.opendb
+*.VC.db
 *.opensdf
 *.cachefile
 *.suo
 *.user
 *.sln.docstates
 *.tmp
+.vs/
 /msvc/Win32/
 /msvc/x64/
 /msvc/projects/*/*/Debug*/
diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt
index 77d567d..633a7d4 100644
--- a/msvc/ReadMe.txt
+++ b/msvc/ReadMe.txt
@@ -9,16 +9,15 @@ How to build jemalloc for Windows
    * grep
    * sed
 
-2. Install Visual Studio 2015 with Visual C++
+2. Install Visual Studio 2015 or 2017 with Visual C++
 
 3. Add Cygwin\bin to the PATH environment variable
 
-4. Open "VS2015 x86 Native Tools Command Prompt"
+4. Open "x64 Native Tools Command Prompt for VS 2017"
    (note: x86/x64 doesn't matter at this point)
 
 5. Generate header files:
    sh -c "CC=cl ./autogen.sh"
 
 6. Now the project can be opened and built in Visual Studio:
-   msvc\jemalloc_vc2015.sln
-
+   msvc\jemalloc_vc2017.sln
diff --git a/msvc/jemalloc_vc2017.sln b/msvc/jemalloc_vc2017.sln
new file mode 100644
index 0000000..c22fcb4
--- /dev/null
+++ b/msvc/jemalloc_vc2017.sln
@@ -0,0 +1,63 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.24720.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}"
+	ProjectSection(SolutionItems) = preProject
+		ReadMe.txt = ReadMe.txt
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2017\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2017\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Debug-static|x64 = Debug-static|x64
+		Debug-static|x86 = Debug-static|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+		Release-static|x64 = Release-static|x64
+		Release-static|x86 = Release-static|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 97f892e..78f92c9 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -48,6 +48,7 @@
     <ClCompile Include="..\..\..\..\src\hooks.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
+    <ClCompile Include="..\..\..\..\src\log.c" />
     <ClCompile Include="..\..\..\..\src\malloc_io.c" />
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index d2de135..dba976e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -88,5 +88,8 @@
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/test_threads/test_threads.cpp b/msvc/projects/vc2015/test_threads/test_threads.cpp
deleted file mode 100644
index 92e3162..0000000
--- a/msvc/projects/vc2015/test_threads/test_threads.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-// jemalloc C++ threaded test
-// Author: Rustam Abdullaev
-// Public Domain
-
-#include <atomic>
-#include <functional>
-#include <future>
-#include <random>
-#include <thread>
-#include <vector>
-#include <stdio.h>
-#include <jemalloc/jemalloc.h>
-
-using std::vector;
-using std::thread;
-using std::uniform_int_distribution;
-using std::minstd_rand;
-
-int test_threads() {
-  je_malloc_conf = "narenas:3";
-  int narenas = 0;
-  size_t sz = sizeof(narenas);
-  je_mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0);
-  if (narenas != 3) {
-    printf("Error: unexpected number of arenas: %d\n", narenas);
-    return 1;
-  }
-  static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 };
-  static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0]));
-  vector<thread> workers;
-  static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50;
-  je_malloc_stats_print(NULL, NULL, NULL);
-  size_t allocated1;
-  size_t sz1 = sizeof(allocated1);
-  je_mallctl("stats.active", (void *)&allocated1, &sz1, NULL, 0);
-  printf("\nPress Enter to start threads...\n");
-  getchar();
-  printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2);
-  for (int i = 0; i < numThreads; i++) {
-    workers.emplace_back([tid=i]() {
-      uniform_int_distribution<int> sizeDist(0, numSizes - 1);
-      minstd_rand rnd(tid * 17);
-      uint8_t* ptrs[numAllocsMax];
-      int ptrsz[numAllocsMax];
-      for (int i = 0; i < numIter1; ++i) {
-        thread t([&]() {
-          for (int i = 0; i < numIter2; ++i) {
-            const int numAllocs = numAllocsMax - sizeDist(rnd);
-            for (int j = 0; j < numAllocs; j += 64) {
-              const int x = sizeDist(rnd);
-              const int sz = sizes[x];
-              ptrsz[j] = sz;
-              ptrs[j] = (uint8_t*)je_malloc(sz);
-              if (!ptrs[j]) {
-                printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x);
-                exit(1);
-              }
-              for (int k = 0; k < sz; k++)
-                ptrs[j][k] = tid + k;
-            }
-            for (int j = 0; j < numAllocs; j += 64) {
-              for (int k = 0, sz = ptrsz[j]; k < sz; k++)
-                if (ptrs[j][k] != (uint8_t)(tid + k)) {
-                  printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k));
-                  exit(1);
-                }
-              je_free(ptrs[j]);
-            }
-          }
-        });
-        t.join();
-      }
-    });
-  }
-  for (thread& t : workers) {
-    t.join();
-  }
-  je_malloc_stats_print(NULL, NULL, NULL);
-  size_t allocated2;
-  je_mallctl("stats.active", (void *)&allocated2, &sz1, NULL, 0);
-  size_t leaked = allocated2 - allocated1;
-  printf("\nDone. Leaked: %zd bytes\n", leaked);
-  bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
-  printf("\nTest %s!\n", (failed ? "FAILED" : "successful"));
-  printf("\nPress Enter to continue...\n");
-  getchar();
-  return failed ? 1 : 0;
-}
diff --git a/msvc/projects/vc2015/test_threads/test_threads.h b/msvc/projects/vc2015/test_threads/test_threads.h
deleted file mode 100644
index 64d0cdb..0000000
--- a/msvc/projects/vc2015/test_threads/test_threads.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#pragma once
-
-int test_threads();
diff --git a/msvc/projects/vc2015/test_threads/test_threads.vcxproj b/msvc/projects/vc2015/test_threads/test_threads.vcxproj
index f5e9898..325876d 100644
--- a/msvc/projects/vc2015/test_threads/test_threads.vcxproj
+++ b/msvc/projects/vc2015/test_threads/test_threads.vcxproj
@@ -310,8 +310,8 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="test_threads.cpp" />
-    <ClCompile Include="test_threads_main.cpp" />
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp" />
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
@@ -319,7 +319,7 @@
     </ProjectReference>
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="test_threads.h" />
+    <ClInclude Include="..\..\..\test_threads\test_threads.h" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
diff --git a/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters b/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters
index 4c23340..fa4588f 100644
--- a/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters
+++ b/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters
@@ -11,15 +11,15 @@
     </Filter>
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="test_threads.cpp">
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="test_threads_main.cpp">
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="test_threads.h">
+    <ClInclude Include="..\..\..\test_threads\test_threads.h">
       <Filter>Header Files</Filter>
     </ClInclude>
   </ItemGroup>
diff --git a/msvc/projects/vc2015/test_threads/test_threads_main.cpp b/msvc/projects/vc2015/test_threads/test_threads_main.cpp
deleted file mode 100644
index 0a022fb..0000000
--- a/msvc/projects/vc2015/test_threads/test_threads_main.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-#include "test_threads.h"
-#include <future>
-#include <functional>
-#include <chrono>
-
-using namespace std::chrono_literals;
-
-int main(int argc, char** argv) {
-  int rc = test_threads();
-  return rc;
-}
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
new file mode 100644
index 0000000..e49dbbd
--- /dev/null
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -0,0 +1,345 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c" />
+    <ClCompile Include="..\..\..\..\src\background_thread.c" />
+    <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\extent_dss.c" />
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\hash.c" />
+    <ClCompile Include="..\..\..\..\src\hooks.c" />
+    <ClCompile Include="..\..\..\..\src\jemalloc.c" />
+    <ClCompile Include="..\..\..\..\src\large.c" />
+    <ClCompile Include="..\..\..\..\src\log.c" />
+    <ClCompile Include="..\..\..\..\src\malloc_io.c" />
+    <ClCompile Include="..\..\..\..\src\mutex.c" />
+    <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
+    <ClCompile Include="..\..\..\..\src\nstime.c" />
+    <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\prng.c" />
+    <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\stats.c" />
+    <ClCompile Include="..\..\..\..\src\sz.c" />
+    <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
+    <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\witness.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>jemalloc</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
new file mode 100644
index 0000000..dba976e
--- /dev/null
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -0,0 +1,95 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\background_thread.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\base.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bitmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ckh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ctl.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_dss.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hash.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\jemalloc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\large.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\malloc_io.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mutex.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mutex_pool.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\nstime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pages.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prng.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\rtree.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sz.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tcache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tsd.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\witness.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/test_threads/test_threads.vcxproj b/msvc/projects/vc2017/test_threads/test_threads.vcxproj
new file mode 100644
index 0000000..c35b0f5
--- /dev/null
+++ b/msvc/projects/vc2017/test_threads/test_threads.vcxproj
@@ -0,0 +1,326 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{09028CFD-4EB7-491D-869C-0708DB97ED44}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>test_threads</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp" />
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
+      <Project>{8d6bb292-9e1c-413d-9f98-4864bdc1514a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/test_threads/test_threads.vcxproj.filters b/msvc/projects/vc2017/test_threads/test_threads.vcxproj.filters
new file mode 100644
index 0000000..fa4588f
--- /dev/null
+++ b/msvc/projects/vc2017/test_threads/test_threads.vcxproj.filters
@@ -0,0 +1,26 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/test_threads/test_threads.cpp b/msvc/test_threads/test_threads.cpp
new file mode 100644
index 0000000..92e3162
--- /dev/null
+++ b/msvc/test_threads/test_threads.cpp
@@ -0,0 +1,88 @@
+// jemalloc C++ threaded test
+// Author: Rustam Abdullaev
+// Public Domain
+
+#include <atomic>
+#include <functional>
+#include <future>
+#include <random>
+#include <thread>
+#include <vector>
+#include <stdio.h>
+#include <jemalloc/jemalloc.h>
+
+using std::vector;
+using std::thread;
+using std::uniform_int_distribution;
+using std::minstd_rand;
+
+int test_threads() {
+  je_malloc_conf = "narenas:3";
+  int narenas = 0;
+  size_t sz = sizeof(narenas);
+  je_mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0);
+  if (narenas != 3) {
+    printf("Error: unexpected number of arenas: %d\n", narenas);
+    return 1;
+  }
+  static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 };
+  static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0]));
+  vector<thread> workers;
+  static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50;
+  je_malloc_stats_print(NULL, NULL, NULL);
+  size_t allocated1;
+  size_t sz1 = sizeof(allocated1);
+  je_mallctl("stats.active", (void *)&allocated1, &sz1, NULL, 0);
+  printf("\nPress Enter to start threads...\n");
+  getchar();
+  printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2);
+  for (int i = 0; i < numThreads; i++) {
+    workers.emplace_back([tid=i]() {
+      uniform_int_distribution<int> sizeDist(0, numSizes - 1);
+      minstd_rand rnd(tid * 17);
+      uint8_t* ptrs[numAllocsMax];
+      int ptrsz[numAllocsMax];
+      for (int i = 0; i < numIter1; ++i) {
+        thread t([&]() {
+          for (int i = 0; i < numIter2; ++i) {
+            const int numAllocs = numAllocsMax - sizeDist(rnd);
+            for (int j = 0; j < numAllocs; j += 64) {
+              const int x = sizeDist(rnd);
+              const int sz = sizes[x];
+              ptrsz[j] = sz;
+              ptrs[j] = (uint8_t*)je_malloc(sz);
+              if (!ptrs[j]) {
+                printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x);
+                exit(1);
+              }
+              for (int k = 0; k < sz; k++)
+                ptrs[j][k] = tid + k;
+            }
+            for (int j = 0; j < numAllocs; j += 64) {
+              for (int k = 0, sz = ptrsz[j]; k < sz; k++)
+                if (ptrs[j][k] != (uint8_t)(tid + k)) {
+                  printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k));
+                  exit(1);
+                }
+              je_free(ptrs[j]);
+            }
+          }
+        });
+        t.join();
+      }
+    });
+  }
+  for (thread& t : workers) {
+    t.join();
+  }
+  je_malloc_stats_print(NULL, NULL, NULL);
+  size_t allocated2;
+  je_mallctl("stats.active", (void *)&allocated2, &sz1, NULL, 0);
+  size_t leaked = allocated2 - allocated1;
+  printf("\nDone. Leaked: %zd bytes\n", leaked);
+  bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
+  printf("\nTest %s!\n", (failed ? "FAILED" : "successful"));
+  printf("\nPress Enter to continue...\n");
+  getchar();
+  return failed ? 1 : 0;
+}
diff --git a/msvc/test_threads/test_threads.h b/msvc/test_threads/test_threads.h
new file mode 100644
index 0000000..64d0cdb
--- /dev/null
+++ b/msvc/test_threads/test_threads.h
@@ -0,0 +1,3 @@
+#pragma once
+
+int test_threads();
diff --git a/msvc/test_threads/test_threads_main.cpp b/msvc/test_threads/test_threads_main.cpp
new file mode 100644
index 0000000..0a022fb
--- /dev/null
+++ b/msvc/test_threads/test_threads_main.cpp
@@ -0,0 +1,11 @@
+#include "test_threads.h"
+#include <future>
+#include <functional>
+#include <chrono>
+
+using namespace std::chrono_literals;
+
+int main(int argc, char** argv) {
+  int rc = test_threads();
+  return rc;
+}
-- 
cgit v0.12


From 5bad01c38ed0b1f647a6984c5f830b124cafdc94 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 28 Aug 2017 18:27:12 -0700
Subject: Document some of the internal extent functions.

---
 src/extent.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/src/extent.c b/src/extent.c
index 221c80c..fd8eab6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -723,6 +723,13 @@ extent_reregister(tsdn_t *tsdn, extent_t *extent) {
 	assert(!err);
 }
 
+/*
+ * Removes all pointers to the given extent from the global rtree indices for
+ * its interior.  This is relevant for slab extents, for which we need to do
+ * metadata lookups at places other than the head of the extent.  We deregister
+ * on the interior, then, when an extent moves from being an active slab to an
+ * inactive state.
+ */
 static void
 extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
     extent_t *extent) {
@@ -737,6 +744,9 @@ extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 	}
 }
 
+/*
+ * Removes all pointers to the given extent from the global rtree.
+ */
 static void
 extent_deregister(tsdn_t *tsdn, extent_t *extent) {
 	rtree_ctx_t rtree_ctx_fallback;
@@ -760,6 +770,10 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent) {
 	}
 }
 
+/*
+ * Tries to find and remove an extent from extents that can be used for the
+ * given allocation request.
+ */
 static extent_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
@@ -832,6 +846,12 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	return extent;
 }
 
+/*
+ * This fulfills the indicated allocation request out of the given extent (which
+ * the caller should have ensured was big enough).  If there's any unused space
+ * before or after the resulting allocation, that space is given its own extent
+ * and put back into extents.
+ */
 static extent_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
@@ -892,6 +912,10 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 	return extent;
 }
 
+/*
+ * Tries to satisfy the given allocation request by reusing one of the extents
+ * in the given extents_t.
+ */
 static extent_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, void *new_addr, size_t size, size_t pad,
@@ -1442,6 +1466,10 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 	return extent;
 }
 
+/*
+ * Does the metadata management portions of putting an unused extent into the
+ * given extents_t (coalesces, deregisters slab interiors, the heap operations).
+ */
 static void
 extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, extent_t *extent, bool growing_retained) {
@@ -1800,6 +1828,13 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 #endif
 
+/*
+ * Accepts the extent to split, and the characteristics of each side of the
+ * split.  The 'a' parameters go with the 'lead' of the resulting pair of
+ * extents (the lower addressed portion of the split), and the 'b' parameters go
+ * with the trail (the higher addressed portion).  This makes 'extent' the lead,
+ * and returns the trail (except in case of error).
+ */
 static extent_t *
 extent_split_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
-- 
cgit v0.12


From 211b1f3c7de23b1915f1ce8f9277e6c1ff60cfde Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 29 Aug 2017 16:50:57 -0700
Subject: Factor out extent-splitting core from extent lifetime management.

Before this commit, extent_recycle_split intermingles the splitting of an extent
and the return of parts of that extent to a given extents_t.  After it, that
logic is separated.  This will enable splitting extents that don't live in any
extents_t (as the grow retained region soon will).
---
 src/extent.c | 230 ++++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 149 insertions(+), 81 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index fd8eab6..1dd1d1d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -847,69 +847,147 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 }
 
 /*
- * This fulfills the indicated allocation request out of the given extent (which
- * the caller should have ensured was big enough).  If there's any unused space
- * before or after the resulting allocation, that space is given its own extent
- * and put back into extents.
+ * Given an allocation request and an extent guaranteed to be able to satisfy
+ * it, this splits off lead and trail extents, leaving extent pointing to an
+ * extent satisfying the allocation.
+ * This function doesn't put lead or trail into any extents_t; it's the caller's
+ * job to ensure that they can be reused.
  */
-static extent_t *
-extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+typedef enum {
+	/*
+	 * Split successfully.  lead, extent, and trail, are modified to extents
+	 * describing the ranges before, in, and after the given allocation.
+	 */
+	extent_split_interior_ok,
+	/*
+	 * The extent can't satisfy the given allocation request.  None of the
+	 * input extent_t *s are touched.
+	 */
+	extent_split_interior_cant_alloc,
+	/*
+	 * In a potentially invalid state.  Must leak (if *to_leak is non-NULL),
+	 * and salvage what's still salvageable (if *to_salvage is non-NULL).
+	 * None of lead, extent, or trail are valid.
+	 */
+	extent_split_interior_error
+} extent_split_interior_result_t;
+
+static extent_split_interior_result_t
+extent_split_interior(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx,
+    /* The result of splitting, in case of success. */
+    extent_t **extent, extent_t **lead, extent_t **trail,
+    /* The mess to clean up, in case of error. */
+    extent_t **to_leak, extent_t **to_salvage,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, extent_t *extent, bool growing_retained) {
+    szind_t szind, bool growing_retained) {
 	size_t esize = size + pad;
-	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(extent),
-	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(extent);
+	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(*extent),
+	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(*extent);
 	assert(new_addr == NULL || leadsize == 0);
-	assert(extent_size_get(extent) >= leadsize + esize);
-	size_t trailsize = extent_size_get(extent) - leadsize - esize;
+	if (extent_size_get(*extent) < leadsize + esize) {
+		return extent_split_interior_cant_alloc;
+	}
+	size_t trailsize = extent_size_get(*extent) - leadsize - esize;
+
+	*lead = NULL;
+	*trail = NULL;
+	*to_leak = NULL;
+	*to_salvage = NULL;
 
 	/* Split the lead. */
 	if (leadsize != 0) {
-		extent_t *lead = extent;
-		extent = extent_split_impl(tsdn, arena, r_extent_hooks,
-		    lead, leadsize, NSIZES, false, esize + trailsize, szind,
+		*lead = *extent;
+		*extent = extent_split_impl(tsdn, arena, r_extent_hooks,
+		    *lead, leadsize, NSIZES, false, esize + trailsize, szind,
 		    slab, growing_retained);
-		if (extent == NULL) {
-			extent_deregister(tsdn, lead);
-			extents_leak(tsdn, arena, r_extent_hooks, extents,
-			    lead, growing_retained);
-			return NULL;
+		if (*extent == NULL) {
+			*to_leak = *lead;
+			*lead = NULL;
+			return extent_split_interior_error;
 		}
-		extent_deactivate(tsdn, arena, extents, lead, false);
 	}
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		extent_t *trail = extent_split_impl(tsdn, arena,
-		    r_extent_hooks, extent, esize, szind, slab, trailsize,
-		    NSIZES, false, growing_retained);
-		if (trail == NULL) {
-			extent_deregister(tsdn, extent);
-			extents_leak(tsdn, arena, r_extent_hooks, extents,
-			    extent, growing_retained);
-			return NULL;
+		*trail = extent_split_impl(tsdn, arena, r_extent_hooks, *extent,
+		    esize, szind, slab, trailsize, NSIZES, false,
+		    growing_retained);
+		if (*trail == NULL) {
+			*to_leak = *extent;
+			*to_salvage = *lead;
+			*lead = NULL;
+			*extent = NULL;
+			return extent_split_interior_error;
 		}
-		extent_deactivate(tsdn, arena, extents, trail, false);
-	} else if (leadsize == 0) {
+	}
+
+	if (leadsize == 0 && trailsize == 0) {
 		/*
 		 * Splitting causes szind to be set as a side effect, but no
 		 * splitting occurred.
 		 */
-		extent_szind_set(extent, szind);
+		extent_szind_set(*extent, szind);
 		if (szind != NSIZES) {
 			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
-			    (uintptr_t)extent_addr_get(extent), szind, slab);
-			if (slab && extent_size_get(extent) > PAGE) {
+			    (uintptr_t)extent_addr_get(*extent), szind, slab);
+			if (slab && extent_size_get(*extent) > PAGE) {
 				rtree_szind_slab_update(tsdn, &extents_rtree,
 				    rtree_ctx,
-				    (uintptr_t)extent_past_get(extent) -
+				    (uintptr_t)extent_past_get(*extent) -
 				    (uintptr_t)PAGE, szind, slab);
 			}
 		}
 	}
 
-	return extent;
+	return extent_split_interior_ok;
+}
+
+/*
+ * This fulfills the indicated allocation request out of the given extent (which
+ * the caller should have ensured was big enough).  If there's any unused space
+ * before or after the resulting allocation, that space is given its own extent
+ * and put back into extents.
+ */
+static extent_t *
+extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, extent_t *extent, bool growing_retained) {
+	extent_t *lead;
+	extent_t *trail;
+	extent_t *to_leak;
+	extent_t *to_salvage;
+
+	extent_split_interior_result_t result = extent_split_interior(
+	    tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail,
+	    &to_leak, &to_salvage, new_addr, size, pad, alignment, slab, szind,
+	    growing_retained);
+
+	if (result == extent_split_interior_ok) {
+		if (lead != NULL) {
+			extent_deactivate(tsdn, arena, extents, lead, false);
+		}
+		if (trail != NULL) {
+			extent_deactivate(tsdn, arena, extents, trail, false);
+		}
+		return extent;
+	} else {
+		/*
+		 * We should have picked an extent that was large enough to
+		 * fulfill our allocation request.
+		 */
+		assert(result == extent_split_interior_error);
+		if (to_salvage != NULL) {
+			extent_deregister(tsdn, to_salvage);
+		}
+		if (to_leak != NULL) {
+			extents_leak(tsdn, arena, r_extent_hooks, extents,
+			    to_leak, growing_retained);
+		}
+		return NULL;
+	}
+	unreachable();
 }
 
 /*
@@ -1140,10 +1218,6 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		goto label_err;
 	}
 
-	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)ptr,
-	    PAGE_CEILING(alignment)) - (uintptr_t)ptr;
-	assert(alloc_size >= leadsize + esize);
-	size_t trailsize = alloc_size - leadsize - esize;
 	if (extent_zeroed_get(extent) && extent_committed_get(extent)) {
 		*zero = true;
 	}
@@ -1151,54 +1225,48 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		*commit = true;
 	}
 
-	/* Split the lead. */
-	if (leadsize != 0) {
-		extent_t *lead = extent;
-		extent = extent_split_impl(tsdn, arena, r_extent_hooks, lead,
-		    leadsize, NSIZES, false, esize + trailsize, szind, slab,
-		    true);
-		if (extent == NULL) {
-			extent_deregister(tsdn, lead);
-			extents_leak(tsdn, arena, r_extent_hooks,
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	extent_t *lead;
+	extent_t *trail;
+	extent_t *to_leak;
+	extent_t *to_salvage;
+	extent_split_interior_result_t result = extent_split_interior(
+	    tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail,
+	    &to_leak, &to_salvage, NULL, size, pad, alignment, slab, szind,
+	    true);
+
+	if (result == extent_split_interior_ok) {
+		if (lead != NULL) {
+			extent_record(tsdn, arena, r_extent_hooks,
 			    &arena->extents_retained, lead, true);
-			goto label_err;
 		}
-		extent_record(tsdn, arena, r_extent_hooks,
-		    &arena->extents_retained, lead, true);
-	}
-
-	/* Split the trail. */
-	if (trailsize != 0) {
-		extent_t *trail = extent_split_impl(tsdn, arena, r_extent_hooks,
-		    extent, esize, szind, slab, trailsize, NSIZES, false, true);
-		if (trail == NULL) {
-			extent_deregister(tsdn, extent);
+		if (trail != NULL) {
+			extent_record(tsdn, arena, r_extent_hooks,
+			    &arena->extents_retained, trail, true);
+		}
+	} else {
+		/*
+		 * We should have allocated a sufficiently large extent; the
+		 * cant_alloc case should not occur.
+		 */
+		assert(result == extent_split_interior_error);
+		if (to_leak != NULL) {
+			extent_deregister(tsdn, to_leak);
 			extents_leak(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, extent, true);
+			    &arena->extents_retained, to_leak, true);
 			goto label_err;
 		}
-		extent_record(tsdn, arena, r_extent_hooks,
-		    &arena->extents_retained, trail, true);
-	} else if (leadsize == 0) {
 		/*
-		 * Splitting causes szind to be set as a side effect, but no
-		 * splitting occurred.
+		 * Note: we don't handle the non-NULL to_salvage case at all.
+		 * This maintains the behavior that was present when the
+		 * refactor pulling extent_split_interior into a helper function
+		 * was added.  I think this is actually a bug (we leak both the
+		 * memory and the extent_t in that case), but since this code is
+		 * getting deleted very shortly (in a subsequent commit),
+		 * ensuring correctness down this path isn't worth the effort.
 		 */
-		rtree_ctx_t rtree_ctx_fallback;
-		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
-		    &rtree_ctx_fallback);
-
-		extent_szind_set(extent, szind);
-		if (szind != NSIZES) {
-			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
-			    (uintptr_t)extent_addr_get(extent), szind, slab);
-			if (slab && extent_size_get(extent) > PAGE) {
-				rtree_szind_slab_update(tsdn, &extents_rtree,
-				    rtree_ctx,
-				    (uintptr_t)extent_past_get(extent) -
-				    (uintptr_t)PAGE, szind, slab);
-			}
-		}
 	}
 
 	if (*commit && !extent_committed_get(extent)) {
-- 
cgit v0.12


From ccd09050aa53d083fe0b45d4704b1fe95fb00c92 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 18 Sep 2017 15:03:52 -0700
Subject: Add configure-time detection for madvise(..., MADV_DO[NT]DUMP)

---
 configure.ac                                          | 11 +++++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  5 +++++
 2 files changed, 16 insertions(+)

diff --git a/configure.ac b/configure.ac
index 558489c..7544f57 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1845,6 +1845,17 @@ if test "x${je_cv_madvise}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
   fi
 
+  dnl Check for madvise(..., MADV_DO[NT]DUMP).
+  JE_COMPILABLE([madvise(..., MADV_DO[[NT]]DUMP)], [
+#include <sys/mman.h>
+], [
+	madvise((void *)0, 0, MADV_DONTDUMP);
+	madvise((void *)0, 0, MADV_DODUMP);
+], [je_cv_madv_dontdump])
+  if test "x${je_cv_madv_dontdump}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_MADVISE_DONTDUMP], [ ])
+  fi
+ 
   dnl Check for madvise(..., MADV_[NO]HUGEPAGE).
   JE_COMPILABLE([madvise(..., MADV_[[NO]]HUGEPAGE)], [
 #include <sys/mman.h>
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index b56f21f..aadfbed 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -289,6 +289,11 @@
 #undef JEMALLOC_DEFINE_MADVISE_FREE
 
 /*
+ * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
+ */
+#undef JEMALLOC_MADVISE_DONTDUMP
+
+/*
  * Defined if transparent huge pages (THPs) are supported via the
  * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
  */
-- 
cgit v0.12


From bbaa72422bb086933890a125fd58bf199fe26f2d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 18 Sep 2017 15:10:01 -0700
Subject: Add pages_dontdump and pages_dodump.

This will, eventually, enable us to avoid dumping eden regions.
---
 include/jemalloc/internal/pages.h |  2 ++
 src/pages.c                       | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 121fff3..dff2051 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -69,6 +69,8 @@ bool pages_purge_lazy(void *addr, size_t size);
 bool pages_purge_forced(void *addr, size_t size);
 bool pages_huge(void *addr, size_t size);
 bool pages_nohuge(void *addr, size_t size);
+bool pages_dontdump(void *addr, size_t size);
+bool pages_dodump(void *addr, size_t size);
 bool pages_boot(void);
 
 #endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/src/pages.c b/src/pages.c
index e8112f7..5e1043d 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -328,6 +328,29 @@ pages_nohuge(void *addr, size_t size) {
 #endif
 }
 
+bool
+pages_dontdump(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+#ifdef JEMALLOC_MADVISE_DONTDUMP
+	return madvise(addr, size, MADV_DONTDUMP) != 0;
+#else
+	return false;
+#endif
+}
+
+bool
+pages_dodump(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+#ifdef JEMALLOC_MADVISE_DONTDUMP
+	return madvise(addr, size, MADV_DODUMP) != 0;
+#else
+	return false;
+#endif
+}
+
+
 static size_t
 os_page_detect(void) {
 #ifdef _WIN32
-- 
cgit v0.12


From d14bbf8d8190df411f0daf182f73f7b7786288c4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 18 Sep 2017 17:25:57 -0700
Subject: Add a "dumpable" bit to the extent state.

Currently, this is unused (i.e. all extents are always marked dumpable).  In the
future, we'll begin using this functionality.
---
 include/jemalloc/internal/extent_inlines.h | 16 ++++++++++++-
 include/jemalloc/internal/extent_structs.h | 36 ++++++++++++++++++++++++------
 src/extent.c                               | 19 +++++++++++-----
 src/extent_dss.c                           |  5 +++--
 test/unit/rtree.c                          |  8 +++----
 test/unit/slab.c                           |  2 +-
 6 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index bb2bd69..610072e 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -94,6 +94,12 @@ extent_committed_get(const extent_t *extent) {
 }
 
 static inline bool
+extent_dumpable_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
+	    EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
+static inline bool
 extent_slab_get(const extent_t *extent) {
 	return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
 	    EXTENT_BITS_SLAB_SHIFT);
@@ -270,6 +276,12 @@ extent_committed_set(extent_t *extent, bool committed) {
 }
 
 static inline void
+extent_dumpable_set(extent_t *extent, bool dumpable) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
+	    ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
+static inline void
 extent_slab_set(extent_t *extent, bool slab) {
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
 	    ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
@@ -283,7 +295,7 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
 static inline void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
-    bool committed) {
+    bool committed, bool dumpable) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
 	extent_arena_set(extent, arena);
@@ -295,6 +307,7 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 	extent_state_set(extent, state);
 	extent_zeroed_set(extent, zeroed);
 	extent_committed_set(extent, committed);
+	extent_dumpable_set(extent, dumpable);
 	ql_elm_new(extent, ql_link);
 	if (config_prof) {
 		extent_prof_tctx_set(extent, NULL);
@@ -312,6 +325,7 @@ extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
 	extent_state_set(extent, extent_state_active);
 	extent_zeroed_set(extent, true);
 	extent_committed_set(extent, true);
+	extent_dumpable_set(extent, true);
 }
 
 static inline void
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 641a632..722963b 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -23,13 +23,14 @@ struct extent_s {
 	 * a: arena_ind
 	 * b: slab
 	 * c: committed
+	 * d: dumpable
 	 * z: zeroed
 	 * t: state
 	 * i: szind
 	 * f: nfree
 	 * n: sn
 	 *
-	 * nnnnnnnn ... nnnnnfff fffffffi iiiiiiit tzcbaaaa aaaaaaaa
+	 * nnnnnnnn ... nnnnffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -44,6 +45,23 @@ struct extent_s {
 	 *            as on a system that overcommits and satisfies physical
 	 *            memory needs on demand via soft page faults.
 	 *
+	 * dumpable: The dumpable flag indicates whether or not we've set the
+	 *           memory in question to be dumpable.  Note that this
+	 *           interacts somewhat subtly with user-specified extent hooks,
+	 *           since we don't know if *they* are fiddling with
+	 *           dumpability (in which case, we don't want to undo whatever
+	 *           they're doing).  To deal with this scenario, we:
+	 *             - Make dumpable false only for memory allocated with the
+	 *               default hooks.
+	 *             - Only allow memory to go from non-dumpable to dumpable,
+	 *               and only once.
+	 *             - Never make the OS call to allow dumping when the
+	 *               dumpable bit is already set.
+	 *           These three constraints mean that we will never
+	 *           accidentally dump user memory that the user meant to set
+	 *           nondumpable with their extent hooks.
+	 *
+	 *
 	 * zeroed: The zeroed flag is used by extent recycling code to track
 	 *         whether memory is zero-filled.
 	 *
@@ -80,25 +98,29 @@ struct extent_s {
 #define EXTENT_BITS_COMMITTED_MASK \
     ((uint64_t)0x1U << EXTENT_BITS_COMMITTED_SHIFT)
 
-#define EXTENT_BITS_ZEROED_SHIFT	(MALLOCX_ARENA_BITS + 2)
+#define EXTENT_BITS_DUMPABLE_SHIFT	(MALLOCX_ARENA_BITS + 2)
+#define EXTENT_BITS_DUMPABLE_MASK \
+    ((uint64_t)0x1U << EXTENT_BITS_DUMPABLE_SHIFT)
+
+#define EXTENT_BITS_ZEROED_SHIFT	(MALLOCX_ARENA_BITS + 3)
 #define EXTENT_BITS_ZEROED_MASK \
     ((uint64_t)0x1U << EXTENT_BITS_ZEROED_SHIFT)
 
-#define EXTENT_BITS_STATE_SHIFT		(MALLOCX_ARENA_BITS + 3)
+#define EXTENT_BITS_STATE_SHIFT		(MALLOCX_ARENA_BITS + 4)
 #define EXTENT_BITS_STATE_MASK \
     ((uint64_t)0x3U << EXTENT_BITS_STATE_SHIFT)
 
-#define EXTENT_BITS_SZIND_SHIFT		(MALLOCX_ARENA_BITS + 5)
+#define EXTENT_BITS_SZIND_SHIFT		(MALLOCX_ARENA_BITS + 6)
 #define EXTENT_BITS_SZIND_MASK \
     (((uint64_t)(1U << LG_CEIL_NSIZES) - 1) << EXTENT_BITS_SZIND_SHIFT)
 
 #define EXTENT_BITS_NFREE_SHIFT \
-    (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES)
+    (MALLOCX_ARENA_BITS + 6 + LG_CEIL_NSIZES)
 #define EXTENT_BITS_NFREE_MASK \
     ((uint64_t)((1U << (LG_SLAB_MAXREGS + 1)) - 1) << EXTENT_BITS_NFREE_SHIFT)
 
 #define EXTENT_BITS_SN_SHIFT \
-    (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES + (LG_SLAB_MAXREGS + 1))
+    (MALLOCX_ARENA_BITS + 6 + LG_CEIL_NSIZES + (LG_SLAB_MAXREGS + 1))
 #define EXTENT_BITS_SN_MASK		(UINT64_MAX << EXTENT_BITS_SN_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
@@ -128,7 +150,7 @@ struct extent_s {
 	 */
 	ql_elm(extent_t)	ql_link;
 
-	/* 
+	/*
 	 * Linkage for per size class sn/address-ordered heaps, and
 	 * for extent_avail
 	 */
diff --git a/src/extent.c b/src/extent.c
index 1dd1d1d..497f4e4 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -449,8 +449,10 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	return extent_recycle(tsdn, arena, r_extent_hooks, extents, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit, false);
+	extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks, extents,
+	    new_addr, size, pad, alignment, slab, szind, zero, commit, false);
+	assert(extent == NULL || extent_dumpable_get(extent));
+	return extent;
 }
 
 void
@@ -458,6 +460,7 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
+	assert(extent_dumpable_get(extent));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -1207,11 +1210,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 
 	extent_init(extent, arena, ptr, alloc_size, false, NSIZES,
 	    arena_extent_sn_next(arena), extent_state_active, zeroed,
-	    committed);
+	    committed, true);
 	if (ptr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
 		goto label_err;
 	}
+
 	if (extent_register_no_gdump_add(tsdn, extent)) {
 		extents_leak(tsdn, arena, r_extent_hooks,
 		    &arena->extents_retained, extent, true);
@@ -1374,7 +1378,8 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		return NULL;
 	}
 	extent_init(extent, arena, addr, esize, slab, szind,
-	    arena_extent_sn_next(arena), extent_state_active, zero, commit);
+	    arena_extent_sn_next(arena), extent_state_active, zero, commit,
+	    true);
 	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);
 	}
@@ -1412,6 +1417,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}
 
+	assert(extent == NULL || extent_dumpable_get(extent));
 	return extent;
 }
 
@@ -1636,6 +1642,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent) {
+	assert(extent_dumpable_get(extent));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -1926,7 +1933,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
 	    size_a), size_b, slab_b, szind_b, extent_sn_get(extent),
 	    extent_state_get(extent), extent_zeroed_get(extent),
-	    extent_committed_get(extent));
+	    extent_committed_get(extent), extent_dumpable_get(extent));
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -1937,7 +1944,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 		extent_init(&lead, arena, extent_addr_get(extent), size_a,
 		    slab_a, szind_a, extent_sn_get(extent),
 		    extent_state_get(extent), extent_zeroed_get(extent),
-		    extent_committed_get(extent));
+		    extent_committed_get(extent), extent_dumpable_get(extent));
 
 		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
 		    true, &lead_elm_a, &lead_elm_b);
diff --git a/src/extent_dss.c b/src/extent_dss.c
index e72da95..2b1ea9c 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -156,7 +156,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				extent_init(gap, arena, gap_addr_page,
 				    gap_size_page, false, NSIZES,
 				    arena_extent_sn_next(arena),
-				    extent_state_active, false, true);
+				    extent_state_active, false, true, true);
 			}
 			/*
 			 * Compute the address just past the end of the desired
@@ -199,7 +199,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 
 					extent_init(&extent, arena, ret, size,
 					    size, false, NSIZES,
-					    extent_state_active, false, true);
+					    extent_state_active, false, true,
+					    true);
 					if (extent_purge_forced_wrapper(tsdn,
 					    arena, &extent_hooks, &extent, 0,
 					    size)) {
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 814837b..908100f 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -87,9 +87,9 @@ TEST_BEGIN(test_rtree_extrema) {
 	extent_t extent_a, extent_b;
 	extent_init(&extent_a, NULL, NULL, LARGE_MINCLASS, false,
 	    sz_size2index(LARGE_MINCLASS), 0, extent_state_active, false,
-	    false);
+	    false, true);
 	extent_init(&extent_b, NULL, NULL, 0, false, NSIZES, 0,
-	    extent_state_active, false, false);
+	    extent_state_active, false, false, true);
 
 	tsdn_t *tsdn = tsdn_fetch();
 
@@ -126,7 +126,7 @@ TEST_BEGIN(test_rtree_bits) {
 
 	extent_t extent;
 	extent_init(&extent, NULL, NULL, 0, false, NSIZES, 0,
-	    extent_state_active, false, false);
+	    extent_state_active, false, false, true);
 
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
@@ -167,7 +167,7 @@ TEST_BEGIN(test_rtree_random) {
 
 	extent_t extent;
 	extent_init(&extent, NULL, NULL, 0, false, NSIZES, 0,
-	    extent_state_active, false, false);
+	    extent_state_active, false, false, true);
 
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
 
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 6f40aee..ea344f8 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -9,7 +9,7 @@ TEST_BEGIN(test_arena_slab_regind) {
 		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 		extent_init(&slab, NULL, mallocx(bin_info->slab_size,
 		    MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, true,
-		    binind, 0, extent_state_active, false, true);
+		    binind, 0, extent_state_active, false, true, true);
 		assert_ptr_not_null(extent_addr_get(&slab),
 		    "Unexpected malloc() failure");
 		for (regind = 0; regind < bin_info->nregs; regind++) {
-- 
cgit v0.12


From 47203d5f422452def4cb29c0b7128cc068031100 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 19 Oct 2017 12:01:20 -0700
Subject: Output all counters for bin mutex stats.

The saved space is not worth the trouble of missing counters.
---
 src/stats.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index cbeb923..0847f39 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -131,7 +131,8 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t\t\t\"bins\": [\n");
 	} else {
 		char *mutex_counters = "   n_lock_ops    n_waiting"
-		    "   n_spin_acq  total_wait_ns  max_wait_ns\n";
+		    "   n_spin_acq n_owner_switch  total_wait_ns"
+		    "  max_wait_ns max_n_thds\n";
 		malloc_cprintf(write_cb, cbopaque,
 		    "bins:           size ind    allocated      nmalloc"
 		    "      ndalloc    nrequests      curregs     curslabs regs"
@@ -234,16 +235,18 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    nregs, slab_size / page, util, nfills, nflushes,
 			    nslabs, nreslabs);
 
-			/* Output less info for bin mutexes to save space. */
 			if (mutex) {
 				malloc_cprintf(write_cb, cbopaque,
 				    " %12"FMTu64" %12"FMTu64" %12"FMTu64
-				    " %14"FMTu64" %12"FMTu64"\n",
+				    " %14"FMTu64" %14"FMTu64" %12"FMTu64
+				    " %10"FMTu64"\n",
 				    mutex_stats[mutex_counter_num_ops],
 				    mutex_stats[mutex_counter_num_wait],
 				    mutex_stats[mutex_counter_num_spin_acq],
+				    mutex_stats[mutex_counter_num_owner_switch],
 				    mutex_stats[mutex_counter_total_wait_time],
-				    mutex_stats[mutex_counter_max_wait_time]);
+				    mutex_stats[mutex_counter_max_wait_time],
+				    mutex_stats[mutex_counter_max_num_thds]);
 			} else {
 				malloc_cprintf(write_cb, cbopaque, "\n");
 			}
-- 
cgit v0.12


From 58eba024c0fbda463eaf8b42772407894dba6eff Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 31 Oct 2017 14:17:40 -0700
Subject: metadata_thp: auto mode adjustment for a0.

We observed that arena 0 can have much more metadata allocated comparing to
other arenas.  Tune the auto mode to only switch to huge page on the 5th block
(instead of 3 previously) for a0.
---
 include/jemalloc/internal/base_types.h |  9 ++++++++
 src/base.c                             | 41 ++++++++++++++++++----------------
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h
index 97e38a9..b6db77d 100644
--- a/include/jemalloc/internal/base_types.h
+++ b/include/jemalloc/internal/base_types.h
@@ -6,6 +6,15 @@ typedef struct base_s base_t;
 
 #define METADATA_THP_DEFAULT metadata_thp_disabled
 
+/*
+ * In auto mode, arenas switch to huge pages for the base allocator on the
+ * second base block.  a0 switches to thp on the 5th block (after 20 megabytes
+ * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
+ */
+
+#define BASE_AUTO_THP_THRESHOLD    2
+#define BASE_AUTO_THP_THRESHOLD_A0 5
+
 typedef enum {
 	metadata_thp_disabled   = 0,
 	/*
diff --git a/src/base.c b/src/base.c
index c6db425..1036936 100644
--- a/src/base.c
+++ b/src/base.c
@@ -125,42 +125,45 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
 	extent_binit(extent, addr, size, sn);
 }
 
+static size_t
+base_get_num_blocks(base_t *base, bool with_new_block) {
+	base_block_t *b = base->blocks;
+	assert(b != NULL);
+
+	size_t n_blocks = with_new_block ? 2 : 1;
+	while (b->next != NULL) {
+		n_blocks++;
+		b = b->next;
+	}
+
+	return n_blocks;
+}
+
 static bool
 base_auto_thp_triggered(base_t *base, bool with_new_block) {
 	assert(opt_metadata_thp == metadata_thp_auto);
-	base_block_t *b1 = base->blocks;
-	assert(b1 != NULL);
 
-	base_block_t *b2 = b1->next;
 	if (base_ind_get(base) != 0) {
-		return with_new_block ? true: b2 != NULL;
+		return base_get_num_blocks(base, with_new_block) >=
+		    BASE_AUTO_THP_THRESHOLD;
 	}
 
-	base_block_t *b3 = (b2 != NULL) ? b2->next : NULL;
-	return with_new_block ? b2 != NULL : b3 != NULL;
+	return base_get_num_blocks(base, with_new_block) >=
+	    BASE_AUTO_THP_THRESHOLD_A0;
 }
 
 static void
 base_auto_thp_switch(base_t *base) {
 	assert(opt_metadata_thp == metadata_thp_auto);
 
-	base_block_t *b1 = base->blocks;
-	assert(b1 != NULL);
-	base_block_t *b2 = b1->next;
-
 	/* Called when adding a new block. */
 	bool should_switch;
 	if (base_ind_get(base) != 0) {
-		/* Makes the switch on the 2nd block. */
-		should_switch = (b2 == NULL);
+		should_switch = (base_get_num_blocks(base, true) ==
+		    BASE_AUTO_THP_THRESHOLD);
 	} else {
-		/*
-		 * a0 switches to thp on the 3rd block, since rtree nodes are
-		 * allocated from a0 base, which takes an entire block on init.
-		 */
-		base_block_t *b3 = (b2 != NULL) ? b2->next :
-			NULL;
-		should_switch = (b2 != NULL) && (b3 == NULL);
+		should_switch = (base_get_num_blocks(base, true) ==
+		    BASE_AUTO_THP_THRESHOLD_A0);
 	}
 	if (!should_switch) {
 		return;
-- 
cgit v0.12


From d591df05c86e89c0a5db98274bc7f280f910a0de Mon Sep 17 00:00:00 2001
From: Edward Tomasz Napierala <trasz@FreeBSD.org>
Date: Sun, 22 Oct 2017 12:04:59 +0100
Subject: Use getpagesize(3) under FreeBSD.

This avoids sysctl(2) syscall during binary startup, using the value
passed in the ELF aux vector instead.

Signed-off-by: Edward Tomasz Napierala <trasz@FreeBSD.org>
---
 src/pages.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 5e1043d..14e63f9 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -357,6 +357,8 @@ os_page_detect(void) {
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	return si.dwPageSize;
+#elif defined(__FreeBSD__)
+	return getpagesize();
 #else
 	long result = sysconf(_SC_PAGESIZE);
 	if (result == -1) {
-- 
cgit v0.12


From 9f455e2786685b443201c33119765c8093461174 Mon Sep 17 00:00:00 2001
From: Edward Tomasz Napierala <trasz@FreeBSD.org>
Date: Thu, 26 Oct 2017 16:55:43 +0100
Subject: Try to use sysctl(3) instead of sysctlbyname(3).

This attempts to use VM_OVERCOMMIT OID - newly introduced in -CURRENT
few days ago, specifically for this purpose - instead of querying the
sysctl by its string name.  Due to how syctlbyname(3) works, this means
we do one syscall during binary startup instead of two.

Signed-off-by: Edward Tomasz Napierala <trasz@FreeBSD.org>
---
 src/pages.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 14e63f9..c839471 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -10,6 +10,9 @@
 
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
 #include <sys/sysctl.h>
+#ifdef __FreeBSD__
+#include <vm/vm_param.h>
+#endif
 #endif
 
 /******************************************************************************/
@@ -375,9 +378,19 @@ os_overcommits_sysctl(void) {
 	size_t sz;
 
 	sz = sizeof(vm_overcommit);
+#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
+	int mib[2];
+
+	mib[0] = CTL_VM;
+	mib[1] = VM_OVERCOMMIT;
+	if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
+		return false; /* Error. */
+	}
+#else
 	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
 		return false; /* Error. */
 	}
+#endif
 
 	return ((vm_overcommit & 0x3) == 0);
 }
-- 
cgit v0.12


From e422fa8e7ea749ab8c4783e405c0f4b19ac25db9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 2 Nov 2017 17:48:39 -0700
Subject: Add arena.i.retain_grow_limit

This option controls the max size when grow_retained.  This is useful when we
have customized extent hooks reserving physical memory (e.g. 1G huge pages).
Without this feature, the default increasing sequence could result in fragmented
and wasted physical memory.
---
 doc/jemalloc.xml.in                         | 16 ++++++++++
 include/jemalloc/internal/arena_externs.h   |  2 ++
 include/jemalloc/internal/arena_structs_b.h |  5 +++
 include/jemalloc/internal/extent_types.h    |  2 ++
 src/arena.c                                 | 28 +++++++++++++++++
 src/ctl.c                                   | 42 +++++++++++++++++++++++--
 src/extent.c                                |  7 +++--
 test/unit/mallctl.c                         | 49 +++++++++++++++++++++++++++++
 8 files changed, 146 insertions(+), 5 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 8151b5b..895b2d4 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1683,6 +1683,22 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         for additional information.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.retain_grow_limit">
+        <term>
+          <mallctl>arena.&lt;i&gt;.retain_grow_limit</mallctl>
+          (<type>size_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Maximum size to grow retained region (only relevant when
+        <link linkend="opt.retain"><mallctl>opt.retain</mallctl></link> is
+        enabled).  This controls the maximum increment to expand virtual memory,
+        or allocation through <link
+        linkend="arena.i.extent_hooks"><mallctl>arena.&lt;i&gt;extent_hooks</mallctl></link>.
+        In particular, if customized extent hooks reserve physical memory
+        (e.g. 1G huge pages), this is useful to control the allocation hook's
+        input size.  The default is no limit.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.extent_hooks">
         <term>
           <mallctl>arena.&lt;i&gt;.extent_hooks</mallctl>
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 4e546c3..5a0e3ad 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -77,6 +77,8 @@ ssize_t arena_dirty_decay_ms_default_get(void);
 bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
 ssize_t arena_muzzy_decay_ms_default_get(void);
 bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
+bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
+    size_t *old_limit, size_t *new_limit);
 unsigned arena_nthreads_get(arena_t *arena, bool internal);
 void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index c4e4310..f74ea97 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -240,9 +240,14 @@ struct arena_s {
 	 * be effective even if multiple arenas' extent allocation requests are
 	 * highly interleaved.
 	 *
+	 * retain_grow_limit is the max allowed size ind to expand (unless the
+	 * required size is greater).  Default is no limit, and controlled
+	 * through mallctl only.
+	 *
 	 * Synchronization: extent_grow_mtx
 	 */
 	pszind_t		extent_grow_next;
+	pszind_t		retain_grow_limit;
 	malloc_mutex_t		extent_grow_mtx;
 
 	/*
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index b6905ce..7efcd3a 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -6,4 +6,6 @@ typedef struct extents_s extents_t;
 
 #define EXTENT_HOOKS_INITIALIZER	NULL
 
+#define EXTENT_GROW_MAX_PIND (NPSIZES - 1)
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/src/arena.c b/src/arena.c
index 43ba601..91dce1f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1886,6 +1886,33 @@ arena_muzzy_decay_ms_default_set(ssize_t decay_ms) {
 	return false;
 }
 
+bool
+arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
+    size_t *new_limit) {
+	assert(opt_retain);
+
+	pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
+	if (new_limit != NULL) {
+		size_t limit = *new_limit;
+		/* Grow no more than the new limit. */
+		if ((new_ind = sz_psz2ind(limit + 1) - 1) >
+		     EXTENT_GROW_MAX_PIND) {
+			return true;
+		}
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->extent_grow_mtx);
+	if (old_limit != NULL) {
+		*old_limit = sz_pind2sz(arena->retain_grow_limit);
+	}
+	if (new_limit != NULL) {
+		arena->retain_grow_limit = new_ind;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->extent_grow_mtx);
+
+	return false;
+}
+
 unsigned
 arena_nthreads_get(arena_t *arena, bool internal) {
 	return atomic_load_u(&arena->nthreads[internal], ATOMIC_RELAXED);
@@ -2013,6 +2040,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	arena->extent_grow_next = sz_psz2ind(HUGEPAGE);
+	arena->retain_grow_limit = EXTENT_GROW_MAX_PIND;
 	if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow",
 	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		goto label_error;
diff --git a/src/ctl.c b/src/ctl.c
index a2f3837..11cd68d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -118,6 +118,7 @@ CTL_PROTO(arena_i_dss)
 CTL_PROTO(arena_i_dirty_decay_ms)
 CTL_PROTO(arena_i_muzzy_decay_ms)
 CTL_PROTO(arena_i_extent_hooks)
+CTL_PROTO(arena_i_retain_grow_limit)
 INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
 CTL_PROTO(arenas_bin_i_nregs)
@@ -320,7 +321,8 @@ static const ctl_named_node_t arena_i_node[] = {
 	{NAME("dss"),		CTL(arena_i_dss)},
 	{NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)},
 	{NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)},
-	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)}
+	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)},
+	{NAME("retain_grow_limit"),	CTL(arena_i_retain_grow_limit)}
 };
 static const ctl_named_node_t super_arena_i_node[] = {
 	{NAME(""),		CHILD(named, arena_i)}
@@ -2199,6 +2201,42 @@ label_return:
 	return ret;
 }
 
+static int
+arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned arena_ind;
+	arena_t *arena;
+
+	if (!opt_retain) {
+		/* Only relevant when retain is enabled. */
+		return ENOENT;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+	MIB_UNSIGNED(arena_ind, 1);
+	if (arena_ind < narenas_total_get() && (arena =
+	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
+		size_t old_limit, new_limit;
+		if (newp != NULL) {
+			WRITE(new_limit, size_t);
+		}
+		bool err = arena_retain_grow_limit_get_set(tsd, arena,
+		    &old_limit, newp != NULL ? &new_limit : NULL);
+		if (!err) {
+			READ(old_limit, size_t);
+			ret = 0;
+		} else {
+			ret = EFAULT;
+		}
+	} else {
+		ret = EFAULT;
+	}
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	return ret;
+}
+
 static const ctl_named_node_t *
 arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
@@ -2260,7 +2298,7 @@ arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (dirty ?  arena_dirty_decay_ms_default_set(*(ssize_t *)newp)
+		if (dirty ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp)
 		    : arena_muzzy_decay_ms_default_set(*(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
diff --git a/src/extent.c b/src/extent.c
index 497f4e4..d1324f9 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1284,13 +1284,14 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	/*
-	 * Increment extent_grow_next if doing so wouldn't exceed the legal
+	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (arena->extent_grow_next + egn_skip + 1 < NPSIZES) {
+	if (arena->extent_grow_next + egn_skip + 1 <=
+	    arena->retain_grow_limit) {
 		arena->extent_grow_next += egn_skip + 1;
 	} else {
-		arena->extent_grow_next = NPSIZES - 1;
+		arena->extent_grow_next = arena->retain_grow_limit;
 	}
 	/* All opportunities for failure are past. */
 	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 5612cce..94f801e 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -556,6 +556,54 @@ TEST_BEGIN(test_arena_i_dss) {
 }
 TEST_END
 
+TEST_BEGIN(test_arena_i_retain_grow_limit) {
+	size_t old_limit, new_limit, default_limit;
+	size_t mib[3];
+	size_t miblen;
+
+	bool retain_enabled;
+	size_t sz = sizeof(retain_enabled);
+	assert_d_eq(mallctl("opt.retain", &retain_enabled, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	test_skip_if(!retain_enabled);
+
+	sz = sizeof(default_limit);
+	miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.retain_grow_limit", mib, &miblen),
+	    0, "Unexpected mallctlnametomib() error");
+
+	assert_d_eq(mallctlbymib(mib, miblen, &default_limit, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	assert_zu_eq(default_limit, sz_pind2sz(EXTENT_GROW_MAX_PIND),
+	    "Unexpected default for retain_grow_limit");
+
+	new_limit = PAGE - 1;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+	    sizeof(new_limit)), EFAULT, "Unexpected mallctl() success");
+
+	new_limit = PAGE + 1;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	assert_zu_eq(old_limit, PAGE,
+	    "Unexpected value for retain_grow_limit");
+
+	/* Expect grow less than psize class 10. */
+	new_limit = sz_pind2sz(10) - 1;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	assert_zu_eq(old_limit, sz_pind2sz(9),
+	    "Unexpected value for retain_grow_limit");
+
+	/* Restore to default. */
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &default_limit,
+	    sizeof(default_limit)), 0, "Unexpected mallctl() failure");
+}
+TEST_END
+
 TEST_BEGIN(test_arenas_dirty_decay_ms) {
 	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
 	size_t sz = sizeof(ssize_t);
@@ -727,6 +775,7 @@ main(void) {
 	    test_arena_i_purge,
 	    test_arena_i_decay,
 	    test_arena_i_dss,
+	    test_arena_i_retain_grow_limit,
 	    test_arenas_dirty_decay_ms,
 	    test_arenas_muzzy_decay_ms,
 	    test_arenas_constants,
-- 
cgit v0.12


From 6dd5681ab787b4153ad2fa425be72efece42d3c7 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 3 Nov 2017 13:58:59 -0700
Subject: Use hugepage alignment for base allocator.

This gives us an easier way to tell if the allocation is for metadata in the
extent hooks.
---
 src/base.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/base.c b/src/base.c
index 1036936..e3a89b0 100644
--- a/src/base.c
+++ b/src/base.c
@@ -33,9 +33,9 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 	bool zero = true;
 	bool commit = true;
 
-	/* We use hugepage sizes regardless of opt_metadata_thp. */
+	/* Use huge page sizes and alignment regardless of opt_metadata_thp. */
 	assert(size == HUGEPAGE_CEILING(size));
-	size_t alignment = metadata_thp_enabled() ? HUGEPAGE : PAGE;
+	size_t alignment = HUGEPAGE;
 	if (extent_hooks == &extent_hooks_default) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
 	} else {
-- 
cgit v0.12


From b5d071c26697813bcceae320ba88dee2a2a73e51 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 8 Nov 2017 13:59:21 -0800
Subject: Fix unbounded increase in stash_decayed.

Added an upper bound on how many pages we can decay during the current run.
Without this, decay could have unbounded increase in stashed, since other
threads could add new pages into the extents.
---
 include/jemalloc/internal/extent_externs.h |  3 ++-
 src/arena.c                                | 30 ++++++++++++++++++------------
 src/extent.c                               |  5 +++--
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 9da5d01..132d890 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -36,7 +36,8 @@ extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
 extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min);
+    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min,
+    size_t npages_max);
 void extents_prefork(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_child(tsdn_t *tsdn, extents_t *extents);
diff --git a/src/arena.c b/src/arena.c
index 91dce1f..e2462bf 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -62,7 +62,7 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 
 static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
     arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit,
-    bool is_background_thread);
+    size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
 static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
@@ -693,7 +693,8 @@ arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
     bool is_background_thread) {
 	if (current_npages > npages_limit) {
 		arena_decay_to_limit(tsdn, arena, decay, extents, false,
-		    npages_limit, is_background_thread);
+		    npages_limit, current_npages - npages_limit,
+		    is_background_thread);
 	}
 }
 
@@ -799,7 +800,8 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
 			arena_decay_to_limit(tsdn, arena, decay, extents, false,
-			    0, is_background_thread);
+			    0, extents_npages_get(extents),
+			    is_background_thread);
 		}
 		return false;
 	}
@@ -901,15 +903,16 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 static size_t
 arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_limit,
-    extent_list_t *decay_extents) {
+	size_t npages_decay_max, extent_list_t *decay_extents) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	/* Stash extents according to npages_limit. */
 	size_t nstashed = 0;
 	extent_t *extent;
-	while ((extent = extents_evict(tsdn, arena, r_extent_hooks, extents,
-	    npages_limit)) != NULL) {
+	while (nstashed < npages_decay_max &&
+	    (extent = extents_evict(tsdn, arena, r_extent_hooks, extents,
+	    npages_limit, npages_decay_max - nstashed)) != NULL) {
 		extent_list_append(decay_extents, extent);
 		nstashed += extent_size_get(extent) >> LG_PAGE;
 	}
@@ -983,12 +986,15 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 }
 
 /*
- * npages_limit: Decay as many dirty extents as possible without violating the
- * invariant: (extents_npages_get(extents) >= npages_limit)
+ * npages_limit: Decay at most npages_decay_max pages without violating the
+ * invariant: (extents_npages_get(extents) >= npages_limit).  We need an upper
+ * bound on number of pages in order to prevent unbounded growth (namely in
+ * stashed), otherwise unbounded new pages could be added to extents during the
+ * current decay run, so that the purging thread never finishes.
  */
 static void
 arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, bool all, size_t npages_limit,
+    extents_t *extents, bool all, size_t npages_limit, size_t npages_decay_max,
     bool is_background_thread) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
@@ -1006,7 +1012,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	extent_list_init(&decay_extents);
 
 	size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, extents,
-	    npages_limit, &decay_extents);
+	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		UNUSED size_t npurged = arena_decay_stashed(tsdn, arena,
 		    &extent_hooks, decay, extents, all, &decay_extents,
@@ -1024,7 +1030,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
 		arena_decay_to_limit(tsdn, arena, decay, extents, all, 0,
-		    is_background_thread);
+		    extents_npages_get(extents), is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 
 		return false;
@@ -1220,7 +1226,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	extent_t *extent;
 	while ((extent = extents_evict(tsdn, arena, &extent_hooks,
-	    &arena->extents_retained, 0)) != NULL) {
+	    &arena->extents_retained, 0, SIZE_MAX)) != NULL) {
 		extent_destroy_wrapper(tsdn, arena, &extent_hooks, extent);
 	}
 }
diff --git a/src/extent.c b/src/extent.c
index d1324f9..c8a3090 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -472,7 +472,7 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 extent_t *
 extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, size_t npages_min) {
+    extents_t *extents, size_t npages_min, size_t npages_max) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -493,7 +493,8 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		size_t npages = extent_size_get(extent) >> LG_PAGE;
 		size_t extents_npages = atomic_load_zu(&extents->npages,
 		    ATOMIC_RELAXED);
-		if (extents_npages - npages < npages_min) {
+		if (extents_npages - npages < npages_min ||
+		    npages > npages_max) {
 			extent = NULL;
 			goto label_return;
 		}
-- 
cgit v0.12


From cb3b72b9756d124565ed12e005065ad6f0769568 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 7 Nov 2017 19:40:38 -0800
Subject: Fix base allocator THP auto mode locking and stats.

Added proper synchronization for switching to using THP in auto mode.  Also
fixed stats for number of THPs used.
---
 include/jemalloc/internal/base_structs.h |  2 ++
 src/base.c                               | 37 ++++++++++++++------------------
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index b542169..2102247 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -30,6 +30,8 @@ struct base_s {
 	/* Protects base_alloc() and base_stats_get() operations. */
 	malloc_mutex_t	mtx;
 
+	/* Using THP when true (metadata_thp auto mode). */
+	bool		auto_thp_switched;
 	/*
 	 * Most recent size class in the series of increasingly large base
 	 * extents.  Logarithmic spacing between subsequent allocations ensures
diff --git a/src/base.c b/src/base.c
index e3a89b0..cc3d978 100644
--- a/src/base.c
+++ b/src/base.c
@@ -139,23 +139,13 @@ base_get_num_blocks(base_t *base, bool with_new_block) {
 	return n_blocks;
 }
 
-static bool
-base_auto_thp_triggered(base_t *base, bool with_new_block) {
-	assert(opt_metadata_thp == metadata_thp_auto);
-
-	if (base_ind_get(base) != 0) {
-		return base_get_num_blocks(base, with_new_block) >=
-		    BASE_AUTO_THP_THRESHOLD;
-	}
-
-	return base_get_num_blocks(base, with_new_block) >=
-	    BASE_AUTO_THP_THRESHOLD_A0;
-}
-
 static void
-base_auto_thp_switch(base_t *base) {
+base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 	assert(opt_metadata_thp == metadata_thp_auto);
-
+	malloc_mutex_assert_owner(tsdn, &base->mtx);
+	if (base->auto_thp_switched) {
+		return;
+	}
 	/* Called when adding a new block. */
 	bool should_switch;
 	if (base_ind_get(base) != 0) {
@@ -169,14 +159,16 @@ base_auto_thp_switch(base_t *base) {
 		return;
 	}
 
-	assert(base->n_thp == 0);
+	base->auto_thp_switched = true;
+	assert(!config_stats || base->n_thp == 0);
 	/* Make the initial blocks THP lazily. */
 	base_block_t *block = base->blocks;
 	while (block != NULL) {
 		assert((block->size & HUGEPAGE_MASK) == 0);
 		pages_huge(block, block->size);
 		if (config_stats) {
-			base->n_thp += block->size >> LG_HUGEPAGE;
+			base->n_thp += HUGEPAGE_CEILING(block->size -
+			    extent_bsize_get(&block->extent)) >> LG_HUGEPAGE;
 		}
 		block = block->next;
 		assert(block == NULL || (base_ind_get(base) == 0));
@@ -226,7 +218,7 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
 		if (metadata_thp_madvise() && (opt_metadata_thp ==
-		    metadata_thp_always || base_auto_thp_triggered(base, false))) {
+		    metadata_thp_always || base->auto_thp_switched)) {
 			base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
 			    - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
 			    LG_HUGEPAGE;
@@ -289,10 +281,12 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
 		} else if (opt_metadata_thp == metadata_thp_auto &&
 		    base != NULL) {
 			/* base != NULL indicates this is not a new base. */
-			if (base_auto_thp_triggered(base, true)) {
+			malloc_mutex_lock(tsdn, &base->mtx);
+			base_auto_thp_switch(tsdn, base);
+			if (base->auto_thp_switched) {
 				pages_huge(addr, block_size);
 			}
-			base_auto_thp_switch(base);
+			malloc_mutex_unlock(tsdn, &base->mtx);
 		}
 	}
 
@@ -334,7 +328,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 		base->mapped += block->size;
 		if (metadata_thp_madvise() &&
 		    !(opt_metadata_thp == metadata_thp_auto
-		      && !base_auto_thp_triggered(base, false))) {
+		      && !base->auto_thp_switched)) {
 			assert(base->n_thp > 0);
 			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
 			    LG_HUGEPAGE;
@@ -376,6 +370,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	base->pind_last = pind_last;
 	base->extent_sn_next = extent_sn_next;
 	base->blocks = block;
+	base->auto_thp_switched = false;
 	for (szind_t i = 0; i < NSIZES; i++) {
 		extent_heap_new(&base->avail[i]);
 	}
-- 
cgit v0.12


From d6feed6e6631d00806607cfe16a796e337752044 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Fri, 13 Oct 2017 10:27:13 -0700
Subject: Use tsd offset_state instead of atomic

While working on #852, I noticed the prng state is atomic.  This is the only
atomic use of prng in all of jemalloc.  Instead, use a threadlocal prng
state if possible to avoid unnecessary cache line contention.
---
 include/jemalloc/internal/extent_inlines.h | 13 ++++++++++---
 include/jemalloc/internal/tsd.h            |  2 ++
 src/tsd.c                                  | 10 ++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 610072e..9f5c5cd 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -196,9 +196,16 @@ extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
 	if (alignment < PAGE) {
 		unsigned lg_range = LG_PAGE -
 		    lg_floor(CACHELINE_CEILING(alignment));
-		size_t r =
-		    prng_lg_range_zu(&extent_arena_get(extent)->offset_state,
-		    lg_range, true);
+		size_t r;
+		if (!tsdn_null(tsdn)) {
+			tsd_t *tsd = tsdn_tsd(tsdn);
+			r = (size_t)prng_lg_range_u64(
+			    tsd_offset_statep_get(tsd), lg_range);
+		} else {
+			r = prng_lg_range_zu(
+			    &extent_arena_get(extent)->offset_state,
+			    lg_range, true);
+		}
 		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
 		    lg_range);
 		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 155a2ec..0b9841a 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -65,6 +65,7 @@ typedef void (*test_callback_t)(int *);
     O(arenas_tdata_bypass,	bool,			bool)		\
     O(reentrancy_level,		int8_t,			int8_t)		\
     O(narenas_tdata,		uint32_t,		uint32_t)	\
+    O(offset_state,		uint64_t,		uint64_t)	\
     O(thread_allocated,		uint64_t,		uint64_t)	\
     O(thread_deallocated,	uint64_t,		uint64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
@@ -84,6 +85,7 @@ typedef void (*test_callback_t)(int *);
     0,									\
     0,									\
     0,									\
+    0,									\
     NULL,								\
     RTREE_CTX_ZERO_INITIALIZER,						\
     NULL,								\
diff --git a/src/tsd.c b/src/tsd.c
index f968992..c143068 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -71,6 +71,16 @@ tsd_data_init(tsd_t *tsd) {
 	 */
 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
 
+	/*
+	 * A nondeterministic seed based on the address of tsd reduces
+	 * the likelihood of lockstep non-uniform cache index
+	 * utilization among identical concurrent processes, but at the
+	 * cost of test repeatability.  For debug builds, instead use a
+	 * deterministic seed.
+	 */
+	*tsd_offset_statep_get(tsd) = config_debug ? 0 :
+	    (uint64_t)(uintptr_t)tsd;
+
 	return tsd_tcache_enabled_data_init(tsd);
 }
 
-- 
cgit v0.12


From 282a3faa1784783e2e2cb3698183927b3927b950 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 13 Nov 2017 11:41:53 -0800
Subject: Use extent_heap_first for best fit.

extent_heap_any makes the layout less predictable and as a result incurs more
fragmentation.
---
 src/extent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/extent.c b/src/extent.c
index c8a3090..466e0b2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -370,7 +370,7 @@ extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	    (size_t)pind);
 	if (i < NPSIZES+1) {
 		assert(!extent_heap_empty(&extents->heaps[i]));
-		extent_t *extent = extent_heap_any(&extents->heaps[i]);
+		extent_t *extent = extent_heap_first(&extents->heaps[i]);
 		assert(extent_size_get(extent) >= size);
 		return extent;
 	}
-- 
cgit v0.12


From fac706836ffda46759914508b918e8b54c8020c8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 9 Nov 2017 13:51:39 -0800
Subject: Add opt.lg_extent_max_active_fit

When allocating from dirty extents (which we always prefer if available), large
active extents can get split even if the new allocation is much smaller, in
which case the introduced fragmentation causes high long term damage.  This new
option controls the threshold to reuse and split an existing active extent.  We
avoid using a large extent for much smaller sizes, in order to reduce
fragmentation.  In some workload, adding the threshold improves virtual memory
usage by >10x.
---
 doc/jemalloc.xml.in                        | 16 ++++++++++++++++
 include/jemalloc/internal/extent_externs.h |  8 +++++---
 include/jemalloc/internal/extent_types.h   |  6 ++++++
 src/ctl.c                                  |  4 ++++
 src/extent.c                               |  9 +++++++++
 src/jemalloc.c                             |  3 +++
 test/unit/mallctl.c                        |  1 +
 7 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 895b2d4..3f9ba20 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1069,6 +1069,22 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         for related dynamic control options.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.lg_extent_max_active_fit">
+        <term>
+          <mallctl>opt.lg_extent_max_active_fit</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>When reusing dirty extents, this determines the (log
+        base 2 of the) maximum ratio between the size of the active extent
+        selected (to split off from) and the size of the requested allocation.
+        This prevents the splitting of large active extents for smaller
+        allocations, which can reduce fragmentation over the long run
+        (especially for non-active extents).  Lower value may reduce
+        fragmentation, at the cost of extra active extents.  The default value
+        is 6, which gives a maximum ratio of 64 (2^6).</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.stats_print">
         <term>
           <mallctl>opt.stats_print</mallctl>
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 132d890..a76d4e4 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -6,9 +6,11 @@
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/rtree.h"
 
-extern rtree_t			extents_rtree;
-extern const extent_hooks_t	extent_hooks_default;
-extern mutex_pool_t		extent_mutex_pool;
+extern size_t opt_lg_extent_max_active_fit;
+
+extern rtree_t extents_rtree;
+extern const extent_hooks_t extent_hooks_default;
+extern mutex_pool_t extent_mutex_pool;
 
 extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
 void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index 7efcd3a..c0561d9 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -8,4 +8,10 @@ typedef struct extents_s extents_t;
 
 #define EXTENT_GROW_MAX_PIND (NPSIZES - 1)
 
+/*
+ * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
+ * is the max ratio between the size of the active extent and the new extent.
+ */
+#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/src/ctl.c b/src/ctl.c
index 11cd68d..1fdb772 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -95,6 +95,7 @@ CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_tcache)
+CTL_PROTO(opt_lg_extent_max_active_fit)
 CTL_PROTO(opt_lg_tcache_max)
 CTL_PROTO(opt_prof)
 CTL_PROTO(opt_prof_prefix)
@@ -293,6 +294,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("utrace"),	CTL(opt_utrace)},
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("tcache"),	CTL(opt_tcache)},
+	{NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
 	{NAME("lg_tcache_max"),	CTL(opt_lg_tcache_max)},
 	{NAME("prof"),		CTL(opt_prof)},
 	{NAME("prof_prefix"),	CTL(opt_prof_prefix)},
@@ -1597,6 +1599,8 @@ CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
+CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit,
+    size_t)
 CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
diff --git a/src/extent.c b/src/extent.c
index 466e0b2..548a93e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -17,6 +17,8 @@ rtree_t		extents_rtree;
 /* Keyed by the address of the extent_t being protected. */
 mutex_pool_t	extent_mutex_pool;
 
+size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
+
 static const bitmap_info_t extents_bitmap_info =
     BITMAP_INFO_INITIALIZER(NPSIZES+1);
 
@@ -369,6 +371,13 @@ extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
 	    (size_t)pind);
 	if (i < NPSIZES+1) {
+		/*
+		 * In order to reduce fragmentation, avoid reusing and splitting
+		 * large extents for much smaller sizes.
+		 */
+		if ((sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
+			return NULL;
+		}
 		assert(!extent_heap_empty(&extents->heaps[i]));
 		extent_t *extent = extent_heap_first(&extents->heaps[i]);
 		assert(extent_size_get(extent) >= size);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f29fc7d..f4fd805 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1146,6 +1146,9 @@ malloc_conf_init(void) {
 				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
 			}
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
+			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
+			    "lg_extent_max_active_fit", 0,
+			    (sizeof(size_t) << 3), yes, yes, false)
 			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
 			    -1, (sizeof(size_t) << 3) - 1)
 			if (strncmp("percpu_arena", k, klen) == 0) {
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 94f801e..4cfd981 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -172,6 +172,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, utrace, utrace);
 	TEST_MALLCTL_OPT(bool, xmalloc, xmalloc);
 	TEST_MALLCTL_OPT(bool, tcache, always);
+	TEST_MALLCTL_OPT(size_t, lg_extent_max_active_fit, always);
 	TEST_MALLCTL_OPT(size_t, lg_tcache_max, always);
 	TEST_MALLCTL_OPT(bool, prof, prof);
 	TEST_MALLCTL_OPT(const char *, prof_prefix, prof);
-- 
cgit v0.12


From eb1b08daaea57d16ce720d97847d94cee2f867cc Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 14 Nov 2017 16:09:31 -0800
Subject: Fix an extent coalesce bug.

When coalescing, we should take both extents off the LRU list; otherwise decay
can grab the existing outer extent through extents_evict.
---
 include/jemalloc/internal/extent_inlines.h |  5 +++++
 src/extent.c                               | 20 +++++++++++++-------
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 9f5c5cd..9b8ddc2 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -356,6 +356,11 @@ extent_list_append(extent_list_t *list, extent_t *extent) {
 }
 
 static inline void
+extent_list_prepend(extent_list_t *list, extent_t *extent) {
+	ql_head_insert(list, extent, ql_link);
+}
+
+static inline void
 extent_list_replace(extent_list_t *list, extent_t *to_remove,
     extent_t *to_insert) {
 	ql_after_insert(to_remove, to_insert, ql_link);
diff --git a/src/extent.c b/src/extent.c
index 548a93e..8b00ec9 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1458,13 +1458,12 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     bool growing_retained) {
 	assert(extent_can_coalesce(arena, extents, inner, outer));
 
-	if (forward && extents->delay_coalesce) {
+	if (extents->delay_coalesce) {
 		/*
-		 * The extent that remains after coalescing must occupy the
-		 * outer extent's position in the LRU.  For forward coalescing,
-		 * swap the inner extent into the LRU.
+		 * Remove outer from the LRU list so that it won't be show up in
+		 * decay through extents_evict.
 		 */
-		extent_list_replace(&extents->lru, outer, inner);
+		extent_list_remove(&extents->lru, outer);
 	}
 	extent_activate_locked(tsdn, arena, extents, outer,
 	    extents->delay_coalesce);
@@ -1474,9 +1473,16 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &extents->mtx);
 
+	if (!err && extents->delay_coalesce) {
+		if (forward) {
+			extent_list_prepend(&extents->lru, inner);
+		} else {
+			extent_list_prepend(&extents->lru, outer);
+		}
+	}
 	if (err) {
-		if (forward && extents->delay_coalesce) {
-			extent_list_replace(&extents->lru, inner, outer);
+		if (extents->delay_coalesce) {
+			extent_list_prepend(&extents->lru, outer);
 		}
 		extent_deactivate_locked(tsdn, arena, extents, outer,
 		    extents->delay_coalesce);
-- 
cgit v0.12


From 3e64dae802b9f7cd4f860b0d29126cd727d5166b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 15 Nov 2017 14:48:55 -0800
Subject: Eagerly coalesce large extents.

Coalescing is a small price to pay for large allocations since they happen less
frequently.  This reduces fragmentation while also potentially improving
locality.
---
 src/extent.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/extent.c b/src/extent.c
index 8b00ec9..23b6401 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1586,8 +1586,22 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	if (!extents->delay_coalesce) {
 		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
 		    rtree_ctx, extents, extent, NULL, growing_retained);
+	} else if (extent_size_get(extent) >= LARGE_MINCLASS) {
+		/* Always coalesce large extents eagerly. */
+		bool coalesced;
+		size_t prev_size;
+		do {
+			prev_size = extent_size_get(extent);
+			assert(extent_state_get(extent) == extent_state_active);
+			extent = extent_try_coalesce(tsdn, arena,
+			    r_extent_hooks, rtree_ctx, extents, extent,
+			    &coalesced, growing_retained);
+			if (coalesced) {
+				extent_list_remove(&extents->lru, extent);
+			}
+		} while (coalesced &&
+		    extent_size_get(extent) >= prev_size + LARGE_MINCLASS);
 	}
-
 	extent_deactivate_locked(tsdn, arena, extents, extent, false);
 
 	malloc_mutex_unlock(tsdn, &extents->mtx);
-- 
cgit v0.12


From e475d03752d53e198143fdf58e7d0e2e14e5f1a2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 16 Nov 2017 14:27:23 -0800
Subject: Avoid setting zero and commit if split fails in extent_recycle.

---
 src/extent.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 23b6401..7c7da29 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -791,7 +791,7 @@ static extent_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    bool *zero, bool *commit, bool growing_retained) {
+    bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(alignment > 0);
@@ -849,13 +849,6 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	extent_activate_locked(tsdn, arena, extents, extent, false);
 	malloc_mutex_unlock(tsdn, &extents->mtx);
 
-	if (extent_zeroed_get(extent)) {
-		*zero = true;
-	}
-	if (extent_committed_get(extent)) {
-		*commit = true;
-	}
-
 	return extent;
 }
 
@@ -1021,16 +1014,12 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	bool committed = false;
 	extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks,
-	    rtree_ctx, extents, new_addr, size, pad, alignment, slab, zero,
-	    &committed, growing_retained);
+	    rtree_ctx, extents, new_addr, size, pad, alignment, slab,
+	    growing_retained);
 	if (extent == NULL) {
 		return NULL;
 	}
-	if (committed) {
-		*commit = true;
-	}
 
 	extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx,
 	    extents, new_addr, size, pad, alignment, slab, szind, extent,
@@ -1049,6 +1038,13 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent_zeroed_set(extent, true);
 	}
 
+	if (extent_committed_get(extent)) {
+		*commit = true;
+	}
+	if (extent_zeroed_get(extent)) {
+		*zero = true;
+	}
+
 	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);
 	}
-- 
cgit v0.12


From 26a8f82c484eada4188e56daad32ed6a16b4b585 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Sun, 19 Nov 2017 17:01:53 -0800
Subject: Add missing deregister before extents_leak.

This fixes an regression introduced by 211b1f3 (refactor extent split).
---
 src/extent.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/extent.c b/src/extent.c
index 7c7da29..ee50aff 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -988,6 +988,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 			extent_deregister(tsdn, to_salvage);
 		}
 		if (to_leak != NULL) {
+			extent_deregister(tsdn, to_leak);
 			extents_leak(tsdn, arena, r_extent_hooks, extents,
 			    to_leak, growing_retained);
 		}
-- 
cgit v0.12


From 6e841f618a5ff99001a9578e9ff73602e7a94620 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Nov 2017 12:21:58 -0800
Subject: Add more tests for extent hooks failure paths.

---
 src/extent.c                     |  3 +++
 test/include/test/extent_hooks.h |  2 ++
 test/integration/extent.c        | 23 ++++++++++++++++++++---
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index ee50aff..7e10b7f 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -988,9 +988,12 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 			extent_deregister(tsdn, to_salvage);
 		}
 		if (to_leak != NULL) {
+			void *leak = extent_base_get(to_leak);
 			extent_deregister(tsdn, to_leak);
 			extents_leak(tsdn, arena, r_extent_hooks, extents,
 			    to_leak, growing_retained);
+			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak)
+			    == NULL);
 		}
 		return NULL;
 	}
diff --git a/test/include/test/extent_hooks.h b/test/include/test/extent_hooks.h
index ea01285..1f06201 100644
--- a/test/include/test/extent_hooks.h
+++ b/test/include/test/extent_hooks.h
@@ -266,6 +266,8 @@ extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 	    "extent_hooks should be same as pointer used to set hooks");
 	assert_ptr_eq(extent_hooks->merge, extent_merge_hook,
 	    "Wrong hook function");
+	assert_ptr_eq((void *)((uintptr_t)addr_a + size_a), addr_b,
+	    "Extents not mergeable");
 	called_merge = true;
 	if (!try_merge) {
 		return true;
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 1dcf217..7100b6a 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -98,7 +98,8 @@ test_extent_body(unsigned arena_ind) {
 	dallocx(p, flags);
 }
 
-TEST_BEGIN(test_extent_manual_hook) {
+static void
+test_manual_hook_body(void) {
 	unsigned arena_ind;
 	size_t old_size, new_size, sz;
 	size_t hooks_mib[3];
@@ -139,8 +140,9 @@ TEST_BEGIN(test_extent_manual_hook) {
 	assert_ptr_ne(old_hooks->merge, extent_merge_hook,
 	    "Unexpected extent_hooks error");
 
-	test_skip_if(check_background_thread_enabled());
-	test_extent_body(arena_ind);
+	if (check_background_thread_enabled()) {
+		test_extent_body(arena_ind);
+	}
 
 	/* Restore extent hooks. */
 	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
@@ -165,6 +167,21 @@ TEST_BEGIN(test_extent_manual_hook) {
 	assert_ptr_eq(old_hooks->merge, default_hooks->merge,
 	    "Unexpected extent_hooks error");
 }
+
+TEST_BEGIN(test_extent_manual_hook) {
+	test_manual_hook_body();
+
+	/* Test failure paths. */
+	try_split = false;
+	test_manual_hook_body();
+	try_merge = false;
+	test_manual_hook_body();
+	try_purge_lazy = false;
+	try_purge_forced = false;
+	test_manual_hook_body();
+
+	try_split = try_merge = try_purge_lazy = try_purge_forced = true;
+}
 TEST_END
 
 TEST_BEGIN(test_extent_auto_hook) {
-- 
cgit v0.12


From b5ab3f91ea60b16819563b09aa01a0d339aa40b4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 8 Dec 2017 13:43:21 -0800
Subject: Fix test/integration/extent.

Should only run the hook tests without background threads.  This was introduced
in 6e841f6.
---
 test/integration/extent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/integration/extent.c b/test/integration/extent.c
index 7100b6a..c2dc1cb 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -140,7 +140,7 @@ test_manual_hook_body(void) {
 	assert_ptr_ne(old_hooks->merge, extent_merge_hook,
 	    "Unexpected extent_hooks error");
 
-	if (check_background_thread_enabled()) {
+	if (!check_background_thread_enabled()) {
 		test_extent_body(arena_ind);
 	}
 
-- 
cgit v0.12


From 955b1d9cc574647d3d3dfb474b47b51b3a81453d Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 8 Dec 2017 15:06:08 -0800
Subject: Fix extent deregister on the leak path.

On leak path we should not adjust gdump when deregister.
---
 src/extent.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 7e10b7f..c531da2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -761,7 +761,7 @@ extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
  * Removes all pointers to the given extent from the global rtree.
  */
 static void
-extent_deregister(tsdn_t *tsdn, extent_t *extent) {
+extent_deregister_impl(tsdn_t *tsdn, extent_t *extent, bool gdump) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *elm_a, *elm_b;
@@ -778,11 +778,21 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent) {
 
 	extent_unlock(tsdn, extent);
 
-	if (config_prof) {
+	if (config_prof && gdump) {
 		extent_gdump_sub(tsdn, extent);
 	}
 }
 
+static void
+extent_deregister(tsdn_t *tsdn, extent_t *extent) {
+	extent_deregister_impl(tsdn, extent, true);
+}
+
+static void
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
+	extent_deregister_impl(tsdn, extent, false);
+}
+
 /*
  * Tries to find and remove an extent from extents that can be used for the
  * given allocation request.
@@ -989,7 +999,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 		}
 		if (to_leak != NULL) {
 			void *leak = extent_base_get(to_leak);
-			extent_deregister(tsdn, to_leak);
+			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_leak(tsdn, arena, r_extent_hooks, extents,
 			    to_leak, growing_retained);
 			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak)
@@ -1267,7 +1277,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		 */
 		assert(result == extent_split_interior_error);
 		if (to_leak != NULL) {
-			extent_deregister(tsdn, to_leak);
+			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_leak(tsdn, arena, r_extent_hooks,
 			    &arena->extents_retained, to_leak, true);
 			goto label_err;
-- 
cgit v0.12


From 749caf14ae73a9ab1c48e538a8af09addbb35ee7 Mon Sep 17 00:00:00 2001
From: Ed Schouten <ed@nuxi.nl>
Date: Sun, 3 Dec 2017 21:45:08 +0100
Subject: Also use __riscv to detect builds for RISC-V CPUs.

According to the RISC-V toolchain conventions, __riscv__ is the old
spelling of this definition. __riscv should be used going forward.

https://github.com/riscv/riscv-toolchain-conventions#cc-preprocessor-definitions
---
 include/jemalloc/internal/jemalloc_internal_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 6b987d6..1b750b1 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -94,7 +94,7 @@ typedef int malloc_cpuid_t;
 #  ifdef __powerpc__
 #    define LG_QUANTUM		4
 #  endif
-#  ifdef __riscv__
+#  if defined(__riscv) || defined(__riscv__)
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __s390__
-- 
cgit v0.12


From 22460cbebd2b7343319d9a8425f593c92facacab Mon Sep 17 00:00:00 2001
From: nicolov <nicolov@users.noreply.github.com>
Date: Sun, 10 Dec 2017 23:36:32 -0800
Subject: jemalloc_mangle.sh: set sh in strict mode

---
 include/jemalloc/jemalloc_mangle.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/jemalloc_mangle.sh b/include/jemalloc/jemalloc_mangle.sh
index df328b7..c675bb4 100755
--- a/include/jemalloc/jemalloc_mangle.sh
+++ b/include/jemalloc/jemalloc_mangle.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/sh -eu
 
 public_symbols_txt=$1
 symbol_prefix=$2
-- 
cgit v0.12


From 5e0332890f8e553e148b8c4b0130d84037339e6a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 14 Dec 2017 11:14:08 -0800
Subject: Output opt.lg_extent_max_active_fit in stats.

---
 src/stats.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/stats.c b/src/stats.c
index 0847f39..33e4426 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -822,6 +822,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",")
 	OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",")
 	OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",")
+	OPT_WRITE_UNSIGNED(lg_extent_max_active_fit, ",")
 	OPT_WRITE_CHAR_P(junk, ",")
 	OPT_WRITE_BOOL(zero, ",")
 	OPT_WRITE_BOOL(utrace, ",")
@@ -856,7 +857,9 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 #undef OPT_WRITE_BOOL
 #undef OPT_WRITE_BOOL_MUTABLE
+#undef OPT_WRITE_UNSIGNED
 #undef OPT_WRITE_SSIZE_T
+#undef OPT_WRITE_SSIZE_T_MUTABLE
 #undef OPT_WRITE_CHAR_P
 
 	/* arenas. */
-- 
cgit v0.12


From f70785de91ee14e8034f9bd64bf6590199c89e65 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 11 Dec 2017 14:04:07 -0800
Subject: Skip test/unit/pack when profiling is enabled.

The test assumes no sampled allocations.
---
 test/unit/pack.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/unit/pack.c b/test/unit/pack.c
index edfc548..fc188b0 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -88,6 +88,12 @@ arena_reset_mallctl(unsigned arena_ind) {
 }
 
 TEST_BEGIN(test_pack) {
+	bool prof_enabled;
+	size_t sz = sizeof(prof_enabled);
+	if (mallctl("opt.prof", (void *)&prof_enabled, &sz, NULL, 0) == 0) {
+		test_skip_if(prof_enabled);
+	}
+
 	unsigned arena_ind = arenas_create_mallctl();
 	size_t nregs_per_run = nregs_per_run_compute();
 	size_t nregs = nregs_per_run * NSLABS;
-- 
cgit v0.12


From 740bdd68b1d4b9c39c68432e06deb70ad4da3210 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 8 Dec 2017 12:13:50 -0800
Subject: Over purge by 1 extent always.

When purging, large allocations are usually the ones that cross the npages_limit
threshold, simply because they are "large".  This means we often leave the large
extent around for a while, which has the downsides of: 1) high RSS and 2) more
chance of them getting fragmented.  Given that they are not likely to be reused
very soon (LRU), let's over purge by 1 extent (which is often large and not
reused frequently).
---
 include/jemalloc/internal/extent_externs.h | 3 +--
 src/arena.c                                | 4 ++--
 src/extent.c                               | 6 ++----
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index a76d4e4..b8a4d02 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -38,8 +38,7 @@ extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
 extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min,
-    size_t npages_max);
+    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min);
 void extents_prefork(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_child(tsdn_t *tsdn, extents_t *extents);
diff --git a/src/arena.c b/src/arena.c
index e2462bf..a28dbfb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -912,7 +912,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	extent_t *extent;
 	while (nstashed < npages_decay_max &&
 	    (extent = extents_evict(tsdn, arena, r_extent_hooks, extents,
-	    npages_limit, npages_decay_max - nstashed)) != NULL) {
+	    npages_limit)) != NULL) {
 		extent_list_append(decay_extents, extent);
 		nstashed += extent_size_get(extent) >> LG_PAGE;
 	}
@@ -1226,7 +1226,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	extent_t *extent;
 	while ((extent = extents_evict(tsdn, arena, &extent_hooks,
-	    &arena->extents_retained, 0, SIZE_MAX)) != NULL) {
+	    &arena->extents_retained, 0)) != NULL) {
 		extent_destroy_wrapper(tsdn, arena, &extent_hooks, extent);
 	}
 }
diff --git a/src/extent.c b/src/extent.c
index c531da2..bca703f 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -481,7 +481,7 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 extent_t *
 extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, size_t npages_min, size_t npages_max) {
+    extents_t *extents, size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -499,11 +499,9 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			goto label_return;
 		}
 		/* Check the eviction limit. */
-		size_t npages = extent_size_get(extent) >> LG_PAGE;
 		size_t extents_npages = atomic_load_zu(&extents->npages,
 		    ATOMIC_RELAXED);
-		if (extents_npages - npages < npages_min ||
-		    npages > npages_max) {
+		if (extents_npages <= npages_min) {
 			extent = NULL;
 			goto label_return;
 		}
-- 
cgit v0.12


From 4bf4a1c4ea418ba490d35d23aee0f535e96ddd23 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 1 Oct 2017 17:22:06 -0700
Subject: Pull out arena_bin_info_t and arena_bin_t into their own file.

In the process, kill arena_bin_index, which is unused.  To follow are several
diffs continuing this separation.
---
 Makefile.in                                 |  1 +
 include/jemalloc/internal/arena_externs.h   |  7 +--
 include/jemalloc/internal/arena_inlines_b.h |  7 ---
 include/jemalloc/internal/arena_structs_b.h | 65 +------------------
 include/jemalloc/internal/arena_types.h     |  2 -
 include/jemalloc/internal/bin.h             | 81 ++++++++++++++++++++++++
 include/jemalloc/internal/extent_structs.h  |  2 +-
 include/jemalloc/internal/tcache_inlines.h  | 10 +--
 src/arena.c                                 | 96 ++++++++++++-----------------
 src/bin.c                                   | 21 +++++++
 src/ctl.c                                   |  8 +--
 src/tcache.c                                | 12 ++--
 test/unit/junk.c                            |  2 +-
 test/unit/mallctl.c                         |  6 +-
 test/unit/slab.c                            |  2 +-
 test/unit/stats.c                           |  2 +-
 16 files changed, 169 insertions(+), 155 deletions(-)
 create mode 100644 include/jemalloc/internal/bin.h
 create mode 100644 src/bin.c

diff --git a/Makefile.in b/Makefile.in
index 0698633..2f0b3b2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -93,6 +93,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/arena.c \
 	$(srcroot)src/background_thread.c \
 	$(srcroot)src/base.c \
+	$(srcroot)src/bin.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 5a0e3ad..77a2b54 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 
+#include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/size_classes.h"
@@ -9,8 +10,6 @@
 extern ssize_t opt_dirty_decay_ms;
 extern ssize_t opt_muzzy_decay_ms;
 
-extern const arena_bin_info_t arena_bin_info[NBINS];
-
 extern percpu_arena_mode_t opt_percpu_arena;
 extern const char *percpu_arena_mode_names[];
 
@@ -51,10 +50,10 @@ void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
-void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
+void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info,
     bool zero);
 
-typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *);
+typedef void (arena_dalloc_junk_small_t)(void *, const bin_info_t *);
 extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small;
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 003abe1..7b10d9e 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -8,13 +8,6 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
 
-static inline szind_t
-arena_bin_index(arena_t *arena, arena_bin_t *bin) {
-	szind_t binind = (szind_t)(bin - arena->bins);
-	assert(binind < NBINS);
-	return binind;
-}
-
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
 arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index f74ea97..d843b09 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
@@ -13,42 +14,6 @@
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ticker.h"
 
-/*
- * Read-only information associated with each element of arena_t's bins array
- * is stored separately, partly to reduce memory usage (only one copy, rather
- * than one per arena), but mainly to avoid false cacheline sharing.
- *
- * Each slab has the following layout:
- *
- *   /--------------------\
- *   | region 0           |
- *   |--------------------|
- *   | region 1           |
- *   |--------------------|
- *   | ...                |
- *   | ...                |
- *   | ...                |
- *   |--------------------|
- *   | region nregs-1     |
- *   \--------------------/
- */
-struct arena_bin_info_s {
-	/* Size of regions in a slab for this bin's size class. */
-	size_t			reg_size;
-
-	/* Total size of a slab for this bin's size class. */
-	size_t			slab_size;
-
-	/* Total number of regions in a slab for this bin's size class. */
-	uint32_t		nregs;
-
-	/*
-	 * Metadata used to manipulate bitmaps for slabs associated with this
-	 * bin.
-	 */
-	bitmap_info_t		bitmap_info;
-};
-
 struct arena_decay_s {
 	/* Synchronizes all non-atomic fields. */
 	malloc_mutex_t		mtx;
@@ -109,32 +74,6 @@ struct arena_decay_s {
 	uint64_t		ceil_npages;
 };
 
-struct arena_bin_s {
-	/* All operations on arena_bin_t fields require lock ownership. */
-	malloc_mutex_t		lock;
-
-	/*
-	 * Current slab being used to service allocations of this bin's size
-	 * class.  slabcur is independent of slabs_{nonfull,full}; whenever
-	 * slabcur is reassigned, the previous slab must be deallocated or
-	 * inserted into slabs_{nonfull,full}.
-	 */
-	extent_t		*slabcur;
-
-	/*
-	 * Heap of non-full slabs.  This heap is used to assure that new
-	 * allocations come from the non-full slab that is oldest/lowest in
-	 * memory.
-	 */
-	extent_heap_t		slabs_nonfull;
-
-	/* List used to track full slabs. */
-	extent_list_t		slabs_full;
-
-	/* Bin statistics. */
-	malloc_bin_stats_t	stats;
-};
-
 struct arena_s {
 	/*
 	 * Number of threads currently assigned to this arena.  Each thread has
@@ -264,7 +203,7 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	arena_bin_t		bins[NBINS];
+	bin_t			bins[NBINS];
 
 	/*
 	 * Base allocator, from which arena metadata are allocated.
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index a691bd8..70001b5 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -12,9 +12,7 @@
 #define DECAY_NTICKS_PER_UPDATE	1000
 
 typedef struct arena_slab_data_s arena_slab_data_t;
-typedef struct arena_bin_info_s arena_bin_info_t;
 typedef struct arena_decay_s arena_decay_t;
-typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
 typedef struct alloc_ctx_s alloc_ctx_t;
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
new file mode 100644
index 0000000..09717b1
--- /dev/null
+++ b/include/jemalloc/internal/bin.h
@@ -0,0 +1,81 @@
+#ifndef JEMALLOC_INTERNAL_BIN_H
+#define JEMALLOC_INTERNAL_BIN_H
+
+#include "jemalloc/internal/extent_types.h"
+#include "jemalloc/internal/extent_structs.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/stats.h"
+
+/*
+ * A bin contains a set of extents that are currently being used for slab
+ * allocations.
+ */
+
+/*
+ * Read-only information associated with each element of arena_t's bins array
+ * is stored separately, partly to reduce memory usage (only one copy, rather
+ * than one per arena), but mainly to avoid false cacheline sharing.
+ *
+ * Each slab has the following layout:
+ *
+ *   /--------------------\
+ *   | region 0           |
+ *   |--------------------|
+ *   | region 1           |
+ *   |--------------------|
+ *   | ...                |
+ *   | ...                |
+ *   | ...                |
+ *   |--------------------|
+ *   | region nregs-1     |
+ *   \--------------------/
+ */
+typedef struct bin_info_s bin_info_t;
+struct bin_info_s {
+	/* Size of regions in a slab for this bin's size class. */
+	size_t			reg_size;
+
+	/* Total size of a slab for this bin's size class. */
+	size_t			slab_size;
+
+	/* Total number of regions in a slab for this bin's size class. */
+	uint32_t		nregs;
+
+	/*
+	 * Metadata used to manipulate bitmaps for slabs associated with this
+	 * bin.
+	 */
+	bitmap_info_t		bitmap_info;
+};
+
+extern const bin_info_t bin_infos[NBINS];
+
+
+typedef struct bin_s bin_t;
+struct bin_s {
+	/* All operations on bin_t fields require lock ownership. */
+	malloc_mutex_t		lock;
+
+	/*
+	 * Current slab being used to service allocations of this bin's size
+	 * class.  slabcur is independent of slabs_{nonfull,full}; whenever
+	 * slabcur is reassigned, the previous slab must be deallocated or
+	 * inserted into slabs_{nonfull,full}.
+	 */
+	extent_t		*slabcur;
+
+	/*
+	 * Heap of non-full slabs.  This heap is used to assure that new
+	 * allocations come from the non-full slab that is oldest/lowest in
+	 * memory.
+	 */
+	extent_heap_t		slabs_nonfull;
+
+	/* List used to track full slabs. */
+	extent_list_t		slabs_full;
+
+	/* Bin statistics. */
+	malloc_bin_stats_t	stats;
+};
+
+#endif /* JEMALLOC_INTERNAL_BIN_H */
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 722963b..89b49c7 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -143,7 +143,7 @@ struct extent_s {
 
 	/*
 	 * List linkage, used by a variety of lists:
-	 * - arena_bin_t's slabs_full
+	 * - bin_t's slabs_full
 	 * - extents_t's LRU
 	 * - stashed dirty extents
 	 * - arena's large allocations
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index d1632d8..14ab037 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H
 #define JEMALLOC_INTERNAL_TCACHE_INLINES_H
 
+#include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/sz.h"
@@ -76,16 +77,15 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	if (likely(!zero)) {
 		if (slow_path && config_fill) {
 			if (unlikely(opt_junk_alloc)) {
-				arena_alloc_junk_small(ret,
-				    &arena_bin_info[binind], false);
+				arena_alloc_junk_small(ret, &bin_infos[binind],
+				    false);
 			} else if (unlikely(opt_zero)) {
 				memset(ret, 0, usize);
 			}
 		}
 	} else {
 		if (slow_path && config_fill && unlikely(opt_junk_alloc)) {
-			arena_alloc_junk_small(ret, &arena_bin_info[binind],
-			    true);
+			arena_alloc_junk_small(ret, &bin_infos[binind], true);
 		}
 		memset(ret, 0, usize);
 	}
@@ -169,7 +169,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free)) {
-		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
+		arena_dalloc_junk_small(ptr, &bin_infos[binind]);
 	}
 
 	bin = tcache_small_bin_get(tcache, binind);
diff --git a/src/arena.c b/src/arena.c
index a28dbfb..2dcb447 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -32,21 +32,6 @@ ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
 static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
-const arena_bin_info_t arena_bin_info[NBINS] = {
-#define BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
-	{reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
-#define BIN_INFO_bin_no(reg_size, slab_size, nregs)
-#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
-    lg_delta_lookup)							\
-	BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta),		\
-	    (pgs << LG_PAGE), (pgs << LG_PAGE) / ((1U<<lg_grp) +	\
-	    (ndelta<<lg_delta)))
-	SIZE_CLASSES
-#undef BIN_INFO_bin_yes
-#undef BIN_INFO_bin_no
-#undef SC
-};
-
 const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
 		h,
@@ -66,9 +51,9 @@ static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
 static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
-    arena_bin_t *bin);
+    bin_t *bin);
 static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
-    arena_bin_t *bin);
+    bin_t *bin);
 
 /******************************************************************************/
 
@@ -352,7 +337,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	nstime_subtract(&astats->uptime, &arena->create_time);
 
 	for (szind_t i = 0; i < NBINS; i++) {
-		arena_bin_t *bin = &arena->bins[i];
+		bin_t *bin = &arena->bins[i];
 
 		malloc_mutex_lock(tsdn, &bin->lock);
 		malloc_mutex_prof_read(tsdn, &bstats[i].mutex_data, &bin->lock);
@@ -385,8 +370,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
 }
 
 static void *
-arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab,
-    const arena_bin_info_t *bin_info) {
+arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab, const bin_info_t *bin_info) {
 	void *ret;
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
 	size_t regind;
@@ -413,7 +397,7 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 	assert((uintptr_t)ptr < (uintptr_t)extent_past_get(slab));
 	/* Freeing an interior pointer can cause assertion failure. */
 	assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) %
-	    (uintptr_t)arena_bin_info[binind].reg_size == 0);
+	    (uintptr_t)bin_infos[binind].reg_size == 0);
 
 	/* Avoid doing division with a variable divisor. */
 	diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab));
@@ -434,7 +418,7 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 	default: not_reached();
 	}
 
-	assert(regind < arena_bin_info[binind].nregs);
+	assert(regind < bin_infos[binind].nregs);
 
 	return regind;
 }
@@ -443,7 +427,7 @@ static void
 arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
     arena_slab_data_t *slab_data, void *ptr) {
 	szind_t binind = extent_szind_get(slab);
-	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
+	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t regind = arena_slab_regind(slab, binind, ptr);
 
 	assert(extent_nfree_get(slab) < bin_info->nregs);
@@ -1089,18 +1073,18 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
 }
 
 static void
-arena_bin_slabs_nonfull_insert(arena_bin_t *bin, extent_t *slab) {
+arena_bin_slabs_nonfull_insert(bin_t *bin, extent_t *slab) {
 	assert(extent_nfree_get(slab) > 0);
 	extent_heap_insert(&bin->slabs_nonfull, slab);
 }
 
 static void
-arena_bin_slabs_nonfull_remove(arena_bin_t *bin, extent_t *slab) {
+arena_bin_slabs_nonfull_remove(bin_t *bin, extent_t *slab) {
 	extent_heap_remove(&bin->slabs_nonfull, slab);
 }
 
 static extent_t *
-arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) {
+arena_bin_slabs_nonfull_tryget(bin_t *bin) {
 	extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull);
 	if (slab == NULL) {
 		return NULL;
@@ -1112,7 +1096,7 @@ arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) {
 }
 
 static void
-arena_bin_slabs_full_insert(arena_t *arena, arena_bin_t *bin, extent_t *slab) {
+arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, extent_t *slab) {
 	assert(extent_nfree_get(slab) == 0);
 	/*
 	 *  Tracking extents is required by arena_reset, which is not allowed
@@ -1126,7 +1110,7 @@ arena_bin_slabs_full_insert(arena_t *arena, arena_bin_t *bin, extent_t *slab) {
 }
 
 static void
-arena_bin_slabs_full_remove(arena_t *arena, arena_bin_t *bin, extent_t *slab) {
+arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, extent_t *slab) {
 	if (arena_is_auto(arena)) {
 		return;
 	}
@@ -1180,7 +1164,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Bins. */
 	for (unsigned i = 0; i < NBINS; i++) {
 		extent_t *slab;
-		arena_bin_t *bin = &arena->bins[i];
+		bin_t *bin = &arena->bins[i];
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		if (bin->slabcur != NULL) {
 			slab = bin->slabcur;
@@ -1269,7 +1253,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 
 static extent_t *
 arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, const arena_bin_info_t *bin_info,
+    extent_hooks_t **r_extent_hooks, const bin_info_t *bin_info,
     szind_t szind) {
 	extent_t *slab;
 	bool zero, commit;
@@ -1292,7 +1276,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 
 static extent_t *
 arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    const arena_bin_info_t *bin_info) {
+    const bin_info_t *bin_info) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -1328,10 +1312,10 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 }
 
 static extent_t *
-arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
+arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind) {
 	extent_t *slab;
-	const arena_bin_info_t *bin_info;
+	const bin_info_t *bin_info;
 
 	/* Look for a usable slab. */
 	slab = arena_bin_slabs_nonfull_tryget(bin);
@@ -1340,7 +1324,7 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 	}
 	/* No existing slabs have any space available. */
 
-	bin_info = &arena_bin_info[binind];
+	bin_info = &bin_infos[binind];
 
 	/* Allocate a new slab. */
 	malloc_mutex_unlock(tsdn, &bin->lock);
@@ -1371,12 +1355,12 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 
 /* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */
 static void *
-arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
+arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind) {
-	const arena_bin_info_t *bin_info;
+	const bin_info_t *bin_info;
 	extent_t *slab;
 
-	bin_info = &arena_bin_info[binind];
+	bin_info = &bin_infos[binind];
 	if (!arena_is_auto(arena) && bin->slabcur != NULL) {
 		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
 		bin->slabcur = NULL;
@@ -1429,7 +1413,7 @@ void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
 	unsigned i, nfill;
-	arena_bin_t *bin;
+	bin_t *bin;
 
 	assert(tbin->ncached == 0);
 
@@ -1445,7 +1429,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 		if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) >
 		    0) {
 			ptr = arena_slab_reg_alloc(tsdn, slab,
-			    &arena_bin_info[binind]);
+			    &bin_infos[binind]);
 		} else {
 			ptr = arena_bin_malloc_hard(tsdn, arena, bin, binind);
 		}
@@ -1462,8 +1446,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 			break;
 		}
 		if (config_fill && unlikely(opt_junk_alloc)) {
-			arena_alloc_junk_small(ptr, &arena_bin_info[binind],
-			    true);
+			arena_alloc_junk_small(ptr, &bin_infos[binind], true);
 		}
 		/* Insert such that low regions get used first. */
 		*(tbin->avail - nfill + i) = ptr;
@@ -1481,14 +1464,14 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 }
 
 void
-arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero) {
+arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, bool zero) {
 	if (!zero) {
 		memset(ptr, JEMALLOC_ALLOC_JUNK, bin_info->reg_size);
 	}
 }
 
 static void
-arena_dalloc_junk_small_impl(void *ptr, const arena_bin_info_t *bin_info) {
+arena_dalloc_junk_small_impl(void *ptr, const bin_info_t *bin_info) {
 	memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size);
 }
 arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small =
@@ -1497,7 +1480,7 @@ arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small =
 static void *
 arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	void *ret;
-	arena_bin_t *bin;
+	bin_t *bin;
 	size_t usize;
 	extent_t *slab;
 
@@ -1507,7 +1490,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) {
-		ret = arena_slab_reg_alloc(tsdn, slab, &arena_bin_info[binind]);
+		ret = arena_slab_reg_alloc(tsdn, slab, &bin_infos[binind]);
 	} else {
 		ret = arena_bin_malloc_hard(tsdn, arena, bin, binind);
 	}
@@ -1531,14 +1514,14 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 		if (config_fill) {
 			if (unlikely(opt_junk_alloc)) {
 				arena_alloc_junk_small(ret,
-				    &arena_bin_info[binind], false);
+				    &bin_infos[binind], false);
 			} else if (unlikely(opt_zero)) {
 				memset(ret, 0, usize);
 			}
 		}
 	} else {
 		if (config_fill && unlikely(opt_junk_alloc)) {
-			arena_alloc_junk_small(ret, &arena_bin_info[binind],
+			arena_alloc_junk_small(ret, &bin_infos[binind],
 			    true);
 		}
 		memset(ret, 0, usize);
@@ -1643,13 +1626,13 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 }
 
 static void
-arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, arena_bin_t *bin) {
+arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, bin_t *bin) {
 	/* Dissociate slab from bin. */
 	if (slab == bin->slabcur) {
 		bin->slabcur = NULL;
 	} else {
 		szind_t binind = extent_szind_get(slab);
-		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
+		const bin_info_t *bin_info = &bin_infos[binind];
 
 		/*
 		 * The following block's conditional is necessary because if the
@@ -1666,7 +1649,7 @@ arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, arena_bin_t *bin) {
 
 static void
 arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
-    arena_bin_t *bin) {
+    bin_t *bin) {
 	assert(slab != bin->slabcur);
 
 	malloc_mutex_unlock(tsdn, &bin->lock);
@@ -1680,8 +1663,7 @@ arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 }
 
 static void
-arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
-    arena_bin_t *bin) {
+arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, bin_t *bin) {
 	assert(extent_nfree_get(slab) > 0);
 
 	/*
@@ -1711,8 +1693,8 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
     void *ptr, bool junked) {
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
 	szind_t binind = extent_szind_get(slab);
-	arena_bin_t *bin = &arena->bins[binind];
-	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
+	bin_t *bin = &arena->bins[binind];
+	const bin_info_t *bin_info = &bin_infos[binind];
 
 	if (!junked && config_fill && unlikely(opt_junk_free)) {
 		arena_dalloc_junk_small(ptr, bin_info);
@@ -1743,7 +1725,7 @@ arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 static void
 arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
 	szind_t binind = extent_szind_get(extent);
-	arena_bin_t *bin = &arena->bins[binind];
+	bin_t *bin = &arena->bins[binind];
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, false);
@@ -1777,7 +1759,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
 		 */
-		assert(arena_bin_info[sz_size2index(oldsize)].reg_size ==
+		assert(bin_infos[sz_size2index(oldsize)].reg_size ==
 		    oldsize);
 		if ((usize_max > SMALL_MAXCLASS || sz_size2index(usize_max) !=
 		    sz_size2index(oldsize)) && (size > oldsize || usize_max <
@@ -2060,7 +2042,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
-		arena_bin_t *bin = &arena->bins[i];
+		bin_t *bin = &arena->bins[i];
 		if (malloc_mutex_init(&bin->lock, "arena_bin",
 		    WITNESS_RANK_ARENA_BIN, malloc_mutex_rank_exclusive)) {
 			goto label_error;
diff --git a/src/bin.c b/src/bin.c
new file mode 100644
index 0000000..59cdd2c
--- /dev/null
+++ b/src/bin.c
@@ -0,0 +1,21 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/bin.h"
+
+const bin_info_t bin_infos[NBINS] = {
+#define BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
+	{reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
+#define BIN_INFO_bin_no(reg_size, slab_size, nregs)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
+    lg_delta_lookup)							\
+	BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta),		\
+	    (pgs << LG_PAGE), (pgs << LG_PAGE) / ((1U<<lg_grp) +	\
+	    (ndelta<<lg_delta)))
+	SIZE_CLASSES
+#undef BIN_INFO_bin_yes
+#undef BIN_INFO_bin_no
+#undef SC
+};
+
+
diff --git a/src/ctl.c b/src/ctl.c
index 1fdb772..aae8b6e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2333,9 +2333,9 @@ CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
 CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t)
 CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned)
 CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned)
-CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
-CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
-CTL_RO_NL_GEN(arenas_bin_i_slab_size, arena_bin_info[mib[2]].slab_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
+CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
 static const ctl_named_node_t *
 arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	if (i > NBINS) {
@@ -2680,7 +2680,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 		MUTEX_PROF_RESET(arena->base->mtx);
 
 		for (szind_t i = 0; i < NBINS; i++) {
-			arena_bin_t *bin = &arena->bins[i];
+			bin_t *bin = &arena->bins[i];
 			MUTEX_PROF_RESET(bin->lock);
 		}
 	}
diff --git a/src/tcache.c b/src/tcache.c
index e22f806..6d51673 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -121,7 +121,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		/* Lock the arena bin associated with the first object. */
 		extent_t *extent = item_extent[0];
 		arena_t *bin_arena = extent_arena_get(extent);
-		arena_bin_t *bin = &bin_arena->bins[binind];
+		bin_t *bin = &bin_arena->bins[binind];
 
 		if (config_prof && bin_arena == arena) {
 			if (arena_prof_accum(tsd_tsdn(tsd), arena,
@@ -169,7 +169,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		arena_bin_t *bin = &arena->bins[binind];
+		bin_t *bin = &arena->bins[binind];
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += tbin->tstats.nrequests;
@@ -533,7 +533,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < NBINS; i++) {
-		arena_bin_t *bin = &arena->bins[i];
+		bin_t *bin = &arena->bins[i];
 		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nrequests += tbin->tstats.nrequests;
@@ -674,13 +674,13 @@ tcache_boot(tsdn_t *tsdn) {
 	stack_nelms = 0;
 	unsigned i;
 	for (i = 0; i < NBINS; i++) {
-		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
+		if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
 			tcache_bin_info[i].ncached_max =
 			    TCACHE_NSLOTS_SMALL_MIN;
-		} else if ((arena_bin_info[i].nregs << 1) <=
+		} else if ((bin_infos[i].nregs << 1) <=
 		    TCACHE_NSLOTS_SMALL_MAX) {
 			tcache_bin_info[i].ncached_max =
-			    (arena_bin_info[i].nregs << 1);
+			    (bin_infos[i].nregs << 1);
 		} else {
 			tcache_bin_info[i].ncached_max =
 			    TCACHE_NSLOTS_SMALL_MAX;
diff --git a/test/unit/junk.c b/test/unit/junk.c
index fd0e65b..243ced4 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -15,7 +15,7 @@ watch_junking(void *p) {
 }
 
 static void
-arena_dalloc_junk_small_intercept(void *ptr, const arena_bin_info_t *bin_info) {
+arena_dalloc_junk_small_intercept(void *ptr, const bin_info_t *bin_info) {
 	size_t i;
 
 	arena_dalloc_junk_small_orig(ptr, bin_info);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 4cfd981..e812b52 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -696,10 +696,10 @@ TEST_BEGIN(test_arenas_bin_constants) {
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
-	TEST_ARENAS_BIN_CONSTANT(size_t, size, arena_bin_info[0].reg_size);
-	TEST_ARENAS_BIN_CONSTANT(uint32_t, nregs, arena_bin_info[0].nregs);
+	TEST_ARENAS_BIN_CONSTANT(size_t, size, bin_infos[0].reg_size);
+	TEST_ARENAS_BIN_CONSTANT(uint32_t, nregs, bin_infos[0].nregs);
 	TEST_ARENAS_BIN_CONSTANT(size_t, slab_size,
-	    arena_bin_info[0].slab_size);
+	    bin_infos[0].slab_size);
 
 #undef TEST_ARENAS_BIN_CONSTANT
 }
diff --git a/test/unit/slab.c b/test/unit/slab.c
index ea344f8..7e662ae 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -6,7 +6,7 @@ TEST_BEGIN(test_arena_slab_regind) {
 	for (binind = 0; binind < NBINS; binind++) {
 		size_t regind;
 		extent_t slab;
-		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
+		const bin_info_t *bin_info = &bin_infos[binind];
 		extent_init(&slab, NULL, mallocx(bin_info->slab_size,
 		    MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, true,
 		    binind, 0, extent_state_active, false, true, true);
diff --git a/test/unit/stats.c b/test/unit/stats.c
index d9849d8..231010e 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -245,7 +245,7 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	    (void *)&arena_ind, sizeof(arena_ind)), 0,
 	    "Unexpected mallctl() failure");
 
-	p = malloc(arena_bin_info[0].reg_size);
+	p = malloc(bin_infos[0].reg_size);
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
 	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
-- 
cgit v0.12


From a8dd8876fb483f402833fa05f0fb46fe7c5416e1 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 1 Oct 2017 18:02:39 -0700
Subject: Move bin initialization from arena module to bin module.

---
 include/jemalloc/internal/bin.h     |  3 +++
 include/jemalloc/internal/witness.h |  2 +-
 src/arena.c                         | 11 ++---------
 src/bin.c                           | 16 +++++++++++++++-
 4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 09717b1..d792722 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -78,4 +78,7 @@ struct bin_s {
 	malloc_bin_stats_t	stats;
 };
 
+/* Returns true on error. */
+bool bin_init(bin_t *bin);
+
 #endif /* JEMALLOC_INTERNAL_BIN_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 33be666..7ace8ae 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -51,7 +51,7 @@
 #define WITNESS_RANK_ARENA_LARGE	19U
 
 #define WITNESS_RANK_LEAF		0xffffffffU
-#define WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
+#define WITNESS_RANK_BIN		WITNESS_RANK_LEAF
 #define WITNESS_RANK_ARENA_STATS	WITNESS_RANK_LEAF
 #define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
diff --git a/src/arena.c b/src/arena.c
index 2dcb447..0d27ffb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2042,17 +2042,10 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
-		bin_t *bin = &arena->bins[i];
-		if (malloc_mutex_init(&bin->lock, "arena_bin",
-		    WITNESS_RANK_ARENA_BIN, malloc_mutex_rank_exclusive)) {
+		bool err = bin_init(&arena->bins[i]);
+		if (err) {
 			goto label_error;
 		}
-		bin->slabcur = NULL;
-		extent_heap_new(&bin->slabs_nonfull);
-		extent_list_init(&bin->slabs_full);
-		if (config_stats) {
-			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
-		}
 	}
 
 	arena->base = base;
diff --git a/src/bin.c b/src/bin.c
index 59cdd2c..89b041d 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/witness.h"
 
 const bin_info_t bin_infos[NBINS] = {
 #define BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
@@ -18,4 +19,17 @@ const bin_info_t bin_infos[NBINS] = {
 #undef SC
 };
 
-
+bool
+bin_init(bin_t *bin) {
+	if (malloc_mutex_init(&bin->lock, "arena_bin", WITNESS_RANK_BIN,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	bin->slabcur = NULL;
+	extent_heap_new(&bin->slabs_nonfull);
+	extent_list_init(&bin->slabs_full);
+	if (config_stats) {
+		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+	}
+	return false;
+}
-- 
cgit v0.12


From 48bb4a056be97214fa049f21bead9618429c807a Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 1 Oct 2017 18:10:36 -0700
Subject: Move bin forking code from arena to bin module.

---
 include/jemalloc/internal/bin.h |  3 +++
 src/arena.c                     |  6 +++---
 src/bin.c                       | 17 ++++++++++++++++-
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index d792722..4e55166 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -80,5 +80,8 @@ struct bin_s {
 
 /* Returns true on error. */
 bool bin_init(bin_t *bin);
+void bin_prefork(tsdn_t *tsdn, bin_t *bin);
+void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
+void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 
 #endif /* JEMALLOC_INTERNAL_BIN_H */
diff --git a/src/arena.c b/src/arena.c
index 0d27ffb..a5f2449 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2126,7 +2126,7 @@ arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
 void
 arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < NBINS; i++) {
-		malloc_mutex_prefork(tsdn, &arena->bins[i].lock);
+		bin_prefork(tsdn, &arena->bins[i]);
 	}
 }
 
@@ -2135,7 +2135,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	unsigned i;
 
 	for (i = 0; i < NBINS; i++) {
-		malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock);
+		bin_postfork_parent(tsdn, &arena->bins[i]);
 	}
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
@@ -2179,7 +2179,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 
 	for (i = 0; i < NBINS; i++) {
-		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
+		bin_postfork_child(tsdn, &arena->bins[i]);
 	}
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
diff --git a/src/bin.c b/src/bin.c
index 89b041d..931a76e 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -21,7 +21,7 @@ const bin_info_t bin_infos[NBINS] = {
 
 bool
 bin_init(bin_t *bin) {
-	if (malloc_mutex_init(&bin->lock, "arena_bin", WITNESS_RANK_BIN,
+	if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
@@ -33,3 +33,18 @@ bin_init(bin_t *bin) {
 	}
 	return false;
 }
+
+void
+bin_prefork(tsdn_t *tsdn, bin_t *bin) {
+	malloc_mutex_prefork(tsdn, &bin->lock);
+}
+
+void
+bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) {
+	malloc_mutex_postfork_parent(tsdn, &bin->lock);
+}
+
+void
+bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
+	malloc_mutex_postfork_child(tsdn, &bin->lock);
+}
-- 
cgit v0.12


From 8aafa270fd56c36db374fa9f294217fa80151b3d Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 1 Oct 2017 18:27:40 -0700
Subject: Move bin stats code from arena to bin module.

---
 include/jemalloc/internal/bin.h   | 21 ++++++++++++++++++++-
 include/jemalloc/internal/stats.h |  6 ++++++
 src/arena.c                       | 15 +--------------
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 4e55166..89572fa 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -78,10 +78,29 @@ struct bin_s {
 	malloc_bin_stats_t	stats;
 };
 
-/* Returns true on error. */
+/* Initializes a bin to empty.  Returns true on error. */
 bool bin_init(bin_t *bin);
+
+/* Forking. */
 void bin_prefork(tsdn_t *tsdn, bin_t *bin);
 void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
 void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 
+/* Stats. */
+static inline void
+bin_stats_merge(tsdn_t *tsdn, malloc_bin_stats_t *dst_bin_stats, bin_t *bin) {
+	malloc_mutex_lock(tsdn, &bin->lock);
+	malloc_mutex_prof_read(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
+	dst_bin_stats->nmalloc += bin->stats.nmalloc;
+	dst_bin_stats->ndalloc += bin->stats.ndalloc;
+	dst_bin_stats->nrequests += bin->stats.nrequests;
+	dst_bin_stats->curregs += bin->stats.curregs;
+	dst_bin_stats->nfills += bin->stats.nfills;
+	dst_bin_stats->nflushes += bin->stats.nflushes;
+	dst_bin_stats->nslabs += bin->stats.nslabs;
+	dst_bin_stats->reslabs += bin->stats.reslabs;
+	dst_bin_stats->curslabs += bin->stats.curslabs;
+	malloc_mutex_unlock(tsdn, &bin->lock);
+}
+
 #endif /* JEMALLOC_INTERNAL_BIN_H */
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index f19df37..1da5b02 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -6,6 +6,12 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/size_classes.h"
 
+/*
+ * The synchronization for stats counters may piggyback on existing
+ * synchronization in the associated data.  Therefore, the merging functions for
+ * a module's stats will lie in the module, instead of with the stats.
+ */
+
 /*  OPTION(opt,		var_name,	default,	set_value_to) */
 #define STATS_PRINT_OPTIONS						\
     OPTION('J',		json,		false,		true)		\
diff --git a/src/arena.c b/src/arena.c
index a5f2449..c02dff1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -337,20 +337,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	nstime_subtract(&astats->uptime, &arena->create_time);
 
 	for (szind_t i = 0; i < NBINS; i++) {
-		bin_t *bin = &arena->bins[i];
-
-		malloc_mutex_lock(tsdn, &bin->lock);
-		malloc_mutex_prof_read(tsdn, &bstats[i].mutex_data, &bin->lock);
-		bstats[i].nmalloc += bin->stats.nmalloc;
-		bstats[i].ndalloc += bin->stats.ndalloc;
-		bstats[i].nrequests += bin->stats.nrequests;
-		bstats[i].curregs += bin->stats.curregs;
-		bstats[i].nfills += bin->stats.nfills;
-		bstats[i].nflushes += bin->stats.nflushes;
-		bstats[i].nslabs += bin->stats.nslabs;
-		bstats[i].reslabs += bin->stats.reslabs;
-		bstats[i].curslabs += bin->stats.curslabs;
-		malloc_mutex_unlock(tsdn, &bin->lock);
+		bin_stats_merge(tsdn, &bstats[i], &arena->bins[i]);
 	}
 }
 
-- 
cgit v0.12


From 901d94a2b06df09c960836901f6a81a0d3d00732 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 1 Oct 2017 18:54:25 -0700
Subject: Rename cache_alloc_easy to cache_bin_alloc_easy.

This lives in the cache_bin module; just a typo.
---
 include/jemalloc/internal/cache_bin.h      | 2 +-
 include/jemalloc/internal/tcache_inlines.h | 4 ++--
 src/tcache.c                               | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 9b87439..12f3ef2 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -85,7 +85,7 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_alloc_easy(cache_bin_t *bin, bool *success) {
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
 	void *ret;
 
 	if (unlikely(bin->ncached == 0)) {
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 14ab037..0a6feb5 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -48,7 +48,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind < NBINS);
 	bin = tcache_small_bin_get(tcache, binind);
-	ret = cache_alloc_easy(bin, &tcache_success);
+	ret = cache_bin_alloc_easy(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -109,7 +109,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind >= NBINS &&binind < nhbins);
 	bin = tcache_large_bin_get(tcache, binind);
-	ret = cache_alloc_easy(bin, &tcache_success);
+	ret = cache_bin_alloc_easy(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
diff --git a/src/tcache.c b/src/tcache.c
index 6d51673..a769a6b 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -95,7 +95,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	if (config_prof) {
 		tcache->prof_accumbytes = 0;
 	}
-	ret = cache_alloc_easy(tbin, tcache_success);
+	ret = cache_bin_alloc_easy(tbin, tcache_success);
 
 	return ret;
 }
-- 
cgit v0.12


From 7f1b02e3fa9de7e0bb5e2562994b5ab3b82c0ec3 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 4 Nov 2017 12:50:19 -0700
Subject: Split up and standardize naming of stats code.

The arena-associated stats are now all prefixed with arena_stats_, and live in
their own file.  Likewise, malloc_bin_stats_t -> bin_stats_t, also in its own
file.
---
 include/jemalloc/internal/arena_externs.h   |   6 +-
 include/jemalloc/internal/arena_stats.h     | 237 ++++++++++++++++++++++++++++
 include/jemalloc/internal/arena_structs_b.h |   4 +-
 include/jemalloc/internal/bin.h             |   6 +-
 include/jemalloc/internal/bin_stats.h       |  51 ++++++
 include/jemalloc/internal/ctl.h             |   4 +-
 include/jemalloc/internal/stats.h           | 140 ----------------
 src/arena.c                                 | 143 +----------------
 src/bin.c                                   |   2 +-
 src/ctl.c                                   |  82 +++++-----
 10 files changed, 342 insertions(+), 333 deletions(-)
 create mode 100644 include/jemalloc/internal/arena_stats.h
 create mode 100644 include/jemalloc/internal/bin_stats.h

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 77a2b54..4b3732b 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -16,17 +16,13 @@ extern const char *percpu_arena_mode_names[];
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 
-void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    szind_t szind, uint64_t nrequests);
-void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    size_t size);
 void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
     unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
     ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats);
+    bin_stats_t *bstats, arena_stats_large_t *lstats);
 void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
 #ifdef JEMALLOC_JET
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
new file mode 100644
index 0000000..837d4eb
--- /dev/null
+++ b/include/jemalloc/internal/arena_stats.h
@@ -0,0 +1,237 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_STATS_H
+#define JEMALLOC_INTERNAL_ARENA_STATS_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/size_classes.h"
+
+/*
+ * In those architectures that support 64-bit atomics, we use atomic updates for
+ * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
+ * externally.
+ */
+#ifdef JEMALLOC_ATOMIC_U64
+typedef atomic_u64_t arena_stats_u64_t;
+#else
+/* Must hold the arena stats mutex while reading atomically. */
+typedef uint64_t arena_stats_u64_t;
+#endif
+
+typedef struct arena_stats_large_s arena_stats_large_t;
+struct arena_stats_large_s {
+	/*
+	 * Total number of allocation/deallocation requests served directly by
+	 * the arena.
+	 */
+	arena_stats_u64_t	nmalloc;
+	arena_stats_u64_t	ndalloc;
+
+	/*
+	 * Number of allocation requests that correspond to this size class.
+	 * This includes requests served by tcache, though tcache only
+	 * periodically merges into this counter.
+	 */
+	arena_stats_u64_t	nrequests; /* Partially derived. */
+
+	/* Current number of allocations of this size class. */
+	size_t		curlextents; /* Derived. */
+};
+
+typedef struct arena_stats_decay_s arena_stats_decay_t;
+struct arena_stats_decay_s {
+	/* Total number of purge sweeps. */
+	arena_stats_u64_t	npurge;
+	/* Total number of madvise calls made. */
+	arena_stats_u64_t	nmadvise;
+	/* Total number of pages purged. */
+	arena_stats_u64_t	purged;
+};
+
+/*
+ * Arena stats.  Note that fields marked "derived" are not directly maintained
+ * within the arena code; rather their values are derived during stats merge
+ * requests.
+ */
+typedef struct arena_stats_s arena_stats_t;
+struct arena_stats_s {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_t		mtx;
+#endif
+
+	/* Number of bytes currently mapped, excluding retained memory. */
+	atomic_zu_t		mapped; /* Partially derived. */
+
+	/*
+	 * Number of unused virtual memory bytes currently retained.  Retained
+	 * bytes are technically mapped (though always decommitted or purged),
+	 * but they are excluded from the mapped statistic (above).
+	 */
+	atomic_zu_t		retained; /* Derived. */
+
+	arena_stats_decay_t	decay_dirty;
+	arena_stats_decay_t	decay_muzzy;
+
+	atomic_zu_t		base; /* Derived. */
+	atomic_zu_t		internal;
+	atomic_zu_t		resident; /* Derived. */
+	atomic_zu_t		metadata_thp;
+
+	atomic_zu_t		allocated_large; /* Derived. */
+	arena_stats_u64_t	nmalloc_large; /* Derived. */
+	arena_stats_u64_t	ndalloc_large; /* Derived. */
+	arena_stats_u64_t	nrequests_large; /* Derived. */
+
+	/* Number of bytes cached in tcache associated with this arena. */
+	atomic_zu_t		tcache_bytes; /* Derived. */
+
+	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
+
+	/* One element for each large size class. */
+	arena_stats_large_t	lstats[NSIZES - NBINS];
+
+	/* Arena uptime. */
+	nstime_t		uptime;
+};
+
+static inline bool
+arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+	if (config_debug) {
+		for (size_t i = 0; i < sizeof(arena_stats_t); i++) {
+			assert(((char *)arena_stats)[i] == 0);
+		}
+	}
+#ifndef JEMALLOC_ATOMIC_U64
+	if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
+	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+#endif
+	/* Memory is zeroed, so there is no need to clear stats. */
+	return false;
+}
+
+static inline void
+arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_lock(tsdn, &arena_stats->mtx);
+#endif
+}
+
+static inline void
+arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_unlock(tsdn, &arena_stats->mtx);
+#endif
+}
+
+static inline uint64_t
+arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    arena_stats_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_u64(p, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	return *p;
+#endif
+}
+
+static inline void
+arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    arena_stats_u64_t *p, uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_fetch_add_u64(p, x, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	*p += x;
+#endif
+}
+
+UNUSED static inline void
+arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    arena_stats_u64_t *p, uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	*p -= x;
+	assert(*p + x >= *p);
+#endif
+}
+
+/*
+ * Non-atomically sets *dst += src.  *dst needs external synchronization.
+ * This lets us avoid the cost of a fetch_add when its unnecessary (note that
+ * the types here are atomic).
+ */
+static inline void
+arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
+#ifdef JEMALLOC_ATOMIC_U64
+	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
+	atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED);
+#else
+	*dst += src;
+#endif
+}
+
+static inline size_t
+arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_zu(p, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	return atomic_load_zu(p, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
+	atomic_store_zu(p, cur + x, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
+	atomic_store_zu(p, cur - x, ATOMIC_RELAXED);
+#endif
+}
+
+/* Like the _u64 variant, needs an externally synchronized *dst. */
+static inline void
+arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
+	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
+	atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED);
+}
+
+static inline void
+arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    szind_t szind, uint64_t nrequests) {
+	arena_stats_lock(tsdn, arena_stats);
+	arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind -
+	    NBINS].nrequests, nrequests);
+	arena_stats_unlock(tsdn, arena_stats);
+}
+
+static inline void
+arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
+	arena_stats_lock(tsdn, arena_stats);
+	arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size);
+	arena_stats_unlock(tsdn, arena_stats);
+}
+
+
+#endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index d843b09..38bc959 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 
+#include "jemalloc/internal/arena_stats.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/bitmap.h"
@@ -11,7 +12,6 @@
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
-#include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ticker.h"
 
 struct arena_decay_s {
@@ -69,7 +69,7 @@ struct arena_decay_s {
 	 * arena and ctl code.
 	 *
 	 * Synchronization: Same as associated arena's stats field. */
-	decay_stats_t		*stats;
+	arena_stats_decay_t	*stats;
 	/* Peak number of pages in associated extents.  Used for debug only. */
 	uint64_t		ceil_npages;
 };
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 89572fa..9b416ad 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/extent_types.h"
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/bin_stats.h"
 
 /*
  * A bin contains a set of extents that are currently being used for slab
@@ -75,7 +75,7 @@ struct bin_s {
 	extent_list_t		slabs_full;
 
 	/* Bin statistics. */
-	malloc_bin_stats_t	stats;
+	bin_stats_t	stats;
 };
 
 /* Initializes a bin to empty.  Returns true on error. */
@@ -88,7 +88,7 @@ void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 
 /* Stats. */
 static inline void
-bin_stats_merge(tsdn_t *tsdn, malloc_bin_stats_t *dst_bin_stats, bin_t *bin) {
+bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) {
 	malloc_mutex_lock(tsdn, &bin->lock);
 	malloc_mutex_prof_read(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
 	dst_bin_stats->nmalloc += bin->stats.nmalloc;
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
new file mode 100644
index 0000000..86e673e
--- /dev/null
+++ b/include/jemalloc/internal/bin_stats.h
@@ -0,0 +1,51 @@
+#ifndef JEMALLOC_INTERNAL_BIN_STATS_H
+#define JEMALLOC_INTERNAL_BIN_STATS_H
+
+#include "jemalloc/internal/mutex_prof.h"
+
+typedef struct bin_stats_s bin_stats_t;
+struct bin_stats_s {
+	/*
+	 * Total number of allocation/deallocation requests served directly by
+	 * the bin.  Note that tcache may allocate an object, then recycle it
+	 * many times, resulting many increments to nrequests, but only one
+	 * each to nmalloc and ndalloc.
+	 */
+	uint64_t	nmalloc;
+	uint64_t	ndalloc;
+
+	/*
+	 * Number of allocation requests that correspond to the size of this
+	 * bin.  This includes requests served by tcache, though tcache only
+	 * periodically merges into this counter.
+	 */
+	uint64_t	nrequests;
+
+	/*
+	 * Current number of regions of this size class, including regions
+	 * currently cached by tcache.
+	 */
+	size_t		curregs;
+
+	/* Number of tcache fills from this bin. */
+	uint64_t	nfills;
+
+	/* Number of tcache flushes to this bin. */
+	uint64_t	nflushes;
+
+	/* Total number of slabs created for this bin's size class. */
+	uint64_t	nslabs;
+
+	/*
+	 * Total number of slabs reused by extracting them from the slabs heap
+	 * for this bin's size class.
+	 */
+	uint64_t	reslabs;
+
+	/* Current number of slabs in this bin. */
+	size_t		curslabs;
+
+	mutex_prof_data_t mutex_data;
+};
+
+#endif /* JEMALLOC_INTERNAL_BIN_STATS_H */
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index a36feaf..d927d94 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -40,8 +40,8 @@ typedef struct ctl_arena_stats_s {
 	uint64_t ndalloc_small;
 	uint64_t nrequests_small;
 
-	malloc_bin_stats_t bstats[NBINS];
-	malloc_large_stats_t lstats[NSIZES - NBINS];
+	bin_stats_t bstats[NBINS];
+	arena_stats_large_t lstats[NSIZES - NBINS];
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 1da5b02..852e342 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -1,17 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_STATS_H
 #define JEMALLOC_INTERNAL_STATS_H
 
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/mutex_prof.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/size_classes.h"
-
-/*
- * The synchronization for stats counters may piggyback on existing
- * synchronization in the associated data.  Therefore, the merging functions for
- * a module's stats will lie in the module, instead of with the stats.
- */
-
 /*  OPTION(opt,		var_name,	default,	set_value_to) */
 #define STATS_PRINT_OPTIONS						\
     OPTION('J',		json,		false,		true)		\
@@ -38,133 +27,4 @@ extern char opt_stats_print_opts[stats_print_tot_num_options+1];
 void stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts);
 
-/*
- * In those architectures that support 64-bit atomics, we use atomic updates for
- * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
- * externally.
- */
-#ifdef JEMALLOC_ATOMIC_U64
-typedef atomic_u64_t arena_stats_u64_t;
-#else
-/* Must hold the arena stats mutex while reading atomically. */
-typedef uint64_t arena_stats_u64_t;
-#endif
-
-typedef struct malloc_bin_stats_s {
-	/*
-	 * Total number of allocation/deallocation requests served directly by
-	 * the bin.  Note that tcache may allocate an object, then recycle it
-	 * many times, resulting many increments to nrequests, but only one
-	 * each to nmalloc and ndalloc.
-	 */
-	uint64_t	nmalloc;
-	uint64_t	ndalloc;
-
-	/*
-	 * Number of allocation requests that correspond to the size of this
-	 * bin.  This includes requests served by tcache, though tcache only
-	 * periodically merges into this counter.
-	 */
-	uint64_t	nrequests;
-
-	/*
-	 * Current number of regions of this size class, including regions
-	 * currently cached by tcache.
-	 */
-	size_t		curregs;
-
-	/* Number of tcache fills from this bin. */
-	uint64_t	nfills;
-
-	/* Number of tcache flushes to this bin. */
-	uint64_t	nflushes;
-
-	/* Total number of slabs created for this bin's size class. */
-	uint64_t	nslabs;
-
-	/*
-	 * Total number of slabs reused by extracting them from the slabs heap
-	 * for this bin's size class.
-	 */
-	uint64_t	reslabs;
-
-	/* Current number of slabs in this bin. */
-	size_t		curslabs;
-
-	mutex_prof_data_t mutex_data;
-} malloc_bin_stats_t;
-
-typedef struct malloc_large_stats_s {
-	/*
-	 * Total number of allocation/deallocation requests served directly by
-	 * the arena.
-	 */
-	arena_stats_u64_t	nmalloc;
-	arena_stats_u64_t	ndalloc;
-
-	/*
-	 * Number of allocation requests that correspond to this size class.
-	 * This includes requests served by tcache, though tcache only
-	 * periodically merges into this counter.
-	 */
-	arena_stats_u64_t	nrequests; /* Partially derived. */
-
-	/* Current number of allocations of this size class. */
-	size_t		curlextents; /* Derived. */
-} malloc_large_stats_t;
-
-typedef struct decay_stats_s {
-	/* Total number of purge sweeps. */
-	arena_stats_u64_t	npurge;
-	/* Total number of madvise calls made. */
-	arena_stats_u64_t	nmadvise;
-	/* Total number of pages purged. */
-	arena_stats_u64_t	purged;
-} decay_stats_t;
-
-/*
- * Arena stats.  Note that fields marked "derived" are not directly maintained
- * within the arena code; rather their values are derived during stats merge
- * requests.
- */
-typedef struct arena_stats_s {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_t		mtx;
-#endif
-
-	/* Number of bytes currently mapped, excluding retained memory. */
-	atomic_zu_t		mapped; /* Partially derived. */
-
-	/*
-	 * Number of unused virtual memory bytes currently retained.  Retained
-	 * bytes are technically mapped (though always decommitted or purged),
-	 * but they are excluded from the mapped statistic (above).
-	 */
-	atomic_zu_t		retained; /* Derived. */
-
-	decay_stats_t		decay_dirty;
-	decay_stats_t		decay_muzzy;
-
-	atomic_zu_t		base; /* Derived. */
-	atomic_zu_t		internal;
-	atomic_zu_t		resident; /* Derived. */
-	atomic_zu_t		metadata_thp;
-
-	atomic_zu_t		allocated_large; /* Derived. */
-	arena_stats_u64_t	nmalloc_large; /* Derived. */
-	arena_stats_u64_t	ndalloc_large; /* Derived. */
-	arena_stats_u64_t	nrequests_large; /* Derived. */
-
-	/* Number of bytes cached in tcache associated with this arena. */
-	atomic_zu_t		tcache_bytes; /* Derived. */
-
-	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
-
-	/* One element for each large size class. */
-	malloc_large_stats_t	lstats[NSIZES - NBINS];
-
-	/* Arena uptime. */
-	nstime_t		uptime;
-} arena_stats_t;
-
 #endif /* JEMALLOC_INTERNAL_STATS_H */
diff --git a/src/arena.c b/src/arena.c
index c02dff1..e3693d5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -57,145 +57,6 @@ static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 
 /******************************************************************************/
 
-static bool
-arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-	if (config_debug) {
-		for (size_t i = 0; i < sizeof(arena_stats_t); i++) {
-			assert(((char *)arena_stats)[i] == 0);
-		}
-	}
-#ifndef JEMALLOC_ATOMIC_U64
-	if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
-	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-#endif
-	/* Memory is zeroed, so there is no need to clear stats. */
-	return false;
-}
-
-static void
-arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_lock(tsdn, &arena_stats->mtx);
-#endif
-}
-
-static void
-arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_unlock(tsdn, &arena_stats->mtx);
-#endif
-}
-
-static uint64_t
-arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
-	return atomic_load_u64(p, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	return *p;
-#endif
-}
-
-static void
-arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p, uint64_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	atomic_fetch_add_u64(p, x, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	*p += x;
-#endif
-}
-
-UNUSED static void
-arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p, uint64_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
-	assert(r - x <= r);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	*p -= x;
-	assert(*p + x >= *p);
-#endif
-}
-
-/*
- * Non-atomically sets *dst += src.  *dst needs external synchronization.
- * This lets us avoid the cost of a fetch_add when its unnecessary (note that
- * the types here are atomic).
- */
-static void
-arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
-#ifdef JEMALLOC_ATOMIC_U64
-	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
-	atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED);
-#else
-	*dst += src;
-#endif
-}
-
-static size_t
-arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
-	return atomic_load_zu(p, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	return atomic_load_zu(p, ATOMIC_RELAXED);
-#endif
-}
-
-static void
-arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
-    size_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
-	atomic_store_zu(p, cur + x, ATOMIC_RELAXED);
-#endif
-}
-
-static void
-arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
-    size_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
-	assert(r - x <= r);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
-	atomic_store_zu(p, cur - x, ATOMIC_RELAXED);
-#endif
-}
-
-/* Like the _u64 variant, needs an externally synchronized *dst. */
-static void
-arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
-	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
-	atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED);
-}
-
-void
-arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    szind_t szind, uint64_t nrequests) {
-	arena_stats_lock(tsdn, arena_stats);
-	arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind -
-	    NBINS].nrequests, nrequests);
-	arena_stats_unlock(tsdn, arena_stats);
-}
-
-void
-arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
-	arena_stats_lock(tsdn, arena_stats);
-	arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size);
-	arena_stats_unlock(tsdn, arena_stats);
-}
-
 void
 arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
@@ -213,7 +74,7 @@ void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats) {
+    bin_stats_t *bstats, arena_stats_large_t *lstats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -729,7 +590,7 @@ arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) {
 
 static bool
 arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms,
-    decay_stats_t *stats) {
+    arena_stats_decay_t *stats) {
 	if (config_debug) {
 		for (size_t i = 0; i < sizeof(arena_decay_t); i++) {
 			assert(((char *)decay)[i] == 0);
diff --git a/src/bin.c b/src/bin.c
index 931a76e..0886bc4 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -29,7 +29,7 @@ bin_init(bin_t *bin) {
 	extent_heap_new(&bin->slabs_nonfull);
 	extent_list_init(&bin->slabs_full);
 	if (config_stats) {
-		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+		memset(&bin->stats, 0, sizeof(bin_stats_t));
 	}
 	return false;
 }
diff --git a/src/ctl.c b/src/ctl.c
index aae8b6e..3a22423 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -560,7 +560,7 @@ static const ctl_named_node_t super_root_node[] = {
  * synchronized by the ctl mutex.
  */
 static void
-accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) {
+ctl_accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) {
 #ifdef JEMALLOC_ATOMIC_U64
 	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
 	uint64_t cur_src = atomic_load_u64(src, ATOMIC_RELAXED);
@@ -572,7 +572,7 @@ accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) {
 
 /* Likewise: with ctl mutex synchronization, reading is simple. */
 static uint64_t
-arena_stats_read_u64(arena_stats_u64_t *p) {
+ctl_arena_stats_read_u64(arena_stats_u64_t *p) {
 #ifdef JEMALLOC_ATOMIC_U64
 	return atomic_load_u64(p, ATOMIC_RELAXED);
 #else
@@ -580,7 +580,8 @@ arena_stats_read_u64(arena_stats_u64_t *p) {
 #endif
 }
 
-static void accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
+static void
+accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
 	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
 	size_t cur_src = atomic_load_zu(src, ATOMIC_RELAXED);
 	atomic_store_zu(dst, cur_dst + cur_src, ATOMIC_RELAXED);
@@ -690,9 +691,9 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		ctl_arena->astats->ndalloc_small = 0;
 		ctl_arena->astats->nrequests_small = 0;
 		memset(ctl_arena->astats->bstats, 0, NBINS *
-		    sizeof(malloc_bin_stats_t));
+		    sizeof(bin_stats_t));
 		memset(ctl_arena->astats->lstats, 0, (NSIZES - NBINS) *
-		    sizeof(malloc_large_stats_t));
+		    sizeof(arena_stats_large_t));
 	}
 }
 
@@ -755,18 +756,18 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			    &astats->astats.retained);
 		}
 
-		accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge,
+		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge,
 		    &astats->astats.decay_dirty.npurge);
-		accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise,
+		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise,
 		    &astats->astats.decay_dirty.nmadvise);
-		accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged,
+		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged,
 		    &astats->astats.decay_dirty.purged);
 
-		accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge,
+		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge,
 		    &astats->astats.decay_muzzy.npurge);
-		accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise,
+		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise,
 		    &astats->astats.decay_muzzy.nmadvise);
-		accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged,
+		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged,
 		    &astats->astats.decay_muzzy.purged);
 
 #define OP(mtx) malloc_mutex_prof_merge(				\
@@ -806,11 +807,11 @@ MUTEX_PROF_ARENA_MUTEXES
 			assert(atomic_load_zu(&astats->astats.allocated_large,
 			    ATOMIC_RELAXED) == 0);
 		}
-		accum_arena_stats_u64(&sdstats->astats.nmalloc_large,
+		ctl_accum_arena_stats_u64(&sdstats->astats.nmalloc_large,
 		    &astats->astats.nmalloc_large);
-		accum_arena_stats_u64(&sdstats->astats.ndalloc_large,
+		ctl_accum_arena_stats_u64(&sdstats->astats.ndalloc_large,
 		    &astats->astats.ndalloc_large);
-		accum_arena_stats_u64(&sdstats->astats.nrequests_large,
+		ctl_accum_arena_stats_u64(&sdstats->astats.nrequests_large,
 		    &astats->astats.nrequests_large);
 
 		accum_atomic_zu(&sdstats->astats.tcache_bytes,
@@ -847,11 +848,11 @@ MUTEX_PROF_ARENA_MUTEXES
 		}
 
 		for (i = 0; i < NSIZES - NBINS; i++) {
-			accum_arena_stats_u64(&sdstats->lstats[i].nmalloc,
+			ctl_accum_arena_stats_u64(&sdstats->lstats[i].nmalloc,
 			    &astats->lstats[i].nmalloc);
-			accum_arena_stats_u64(&sdstats->lstats[i].ndalloc,
+			ctl_accum_arena_stats_u64(&sdstats->lstats[i].ndalloc,
 			    &astats->lstats[i].ndalloc);
-			accum_arena_stats_u64(&sdstats->lstats[i].nrequests,
+			ctl_accum_arena_stats_u64(&sdstats->lstats[i].nrequests,
 			    &astats->lstats[i].nrequests);
 			if (!destroyed) {
 				sdstats->lstats[i].curlextents +=
@@ -2545,24 +2546,24 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_dirty.npurge),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.decay_dirty.npurge), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
-    arena_stats_read_u64(
+    ctl_arena_stats_read_u64(
     &arenas_i(mib[2])->astats->astats.decay_dirty.nmadvise), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_dirty.purged),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.decay_dirty.purged), uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_muzzy.npurge),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
-    arena_stats_read_u64(
+    ctl_arena_stats_read_u64(
     &arenas_i(mib[2])->astats->astats.decay_muzzy.nmadvise), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_muzzy.purged),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.decay_muzzy.purged), uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.base, ATOMIC_RELAXED),
@@ -2592,14 +2593,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large,
     ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.ndalloc_large),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t)
+/*
+ * Note: "nmalloc" here instead of "nrequests" in the read.  This is intentional.
+ */
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large),
-    uint64_t) /* Intentional. */
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) /* Intentional. */
 
 /* Lock profiling related APIs below. */
 #define RO_MUTEX_CTL_GEN(n, l)						\
@@ -2717,14 +2721,14 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].nrequests),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
     arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t)
 
-- 
cgit v0.12


From 21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 19 Dec 2017 17:30:50 -0800
Subject: Add the div module, which allows fast division by dynamic values.

---
 Makefile.in                     |  2 ++
 include/jemalloc/internal/div.h | 41 ++++++++++++++++++++++++++++++
 src/div.c                       | 55 +++++++++++++++++++++++++++++++++++++++++
 src/sz.c                        |  3 ++-
 test/unit/div.c                 | 29 ++++++++++++++++++++++
 5 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 include/jemalloc/internal/div.h
 create mode 100644 src/div.c
 create mode 100644 test/unit/div.c

diff --git a/Makefile.in b/Makefile.in
index 2f0b3b2..96c4ae0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -97,6 +97,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
+	$(srcroot)src/div.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
@@ -165,6 +166,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
+	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/extent_quantize.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
diff --git a/include/jemalloc/internal/div.h b/include/jemalloc/internal/div.h
new file mode 100644
index 0000000..aebae93
--- /dev/null
+++ b/include/jemalloc/internal/div.h
@@ -0,0 +1,41 @@
+#ifndef JEMALLOC_INTERNAL_DIV_H
+#define JEMALLOC_INTERNAL_DIV_H
+
+#include "jemalloc/internal/assert.h"
+
+/*
+ * This module does the division that computes the index of a region in a slab,
+ * given its offset relative to the base.
+ * That is, given a divisor d, an n = i * d (all integers), we'll return i.
+ * We do some pre-computation to do this more quickly than a CPU division
+ * instruction.
+ * We bound n < 2^32, and don't support dividing by one.
+ */
+
+typedef struct div_info_s div_info_t;
+struct div_info_s {
+	uint32_t magic;
+#ifdef JEMALLOC_DEBUG
+	size_t d;
+#endif
+};
+
+void div_init(div_info_t *div_info, size_t divisor);
+
+static inline size_t
+div_compute(div_info_t *div_info, size_t n) {
+	assert(n <= (uint32_t)-1);
+	/*
+	 * This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine,
+	 * the compilers I tried were all smart enough to turn this into the
+	 * appropriate "get the high 32 bits of the result of a multiply" (e.g.
+	 * mul; mov edx eax; on x86, umull on arm, etc.).
+	 */
+	size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32;
+#ifdef JEMALLOC_DEBUG
+	assert(i * div_info->d == n);
+#endif
+	return i;
+}
+
+#endif /* JEMALLOC_INTERNAL_DIV_H */
diff --git a/src/div.c b/src/div.c
new file mode 100644
index 0000000..808892a
--- /dev/null
+++ b/src/div.c
@@ -0,0 +1,55 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/div.h"
+
+#include "jemalloc/internal/assert.h"
+
+/*
+ * Suppose we have n = q * d, all integers. We know n and d, and want q = n / d.
+ *
+ * For any k, we have (here, all division is exact; not C-style rounding):
+ * floor(ceil(2^k / d) * n / 2^k) = floor((2^k + r) / d * n / 2^k), where
+ * r = (-2^k) mod d.
+ *
+ * Expanding this out:
+ * ... = floor(2^k / d * n / 2^k + r / d * n / 2^k)
+ *     = floor(n / d + (r / d) * (n / 2^k)).
+ *
+ * The fractional part of n / d is 0 (because of the assumption that d divides n
+ * exactly), so we have:
+ * ... = n / d + floor((r / d) * (n / 2^k))
+ *
+ * So that our initial expression is equal to the quantity we seek, so long as
+ * (r / d) * (n / 2^k) < 1.
+ *
+ * r is a remainder mod d, so r < d and r / d < 1 always. We can make
+ * n / 2 ^ k < 1 by setting k = 32. This gets us a value of magic that works.
+ */
+
+void
+div_init(div_info_t *div_info, size_t d) {
+	/* Nonsensical. */
+	assert(d != 0);
+	/*
+	 * This would make the value of magic too high to fit into a uint32_t
+	 * (we would want magic = 2^32 exactly). This would mess with code gen
+	 * on 32-bit machines.
+	 */
+	assert(d != 1);
+
+	uint64_t two_to_k = ((uint64_t)1 << 32);
+	uint32_t magic = (uint32_t)(two_to_k / d);
+
+	/*
+	 * We want magic = ceil(2^k / d), but C gives us floor. We have to
+	 * increment it unless the result was exact (i.e. unless d is a power of
+	 * two).
+	 */
+	if (two_to_k % d != 0) {
+		magic++;
+	}
+	div_info->magic = magic;
+#ifdef JEMALLOC_DEBUG
+	div_info->d = d;
+#endif
+}
diff --git a/src/sz.c b/src/sz.c
index 0986615..9de77e4 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -26,7 +26,8 @@ const size_t sz_index2size_tab[NSIZES] = {
 JEMALLOC_ALIGNED(CACHELINE)
 const uint8_t sz_size2index_tab[] = {
 #if LG_TINY_MIN == 0
-#warning "Dangerous LG_TINY_MIN"
+/* The div module doesn't support division by 1. */
+#error "Unsupported LG_TINY_MIN"
 #define S2B_0(i)	i,
 #elif LG_TINY_MIN == 1
 #warning "Dangerous LG_TINY_MIN"
diff --git a/test/unit/div.c b/test/unit/div.c
new file mode 100644
index 0000000..b47f10b
--- /dev/null
+++ b/test/unit/div.c
@@ -0,0 +1,29 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/div.h"
+
+TEST_BEGIN(test_div_exhaustive) {
+	for (size_t divisor = 2; divisor < 1000 * 1000; ++divisor) {
+		div_info_t div_info;
+		div_init(&div_info, divisor);
+		size_t max = 1000 * divisor;
+		if (max < 1000 * 1000) {
+			max = 1000 * 1000;
+		}
+		for (size_t dividend = 0; dividend < 1000 * divisor;
+		    dividend += divisor) {
+			size_t quotient = div_compute(
+			    &div_info, dividend);
+			assert_zu_eq(dividend, quotient * divisor,
+			    "With divisor = %zu, dividend = %zu, "
+			    "got quotient %zu", divisor, dividend, quotient);
+		}
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_div_exhaustive);
+}
-- 
cgit v0.12


From d41b19f9c70c9dd8244e0879c7aef7943a34c750 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 20 Dec 2017 17:21:56 -0800
Subject: Implement arena regind computation using div_info_t.

This eliminates the need to generate an enormous switch statement in
arena_slab_regind.
---
 src/arena.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index e3693d5..40ef143 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/div.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
@@ -39,6 +40,8 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #undef STEP
 };
 
+static div_info_t arena_binind_div_info[NBINS];
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -247,24 +250,10 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 	assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) %
 	    (uintptr_t)bin_infos[binind].reg_size == 0);
 
-	/* Avoid doing division with a variable divisor. */
 	diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab));
-	switch (binind) {
-#define REGIND_bin_yes(index, reg_size)					\
-	case index:							\
-		regind = diff / (reg_size);				\
-		assert(diff == regind * (reg_size));			\
-		break;
-#define REGIND_bin_no(index, reg_size)
-#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
-    lg_delta_lookup)							\
-	REGIND_bin_##bin(index, (1U<<lg_grp) + (ndelta<<lg_delta))
-	SIZE_CLASSES
-#undef REGIND_bin_yes
-#undef REGIND_bin_no
-#undef SC
-	default: not_reached();
-	}
+
+	/* Avoid doing division with a variable divisor. */
+	regind = div_compute(&arena_binind_div_info[binind], diff);
 
 	assert(regind < bin_infos[binind].nregs);
 
@@ -1929,6 +1918,16 @@ void
 arena_boot(void) {
 	arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
 	arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms);
+#define REGIND_bin_yes(index, reg_size) 				\
+	div_init(&arena_binind_div_info[(index)], (reg_size));
+#define REGIND_bin_no(index, reg_size)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
+    lg_delta_lookup)							\
+	REGIND_bin_##bin(index, (1U<<lg_grp) + (ndelta << lg_delta))
+	SIZE_CLASSES
+#undef REGIND_bin_yes
+#undef REGIND_bin_no
+#undef SC
 }
 
 void
-- 
cgit v0.12


From f47e39d11a0e7ef4201a1ac18efa7604c5152aa3 Mon Sep 17 00:00:00 2001
From: Rajeev Misra <rajeev.misra@gmail.com>
Date: Sat, 30 Dec 2017 14:31:34 -0800
Subject: handle 32 bit mutex counters

---
 include/jemalloc/internal/mutex_prof.h | 31 +++++++++----
 src/stats.c                            | 83 +++++++++++++++++++---------------
 2 files changed, 69 insertions(+), 45 deletions(-)

diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 3358bcf..735c0ad 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -35,21 +35,34 @@ typedef enum {
 	mutex_prof_num_arena_mutexes
 } mutex_prof_arena_ind_t;
 
-#define MUTEX_PROF_COUNTERS						\
+#define MUTEX_PROF_UINT64_COUNTERS				\
     OP(num_ops, uint64_t)						\
     OP(num_wait, uint64_t)						\
-    OP(num_spin_acq, uint64_t)						\
-    OP(num_owner_switch, uint64_t)					\
-    OP(total_wait_time, uint64_t)					\
-    OP(max_wait_time, uint64_t)						\
+    OP(num_spin_acq, uint64_t)					\
+    OP(num_owner_switch, uint64_t)				\
+    OP(total_wait_time, uint64_t)				\
+    OP(max_wait_time, uint64_t)
+
+#define MUTEX_PROF_UINT32_COUNTERS				\
     OP(max_num_thds, uint32_t)
 
-typedef enum {
+#define MUTEX_PROF_COUNTERS		\
+		MUTEX_PROF_UINT64_COUNTERS \
+		MUTEX_PROF_UINT32_COUNTERS
+
 #define OP(counter, type) mutex_counter_##counter,
-	MUTEX_PROF_COUNTERS
+
+#define COUNTER_ENUM(counter_list, t)           \
+		typedef enum {                          \
+			counter_list                        \
+			mutex_prof_num_##t##_counters       \
+		} mutex_prof_##t##_counter_ind_t;
+
+COUNTER_ENUM(MUTEX_PROF_UINT64_COUNTERS, uint64_t)
+COUNTER_ENUM(MUTEX_PROF_UINT32_COUNTERS, uint32_t)
+
+#undef COUNTER_ENUM
 #undef OP
-	mutex_prof_num_counters
-} mutex_prof_counter_ind_t;
 
 typedef struct {
 	/*
diff --git a/src/stats.c b/src/stats.c
index 33e4426..0a89b4b 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -85,34 +85,38 @@ gen_mutex_ctl_str(char *str, size_t buf_len, const char *prefix,
 
 static void
 read_arena_bin_mutex_stats(unsigned arena_ind, unsigned bin_ind,
-    uint64_t results[mutex_prof_num_counters]) {
+    uint64_t results_uint64_t[mutex_prof_num_uint64_t_counters],
+	uint32_t results_uint32_t[mutex_prof_num_uint32_t_counters]) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 #define OP(c, t)							\
     gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,			\
         "arenas.0.bins.0","mutex", #c);					\
     CTL_M2_M4_GET(cmd, arena_ind, bin_ind,				\
-        (t *)&results[mutex_counter_##c], t);
-MUTEX_PROF_COUNTERS
+        (t *)&results_##t[mutex_counter_##c], t);
+	MUTEX_PROF_COUNTERS
 #undef OP
 }
 
 static void
 mutex_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *name, uint64_t stats[mutex_prof_num_counters],
+    const char *name, uint64_t stats_uint64_t[mutex_prof_num_uint64_t_counters],
+    uint32_t stats_uint32_t[mutex_prof_num_uint32_t_counters],
     const char *json_indent, bool last) {
 	malloc_cprintf(write_cb, cbopaque, "%s\"%s\": {\n", json_indent, name);
 
-	mutex_prof_counter_ind_t k = 0;
+	mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0;
+	mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0;
 	char *fmt_str[2] = {"%s\t\"%s\": %"FMTu32"%s\n",
 	    "%s\t\"%s\": %"FMTu64"%s\n"};
 #define OP(c, t)							\
 	malloc_cprintf(write_cb, cbopaque,				\
 	    fmt_str[sizeof(t) / sizeof(uint32_t) - 1], 			\
-	    json_indent, #c, (t)stats[mutex_counter_##c],		\
-	    (++k == mutex_prof_num_counters) ? "" : ",");
-MUTEX_PROF_COUNTERS
+	    json_indent, #c, (t)stats_##t[mutex_counter_##c],		\
+	    (++k_##t && k_uint32_t == mutex_prof_num_uint32_t_counters) ? "" : ",");
+	MUTEX_PROF_COUNTERS
 #undef OP
-	malloc_cprintf(write_cb, cbopaque, "%s}%s\n", json_indent,
+
+malloc_cprintf(write_cb, cbopaque, "%s}%s\n", json_indent,
 	    last ? "" : ",");
 }
 
@@ -192,10 +196,11 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    nmalloc, ndalloc, curregs, nrequests, nfills,
 			    nflushes, nreslabs, curslabs, mutex ? "," : "");
 			if (mutex) {
-				uint64_t mutex_stats[mutex_prof_num_counters];
-				read_arena_bin_mutex_stats(i, j, mutex_stats);
+				uint64_t mutex_stats_64[mutex_prof_num_uint64_t_counters];
+				uint32_t mutex_stats_32[mutex_prof_num_uint32_t_counters];
+				read_arena_bin_mutex_stats(i, j, mutex_stats_64, mutex_stats_32);
 				mutex_stats_output_json(write_cb, cbopaque,
-				    "mutex", mutex_stats, "\t\t\t\t\t\t", true);
+				    "mutex", mutex_stats_64, mutex_stats_32, "\t\t\t\t\t\t", true);
 			}
 			malloc_cprintf(write_cb, cbopaque,
 			    "\t\t\t\t\t}%s\n",
@@ -222,9 +227,10 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 					not_reached();
 				}
 			}
-			uint64_t mutex_stats[mutex_prof_num_counters];
+			uint64_t mutex_stats_64[mutex_prof_num_uint64_t_counters];
+			uint32_t mutex_stats_32[mutex_prof_num_uint32_t_counters];
 			if (mutex) {
-				read_arena_bin_mutex_stats(i, j, mutex_stats);
+				read_arena_bin_mutex_stats(i, j, mutex_stats_64, mutex_stats_32);
 			}
 
 			malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"
@@ -239,14 +245,14 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				malloc_cprintf(write_cb, cbopaque,
 				    " %12"FMTu64" %12"FMTu64" %12"FMTu64
 				    " %14"FMTu64" %14"FMTu64" %12"FMTu64
-				    " %10"FMTu64"\n",
-				    mutex_stats[mutex_counter_num_ops],
-				    mutex_stats[mutex_counter_num_wait],
-				    mutex_stats[mutex_counter_num_spin_acq],
-				    mutex_stats[mutex_counter_num_owner_switch],
-				    mutex_stats[mutex_counter_total_wait_time],
-				    mutex_stats[mutex_counter_max_wait_time],
-				    mutex_stats[mutex_counter_max_num_thds]);
+				    " %10"FMTu32"\n",
+					mutex_stats_64[mutex_counter_num_ops],
+					mutex_stats_64[mutex_counter_num_wait],
+					mutex_stats_64[mutex_counter_num_spin_acq],
+					mutex_stats_64[mutex_counter_num_owner_switch],
+					mutex_stats_64[mutex_counter_total_wait_time],
+					mutex_stats_64[mutex_counter_max_wait_time],
+					mutex_stats_32[mutex_counter_max_num_thds]);
 			} else {
 				malloc_cprintf(write_cb, cbopaque, "\n");
 			}
@@ -329,7 +335,8 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 
 static void
 read_arena_mutex_stats(unsigned arena_ind,
-    uint64_t results[mutex_prof_num_arena_mutexes][mutex_prof_num_counters]) {
+    uint64_t results_uint64_t[mutex_prof_num_arena_mutexes][mutex_prof_num_uint64_t_counters],
+	uint32_t results_uint32_t[mutex_prof_num_arena_mutexes][mutex_prof_num_uint32_t_counters]) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 
 	mutex_prof_arena_ind_t i;
@@ -338,7 +345,7 @@ read_arena_mutex_stats(unsigned arena_ind,
 		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
 		    "arenas.0.mutexes",	arena_mutex_names[i], #c);	\
 		CTL_M2_GET(cmd, arena_ind,				\
-		    (t *)&results[i][mutex_counter_##c], t);
+		    (t *)&results_##t[i][mutex_counter_##c], t);
 MUTEX_PROF_COUNTERS
 #undef OP
 	}
@@ -346,7 +353,8 @@ MUTEX_PROF_COUNTERS
 
 static void
 mutex_stats_output(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *name, uint64_t stats[mutex_prof_num_counters],
+    const char *name, uint64_t stats_uint64_t[mutex_prof_num_uint64_t_counters],
+    uint32_t stats_uint32_t[mutex_prof_num_uint32_t_counters],
     bool first_mutex) {
 	if (first_mutex) {
 		/* Print title. */
@@ -364,7 +372,7 @@ mutex_stats_output(void (*write_cb)(void *, const char *), void *cbopaque,
 #define OP(c, t)							\
 	malloc_cprintf(write_cb, cbopaque,				\
 	    fmt_str[sizeof(t) / sizeof(uint32_t) - 1],			\
-	    (t)stats[mutex_counter_##c]);
+	    (t)stats_##t[mutex_counter_##c]);
 MUTEX_PROF_COUNTERS
 #undef OP
 	malloc_cprintf(write_cb, cbopaque, "\n");
@@ -373,8 +381,9 @@ MUTEX_PROF_COUNTERS
 static void
 stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
     void *cbopaque, bool json, bool json_end, unsigned arena_ind) {
-	uint64_t mutex_stats[mutex_prof_num_arena_mutexes][mutex_prof_num_counters];
-	read_arena_mutex_stats(arena_ind, mutex_stats);
+	uint64_t mutex_stats_64[mutex_prof_num_arena_mutexes][mutex_prof_num_uint64_t_counters];
+	uint32_t mutex_stats_32[mutex_prof_num_arena_mutexes][mutex_prof_num_uint32_t_counters];
+	read_arena_mutex_stats(arena_ind, mutex_stats_64, mutex_stats_32);
 
 	/* Output mutex stats. */
 	if (json) {
@@ -383,7 +392,7 @@ stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
 		last_mutex = mutex_prof_num_arena_mutexes - 1;
 		for (i = 0; i < mutex_prof_num_arena_mutexes; i++) {
 			mutex_stats_output_json(write_cb, cbopaque,
-			    arena_mutex_names[i], mutex_stats[i],
+			    arena_mutex_names[i], mutex_stats_64[i], mutex_stats_32[i],
 			    "\t\t\t\t\t", (i == last_mutex));
 		}
 		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t}%s\n",
@@ -392,7 +401,7 @@ stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
 		mutex_prof_arena_ind_t i;
 		for (i = 0; i < mutex_prof_num_arena_mutexes; i++) {
 			mutex_stats_output(write_cb, cbopaque,
-			    arena_mutex_names[i], mutex_stats[i], i == 0);
+			    arena_mutex_names[i], mutex_stats_64[i],  mutex_stats_32[i], i == 0);
 		}
 	}
 }
@@ -1004,7 +1013,8 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 static void
 read_global_mutex_stats(
-    uint64_t results[mutex_prof_num_global_mutexes][mutex_prof_num_counters]) {
+    uint64_t results_uint64_t[mutex_prof_num_global_mutexes][mutex_prof_num_uint64_t_counters],
+	uint32_t results_uint32_t[mutex_prof_num_global_mutexes][mutex_prof_num_uint32_t_counters]) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 
 	mutex_prof_global_ind_t i;
@@ -1012,7 +1022,7 @@ read_global_mutex_stats(
 #define OP(c, t)							\
 		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
 		    "mutexes", global_mutex_names[i], #c);		\
-		CTL_GET(cmd, (t *)&results[i][mutex_counter_##c], t);
+		CTL_GET(cmd, (t *)&results_##t[i][mutex_counter_##c], t);
 MUTEX_PROF_COUNTERS
 #undef OP
 	}
@@ -1035,9 +1045,10 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_GET("stats.mapped", &mapped, size_t);
 	CTL_GET("stats.retained", &retained, size_t);
 
-	uint64_t mutex_stats[mutex_prof_num_global_mutexes][mutex_prof_num_counters];
+	uint64_t mutex_stats_uint64_t[mutex_prof_num_global_mutexes][mutex_prof_num_uint64_t_counters];
+	uint32_t mutex_stats_uint32_t[mutex_prof_num_global_mutexes][mutex_prof_num_uint32_t_counters];
 	if (mutex) {
-		read_global_mutex_stats(mutex_stats);
+		read_global_mutex_stats(mutex_stats_uint64_t, mutex_stats_uint32_t);
 	}
 
 	if (have_background_thread) {
@@ -1091,7 +1102,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 			mutex_prof_global_ind_t i;
 			for (i = 0; i < mutex_prof_num_global_mutexes; i++) {
 				mutex_stats_output_json(write_cb, cbopaque,
-				    global_mutex_names[i], mutex_stats[i],
+				    global_mutex_names[i], mutex_stats_uint64_t[i], mutex_stats_uint32_t[i],
 				    "\t\t\t\t",
 				    i == mutex_prof_num_global_mutexes - 1);
 			}
@@ -1118,7 +1129,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 			mutex_prof_global_ind_t i;
 			for (i = 0; i < mutex_prof_num_global_mutexes; i++) {
 				mutex_stats_output(write_cb, cbopaque,
-				    global_mutex_names[i], mutex_stats[i],
+				    global_mutex_names[i], mutex_stats_uint64_t[i], mutex_stats_uint32_t[i],
 				    i == 0);
 			}
 		}
-- 
cgit v0.12


From 72bdbc35e3231db91def5f466d41778ee04d7e64 Mon Sep 17 00:00:00 2001
From: Rajeev Misra <rajeev.misra@gmail.com>
Date: Tue, 2 Jan 2018 21:10:01 -0800
Subject: extent_t bitpacking logic refactoring

---
 include/jemalloc/internal/extent_structs.h | 72 +++++++++++++++---------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 89b49c7..4873b9e 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -86,42 +86,42 @@ struct extent_s {
 	 *     serial number to both resulting adjacent extents.
 	 */
 	uint64_t		e_bits;
-#define EXTENT_BITS_ARENA_SHIFT		0
-#define EXTENT_BITS_ARENA_MASK \
-    (((uint64_t)(1U << MALLOCX_ARENA_BITS) - 1) << EXTENT_BITS_ARENA_SHIFT)
-
-#define EXTENT_BITS_SLAB_SHIFT		MALLOCX_ARENA_BITS
-#define EXTENT_BITS_SLAB_MASK \
-    ((uint64_t)0x1U << EXTENT_BITS_SLAB_SHIFT)
-
-#define EXTENT_BITS_COMMITTED_SHIFT	(MALLOCX_ARENA_BITS + 1)
-#define EXTENT_BITS_COMMITTED_MASK \
-    ((uint64_t)0x1U << EXTENT_BITS_COMMITTED_SHIFT)
-
-#define EXTENT_BITS_DUMPABLE_SHIFT	(MALLOCX_ARENA_BITS + 2)
-#define EXTENT_BITS_DUMPABLE_MASK \
-    ((uint64_t)0x1U << EXTENT_BITS_DUMPABLE_SHIFT)
-
-#define EXTENT_BITS_ZEROED_SHIFT	(MALLOCX_ARENA_BITS + 3)
-#define EXTENT_BITS_ZEROED_MASK \
-    ((uint64_t)0x1U << EXTENT_BITS_ZEROED_SHIFT)
-
-#define EXTENT_BITS_STATE_SHIFT		(MALLOCX_ARENA_BITS + 4)
-#define EXTENT_BITS_STATE_MASK \
-    ((uint64_t)0x3U << EXTENT_BITS_STATE_SHIFT)
-
-#define EXTENT_BITS_SZIND_SHIFT		(MALLOCX_ARENA_BITS + 6)
-#define EXTENT_BITS_SZIND_MASK \
-    (((uint64_t)(1U << LG_CEIL_NSIZES) - 1) << EXTENT_BITS_SZIND_SHIFT)
-
-#define EXTENT_BITS_NFREE_SHIFT \
-    (MALLOCX_ARENA_BITS + 6 + LG_CEIL_NSIZES)
-#define EXTENT_BITS_NFREE_MASK \
-    ((uint64_t)((1U << (LG_SLAB_MAXREGS + 1)) - 1) << EXTENT_BITS_NFREE_SHIFT)
-
-#define EXTENT_BITS_SN_SHIFT \
-    (MALLOCX_ARENA_BITS + 6 + LG_CEIL_NSIZES + (LG_SLAB_MAXREGS + 1))
-#define EXTENT_BITS_SN_MASK		(UINT64_MAX << EXTENT_BITS_SN_SHIFT)
+#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
+
+#define EXTENT_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
+#define EXTENT_BITS_ARENA_SHIFT  0
+#define EXTENT_BITS_ARENA_MASK  MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
+
+#define EXTENT_BITS_SLAB_WIDTH  1
+#define EXTENT_BITS_SLAB_SHIFT  (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
+#define EXTENT_BITS_SLAB_MASK  MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
+
+#define EXTENT_BITS_COMMITTED_WIDTH  1
+#define EXTENT_BITS_COMMITTED_SHIFT  (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
+#define EXTENT_BITS_COMMITTED_MASK  MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
+
+#define EXTENT_BITS_DUMPABLE_WIDTH  1
+#define EXTENT_BITS_DUMPABLE_SHIFT  (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
+#define EXTENT_BITS_DUMPABLE_MASK  MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
+
+#define EXTENT_BITS_ZEROED_WIDTH  1
+#define EXTENT_BITS_ZEROED_SHIFT  (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
+#define EXTENT_BITS_ZEROED_MASK  MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
+
+#define EXTENT_BITS_STATE_WIDTH  2
+#define EXTENT_BITS_STATE_SHIFT  (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
+#define EXTENT_BITS_STATE_MASK  MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
+
+#define EXTENT_BITS_SZIND_WIDTH  LG_CEIL_NSIZES
+#define EXTENT_BITS_SZIND_SHIFT  (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
+#define EXTENT_BITS_SZIND_MASK  MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
+
+#define EXTENT_BITS_NFREE_WIDTH  (LG_SLAB_MAXREGS + 1)
+#define EXTENT_BITS_NFREE_SHIFT  (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
+#define EXTENT_BITS_NFREE_MASK  MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
+
+#define EXTENT_BITS_SN_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
+#define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
 	void			*e_addr;
-- 
cgit v0.12


From 433c2edabc5c03ae069ac652857c05c673807d0c Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Tue, 2 Jan 2018 10:29:19 +0100
Subject: Disable JEMALLOC_HAVE_MADVISE_HUGE for arm* CPUs.

---
 configure.ac | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/configure.ac b/configure.ac
index 7544f57..9432dc6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1863,9 +1863,15 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_HUGEPAGE);
 	madvise((void *)0, 0, MADV_NOHUGEPAGE);
 ], [je_cv_thp])
+case "${host_cpu}" in
+  arm*)
+    ;;
+  *)
   if test "x${je_cv_thp}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ])
   fi
+  ;;
+esac
 fi
 
 dnl Enable transparent huge page support by default.
-- 
cgit v0.12


From 78a87e4a80e9bf379c0dc660374173ef394252f6 Mon Sep 17 00:00:00 2001
From: Nehal J Wani <nehaljw.kkd1@gmail.com>
Date: Sun, 31 Dec 2017 06:52:33 -0600
Subject: Make sure JE_CXXFLAGS_ADD uses CPP compiler

All the invocations of AC_COMPILE_IFELSE inside JE_CXXFLAGS_ADD were
running 'the compiler and compilation flags of the current language'
which was always the C compiler and the CXXFLAGS were never being tested
against a C++ compiler. This patch fixes this issue by temporarily
changing the chosen compiler to C++ by pushing it over the stack and
popping it immediately after the compilation check.
---
 configure.ac | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configure.ac b/configure.ac
index 9432dc6..231b6df 100644
--- a/configure.ac
+++ b/configure.ac
@@ -76,6 +76,7 @@ AC_MSG_CHECKING([whether compiler supports $1])
 T_CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}"
 JE_APPEND_VS(CONFIGURE_CXXFLAGS, $1)
 JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
+AC_LANG_PUSH([C++])
 AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
 [[
 ]], [[
@@ -87,6 +88,7 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
               AC_MSG_RESULT([no])
               [CONFIGURE_CXXFLAGS="${T_CONFIGURE_CXXFLAGS}"]
 )
+AC_LANG_POP([C++])
 JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
 ])
 
-- 
cgit v0.12


From 91b247d311ce6837aa93d4315f5f7680abd8a11a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 22 Dec 2017 11:19:50 -0800
Subject: In iallocztm, check lock rank only when not in reentrancy.

---
 include/jemalloc/internal/jemalloc_internal_inlines_c.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index c54fc99..499ac1b 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -45,8 +45,10 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 	assert(size != 0);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
+		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+		    WITNESS_RANK_CORE, 0);
+	}
 
 	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_internal && likely(ret != NULL)) {
-- 
cgit v0.12


From 41790f4fa475434ea84b8509b9a68e63d9a86f95 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 22 Dec 2017 11:22:16 -0800
Subject: Check tsdn_null before reading reentrancy level.

---
 include/jemalloc/internal/jemalloc_internal_inlines_c.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 499ac1b..c829ac6 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -111,7 +111,8 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
 	if (config_stats && is_internal) {
 		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
 	}
-	if (!is_internal && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
+	if (!is_internal && !tsdn_null(tsdn) &&
+	    tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
 		assert(tcache == NULL);
 	}
 	arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path);
-- 
cgit v0.12


From ba5992fe9ac1708c812ec65bff3270bba17f1e1b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 21 Dec 2017 16:17:45 -0800
Subject: Improve the fit for aligned allocation.

We compute the max size required to satisfy an alignment.  However this can be
quite pessimistic, especially with frequent reuse (and combined with state-based
fragmentation).  This commit adds one more fit step specific to aligned
allocations, searching in all potential fit size classes.
---
 src/extent.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 61 insertions(+), 10 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index bca703f..517780e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -363,6 +363,43 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
 }
 
+/*
+ * Find an extent with size [min_size, max_size) to satisfy the alignment
+ * requirement.  For each size, try only the first extent in the heap.
+ */
+static extent_t *
+extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size,
+    size_t alignment) {
+        pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(min_size));
+        pszind_t pind_max = sz_psz2ind(extent_size_quantize_ceil(max_size));
+
+	for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
+	    &extents_bitmap_info, (size_t)pind); i < pind_max; i =
+	    (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
+	    (size_t)i+1)) {
+		assert(i < NPSIZES);
+		assert(!extent_heap_empty(&extents->heaps[i]));
+		extent_t *extent = extent_heap_first(&extents->heaps[i]);
+		uintptr_t base = (uintptr_t)extent_base_get(extent);
+		size_t candidate_size = extent_size_get(extent);
+		assert(candidate_size >= min_size);
+
+		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
+		    PAGE_CEILING(alignment));
+		if (base > next_align || base + candidate_size <= next_align) {
+			/* Overflow or not crossing the next alignment. */
+			continue;
+		}
+
+		size_t leadsize = next_align - base;
+		if (candidate_size - leadsize >= min_size) {
+			return extent;
+		}
+	}
+
+	return NULL;
+}
+
 /* Do any-best-fit extent selection, i.e. select any extent that best fits. */
 static extent_t *
 extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
@@ -424,12 +461,30 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
  */
 static extent_t *
 extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
-    size_t size) {
+    size_t esize, size_t alignment) {
 	malloc_mutex_assert_owner(tsdn, &extents->mtx);
 
-	return extents->delay_coalesce ? extents_best_fit_locked(tsdn, arena,
-	    extents, size) : extents_first_fit_locked(tsdn, arena, extents,
-	    size);
+	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
+	/* Beware size_t wrap-around. */
+	if (max_size < esize) {
+		return NULL;
+	}
+
+	extent_t *extent = extents->delay_coalesce ?
+	    extents_best_fit_locked(tsdn, arena, extents, max_size) :
+	    extents_first_fit_locked(tsdn, arena, extents, max_size);
+
+	if (alignment > PAGE && extent == NULL) {
+		/*
+		 * max_size guarantees the alignment requirement but is rather
+		 * pessimistic.  Next we try to satisfy the aligned allocation
+		 * with sizes in [esize, max_size).
+		 */
+		extent = extents_fit_alignment(extents, esize, max_size,
+		    alignment);
+	}
+
+	return extent;
 }
 
 static bool
@@ -821,11 +876,6 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	size_t esize = size + pad;
-	size_t alloc_size = esize + PAGE_CEILING(alignment) - PAGE;
-	/* Beware size_t wrap-around. */
-	if (alloc_size < esize) {
-		return NULL;
-	}
 	malloc_mutex_lock(tsdn, &extents->mtx);
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	extent_t *extent;
@@ -847,7 +897,8 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, unlock_extent);
 		}
 	} else {
-		extent = extents_fit_locked(tsdn, arena, extents, alloc_size);
+		extent = extents_fit_locked(tsdn, arena, extents, esize,
+		    alignment);
 	}
 	if (extent == NULL) {
 		malloc_mutex_unlock(tsdn, &extents->mtx);
-- 
cgit v0.12


From f78d4ca3fbff6cab0c704c787706a53ddafcbe13 Mon Sep 17 00:00:00 2001
From: Christopher Ferris <cferris@google.com>
Date: Fri, 22 Sep 2017 12:24:50 -0700
Subject: Modify configure to determine return value of strerror_r.

On glibc and Android's bionic, strerror_r returns char* when
_GNU_SOURCE is defined.

Add a configure check for this rather than assume glibc is the
only libc that behaves this way.
---
 configure.ac                                          | 19 +++++++++++++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  5 +++++
 src/malloc_io.c                                       |  2 +-
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 231b6df..b58540e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2072,6 +2072,25 @@ if test "x${je_cv_pthread_mutex_adaptive_np}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ])
 fi
 
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-D_GNU_SOURCE])
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
+JE_COMPILABLE([strerror_r returns char with gnu source], [
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+], [
+  char *buffer = (char *) malloc(100);
+  char *error = strerror_r(EINVAL, buffer, 100);
+  printf("%s\n", error);
+], [je_cv_strerror_r_returns_char_with_gnu_source])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_strerror_r_returns_char_with_gnu_source}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE], [ ])
+fi
+
 dnl ============================================================================
 dnl Check for typedefs, structures, and compiler characteristics.
 AC_HEADER_STDBOOL
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index aadfbed..8dad9a1 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -358,4 +358,9 @@
 /* If defined, jemalloc takes the malloc/free/etc. symbol names. */
 #undef JEMALLOC_IS_MALLOC
 
+/*
+ * Defined if strerror_r returns char * if _GNU_SOURCE is defined.
+ */
+#undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 6b99afc..fd27bd1 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -111,7 +111,7 @@ buferror(int err, char *buf, size_t buflen) {
 	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0,
 	    (LPSTR)buf, (DWORD)buflen, NULL);
 	return 0;
-#elif defined(__GLIBC__) && defined(_GNU_SOURCE)
+#elif defined(JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE) && defined(_GNU_SOURCE)
 	char *b = strerror_r(err, buf, buflen);
 	if (b != buf) {
 		strncpy(buf, b, buflen);
-- 
cgit v0.12