From 5aa46f027df42636d4aa1fb70d1078a6c5f96420 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 20 Apr 2017 15:19:02 -0700
Subject: Bypass extent tracking for auto arenas.

Tracking extents is required by arena_reset.  To support this, the extent
linkage was used for tracking 1) large allocations, and 2) full slabs.  However
modifying the extent linkage could be an expensive operation as it likely incurs
cache misses.  Since we forbid arena_reset on auto arenas, let's bypass the
linkage operations for auto arenas.
---
 .../internal/jemalloc_internal_inlines_a.h         |  1 +
 .../internal/jemalloc_internal_inlines_b.h         |  6 ++++
 .../internal/jemalloc_internal_inlines_c.h         |  9 ++----
 src/arena.c                                        | 33 ++++++++++++++--------
 src/ctl.c                                          |  7 +----
 src/large.c                                        | 25 ++++++++++------
 6 files changed, 49 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 0d922f1..38fa3c7 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -24,6 +24,7 @@ size_t sa2u(size_t size, size_t alignment);
 arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
 arena_t *arena_choose(tsd_t *tsd, arena_t *arena);
 arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena);
+bool arena_is_auto(arena_t *arena);
 arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
 arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index f22708a..ab54a59 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -70,6 +70,12 @@ arena_ichoose(tsd_t *tsd, arena_t *arena) {
 	return arena_choose_impl(tsd, arena, true);
 }
 
+JEMALLOC_INLINE bool
+arena_is_auto(arena_t *arena) {
+	assert(narenas_auto > 0);
+	return (arena_ind_get(arena) < narenas_auto);
+}
+
 JEMALLOC_ALWAYS_INLINE extent_t *
 iealloc(tsdn_t *tsdn, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 8c79381..70ac666 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -54,8 +54,7 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 
 	assert(size != 0);
 	assert(!is_internal || tcache == NULL);
-	assert(!is_internal || arena == NULL || arena_ind_get(arena) <
-	    narenas_auto);
+	assert(!is_internal || arena == NULL || arena_is_auto(arena));
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
@@ -79,8 +78,7 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	assert(usize != 0);
 	assert(usize == sa2u(usize, alignment));
 	assert(!is_internal || tcache == NULL);
-	assert(!is_internal || arena == NULL || arena_ind_get(arena) <
-	    narenas_auto);
+	assert(!is_internal || arena == NULL || arena_is_auto(arena));
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
@@ -113,8 +111,7 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
     bool is_internal, bool slow_path) {
 	assert(ptr != NULL);
 	assert(!is_internal || tcache == NULL);
-	assert(!is_internal || arena_ind_get(iaalloc(tsdn, ptr)) <
-	    narenas_auto);
+	assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr)));
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 	if (config_stats && is_internal) {
 		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
diff --git a/src/arena.c b/src/arena.c
index bb45a90..94a4b5e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1032,13 +1032,24 @@ arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) {
 }
 
 static void
-arena_bin_slabs_full_insert(arena_bin_t *bin, extent_t *slab) {
+arena_bin_slabs_full_insert(arena_t *arena, arena_bin_t *bin, extent_t *slab) {
 	assert(extent_nfree_get(slab) == 0);
+	/*
+	 *  Tracking extents is required by arena_reset, which is not allowed
+	 *  for auto arenas.  Bypass this step to avoid touching the extent
+	 *  linkage (often results in cache misses) for auto arenas.
+	 */
+	if (arena_is_auto(arena)) {
+		return;
+	}
 	extent_list_append(&bin->slabs_full, slab);
 }
 
 static void
-arena_bin_slabs_full_remove(arena_bin_t *bin, extent_t *slab) {
+arena_bin_slabs_full_remove(arena_t *arena, arena_bin_t *bin, extent_t *slab) {
+	if (arena_is_auto(arena)) {
+		return;
+	}
 	extent_list_remove(&bin->slabs_full, slab);
 }
 
@@ -1106,7 +1117,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		}
 		for (slab = extent_list_first(&bin->slabs_full); slab != NULL;
 		    slab = extent_list_first(&bin->slabs_full)) {
-			arena_bin_slabs_full_remove(bin, slab);
+			arena_bin_slabs_full_remove(arena, bin, slab);
 			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 			arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
@@ -1285,8 +1296,8 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 	extent_t *slab;
 
 	bin_info = &arena_bin_info[binind];
-	if (bin->slabcur != NULL) {
-		arena_bin_slabs_full_insert(bin, bin->slabcur);
+	if (!arena_is_auto(arena) && bin->slabcur != NULL) {
+		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
 		bin->slabcur = NULL;
 	}
 	slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind);
@@ -1319,7 +1330,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 			return ret;
 		}
 
-		arena_bin_slabs_full_insert(bin, bin->slabcur);
+		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
 		bin->slabcur = NULL;
 	}
 
@@ -1559,7 +1570,7 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 }
 
 static void
-arena_dissociate_bin_slab(extent_t *slab, arena_bin_t *bin) {
+arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, arena_bin_t *bin) {
 	/* Dissociate slab from bin. */
 	if (slab == bin->slabcur) {
 		bin->slabcur = NULL;
@@ -1573,7 +1584,7 @@ arena_dissociate_bin_slab(extent_t *slab, arena_bin_t *bin) {
 		 * into the non-full slabs heap.
 		 */
 		if (bin_info->nregs == 1) {
-			arena_bin_slabs_full_remove(bin, slab);
+			arena_bin_slabs_full_remove(arena, bin, slab);
 		} else {
 			arena_bin_slabs_nonfull_remove(bin, slab);
 		}
@@ -1611,7 +1622,7 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 		if (extent_nfree_get(bin->slabcur) > 0) {
 			arena_bin_slabs_nonfull_insert(bin, bin->slabcur);
 		} else {
-			arena_bin_slabs_full_insert(bin, bin->slabcur);
+			arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
 		}
 		bin->slabcur = slab;
 		if (config_stats) {
@@ -1637,10 +1648,10 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 	arena_slab_reg_dalloc(tsdn, slab, slab_data, ptr);
 	unsigned nfree = extent_nfree_get(slab);
 	if (nfree == bin_info->nregs) {
-		arena_dissociate_bin_slab(slab, bin);
+		arena_dissociate_bin_slab(arena, slab, bin);
 		arena_dalloc_bin_slab(tsdn, arena, slab, bin);
 	} else if (nfree == 1 && slab != bin->slabcur) {
-		arena_bin_slabs_full_remove(bin, slab);
+		arena_bin_slabs_full_remove(arena, bin, slab);
 		arena_bin_lower_slab(tsdn, arena, slab, bin);
 	}
 
diff --git a/src/ctl.c b/src/ctl.c
index 069e535..1b0ee05 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1846,13 +1846,8 @@ arena_i_reset_destroy_helper(tsd_t *tsd, const size_t *mib, size_t miblen,
 	WRITEONLY();
 	MIB_UNSIGNED(*arena_ind, 1);
 
-	if (*arena_ind < narenas_auto) {
-		ret = EFAULT;
-		goto label_return;
-	}
-
 	*arena = arena_get(tsd_tsdn(tsd), *arena_ind, false);
-	if (*arena == NULL) {
+	if (*arena == NULL || arena_is_auto(*arena)) {
 		ret = EFAULT;
 		goto label_return;
 	}
diff --git a/src/large.c b/src/large.c
index 629656d..36e8be9 100644
--- a/src/large.c
+++ b/src/large.c
@@ -46,10 +46,13 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		return NULL;
 	}
 
-	/* Insert extent into large. */
-	malloc_mutex_lock(tsdn, &arena->large_mtx);
-	extent_list_append(&arena->large, extent);
-	malloc_mutex_unlock(tsdn, &arena->large_mtx);
+	/* See comments in arena_bin_slabs_full_insert(). */
+	if (!arena_is_auto(arena)) {
+		/* Insert extent into large. */
+		malloc_mutex_lock(tsdn, &arena->large_mtx);
+		extent_list_append(&arena->large, extent);
+		malloc_mutex_unlock(tsdn, &arena->large_mtx);
+	}
 	if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
 		prof_idump(tsdn);
 	}
@@ -318,16 +321,20 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 static void
 large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     bool junked_locked) {
-
 	if (!junked_locked) {
-		malloc_mutex_lock(tsdn, &arena->large_mtx);
-		extent_list_remove(&arena->large, extent);
-		malloc_mutex_unlock(tsdn, &arena->large_mtx);
+		/* See comments in arena_bin_slabs_full_insert(). */
+		if (!arena_is_auto(arena)) {
+			malloc_mutex_lock(tsdn, &arena->large_mtx);
+			extent_list_remove(&arena->large, extent);
+			malloc_mutex_unlock(tsdn, &arena->large_mtx);
+		}
 		large_dalloc_maybe_junk(extent_addr_get(extent),
 		    extent_usize_get(extent));
 	} else {
 		malloc_mutex_assert_owner(tsdn, &arena->large_mtx);
-		extent_list_remove(&arena->large, extent);
+		if (!arena_is_auto(arena)) {
+			extent_list_remove(&arena->large, extent);
+		}
 	}
 	arena_extent_dalloc_large_prep(tsdn, arena, extent);
 }
-- 
cgit v0.12