From f35213bae4ee6294a0743607637f9be8989622f1 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 11 Apr 2017 14:56:43 -0700
Subject: Pass dalloc_ctx down the sdalloc path.

This avoids redundant rtree lookups.
---
 include/jemalloc/internal/arena_inlines_b.h        | 38 +++++++++++++---------
 .../internal/jemalloc_internal_inlines_c.h         | 10 +++---
 src/arena.c                                        |  2 +-
 src/jemalloc.c                                     | 13 ++++++--
 src/large.c                                        |  2 +-
 5 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 96889c1..382289e 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -19,7 +19,7 @@ void arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     dalloc_ctx_t *dalloc_ctx, bool slow_path);
 void arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size);
 void arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    bool slow_path);
+    dalloc_ctx_t *dalloc_ctx, bool slow_path);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
@@ -293,7 +293,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 
 JEMALLOC_ALWAYS_INLINE void
 arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    bool slow_path) {
+    dalloc_ctx_t *dalloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 	assert(size <= LARGE_MAXCLASS);
@@ -305,7 +305,22 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 
 	szind_t szind;
 	bool slab;
-	if (!config_prof || !opt_prof) {
+	UNUSED dalloc_ctx_t local_ctx;
+	if (config_prof && opt_prof) {
+		if (dalloc_ctx == NULL) {
+			/* Uncommon case and should be a static check. */
+			rtree_ctx_t rtree_ctx_fallback;
+			rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
+			    &rtree_ctx_fallback);
+			rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+			    (uintptr_t)ptr, true, &local_ctx.szind,
+			    &local_ctx.slab);
+			assert(local_ctx.szind == size2index(size));
+			dalloc_ctx = &local_ctx;
+		}
+		slab = dalloc_ctx->slab;
+		szind = dalloc_ctx->szind;
+	} else {
 		/*
 		 * There is no risk of being confused by a promoted sampled
 		 * object, so base szind and slab on the given size.
@@ -314,21 +329,14 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		slab = (szind < NBINS);
 	}
 
-	if ((config_prof && opt_prof) || config_debug) {
+	if (config_debug) {
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-
 		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &szind, &slab);
-
-		assert(szind == size2index(size));
-		assert((config_prof && opt_prof) || slab == (szind < NBINS));
-
-		if (config_debug) {
-			extent_t *extent = rtree_extent_read(tsdn,
-			    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
-			assert(szind == extent_szind_get(extent));
-			assert(slab == extent_slab_get(extent));
-		}
+		extent_t *extent = rtree_extent_read(tsdn,
+		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
+		assert(szind == extent_szind_get(extent));
+		assert(slab == extent_slab_get(extent));
 	}
 
 	if (likely(slab)) {
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 05debd2..4fb3424 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -18,7 +18,7 @@ void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     dalloc_ctx_t *dalloc_ctx, bool is_internal, bool slow_path);
 void idalloc(tsd_t *tsd, void *ptr);
 void isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    bool slow_path);
+    dalloc_ctx_t *dalloc_ctx, bool slow_path);
 void *iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena);
@@ -129,10 +129,10 @@ idalloc(tsd_t *tsd, void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isdalloct(tsdn_t *tsdn, void *ptr, size_t size,
-    tcache_t *tcache, bool slow_path) {
+isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    dalloc_ctx_t *dalloc_ctx, bool slow_path) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-	arena_sdalloc(tsdn, ptr, size, tcache, slow_path);
+	arena_sdalloc(tsdn, ptr, size, tcache, dalloc_ctx, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -168,7 +168,7 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
-	isdalloct(tsdn, ptr, oldsize, tcache, true);
+	isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
 	return p;
 }
 
diff --git a/src/arena.c b/src/arena.c
index 5d313e3..16728b3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1752,7 +1752,7 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 
 	size_t copysize = (usize < oldsize) ? usize : oldsize;
 	memcpy(ret, ptr, copysize);
-	isdalloct(tsdn, ptr, oldsize, tcache, true);
+	isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
 	return ret;
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 77ee857..e71949a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2083,17 +2083,26 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
+	dalloc_ctx_t dalloc_ctx, *ctx;
 	if (config_prof && opt_prof) {
+		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+		    (uintptr_t)ptr, true, &dalloc_ctx.szind, &dalloc_ctx.slab);
+		assert(dalloc_ctx.szind == size2index(usize));
 		prof_free(tsd, ptr, usize);
+		ctx = &dalloc_ctx;
+	} else {
+		ctx = NULL;
 	}
+
 	if (config_stats) {
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	}
 
 	if (likely(!slow_path)) {
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, false);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, false);
 	} else {
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, true);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, true);
 	}
 }
 
diff --git a/src/large.c b/src/large.c
index 18987c1..3b53eb3 100644
--- a/src/large.c
+++ b/src/large.c
@@ -304,7 +304,7 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 
 	size_t copysize = (usize < oldusize) ? usize : oldusize;
 	memcpy(ret, extent_addr_get(extent), copysize);
-	isdalloct(tsdn, extent_addr_get(extent), oldusize, tcache, true);
+	isdalloct(tsdn, extent_addr_get(extent), oldusize, tcache, NULL, true);
 	return ret;
 }
 
-- 
cgit v0.12