summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Evans <je@facebook.com>2010-10-21 03:52:00 (GMT)
committerJason Evans <je@facebook.com>2010-10-21 03:52:00 (GMT)
commit81b4e6eb6f06cac048e3743787a70676f1534269 (patch)
treec48f69c8b238c9b6424cf858cf404dc6ee1aa739
parent4d6a134e13e1e9705faff6f5dc6ca90d416aa0a4 (diff)
downloadjemalloc-81b4e6eb6f06cac048e3743787a70676f1534269.zip
jemalloc-81b4e6eb6f06cac048e3743787a70676f1534269.tar.gz
jemalloc-81b4e6eb6f06cac048e3743787a70676f1534269.tar.bz2
Fix a heap profiling regression.
Call prof_ctx_set() in all paths through prof_{m,re}alloc(). Inline arena_prof_ctx_get().
-rw-r--r--jemalloc/include/jemalloc/internal/arena.h107
-rw-r--r--jemalloc/include/jemalloc/internal/prof.h5
-rw-r--r--jemalloc/src/arena.c99
3 files changed, 110 insertions, 101 deletions
diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h
index 893f070..240d96b 100644
--- a/jemalloc/include/jemalloc/internal/arena.h
+++ b/jemalloc/include/jemalloc/internal/arena.h
@@ -444,7 +444,6 @@ size_t arena_salloc(const void *ptr);
#ifdef JEMALLOC_PROF
void arena_prof_promoted(const void *ptr, size_t size);
size_t arena_salloc_demote(const void *ptr);
-prof_ctx_t *arena_prof_ctx_get(const void *ptr);
void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
#endif
void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
@@ -467,10 +466,116 @@ bool arena_boot(void);
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
+unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin,
+ const void *ptr, size_t size);
+# ifdef JEMALLOC_PROF
+prof_ctx_t *arena_prof_ctx_get(const void *ptr);
+# endif
void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
+JEMALLOC_INLINE unsigned
+arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
+ size_t size)
+{
+ unsigned shift, diff, regind;
+
+ assert(run->magic == ARENA_RUN_MAGIC);
+
+ /*
+ * Avoid doing division with a variable divisor if possible. Using
+ * actual division here can reduce allocator throughput by over 20%!
+ */
+ diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
+
+ /* Rescale (factor powers of 2 out of the numerator and denominator). */
+ shift = ffs(size) - 1;
+ diff >>= shift;
+ size >>= shift;
+
+ if (size == 1) {
+ /* The divisor was a power of 2. */
+ regind = diff;
+ } else {
+ /*
+ * To divide by a number D that is not a power of two we
+ * multiply by (2^21 / D) and then right shift by 21 positions.
+ *
+ * X / D
+ *
+ * becomes
+ *
+ * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
+ *
+ * We can omit the first three elements, because we never
+ * divide by 0, and 1 and 2 are both powers of two, which are
+ * handled above.
+ */
+#define SIZE_INV_SHIFT 21
+#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
+ static const unsigned size_invs[] = {
+ SIZE_INV(3),
+ SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
+ SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
+ SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
+ SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
+ SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
+ SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
+ SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
+ };
+
+ if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
+ regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
+ else
+ regind = diff / size;
+#undef SIZE_INV
+#undef SIZE_INV_SHIFT
+ }
+ assert(diff == regind * size);
+ assert(regind < bin->nregs);
+
+ return (regind);
+}
+
+#ifdef JEMALLOC_PROF
+JEMALLOC_INLINE prof_ctx_t *
+arena_prof_ctx_get(const void *ptr)
+{
+ prof_ctx_t *ret;
+ arena_chunk_t *chunk;
+ size_t pageind, mapbits;
+
+ assert(ptr != NULL);
+ assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+ mapbits = chunk->map[pageind-map_bias].bits;
+ assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+ if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+ if (prof_promote)
+ ret = (prof_ctx_t *)(uintptr_t)1U;
+ else {
+ arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+ (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
+ PAGE_SHIFT));
+ arena_bin_t *bin = run->bin;
+ unsigned regind;
+
+ assert(run->magic == ARENA_RUN_MAGIC);
+ regind = arena_run_regind(run, bin, ptr, bin->reg_size);
+ ret = *(prof_ctx_t **)((uintptr_t)run +
+ bin->ctx0_offset + (regind *
+ sizeof(prof_ctx_t *)));
+ }
+ } else
+ ret = chunk->map[pageind-map_bias].prof_ctx;
+
+ return (ret);
+}
+#endif
+
JEMALLOC_INLINE void
arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
{
diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h
index a6bd797..51d0487 100644
--- a/jemalloc/include/jemalloc/internal/prof.h
+++ b/jemalloc/include/jemalloc/internal/prof.h
@@ -389,7 +389,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
{
assert(ptr != NULL);
- assert(size == s2u(size));
+ assert(size == isalloc(ptr));
if (opt_lg_prof_sample != 0) {
if (prof_sample_accum_update(size)) {
@@ -401,6 +401,7 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
* prof_alloc_prep() and prof_malloc().
*/
assert(false);
+ prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
return;
}
}
@@ -438,6 +439,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
if (ptr != NULL) {
+ assert(size == isalloc(ptr));
if (opt_lg_prof_sample != 0) {
if (prof_sample_accum_update(size)) {
/*
@@ -448,6 +450,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
* its actual size was insufficient to cross
* the sample threshold.
*/
+ prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
return;
}
}
diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c
index a54a090..2a8ea62 100644
--- a/jemalloc/src/arena.c
+++ b/jemalloc/src/arena.c
@@ -1766,105 +1766,6 @@ arena_salloc_demote(const void *ptr)
return (ret);
}
-static inline unsigned
-arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
- size_t size)
-{
- unsigned shift, diff, regind;
-
- assert(run->magic == ARENA_RUN_MAGIC);
-
- /*
- * Avoid doing division with a variable divisor if possible. Using
- * actual division here can reduce allocator throughput by over 20%!
- */
- diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
-
- /* Rescale (factor powers of 2 out of the numerator and denominator). */
- shift = ffs(size) - 1;
- diff >>= shift;
- size >>= shift;
-
- if (size == 1) {
- /* The divisor was a power of 2. */
- regind = diff;
- } else {
- /*
- * To divide by a number D that is not a power of two we
- * multiply by (2^21 / D) and then right shift by 21 positions.
- *
- * X / D
- *
- * becomes
- *
- * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
- *
- * We can omit the first three elements, because we never
- * divide by 0, and 1 and 2 are both powers of two, which are
- * handled above.
- */
-#define SIZE_INV_SHIFT 21
-#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
- static const unsigned size_invs[] = {
- SIZE_INV(3),
- SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
- SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
- SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
- SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
- SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
- SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
- SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
- };
-
- if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
- regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
- else
- regind = diff / size;
-#undef SIZE_INV
-#undef SIZE_INV_SHIFT
- }
- assert(diff == regind * size);
- assert(regind < bin->nregs);
-
- return (regind);
-}
-
-prof_ctx_t *
-arena_prof_ctx_get(const void *ptr)
-{
- prof_ctx_t *ret;
- arena_chunk_t *chunk;
- size_t pageind, mapbits;
-
- assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
- mapbits = chunk->map[pageind-map_bias].bits;
- assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
- if ((mapbits & CHUNK_MAP_LARGE) == 0) {
- if (prof_promote)
- ret = (prof_ctx_t *)(uintptr_t)1U;
- else {
- arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
- (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
- PAGE_SHIFT));
- arena_bin_t *bin = run->bin;
- unsigned regind;
-
- assert(run->magic == ARENA_RUN_MAGIC);
- regind = arena_run_regind(run, bin, ptr, bin->reg_size);
- ret = *(prof_ctx_t **)((uintptr_t)run +
- bin->ctx0_offset + (regind *
- sizeof(prof_ctx_t *)));
- }
- } else
- ret = chunk->map[pageind-map_bias].prof_ctx;
-
- return (ret);
-}
-
void
arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
{