summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Evans <jasone@canonware.com>2013-12-16 00:21:30 (GMT)
committerJason Evans <jasone@canonware.com>2013-12-16 05:57:02 (GMT)
commit665769357cd77b74e00a146f196fff19243b33c4 (patch)
treec1a90fa54fa4a6f53ba137aa70706c9c6b7d9415
parent5a658b9c7517d62fa39759f5ff6119d26dfc4cb7 (diff)
downloadjemalloc-665769357cd77b74e00a146f196fff19243b33c4.zip
jemalloc-665769357cd77b74e00a146f196fff19243b33c4.tar.gz
jemalloc-665769357cd77b74e00a146f196fff19243b33c4.tar.bz2
Optimize arena_prof_ctx_set().
Refactor such that arena_prof_ctx_set() receives usize as an argument, and use it to determine whether to handle ptr as a small region, rather than reading the chunk page map.
-rw-r--r--include/jemalloc/internal/arena.h24
-rw-r--r--include/jemalloc/internal/prof.h52
-rw-r--r--src/jemalloc.c101
3 files changed, 99 insertions, 78 deletions
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 075c263..28540a4 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -474,7 +474,7 @@ size_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
const void *ptr);
prof_ctx_t *arena_prof_ctx_get(const void *ptr);
-void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
+void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx);
void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache);
size_t arena_salloc(const void *ptr, bool demote);
void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr,
@@ -886,7 +886,7 @@ arena_prof_ctx_get(const void *ptr)
}
JEMALLOC_INLINE void
-arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
{
arena_chunk_t *chunk;
size_t pageind, mapbits;
@@ -899,7 +899,14 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
mapbits = arena_mapbits_get(chunk, pageind);
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
- if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+
+ if (usize > SMALL_MAXCLASS || (prof_promote &&
+ ((uintptr_t)ctx != (uintptr_t)1U || ((mapbits & CHUNK_MAP_LARGE) !=
+ 0)))) {
+ assert((mapbits & CHUNK_MAP_LARGE) != 0);
+ arena_mapp_get(chunk, pageind)->prof_ctx = ctx;
+ } else {
+ assert((mapbits & CHUNK_MAP_LARGE) == 0);
if (prof_promote == false) {
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
@@ -912,12 +919,11 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
bin_info = &arena_bin_info[binind];
regind = arena_run_regind(run, bin_info, ptr);
- *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset
- + (regind * sizeof(prof_ctx_t *)))) = ctx;
- } else
- assert((uintptr_t)ctx == (uintptr_t)1U);
- } else
- arena_mapp_get(chunk, pageind)->prof_ctx = ctx;
+ *((prof_ctx_t **)((uintptr_t)run +
+ bin_info->ctx0_offset + (regind * sizeof(prof_ctx_t
+ *)))) = ctx;
+ }
+ }
}
JEMALLOC_ALWAYS_INLINE void *
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 28ad37a..8b24099 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -289,11 +289,11 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
prof_tdata_t *prof_tdata_get(bool create);
void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
prof_ctx_t *prof_ctx_get(const void *ptr);
-void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
+void prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx);
bool prof_sample_accum_update(size_t size);
-void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
-void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
- size_t old_size, prof_ctx_t *old_ctx);
+void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt);
+void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
+ size_t old_usize, prof_ctx_t *old_ctx);
void prof_free(const void *ptr, size_t size);
#endif
@@ -386,7 +386,7 @@ prof_ctx_get(const void *ptr)
}
JEMALLOC_INLINE void
-prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx)
{
arena_chunk_t *chunk;
@@ -396,7 +396,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) {
/* Region. */
- arena_prof_ctx_set(ptr, ctx);
+ arena_prof_ctx_set(ptr, usize, ctx);
} else
huge_prof_ctx_set(ptr, ctx);
}
@@ -431,20 +431,20 @@ prof_sample_accum_update(size_t size)
}
JEMALLOC_INLINE void
-prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
+prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
{
cassert(config_prof);
assert(ptr != NULL);
- assert(size == isalloc(ptr, true));
+ assert(usize == isalloc(ptr, true));
if (opt_lg_prof_sample != 0) {
- if (prof_sample_accum_update(size)) {
+ if (prof_sample_accum_update(usize)) {
/*
* Don't sample. For malloc()-like allocation, it is
* always possible to tell in advance how large an
* object's usable size will be, so there should never
- * be a difference between the size passed to
+ * be a difference between the usize passed to
* PROF_ALLOC_PREP() and prof_malloc().
*/
assert((uintptr_t)cnt == (uintptr_t)1U);
@@ -452,17 +452,17 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
}
if ((uintptr_t)cnt > (uintptr_t)1U) {
- prof_ctx_set(ptr, cnt->ctx);
+ prof_ctx_set(ptr, usize, cnt->ctx);
cnt->epoch++;
/*********/
mb_write();
/*********/
cnt->cnts.curobjs++;
- cnt->cnts.curbytes += size;
+ cnt->cnts.curbytes += usize;
if (opt_prof_accum) {
cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += size;
+ cnt->cnts.accumbytes += usize;
}
/*********/
mb_write();
@@ -472,12 +472,12 @@ prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
mb_write();
/*********/
} else
- prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
+ prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
}
JEMALLOC_INLINE void
-prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
- size_t old_size, prof_ctx_t *old_ctx)
+prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt,
+ size_t old_usize, prof_ctx_t *old_ctx)
{
prof_thr_cnt_t *told_cnt;
@@ -485,15 +485,15 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
if (ptr != NULL) {
- assert(size == isalloc(ptr, true));
+ assert(usize == isalloc(ptr, true));
if (opt_lg_prof_sample != 0) {
- if (prof_sample_accum_update(size)) {
+ if (prof_sample_accum_update(usize)) {
/*
- * Don't sample. The size passed to
+ * Don't sample. The usize passed to
* PROF_ALLOC_PREP() was larger than what
* actually got allocated, so a backtrace was
* captured for this allocation, even though
- * its actual size was insufficient to cross
+ * its actual usize was insufficient to cross
* the sample threshold.
*/
cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
@@ -510,7 +510,7 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
*/
malloc_mutex_lock(old_ctx->lock);
old_ctx->cnt_merged.curobjs--;
- old_ctx->cnt_merged.curbytes -= old_size;
+ old_ctx->cnt_merged.curbytes -= old_usize;
malloc_mutex_unlock(old_ctx->lock);
told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
}
@@ -520,23 +520,23 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
if ((uintptr_t)told_cnt > (uintptr_t)1U)
told_cnt->epoch++;
if ((uintptr_t)cnt > (uintptr_t)1U) {
- prof_ctx_set(ptr, cnt->ctx);
+ prof_ctx_set(ptr, usize, cnt->ctx);
cnt->epoch++;
} else if (ptr != NULL)
- prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
+ prof_ctx_set(ptr, usize, (prof_ctx_t *)(uintptr_t)1U);
/*********/
mb_write();
/*********/
if ((uintptr_t)told_cnt > (uintptr_t)1U) {
told_cnt->cnts.curobjs--;
- told_cnt->cnts.curbytes -= old_size;
+ told_cnt->cnts.curbytes -= old_usize;
}
if ((uintptr_t)cnt > (uintptr_t)1U) {
cnt->cnts.curobjs++;
- cnt->cnts.curbytes += size;
+ cnt->cnts.curbytes += usize;
if (opt_prof_accum) {
cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += size;
+ cnt->cnts.accumbytes += usize;
}
}
/*********/
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f8c8119..b8a4fb0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1105,7 +1105,7 @@ je_realloc(void *ptr, size_t size)
{
void *ret;
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
- size_t old_size = 0;
+ size_t old_usize = 0;
UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL);
prof_ctx_t *old_ctx JEMALLOC_CC_SILENCE_INIT(NULL);
@@ -1115,16 +1115,16 @@ je_realloc(void *ptr, size_t size)
/* realloc(ptr, 0) is equivalent to free(p). */
assert(malloc_initialized || IS_INITIALIZER);
if (config_prof) {
- old_size = isalloc(ptr, true);
+ old_usize = isalloc(ptr, true);
if (config_valgrind && opt_valgrind)
old_rzsize = p2rz(ptr);
} else if (config_stats) {
- old_size = isalloc(ptr, false);
+ old_usize = isalloc(ptr, false);
if (config_valgrind && opt_valgrind)
- old_rzsize = u2rz(old_size);
+ old_rzsize = u2rz(old_usize);
} else if (config_valgrind && opt_valgrind) {
- old_size = isalloc(ptr, false);
- old_rzsize = u2rz(old_size);
+ old_usize = isalloc(ptr, false);
+ old_rzsize = u2rz(old_usize);
}
if (config_prof && opt_prof) {
old_ctx = prof_ctx_get(ptr);
@@ -1142,16 +1142,16 @@ je_realloc(void *ptr, size_t size)
malloc_thread_init();
if (config_prof) {
- old_size = isalloc(ptr, true);
+ old_usize = isalloc(ptr, true);
if (config_valgrind && opt_valgrind)
old_rzsize = p2rz(ptr);
} else if (config_stats) {
- old_size = isalloc(ptr, false);
+ old_usize = isalloc(ptr, false);
if (config_valgrind && opt_valgrind)
- old_rzsize = u2rz(old_size);
+ old_rzsize = u2rz(old_usize);
} else if (config_valgrind && opt_valgrind) {
- old_size = isalloc(ptr, false);
- old_rzsize = u2rz(old_size);
+ old_usize = isalloc(ptr, false);
+ old_rzsize = u2rz(old_usize);
}
if (config_prof && opt_prof) {
usize = s2u(size);
@@ -1236,16 +1236,17 @@ label_oom:
label_return:
if (config_prof && opt_prof)
- prof_realloc(ret, usize, cnt, old_size, old_ctx);
+ prof_realloc(ret, usize, cnt, old_usize, old_ctx);
if (config_stats && ret != NULL) {
thread_allocated_t *ta;
assert(usize == isalloc(ret, config_prof));
ta = thread_allocated_tsd_get();
ta->allocated += usize;
- ta->deallocated += old_size;
+ ta->deallocated += old_usize;
}
UTRACE(ptr, size, ret);
- JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_size, old_rzsize, false);
+ JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_usize, old_rzsize,
+ false);
return (ret);
}
@@ -1431,8 +1432,7 @@ void *
je_rallocx(void *ptr, size_t size, int flags)
{
void *p;
- size_t usize;
- size_t old_size;
+ size_t usize, old_usize;
UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
& (SIZE_T_MAX-1));
@@ -1465,7 +1465,7 @@ je_rallocx(void *ptr, size_t size, int flags)
usize = (alignment == 0) ? s2u(size) : sa2u(size,
alignment);
prof_ctx_t *old_ctx = prof_ctx_get(ptr);
- old_size = isalloc(ptr, true);
+ old_usize = isalloc(ptr, true);
if (config_valgrind && opt_valgrind)
old_rzsize = p2rz(ptr);
PROF_ALLOC_PREP(1, usize, cnt);
@@ -1487,15 +1487,28 @@ je_rallocx(void *ptr, size_t size, int flags)
if (p == NULL)
goto label_oom;
}
- prof_realloc(p, usize, cnt, old_size, old_ctx);
+ if (p == ptr && alignment != 0) {
+ /*
+ * The allocation did not move, so it is possible that
+ * the size class is smaller than would guarantee the
+ * requested alignment, and that the alignment
+ * constraint was serendipitously satisfied.
+ * Additionally, old_usize may not be the same as the
+ * current usize because of in-place large
+ * reallocation. Therefore, query the actual value of
+ * usize.
+ */
+ usize = isalloc(p, true);
+ }
+ prof_realloc(p, usize, cnt, old_usize, old_ctx);
} else {
if (config_stats) {
- old_size = isalloc(ptr, false);
+ old_usize = isalloc(ptr, false);
if (config_valgrind && opt_valgrind)
- old_rzsize = u2rz(old_size);
+ old_rzsize = u2rz(old_usize);
} else if (config_valgrind && opt_valgrind) {
- old_size = isalloc(ptr, false);
- old_rzsize = u2rz(old_size);
+ old_usize = isalloc(ptr, false);
+ old_rzsize = u2rz(old_usize);
}
p = iralloct(ptr, size, 0, alignment, zero, false,
try_tcache_alloc, try_tcache_dalloc, arena);
@@ -1509,10 +1522,10 @@ je_rallocx(void *ptr, size_t size, int flags)
thread_allocated_t *ta;
ta = thread_allocated_tsd_get();
ta->allocated += usize;
- ta->deallocated += old_size;
+ ta->deallocated += old_usize;
}
UTRACE(ptr, size, p);
- JEMALLOC_VALGRIND_REALLOC(p, usize, ptr, old_size, old_rzsize, zero);
+ JEMALLOC_VALGRIND_REALLOC(p, usize, ptr, old_usize, old_rzsize, zero);
return (p);
label_oom:
if (config_xmalloc && opt_xmalloc) {
@@ -1526,8 +1539,7 @@ label_oom:
size_t
je_xallocx(void *ptr, size_t size, size_t extra, int flags)
{
- size_t usize;
- size_t old_size;
+ size_t usize, old_usize;
UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
& (SIZE_T_MAX-1));
@@ -1568,12 +1580,12 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
size_t max_usize = (alignment == 0) ? s2u(size+extra) :
sa2u(size+extra, alignment);
prof_ctx_t *old_ctx = prof_ctx_get(ptr);
- old_size = isalloc(ptr, true);
+ old_usize = isalloc(ptr, true);
if (config_valgrind && opt_valgrind)
old_rzsize = p2rz(ptr);
PROF_ALLOC_PREP(1, max_usize, cnt);
if (cnt == NULL) {
- usize = isalloc(ptr, config_prof);
+ usize = old_usize;
goto label_not_moved;
}
/*
@@ -1585,32 +1597,35 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
if (iralloct(ptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >=
size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1),
alignment, zero, true, try_tcache_alloc,
- try_tcache_dalloc, arena) == NULL)
+ try_tcache_dalloc, arena) == NULL) {
+ usize = old_usize;
goto label_not_moved;
- if (max_usize < PAGE) {
- usize = max_usize;
+ }
+ usize = isalloc(ptr, true);
+ if (max_usize < PAGE)
arena_prof_promoted(ptr, usize);
- } else
- usize = isalloc(ptr, config_prof);
} else {
if (iralloct(ptr, size, extra, alignment, zero, true,
- try_tcache_alloc, try_tcache_dalloc, arena) == NULL)
+ try_tcache_alloc, try_tcache_dalloc, arena) ==
+ NULL) {
+ usize = old_usize;
goto label_not_moved;
- usize = isalloc(ptr, config_prof);
+ }
+ usize = isalloc(ptr, true);
}
- prof_realloc(ptr, usize, cnt, old_size, old_ctx);
+ prof_realloc(ptr, usize, cnt, old_usize, old_ctx);
} else {
if (config_stats) {
- old_size = isalloc(ptr, false);
+ old_usize = isalloc(ptr, false);
if (config_valgrind && opt_valgrind)
- old_rzsize = u2rz(old_size);
+ old_rzsize = u2rz(old_usize);
} else if (config_valgrind && opt_valgrind) {
- old_size = isalloc(ptr, false);
- old_rzsize = u2rz(old_size);
+ old_usize = isalloc(ptr, false);
+ old_rzsize = u2rz(old_usize);
}
if (iralloct(ptr, size, extra, alignment, zero, true,
try_tcache_alloc, try_tcache_dalloc, arena) == NULL) {
- usize = isalloc(ptr, config_prof);
+ usize = old_usize;
goto label_not_moved;
}
usize = isalloc(ptr, config_prof);
@@ -1620,9 +1635,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
thread_allocated_t *ta;
ta = thread_allocated_tsd_get();
ta->allocated += usize;
- ta->deallocated += old_size;
+ ta->deallocated += old_usize;
}
- JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_size, old_rzsize, zero);
+ JEMALLOC_VALGRIND_REALLOC(ptr, usize, ptr, old_usize, old_rzsize, zero);
label_not_moved:
UTRACE(ptr, size, ptr);
return (usize);