summaryrefslogtreecommitdiffstats
path: root/jemalloc
diff options
context:
space:
mode:
authorJason Evans <je@facebook.com>2010-04-13 23:13:54 (GMT)
committerJason Evans <je@facebook.com>2010-04-14 04:17:11 (GMT)
commit5065156f3f90e421ba2b1a914e47eeb30d83d994 (patch)
tree252067a09dea4a435c5ae5d5bd6d3d50c11415d4 /jemalloc
parent1bb602125cc6ea471ecc305dd61849ef60349091 (diff)
downloadjemalloc-5065156f3f90e421ba2b1a914e47eeb30d83d994.zip
jemalloc-5065156f3f90e421ba2b1a914e47eeb30d83d994.tar.gz
jemalloc-5065156f3f90e421ba2b1a914e47eeb30d83d994.tar.bz2
Fix threads-related profiling bugs.
Initialize bt2cnt_tsd so that cleanup at thread exit actually happens. Associate (prof_ctx_t *) with allocated objects, rather than (prof_thr_cnt_t *). Each thread must always operate on its own (prof_thr_cnt_t *), and an object may outlive the thread that allocated it.
Diffstat (limited to 'jemalloc')
-rw-r--r--jemalloc/include/jemalloc/internal/arena.h10
-rw-r--r--jemalloc/include/jemalloc/internal/extent.h2
-rw-r--r--jemalloc/include/jemalloc/internal/huge.h4
-rw-r--r--jemalloc/include/jemalloc/internal/prof.h7
-rw-r--r--jemalloc/src/arena.c47
-rw-r--r--jemalloc/src/huge.c12
-rw-r--r--jemalloc/src/jemalloc.c13
-rw-r--r--jemalloc/src/prof.c105
8 files changed, 118 insertions, 82 deletions
diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h
index bb4ce2a..c1955f1 100644
--- a/jemalloc/include/jemalloc/internal/arena.h
+++ b/jemalloc/include/jemalloc/internal/arena.h
@@ -98,7 +98,7 @@ struct arena_chunk_map_s {
#ifdef JEMALLOC_PROF
/* Profile counters, used for large object runs. */
- prof_thr_cnt_t *prof_cnt;
+ prof_ctx_t *prof_ctx;
#endif
/*
@@ -246,10 +246,10 @@ struct arena_bin_s {
#ifdef JEMALLOC_PROF
/*
- * Offset of first (prof_cnt_t *) in a run header for this bin's size
+ * Offset of first (prof_ctx_t *) in a run header for this bin's size
* class, or 0 if (opt_prof == false).
*/
- uint32_t cnt0_offset;
+ uint32_t ctx0_offset;
#endif
/* Offset of first region in a run for this bin's size class. */
@@ -438,8 +438,8 @@ size_t arena_salloc(const void *ptr);
#ifdef JEMALLOC_PROF
void arena_prof_promoted(const void *ptr, size_t size);
size_t arena_salloc_demote(const void *ptr);
-prof_thr_cnt_t *arena_prof_cnt_get(const void *ptr);
-void arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
+prof_ctx_t *arena_prof_ctx_get(const void *ptr);
+void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
#endif
void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
arena_chunk_map_t *mapelm);
diff --git a/jemalloc/include/jemalloc/internal/extent.h b/jemalloc/include/jemalloc/internal/extent.h
index 33a4e9a..6fe9702 100644
--- a/jemalloc/include/jemalloc/internal/extent.h
+++ b/jemalloc/include/jemalloc/internal/extent.h
@@ -19,7 +19,7 @@ struct extent_node_s {
#ifdef JEMALLOC_PROF
/* Profile counters, used for huge objects. */
- prof_thr_cnt_t *prof_cnt;
+ prof_ctx_t *prof_ctx;
#endif
/* Pointer to the extent that this tree node is responsible for. */
diff --git a/jemalloc/include/jemalloc/internal/huge.h b/jemalloc/include/jemalloc/internal/huge.h
index 3cf32f7..0c0582f 100644
--- a/jemalloc/include/jemalloc/internal/huge.h
+++ b/jemalloc/include/jemalloc/internal/huge.h
@@ -25,8 +25,8 @@ void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
void huge_dalloc(void *ptr);
size_t huge_salloc(const void *ptr);
#ifdef JEMALLOC_PROF
-prof_thr_cnt_t *huge_prof_cnt_get(const void *ptr);
-void huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
+prof_ctx_t *huge_prof_ctx_get(const void *ptr);
+void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
#endif
bool huge_boot(void);
diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h
index 6e71552..fb55fb9 100644
--- a/jemalloc/include/jemalloc/internal/prof.h
+++ b/jemalloc/include/jemalloc/internal/prof.h
@@ -98,6 +98,9 @@ struct prof_thr_cnt_s {
};
struct prof_ctx_s {
+ /* Associated backtrace. */
+ prof_bt_t *bt;
+
/* Protects cnt_merged and sets_ql. */
malloc_mutex_t lock;
@@ -151,10 +154,10 @@ bool prof_init(prof_t *prof, bool master);
void prof_destroy(prof_t *prof);
prof_thr_cnt_t *prof_alloc_prep(size_t size);
-prof_thr_cnt_t *prof_cnt_get(const void *ptr);
+prof_ctx_t *prof_ctx_get(const void *ptr);
void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt);
void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
- size_t old_size, prof_thr_cnt_t *old_cnt);
+ size_t old_size, prof_ctx_t *old_ctx);
void prof_free(const void *ptr);
void prof_idump(void);
bool prof_mdump(const char *filename);
diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c
index e74b470..222ec25 100644
--- a/jemalloc/src/arena.c
+++ b/jemalloc/src/arena.c
@@ -1198,7 +1198,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
uint32_t try_nregs, good_nregs;
uint32_t try_hdr_size, good_hdr_size;
#ifdef JEMALLOC_PROF
- uint32_t try_cnt0_offset, good_cnt0_offset;
+ uint32_t try_ctx0_offset, good_ctx0_offset;
#endif
uint32_t try_reg0_offset, good_reg0_offset;
@@ -1225,11 +1225,11 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
if (opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
- try_cnt0_offset = try_hdr_size;
- /* Add space for one (prof_thr_cnt_t *) per region. */
- try_hdr_size += try_nregs * sizeof(prof_thr_cnt_t *);
+ try_ctx0_offset = try_hdr_size;
+ /* Add space for one (prof_ctx_t *) per region. */
+ try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
} else
- try_cnt0_offset = 0;
+ try_ctx0_offset = 0;
#endif
try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
} while (try_hdr_size > try_reg0_offset);
@@ -1243,7 +1243,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
good_nregs = try_nregs;
good_hdr_size = try_hdr_size;
#ifdef JEMALLOC_PROF
- good_cnt0_offset = try_cnt0_offset;
+ good_ctx0_offset = try_ctx0_offset;
#endif
good_reg0_offset = try_reg0_offset;
@@ -1258,13 +1258,12 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
if (opt_prof && prof_promote == false) {
/* Pad to a quantum boundary. */
try_hdr_size = QUANTUM_CEILING(try_hdr_size);
- try_cnt0_offset = try_hdr_size;
+ try_ctx0_offset = try_hdr_size;
/*
- * Add space for one (prof_thr_cnt_t *) per
- * region.
+ * Add space for one (prof_ctx_t *) per region.
*/
try_hdr_size += try_nregs *
- sizeof(prof_thr_cnt_t *);
+ sizeof(prof_ctx_t *);
}
#endif
try_reg0_offset = try_run_size - (try_nregs *
@@ -1282,7 +1281,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
bin->run_size = good_run_size;
bin->nregs = good_nregs;
#ifdef JEMALLOC_PROF
- bin->cnt0_offset = good_cnt0_offset;
+ bin->ctx0_offset = good_ctx0_offset;
#endif
bin->reg0_offset = good_reg0_offset;
@@ -1639,10 +1638,10 @@ arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr,
return (regind);
}
-prof_thr_cnt_t *
-arena_prof_cnt_get(const void *ptr)
+prof_ctx_t *
+arena_prof_ctx_get(const void *ptr)
{
- prof_thr_cnt_t *ret;
+ prof_ctx_t *ret;
arena_chunk_t *chunk;
size_t pageind, mapbits;
@@ -1655,7 +1654,7 @@ arena_prof_cnt_get(const void *ptr)
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
if (prof_promote)
- ret = (prof_thr_cnt_t *)(uintptr_t)1U;
+ ret = (prof_ctx_t *)(uintptr_t)1U;
else {
arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
(uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
@@ -1665,18 +1664,18 @@ arena_prof_cnt_get(const void *ptr)
assert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
- ret = *(prof_thr_cnt_t **)((uintptr_t)run +
- bin->cnt0_offset + (regind *
- sizeof(prof_thr_cnt_t *)));
+ ret = *(prof_ctx_t **)((uintptr_t)run +
+ bin->ctx0_offset + (regind *
+ sizeof(prof_ctx_t *)));
}
} else
- ret = chunk->map[pageind].prof_cnt;
+ ret = chunk->map[pageind].prof_ctx;
return (ret);
}
void
-arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
+arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
{
arena_chunk_t *chunk;
size_t pageind, mapbits;
@@ -1699,12 +1698,12 @@ arena_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
assert(run->magic == ARENA_RUN_MAGIC);
regind = arena_run_regind(run, bin, ptr, bin->reg_size);
- *((prof_thr_cnt_t **)((uintptr_t)run + bin->cnt0_offset
- + (regind * sizeof(prof_thr_cnt_t *)))) = cnt;
+ *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset
+ + (regind * sizeof(prof_ctx_t *)))) = ctx;
} else
- assert((uintptr_t)cnt == (uintptr_t)1U);
+ assert((uintptr_t)ctx == (uintptr_t)1U);
} else
- chunk->map[pageind].prof_cnt = cnt;
+ chunk->map[pageind].prof_ctx = ctx;
}
#endif
diff --git a/jemalloc/src/huge.c b/jemalloc/src/huge.c
index d35aa5c..49962ea 100644
--- a/jemalloc/src/huge.c
+++ b/jemalloc/src/huge.c
@@ -241,10 +241,10 @@ huge_salloc(const void *ptr)
}
#ifdef JEMALLOC_PROF
-prof_thr_cnt_t *
-huge_prof_cnt_get(const void *ptr)
+prof_ctx_t *
+huge_prof_ctx_get(const void *ptr)
{
- prof_thr_cnt_t *ret;
+ prof_ctx_t *ret;
extent_node_t *node, key;
malloc_mutex_lock(&huge_mtx);
@@ -254,7 +254,7 @@ huge_prof_cnt_get(const void *ptr)
node = extent_tree_ad_search(&huge, &key);
assert(node != NULL);
- ret = node->prof_cnt;
+ ret = node->prof_ctx;
malloc_mutex_unlock(&huge_mtx);
@@ -262,7 +262,7 @@ huge_prof_cnt_get(const void *ptr)
}
void
-huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
+huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
{
extent_node_t *node, key;
@@ -273,7 +273,7 @@ huge_prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
node = extent_tree_ad_search(&huge, &key);
assert(node != NULL);
- node->prof_cnt = cnt;
+ node->prof_ctx = ctx;
malloc_mutex_unlock(&huge_mtx);
}
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index e01de0d..aeab140 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -1060,7 +1060,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
void *ret;
#ifdef JEMALLOC_PROF
size_t old_size;
- prof_thr_cnt_t *cnt, *old_cnt;
+ prof_thr_cnt_t *cnt;
+ prof_ctx_t *old_ctx;
#endif
if (size == 0) {
@@ -1074,7 +1075,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
#ifdef JEMALLOC_PROF
if (opt_prof) {
old_size = isalloc(ptr);
- old_cnt = prof_cnt_get(ptr);
+ old_ctx = prof_ctx_get(ptr);
cnt = NULL;
}
#endif
@@ -1083,7 +1084,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
#ifdef JEMALLOC_PROF
else if (opt_prof) {
old_size = 0;
- old_cnt = NULL;
+ old_ctx = NULL;
cnt = NULL;
}
#endif
@@ -1100,7 +1101,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
#ifdef JEMALLOC_PROF
if (opt_prof) {
old_size = isalloc(ptr);
- old_cnt = prof_cnt_get(ptr);
+ old_ctx = prof_ctx_get(ptr);
if ((cnt = prof_alloc_prep(size)) == NULL) {
ret = NULL;
goto OOM;
@@ -1133,7 +1134,7 @@ OOM:
#ifdef JEMALLOC_PROF
if (opt_prof) {
old_size = 0;
- old_cnt = NULL;
+ old_ctx = NULL;
}
#endif
if (malloc_init()) {
@@ -1181,7 +1182,7 @@ RETURN:
#endif
#ifdef JEMALLOC_PROF
if (opt_prof)
- prof_realloc(ret, cnt, ptr, old_size, old_cnt);
+ prof_realloc(ret, cnt, ptr, old_size, old_ctx);
#endif
return (ret);
}
diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c
index 6326188..c13bc04 100644
--- a/jemalloc/src/prof.c
+++ b/jemalloc/src/prof.c
@@ -48,7 +48,7 @@ static malloc_mutex_t bt2ctx_mtx;
static __thread ckh_t *bt2cnt_tls JEMALLOC_ATTR(tls_model("initial-exec"));
/*
- * Same contents as b2cnt, but initialized such that the TSD destructor is
+ * Same contents as b2cnt_tls, but initialized such that the TSD destructor is
* called when a thread exits, so that bt2cnt_tls contents can be merged,
* unlinked, and deallocated.
*/
@@ -100,7 +100,7 @@ static _Unwind_Reason_Code prof_unwind_callback(
#endif
static void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
static prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
-static void prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt);
+static void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
static bool prof_flush(bool propagate_err);
static bool prof_write(const char *s, bool propagate_err);
static void prof_ctx_merge(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
@@ -450,6 +450,7 @@ prof_lookup(prof_bt_t *bt)
return (NULL);
}
bt2cnt_tls = bt2cnt;
+ pthread_setspecific(bt2cnt_tsd, bt2cnt);
}
if (ckh_search(bt2cnt, bt, NULL, (void **)&ret)) {
@@ -475,6 +476,7 @@ prof_lookup(prof_bt_t *bt)
idalloc(ctx);
return (NULL);
}
+ ctx->bt = btkey;
if (malloc_mutex_init(&ctx->lock)) {
prof_leave();
idalloc(btkey);
@@ -580,10 +582,10 @@ prof_alloc_prep(size_t size)
return (ret);
}
-prof_thr_cnt_t *
-prof_cnt_get(const void *ptr)
+prof_ctx_t *
+prof_ctx_get(const void *ptr)
{
- prof_thr_cnt_t *ret;
+ prof_ctx_t *ret;
arena_chunk_t *chunk;
assert(ptr != NULL);
@@ -593,15 +595,15 @@ prof_cnt_get(const void *ptr)
/* Region. */
assert(chunk->arena->magic == ARENA_MAGIC);
- ret = arena_prof_cnt_get(ptr);
+ ret = arena_prof_ctx_get(ptr);
} else
- ret = huge_prof_cnt_get(ptr);
+ ret = huge_prof_ctx_get(ptr);
return (ret);
}
static void
-prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
+prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
{
arena_chunk_t *chunk;
@@ -612,9 +614,9 @@ prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
/* Region. */
assert(chunk->arena->magic == ARENA_MAGIC);
- arena_prof_cnt_set(ptr, cnt);
+ arena_prof_ctx_set(ptr, ctx);
} else
- huge_prof_cnt_set(ptr, cnt);
+ huge_prof_ctx_set(ptr, ctx);
}
static inline void
@@ -649,7 +651,7 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
assert(ptr != NULL);
- prof_cnt_set(ptr, cnt);
+ prof_ctx_set(ptr, cnt->ctx);
prof_sample_accum_update(size);
if ((uintptr_t)cnt > (uintptr_t)1U) {
@@ -673,25 +675,43 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
void
prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
- size_t old_size, prof_thr_cnt_t *old_cnt)
+ size_t old_size, prof_ctx_t *old_ctx)
{
size_t size = isalloc(ptr);
+ prof_thr_cnt_t *told_cnt;
if (ptr != NULL) {
- prof_cnt_set(ptr, cnt);
+ prof_ctx_set(ptr, cnt->ctx);
prof_sample_accum_update(size);
}
- if ((uintptr_t)old_cnt > (uintptr_t)1U)
- old_cnt->epoch++;
+ if ((uintptr_t)old_ctx > (uintptr_t)1U) {
+ told_cnt = prof_lookup(old_ctx->bt);
+ if (told_cnt == NULL) {
+ /*
+ * It's too late to propagate OOM for this realloc(),
+ * so operate directly on old_cnt->ctx->cnt_merged.
+ */
+ malloc_printf("XXX BANG A\n");
+ malloc_mutex_lock(&old_ctx->lock);
+ old_ctx->cnt_merged.curobjs--;
+ old_ctx->cnt_merged.curbytes -= old_size;
+ malloc_mutex_unlock(&old_ctx->lock);
+ told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+ }
+ } else
+ told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+
+ if ((uintptr_t)told_cnt > (uintptr_t)1U)
+ told_cnt->epoch++;
if ((uintptr_t)cnt > (uintptr_t)1U)
cnt->epoch++;
/*********/
mb_write();
/*********/
- if ((uintptr_t)old_cnt > (uintptr_t)1U) {
- old_cnt->cnts.curobjs--;
- old_cnt->cnts.curbytes -= old_size;
+ if ((uintptr_t)told_cnt > (uintptr_t)1U) {
+ told_cnt->cnts.curobjs--;
+ told_cnt->cnts.curbytes -= old_size;
}
if ((uintptr_t)cnt > (uintptr_t)1U) {
cnt->cnts.curobjs++;
@@ -702,8 +722,8 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
/*********/
mb_write();
/*********/
- if ((uintptr_t)old_cnt > (uintptr_t)1U)
- old_cnt->epoch++;
+ if ((uintptr_t)told_cnt > (uintptr_t)1U)
+ told_cnt->epoch++;
if ((uintptr_t)cnt > (uintptr_t)1U)
cnt->epoch++;
/*********/
@@ -713,24 +733,37 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
void
prof_free(const void *ptr)
{
- prof_thr_cnt_t *cnt = prof_cnt_get(ptr);
+ prof_ctx_t *ctx = prof_ctx_get(ptr);
- if ((uintptr_t)cnt > (uintptr_t)1) {
+ if ((uintptr_t)ctx > (uintptr_t)1) {
size_t size = isalloc(ptr);
-
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- cnt->cnts.curobjs--;
- cnt->cnts.curbytes -= size;
- /*********/
- mb_write();
- /*********/
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
+ prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
+
+ if (tcnt != NULL) {
+ tcnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ tcnt->cnts.curobjs--;
+ tcnt->cnts.curbytes -= size;
+ /*********/
+ mb_write();
+ /*********/
+ tcnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ } else {
+ /*
+ * OOM during free() cannot be propagated, so operate
+ * directly on cnt->ctx->cnt_merged.
+ */
+ malloc_printf("XXX BANG B\n");
+ malloc_mutex_lock(&ctx->lock);
+ ctx->cnt_merged.curobjs--;
+ ctx->cnt_merged.curbytes -= size;
+ malloc_mutex_unlock(&ctx->lock);
+ }
}
}