summaryrefslogtreecommitdiffstats
path: root/include/jemalloc/internal/prof.h
diff options
context:
space:
mode:
authorJason Evans <jasone@canonware.com>2012-05-12 00:48:33 (GMT)
committerJason Evans <jasone@canonware.com>2012-05-12 00:48:33 (GMT)
commitfc9b1dbf69f59d7ecfc4ac68da9847e017e1d046 (patch)
tree7f843c7c51cd5df5d3be1ca48f504325f0536c0d /include/jemalloc/internal/prof.h
parentfc1bb70e5f0d9a58b39efa39cc549b5af5104760 (diff)
parentcbb71caceb1e53d0fd21284ce298885327c211b4 (diff)
downloadjemalloc-3.0.0.zip
jemalloc-3.0.0.tar.gz
jemalloc-3.0.0.tar.bz2
Merge branch 'dev'3.0.0
Conflicts: ChangeLog include/jemalloc/internal/chunk.h src/chunk.c src/huge.c src/jemalloc.c test/rallocm.c
Diffstat (limited to 'include/jemalloc/internal/prof.h')
-rw-r--r--include/jemalloc/internal/prof.h177
1 files changed, 104 insertions, 73 deletions
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index e9064ba..c3e3f9e 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -1,4 +1,3 @@
-#ifdef JEMALLOC_PROF
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
@@ -10,28 +9,41 @@ typedef struct prof_tdata_s prof_tdata_t;
/* Option defaults. */
#define PROF_PREFIX_DEFAULT "jeprof"
-#define LG_PROF_BT_MAX_DEFAULT 7
-#define LG_PROF_SAMPLE_DEFAULT 0
+#define LG_PROF_SAMPLE_DEFAULT 19
#define LG_PROF_INTERVAL_DEFAULT -1
-#define LG_PROF_TCMAX_DEFAULT -1
/*
- * Hard limit on stack backtrace depth. Note that the version of
- * prof_backtrace() that is based on __builtin_return_address() necessarily has
- * a hard-coded number of backtrace frame handlers.
+ * Hard limit on stack backtrace depth. The version of prof_backtrace() that
+ * is based on __builtin_return_address() necessarily has a hard-coded number
+ * of backtrace frame handlers, and should be kept in sync with this setting.
*/
-#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND))
-# define LG_PROF_BT_MAX ((ZU(1) << (LG_SIZEOF_PTR+3)) - 1)
-#else
-# define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */
-#endif
-#define PROF_BT_MAX (1U << LG_PROF_BT_MAX)
+#define PROF_BT_MAX 128
+
+/* Maximum number of backtraces to store in each per thread LRU cache. */
+#define PROF_TCMAX 1024
/* Initial hash table size. */
-#define PROF_CKH_MINITEMS 64
+#define PROF_CKH_MINITEMS 64
/* Size of memory buffer to use when writing dump files. */
-#define PROF_DUMP_BUF_SIZE 65536
+#define PROF_DUMP_BUFSIZE 65536
+
+/* Size of stack-allocated buffer used by prof_printf(). */
+#define PROF_PRINTF_BUFSIZE 128
+
+/*
+ * Number of mutexes shared among all ctx's. No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define PROF_NCTX_LOCKS 1024
+
+/*
+ * prof_tdata pointers close to NULL are used to encode state information that
+ * is used for cleaning up during thread shutdown.
+ */
+#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1)
+#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2)
+#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
@@ -109,8 +121,18 @@ struct prof_ctx_s {
/* Associated backtrace. */
prof_bt_t *bt;
- /* Protects cnt_merged and cnts_ql. */
- malloc_mutex_t lock;
+ /* Protects nlimbo, cnt_merged, and cnts_ql. */
+ malloc_mutex_t *lock;
+
+ /*
+ * Number of threads that currently cause this ctx to be in a state of
+ * limbo due to one of:
+ * - Initializing per thread counters associated with this ctx.
+ * - Preparing to destroy this ctx.
+ * nlimbo must be 1 (single destroyer) in order to safely destroy the
+ * ctx.
+ */
+ unsigned nlimbo;
/* Temporary storage for summation during dump. */
prof_cnt_t cnt_summed;
@@ -145,9 +167,14 @@ struct prof_tdata_s {
void **vec;
/* Sampling state. */
- uint64_t prn_state;
+ uint64_t prng_state;
uint64_t threshold;
uint64_t accum;
+
+ /* State used to avoid dumping while operating on prof internals. */
+ bool enq;
+ bool enq_idump;
+ bool enq_gdump;
};
#endif /* JEMALLOC_H_STRUCTS */
@@ -162,13 +189,12 @@ extern bool opt_prof;
* to notice state changes.
*/
extern bool opt_prof_active;
-extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */
extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
extern bool opt_prof_gdump; /* High-water memory dumping. */
+extern bool opt_prof_final; /* Final profile dumping. */
extern bool opt_prof_leak; /* Dump leak summary at exit. */
extern bool opt_prof_accum; /* Report cumulative bytes. */
-extern ssize_t opt_lg_prof_tcmax; /* lg(max per thread bactrace cache) */
extern char opt_prof_prefix[PATH_MAX + 1];
/*
@@ -186,39 +212,14 @@ extern uint64_t prof_interval;
*/
extern bool prof_promote;
-/* (1U << opt_lg_prof_bt_max). */
-extern unsigned prof_bt_max;
-
-/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
-#ifndef NO_TLS
-extern __thread prof_tdata_t *prof_tdata_tls
- JEMALLOC_ATTR(tls_model("initial-exec"));
-# define PROF_TCACHE_GET() prof_tdata_tls
-# define PROF_TCACHE_SET(v) do { \
- prof_tdata_tls = (v); \
- pthread_setspecific(prof_tdata_tsd, (void *)(v)); \
-} while (0)
-#else
-# define PROF_TCACHE_GET() \
- ((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd))
-# define PROF_TCACHE_SET(v) do { \
- pthread_setspecific(prof_tdata_tsd, (void *)(v)); \
-} while (0)
-#endif
-/*
- * Same contents as b2cnt_tls, but initialized such that the TSD destructor is
- * called when a thread exits, so that prof_tdata_tls contents can be merged,
- * unlinked, and deallocated.
- */
-extern pthread_key_t prof_tdata_tsd;
-
void bt_init(prof_bt_t *bt, void **vec);
-void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
+void prof_backtrace(prof_bt_t *bt, unsigned nignore);
prof_thr_cnt_t *prof_lookup(prof_bt_t *bt);
void prof_idump(void);
bool prof_mdump(const char *filename);
void prof_gdump(void);
prof_tdata_t *prof_tdata_init(void);
+void prof_tdata_cleanup(void *arg);
void prof_boot0(void);
void prof_boot1(void);
bool prof_boot2(void);
@@ -233,13 +234,13 @@ bool prof_boot2(void);
\
assert(size == s2u(size)); \
\
- prof_tdata = PROF_TCACHE_GET(); \
- if (prof_tdata == NULL) { \
- prof_tdata = prof_tdata_init(); \
- if (prof_tdata == NULL) { \
+ prof_tdata = prof_tdata_get(); \
+ if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) { \
+ if (prof_tdata != NULL) \
+ ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
+ else \
ret = NULL; \
- break; \
- } \
+ break; \
} \
\
if (opt_prof_active == false) { \
@@ -249,13 +250,13 @@ bool prof_boot2(void);
/* Don't bother with sampling logic, since sampling */\
/* interval is 1. */\
bt_init(&bt, prof_tdata->vec); \
- prof_backtrace(&bt, nignore, prof_bt_max); \
+ prof_backtrace(&bt, nignore); \
ret = prof_lookup(&bt); \
} else { \
if (prof_tdata->threshold == 0) { \
/* Initialize. Seed the prng differently for */\
/* each thread. */\
- prof_tdata->prn_state = \
+ prof_tdata->prng_state = \
(uint64_t)(uintptr_t)&size; \
prof_sample_threshold_update(prof_tdata); \
} \
@@ -272,7 +273,7 @@ bool prof_boot2(void);
if (size >= prof_tdata->threshold - \
prof_tdata->accum) { \
bt_init(&bt, prof_tdata->vec); \
- prof_backtrace(&bt, nignore, prof_bt_max); \
+ prof_backtrace(&bt, nignore); \
ret = prof_lookup(&bt); \
} else \
ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
@@ -280,6 +281,9 @@ bool prof_boot2(void);
} while (0)
#ifndef JEMALLOC_ENABLE_INLINE
+malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
+
+prof_tdata_t *prof_tdata_get(void);
void prof_sample_threshold_update(prof_tdata_t *prof_tdata);
prof_ctx_t *prof_ctx_get(const void *ptr);
void prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
@@ -291,12 +295,35 @@ void prof_free(const void *ptr, size_t size);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
+/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
+malloc_tsd_externs(prof_tdata, prof_tdata_t *)
+malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
+ prof_tdata_cleanup)
+
+JEMALLOC_INLINE prof_tdata_t *
+prof_tdata_get(void)
+{
+ prof_tdata_t *prof_tdata;
+
+ cassert(config_prof);
+
+ prof_tdata = *prof_tdata_tsd_get();
+ if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {
+ if (prof_tdata == NULL)
+ prof_tdata = prof_tdata_init();
+ }
+
+ return (prof_tdata);
+}
+
JEMALLOC_INLINE void
prof_sample_threshold_update(prof_tdata_t *prof_tdata)
{
uint64_t r;
double u;
+ cassert(config_prof);
+
/*
* Compute sample threshold as a geometrically distributed random
* variable with mean (2^opt_lg_prof_sample).
@@ -315,8 +342,8 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata)
* pp 500
* (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
*/
- prn64(r, 53, prof_tdata->prn_state,
- (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU);
+ prng64(r, 53, prof_tdata->prng_state,
+ UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
u = (double)r * (1.0/9007199254740992.0L);
prof_tdata->threshold = (uint64_t)(log(u) /
log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
@@ -329,13 +356,12 @@ prof_ctx_get(const void *ptr)
prof_ctx_t *ret;
arena_chunk_t *chunk;
+ cassert(config_prof);
assert(ptr != NULL);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) {
/* Region. */
- dassert(chunk->arena->magic == ARENA_MAGIC);
-
ret = arena_prof_ctx_get(ptr);
} else
ret = huge_prof_ctx_get(ptr);
@@ -348,13 +374,12 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
{
arena_chunk_t *chunk;
+ cassert(config_prof);
assert(ptr != NULL);
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) {
/* Region. */
- dassert(chunk->arena->magic == ARENA_MAGIC);
-
arena_prof_ctx_set(ptr, ctx);
} else
huge_prof_ctx_set(ptr, ctx);
@@ -365,11 +390,13 @@ prof_sample_accum_update(size_t size)
{
prof_tdata_t *prof_tdata;
+ cassert(config_prof);
/* Sampling logic is unnecessary if the interval is 1. */
assert(opt_lg_prof_sample != 0);
- prof_tdata = PROF_TCACHE_GET();
- assert(prof_tdata != NULL);
+ prof_tdata = *prof_tdata_tsd_get();
+ if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+ return (true);
/* Take care to avoid integer overflow. */
if (size >= prof_tdata->threshold - prof_tdata->accum) {
@@ -391,8 +418,9 @@ JEMALLOC_INLINE void
prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
{
+ cassert(config_prof);
assert(ptr != NULL);
- assert(size == isalloc(ptr));
+ assert(size == isalloc(ptr, true));
if (opt_lg_prof_sample != 0) {
if (prof_sample_accum_update(size)) {
@@ -437,10 +465,11 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
{
prof_thr_cnt_t *told_cnt;
+ cassert(config_prof);
assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
if (ptr != NULL) {
- assert(size == isalloc(ptr));
+ assert(size == isalloc(ptr, true));
if (opt_lg_prof_sample != 0) {
if (prof_sample_accum_update(size)) {
/*
@@ -463,10 +492,10 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
* It's too late to propagate OOM for this realloc(),
* so operate directly on old_cnt->ctx->cnt_merged.
*/
- malloc_mutex_lock(&old_ctx->lock);
+ malloc_mutex_lock(old_ctx->lock);
old_ctx->cnt_merged.curobjs--;
old_ctx->cnt_merged.curbytes -= old_size;
- malloc_mutex_unlock(&old_ctx->lock);
+ malloc_mutex_unlock(old_ctx->lock);
told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
}
} else
@@ -510,9 +539,12 @@ prof_free(const void *ptr, size_t size)
{
prof_ctx_t *ctx = prof_ctx_get(ptr);
+ cassert(config_prof);
+
if ((uintptr_t)ctx > (uintptr_t)1) {
- assert(size == isalloc(ptr));
- prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
+ prof_thr_cnt_t *tcnt;
+ assert(size == isalloc(ptr, true));
+ tcnt = prof_lookup(ctx->bt);
if (tcnt != NULL) {
tcnt->epoch++;
@@ -533,10 +565,10 @@ prof_free(const void *ptr, size_t size)
* OOM during free() cannot be propagated, so operate
* directly on cnt->ctx->cnt_merged.
*/
- malloc_mutex_lock(&ctx->lock);
+ malloc_mutex_lock(ctx->lock);
ctx->cnt_merged.curobjs--;
ctx->cnt_merged.curbytes -= size;
- malloc_mutex_unlock(&ctx->lock);
+ malloc_mutex_unlock(ctx->lock);
}
}
}
@@ -544,4 +576,3 @@ prof_free(const void *ptr, size_t size)
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/
-#endif /* JEMALLOC_PROF */