summaryrefslogtreecommitdiffstats
path: root/jemalloc
diff options
context:
space:
mode:
authorJason Evans <je@facebook.com>2010-03-02 04:15:26 (GMT)
committerJason Evans <je@facebook.com>2010-03-02 04:15:26 (GMT)
commitb9477e782b07afa38c4b1dc0688e053be8a84dd8 (patch)
tree62d7eca6dfdd06e70a611d7a219abd8c43223330 /jemalloc
parentf3ff75289be32382fa455b4436871e4958fe6bf9 (diff)
downloadjemalloc-b9477e782b07afa38c4b1dc0688e053be8a84dd8.zip
jemalloc-b9477e782b07afa38c4b1dc0688e053be8a84dd8.tar.gz
jemalloc-b9477e782b07afa38c4b1dc0688e053be8a84dd8.tar.bz2
Implement sampling for heap profiling.
Diffstat (limited to 'jemalloc')
-rw-r--r--jemalloc/configure.ac1
-rw-r--r--jemalloc/doc/jemalloc.3.in14
-rw-r--r--jemalloc/include/jemalloc/internal/prn.h12
-rw-r--r--jemalloc/include/jemalloc/internal/prof.h10
-rw-r--r--jemalloc/src/ckh.c4
-rw-r--r--jemalloc/src/ctl.c3
-rw-r--r--jemalloc/src/jemalloc.c19
-rw-r--r--jemalloc/src/prof.c182
-rw-r--r--jemalloc/src/stats.c15
9 files changed, 201 insertions, 59 deletions
diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac
index 5d03890..918be8b 100644
--- a/jemalloc/configure.ac
+++ b/jemalloc/configure.ac
@@ -647,6 +647,7 @@ if test "x$enable_tls" = "x0" ; then
enable_prof="0"
fi
if test "x$enable_prof" = "x1" ; then
+ LIBS="$LIBS -lm"
AC_DEFINE([JEMALLOC_PROF], [ ])
if test "x$enable_prof_libunwind" = "x1" ; then
AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in
index 718ad84..6e55ca0 100644
--- a/jemalloc/doc/jemalloc.3.in
+++ b/jemalloc/doc/jemalloc.3.in
@@ -38,7 +38,7 @@
.\" @(#)malloc.3 8.1 (Berkeley) 6/4/93
.\" $FreeBSD: head/lib/libc/stdlib/malloc.3 182225 2008-08-27 02:00:53Z jasone $
.\"
-.Dd February 11, 2010
+.Dd March 1, 2010
.Dt JEMALLOC 3
.Os
.Sh NAME
@@ -355,6 +355,9 @@ will disable dirty page purging.
@roff_prof@.Dq B
@roff_prof@option for backtrace depth control.
@roff_prof@See the
+@roff_prof@.Dq S
+@roff_prof@option for probabilistic sampling control.
+@roff_prof@See the
@roff_prof@.Dq I
@roff_prof@option for information on interval-triggered profile dumping, and the
@roff_prof@.Dq U
@@ -464,6 +467,15 @@ Double/halve the size of the maximum size class that is a multiple of the
quantum (8 or 16 bytes, depending on architecture).
Above this size, cacheline spacing is used for size classes.
The default value is 128 bytes.
+@roff_prof@.It S
+@roff_prof@Double/halve the average interval between allocation samples, as
+@roff_prof@measured in bytes of allocation activity.
+@roff_prof@Increasing the sampling interval decreases profile fidelity, but
+@roff_prof@also decreases the computational overhead.
+@roff_prof@The default sample interval is one (i.e. all allocations are
+@roff_prof@sampled).
+@roff_prof@A sample interval greater than one implicitly disables leak
+@roff_prof@reporting.
@roff_prof@.It U
@roff_prof@Trigger a memory profile dump every time the total virtual memory
@roff_prof@exceeds the previous maximum.
diff --git a/jemalloc/include/jemalloc/internal/prn.h b/jemalloc/include/jemalloc/internal/prn.h
index 502733c..0709d70 100644
--- a/jemalloc/include/jemalloc/internal/prn.h
+++ b/jemalloc/include/jemalloc/internal/prn.h
@@ -25,7 +25,7 @@
* uint32_t state : Seed value.
* const uint32_t a, c : See above discussion.
*/
-#define prn(r, lg_range, state, a, c) do { \
+#define prn32(r, lg_range, state, a, c) do { \
assert(lg_range > 0); \
assert(lg_range <= 32); \
\
@@ -34,6 +34,16 @@
r >>= (32 - lg_range); \
} while (false)
+/* Same as prn32(), but 64 bits of pseudo-randomness, using uint64_t. */
+#define prn64(r, lg_range, state, a, c) do { \
+ assert(lg_range > 0); \
+ assert(lg_range <= 64); \
+ \
+ r = (state * (a)) + (c); \
+ state = r; \
+ r >>= (64 - lg_range); \
+} while (false)
+
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h
index 44e11cb..364ac0a 100644
--- a/jemalloc/include/jemalloc/internal/prof.h
+++ b/jemalloc/include/jemalloc/internal/prof.h
@@ -8,6 +8,9 @@ typedef struct prof_thr_cnt_s prof_thr_cnt_t;
typedef struct prof_ctx_s prof_ctx_t;
typedef struct prof_s prof_t;
+/* Option defaults. */
+#define LG_PROF_BT_MAX_DEFAULT 2
+#define LG_PROF_SAMPLE_DEFAULT 0
#define LG_PROF_INTERVAL_DEFAULT 30
/*
@@ -16,7 +19,7 @@ typedef struct prof_s prof_t;
* a hard-coded number of backtrace frame handlers, so increasing
* LG_PROF_BT_MAX requires changing prof_backtrace().
*/
-#define LG_PROF_BT_MAX 7
+#define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */
#define PROF_BT_MAX (1U << LG_PROF_BT_MAX)
/* Initial hash table size. */
@@ -117,7 +120,8 @@ struct prof_ctx_s {
extern bool opt_prof;
extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */
-extern size_t opt_lg_prof_interval;
+extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
+extern size_t opt_lg_prof_interval; /* lg(prof_interval). */
extern bool opt_prof_udump; /* High-water memory dumping. */
extern bool opt_prof_leak; /* Dump leak summary at exit. */
@@ -133,7 +137,7 @@ extern uint64_t prof_interval;
bool prof_init(prof_t *prof, bool master);
void prof_destroy(prof_t *prof);
-prof_thr_cnt_t *prof_alloc_prep(void);
+prof_thr_cnt_t *prof_alloc_prep(size_t size);
prof_thr_cnt_t *prof_cnt_get(const void *ptr);
void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt);
void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
diff --git a/jemalloc/src/ckh.c b/jemalloc/src/ckh.c
index fd234a4..a0c4162 100644
--- a/jemalloc/src/ckh.c
+++ b/jemalloc/src/ckh.c
@@ -100,7 +100,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
* Cycle through the cells in the bucket, starting at a random position.
* The randomness avoids worst-case search overhead as buckets fill up.
*/
- prn(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
+ prn32(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
@@ -142,7 +142,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
* were an item for which both hashes indicated the same
* bucket.
*/
- prn(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
+ prn32(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
assert(cell->key != NULL);
diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c
index 55ad2a7..36411e0 100644
--- a/jemalloc/src/ctl.c
+++ b/jemalloc/src/ctl.c
@@ -69,6 +69,7 @@ CTL_PROTO(opt_lg_tcache_gc_sweep)
#ifdef JEMALLOC_PROF
CTL_PROTO(opt_prof)
CTL_PROTO(opt_lg_prof_bt_max)
+CTL_PROTO(opt_lg_prof_sample)
CTL_PROTO(opt_lg_prof_interval)
CTL_PROTO(opt_prof_udump)
CTL_PROTO(opt_prof_leak)
@@ -234,6 +235,7 @@ static const ctl_node_t opt_node[] = {
#ifdef JEMALLOC_PROF
{NAME("prof"), CTL(opt_prof)},
{NAME("lg_prof_bt_max"), CTL(opt_lg_prof_bt_max)},
+ {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
{NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
{NAME("prof_udump"), CTL(opt_prof_udump)},
{NAME("prof_leak"), CTL(opt_prof_leak)},
@@ -1066,6 +1068,7 @@ CTL_RO_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t)
#ifdef JEMALLOC_PROF
CTL_RO_GEN(opt_prof, opt_prof, bool)
CTL_RO_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t)
+CTL_RO_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t)
CTL_RO_GEN(opt_lg_prof_interval, opt_lg_prof_interval, size_t)
CTL_RO_GEN(opt_prof_udump, opt_prof_udump, bool)
CTL_RO_GEN(opt_prof_leak, opt_prof_leak, bool)
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index 22401d1..4d51e1a 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -582,6 +582,15 @@ MALLOC_OUT:
opt_lg_qspace_max++;
break;
#ifdef JEMALLOC_PROF
+ case 's':
+ if (opt_lg_prof_sample > 0)
+ opt_lg_prof_sample--;
+ break;
+ case 'S':
+ if (opt_lg_prof_sample + 1 <
+ (sizeof(uint64_t) << 3))
+ opt_lg_prof_sample++;
+ break;
case 'u':
opt_prof_udump = false;
break;
@@ -870,7 +879,7 @@ JEMALLOC_P(malloc)(size_t size)
}
#ifdef JEMALLOC_PROF
- if (opt_prof && (cnt = prof_alloc_prep()) == NULL) {
+ if (opt_prof && (cnt = prof_alloc_prep(size)) == NULL) {
ret = NULL;
goto OOM;
}
@@ -955,7 +964,7 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
}
#ifdef JEMALLOC_PROF
- if (opt_prof && (cnt = prof_alloc_prep()) == NULL) {
+ if (opt_prof && (cnt = prof_alloc_prep(size)) == NULL) {
result = NULL;
ret = EINVAL;
} else
@@ -1030,7 +1039,7 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
}
#ifdef JEMALLOC_PROF
- if (opt_prof && (cnt = prof_alloc_prep()) == NULL) {
+ if (opt_prof && (cnt = prof_alloc_prep(num_size)) == NULL) {
ret = NULL;
goto RETURN;
}
@@ -1106,7 +1115,7 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
if (opt_prof) {
old_size = isalloc(ptr);
old_cnt = prof_cnt_get(ptr);
- if ((cnt = prof_alloc_prep()) == NULL) {
+ if ((cnt = prof_alloc_prep(size)) == NULL) {
ret = NULL;
goto OOM;
}
@@ -1144,7 +1153,7 @@ OOM:
ret = NULL;
} else {
#ifdef JEMALLOC_PROF
- if (opt_prof && (cnt = prof_alloc_prep()) == NULL) {
+ if (opt_prof && (cnt = prof_alloc_prep(size)) == NULL) {
ret = NULL;
} else
#endif
diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c
index d1bb4d0..d44084f 100644
--- a/jemalloc/src/prof.c
+++ b/jemalloc/src/prof.c
@@ -12,11 +12,14 @@
#include <libunwind.h>
#endif
+#include <math.h>
+
/******************************************************************************/
/* Data. */
bool opt_prof = false;
-size_t opt_lg_prof_bt_max = 2;
+size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT;
+size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
size_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
bool opt_prof_udump = false;
bool opt_prof_leak = false;
@@ -52,6 +55,13 @@ static pthread_key_t bt2cnt_tsd;
/* (1U << opt_lg_prof_bt_max). */
static unsigned prof_bt_max;
+static __thread uint64_t prof_sample_prn_state
+ JEMALLOC_ATTR(tls_model("initial-exec"));
+static __thread uint64_t prof_sample_threshold
+ JEMALLOC_ATTR(tls_model("initial-exec"));
+static __thread uint64_t prof_sample_accum
+ JEMALLOC_ATTR(tls_model("initial-exec"));
+
static malloc_mutex_t prof_dump_seq_mtx;
static uint64_t prof_dump_seq;
static uint64_t prof_dump_iseq;
@@ -500,15 +510,27 @@ prof_lookup(prof_bt_t *bt)
}
prof_thr_cnt_t *
-prof_alloc_prep(void)
+prof_alloc_prep(size_t size)
{
prof_thr_cnt_t *ret;
void *vec[prof_bt_max];
prof_bt_t bt;
- bt_init(&bt, vec);
- prof_backtrace(&bt, 2, prof_bt_max);
- ret = prof_lookup(&bt);
+ /*
+ * Determine whether to capture a backtrace based on whether size is
+ * enough for prof_accum to reach prof_sample_threshold. However,
+ * delay updating these variables until prof_{m,re}alloc(), because we
+ * don't know for sure that the allocation will succeed.
+ *
+ * Use subtraction rather than addition to avoid potential integer
+ * overflow.
+ */
+ if (size >= prof_sample_threshold - prof_sample_accum) {
+ bt_init(&bt, vec);
+ prof_backtrace(&bt, 2, prof_bt_max);
+ ret = prof_lookup(&bt);
+ } else
+ ret = (prof_thr_cnt_t *)(uintptr_t)1U;
return (ret);
}
@@ -550,28 +572,84 @@ prof_cnt_set(const void *ptr, prof_thr_cnt_t *cnt)
huge_prof_cnt_set(ptr, cnt);
}
+static inline void
+prof_sample_threshold_update(void)
+{
+ uint64_t r;
+ double u;
+
+ /*
+ * Compute prof_sample_threshold as a geometrically distributed random
+ * variable with mean (2^opt_lg_prof_sample).
+ */
+ prn64(r, 53, prof_sample_prn_state, (uint64_t)1125899906842625LLU,
+ 1058392653243283975);
+ u = (double)r * (1.0/9007199254740992.0L);
+ prof_sample_threshold = (uint64_t)(log(u) /
+ log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
+ + (uint64_t)1U;
+}
+
+static inline void
+prof_sample_accum_update(size_t size)
+{
+
+ if (opt_lg_prof_sample == 0) {
+ /*
+ * Don't bother with sampling logic, since sampling interval is
+ * 1.
+ */
+ return;
+ }
+
+ if (prof_sample_threshold == 0) {
+ /* Initialize. Seed the prng differently for each thread. */
+ prof_sample_prn_state = (uint64_t)(uintptr_t)&size;
+ prof_sample_threshold_update();
+ }
+
+ /* Take care to avoid integer overflow. */
+ if (size >= prof_sample_threshold - prof_sample_accum) {
+ prof_sample_accum -= (prof_sample_threshold - size);
+ /*
+ * Compute new geometrically distributed prof_sample_threshold.
+ */
+ prof_sample_threshold_update();
+ while (prof_sample_accum >= prof_sample_threshold) {
+ prof_sample_accum -= prof_sample_threshold;
+ prof_sample_threshold_update();
+ }
+ } else
+ prof_sample_accum += size;
+}
+
void
prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
{
size_t size = isalloc(ptr);
+ assert(ptr != NULL);
+
prof_cnt_set(ptr, cnt);
+ prof_sample_accum_update(size);
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- cnt->cnts.curobjs++;
- cnt->cnts.curbytes += size;
- cnt->cnts.accumobjs++;
- cnt->cnts.accumbytes += size;
- /*********/
- mb_write();
- /*********/
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
+ if ((uintptr_t)cnt > (uintptr_t)1U) {
+ cnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ cnt->cnts.curobjs++;
+ cnt->cnts.curbytes += size;
+ cnt->cnts.accumobjs++;
+ cnt->cnts.accumbytes += size;
+ /*********/
+ mb_write();
+ /*********/
+ cnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ }
}
void
@@ -580,20 +658,23 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
{
size_t size = isalloc(ptr);
- prof_cnt_set(ptr, cnt);
+ if (ptr != NULL) {
+ prof_cnt_set(ptr, cnt);
+ prof_sample_accum_update(size);
+ }
- if (old_cnt != NULL)
+ if ((uintptr_t)old_cnt > (uintptr_t)1U)
old_cnt->epoch++;
- if (cnt != NULL)
+ if ((uintptr_t)cnt > (uintptr_t)1U)
cnt->epoch++;
/*********/
mb_write();
/*********/
- if (old_cnt != NULL) {
+ if ((uintptr_t)old_cnt > (uintptr_t)1U) {
old_cnt->cnts.curobjs--;
old_cnt->cnts.curbytes -= old_size;
}
- if (cnt != NULL) {
+ if ((uintptr_t)cnt > (uintptr_t)1U) {
cnt->cnts.curobjs++;
cnt->cnts.curbytes += size;
cnt->cnts.accumobjs++;
@@ -602,9 +683,9 @@ prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
/*********/
mb_write();
/*********/
- if (old_cnt != NULL)
+ if ((uintptr_t)old_cnt > (uintptr_t)1U)
old_cnt->epoch++;
- if (cnt != NULL)
+ if ((uintptr_t)cnt > (uintptr_t)1U)
cnt->epoch++;
/*********/
mb_write(); /* Not strictly necessary. */
@@ -614,21 +695,24 @@ void
prof_free(const void *ptr)
{
prof_thr_cnt_t *cnt = prof_cnt_get(ptr);
- size_t size = isalloc(ptr);
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
- cnt->cnts.curobjs--;
- cnt->cnts.curbytes -= size;
- /*********/
- mb_write();
- /*********/
- cnt->epoch++;
- /*********/
- mb_write();
- /*********/
+ if ((uintptr_t)cnt > (uintptr_t)1) {
+ size_t size = isalloc(ptr);
+
+ cnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ cnt->cnts.curobjs--;
+ cnt->cnts.curbytes -= size;
+ /*********/
+ mb_write();
+ /*********/
+ cnt->epoch++;
+ /*********/
+ mb_write();
+ /*********/
+ }
}
static void
@@ -825,7 +909,13 @@ prof_dump(const char *filename, bool leakcheck)
prof_write(umax2s(cnt_all.accumobjs, 10, buf));
prof_write(": ");
prof_write(umax2s(cnt_all.accumbytes, 10, buf));
- prof_write("] @ heapprofile\n");
+ if (opt_lg_prof_sample == 0)
+ prof_write("] @ heapprofile\n");
+ else {
+ prof_write("] @ heap_v2/");
+ prof_write(umax2s((uint64_t)1U << opt_lg_prof_sample, 10, buf));
+ prof_write("\n");
+ }
/* Dump per ctx profile stats. */
for (tabind = 0; ckh_iter(&bt2ctx, &tabind, (void **)&bt, (void **)&ctx)
@@ -1104,6 +1194,14 @@ prof_boot0(void)
* initialized, so this function must be executed early.
*/
+ if (opt_lg_prof_sample > 0) {
+ /*
+ * Disable leak checking, since not all allocations will be
+ * sampled.
+ */
+ opt_prof_leak = false;
+ }
+
if (opt_prof_leak && opt_prof == false) {
/*
* Enable opt_prof, but in such a way that profiles are never
diff --git a/jemalloc/src/stats.c b/jemalloc/src/stats.c
index 7c6d8c9..f8c1731 100644
--- a/jemalloc/src/stats.c
+++ b/jemalloc/src/stats.c
@@ -540,13 +540,18 @@ stats_print(void (*write4)(void *, const char *, const char *, const char *,
tcache_nslots && ssv >= 0 ? umax2s(tcache_gc_sweep,
10, s) : "N/A", "\n", "");
}
- if ((err = JEMALLOC_P(mallctl)("opt.lg_prof_bt_max", &sv, &ssz,
- NULL, 0)) == 0) {
+ if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0))
+ == 0 && bv) {
+ xmallctl("opt.lg_prof_bt_max", &sv, &ssz, NULL, 0);
write4(w4opaque, "Maximum profile backtrace depth: ",
umax2s((1U << sv), 10, s), "\n", "");
- }
- if ((err = JEMALLOC_P(mallctl)("opt.lg_prof_interval", &sv,
- &ssz, NULL, 0)) == 0) {
+
+ xmallctl("opt.lg_prof_sample", &sv, &ssz, NULL, 0);
+ write4(w4opaque, "Average profile sample interval: ",
+ umax2s((1U << sv), 10, s), "", "");
+ write4(w4opaque, " (2^", umax2s(sv, 10, s), ")\n", "");
+
+ xmallctl("opt.lg_prof_interval", &sv, &ssz, NULL, 0);
write4(w4opaque, "Average profile dump interval: ",
umax2s((1U << sv), 10, s), "", "");
write4(w4opaque, " (2^", umax2s(sv, 10, s), ")\n", "");