summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQi Wang <interwq@gwu.edu>2017-03-04 03:58:43 (GMT)
committerQi Wang <interwq@gmail.com>2017-03-23 07:03:28 (GMT)
commita4f176af57de77d62b4751af876512748c6ce800 (patch)
tree3d4f07bf954fa2105b4df7fe9824963eb45a9c34
parent6309df628fa4f11dce084dc53c77ea852408d347 (diff)
downloadjemalloc-a4f176af57de77d62b4751af876512748c6ce800.zip
jemalloc-a4f176af57de77d62b4751af876512748c6ce800.tar.gz
jemalloc-a4f176af57de77d62b4751af876512748c6ce800.tar.bz2
Output bin lock profiling results to malloc_stats.
Two counters are included for the small bins: lock contention rate, and max lock waiting time.
-rw-r--r--include/jemalloc/internal/mutex_inlines.h35
-rw-r--r--include/jemalloc/internal/nstime_externs.h1
-rw-r--r--include/jemalloc/internal/private_symbols.txt2
-rw-r--r--include/jemalloc/internal/stats_structs.h2
-rw-r--r--src/arena.c1
-rw-r--r--src/ctl.c8
-rw-r--r--src/nstime.c6
-rw-r--r--src/stats.c99
8 files changed, 120 insertions, 34 deletions
diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index cf0ce23..8e81fcd 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -9,6 +9,9 @@ bool malloc_mutex_trylock(malloc_mutex_t *mutex);
void malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
void malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
void malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void malloc_lock_prof_read(tsdn_t *tsdn, lock_prof_data_t *data,
+ malloc_mutex_t *mutex);
+void malloc_lock_prof_merge(lock_prof_data_t *sum, lock_prof_data_t *data);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
@@ -23,6 +26,24 @@ malloc_mutex_trylock(malloc_mutex_t *mutex) {
return MALLOC_MUTEX_TRYLOCK(mutex);
}
+/* Aggregate lock prof data. */
+JEMALLOC_INLINE void
+malloc_lock_prof_merge(lock_prof_data_t *sum, lock_prof_data_t *data) {
+ nstime_add(&sum->tot_wait_time, &data->tot_wait_time);
+ if (nstime_compare(&data->max_wait_time, &sum->max_wait_time)) {
+ nstime_copy(&sum->max_wait_time, &data->max_wait_time);
+ }
+ sum->n_wait_times += data->n_wait_times;
+ sum->n_spin_acquired += data->n_spin_acquired;
+
+ if (sum->max_n_thds < data->max_n_thds) {
+ sum->max_n_thds = data->max_n_thds;
+ }
+ sum->n_waiting_thds += data->n_waiting_thds;
+ sum->n_owner_switches += data->n_owner_switches;
+ sum->n_lock_ops += data->n_lock_ops;
+}
+
JEMALLOC_INLINE void
malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
witness_assert_not_owner(tsdn, &mutex->witness);
@@ -58,6 +79,20 @@ JEMALLOC_INLINE void
malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
witness_assert_not_owner(tsdn, &mutex->witness);
}
+
+/* Copy the prof data from mutex for processing. */
+JEMALLOC_INLINE void
+malloc_lock_prof_read(tsdn_t *tsdn, lock_prof_data_t *data,
+ malloc_mutex_t *mutex) {
+ lock_prof_data_t *source = &mutex->prof_data;
+ /* Can only read with the lock. */
+ malloc_mutex_assert_owner(tsdn, mutex);
+
+ *data = *source;
+ /* n_wait_thds is not reported (modified w/o locking). */
+ data->n_waiting_thds = 0;
+}
+
#endif
#endif /* JEMALLOC_INTERNAL_MUTEX_INLINES_H */
diff --git a/include/jemalloc/internal/nstime_externs.h b/include/jemalloc/internal/nstime_externs.h
index cf14ae0..1abc84d 100644
--- a/include/jemalloc/internal/nstime_externs.h
+++ b/include/jemalloc/internal/nstime_externs.h
@@ -5,6 +5,7 @@ void nstime_init(nstime_t *time, uint64_t ns);
void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
uint64_t nstime_ns(const nstime_t *time);
uint64_t nstime_sec(const nstime_t *time);
+uint64_t nstime_msec(const nstime_t *time);
uint64_t nstime_nsec(const nstime_t *time);
void nstime_copy(nstime_t *time, const nstime_t *source);
int nstime_compare(const nstime_t *a, const nstime_t *b);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index e138de0..14ecded 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -273,6 +273,7 @@ malloc_mutex_assert_owner
malloc_mutex_boot
malloc_mutex_init
malloc_mutex_lock
+malloc_mutex_lock_slow
malloc_mutex_postfork_child
malloc_mutex_postfork_parent
malloc_mutex_prefork
@@ -302,6 +303,7 @@ nstime_imultiply
nstime_init
nstime_init2
nstime_monotonic
+nstime_msec
nstime_ns
nstime_nsec
nstime_sec
diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index ffcb3c1..cbc448a 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -56,6 +56,8 @@ struct malloc_bin_stats_s {
/* Current number of slabs in this bin. */
size_t curslabs;
+
+ lock_prof_data_t lock_data;
};
struct malloc_large_stats_s {
diff --git a/src/arena.c b/src/arena.c
index ef7ec37..832a125 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -299,6 +299,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
arena_bin_t *bin = &arena->bins[i];
malloc_mutex_lock(tsdn, &bin->lock);
+ malloc_lock_prof_read(tsdn, &bstats[i].lock_data, &bin->lock);
bstats[i].nmalloc += bin->stats.nmalloc;
bstats[i].ndalloc += bin->stats.ndalloc;
bstats[i].nrequests += bin->stats.nrequests;
diff --git a/src/ctl.c b/src/ctl.c
index ee69be6..d9f8be6 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -145,6 +145,7 @@ CTL_PROTO(stats_arenas_i_bins_j_nflushes)
CTL_PROTO(stats_arenas_i_bins_j_nslabs)
CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
CTL_PROTO(stats_arenas_i_bins_j_curslabs)
+CTL_PROTO(stats_arenas_i_bins_j_lock_data)
INDEX_PROTO(stats_arenas_i_bins_j)
CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
@@ -357,7 +358,8 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
{NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)},
{NAME("nslabs"), CTL(stats_arenas_i_bins_j_nslabs)},
{NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)},
- {NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)}
+ {NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)},
+ {NAME("lock_data"), CTL(stats_arenas_i_bins_j_lock_data)}
};
static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
{NAME(""), CHILD(named, stats_arenas_i_bins_j)}
@@ -726,6 +728,8 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
} else {
assert(astats->bstats[i].curslabs == 0);
}
+ malloc_lock_prof_merge(&sdstats->bstats[i].lock_data,
+ &astats->bstats[i].lock_data);
}
for (i = 0; i < NSIZES - NBINS; i++) {
@@ -2333,6 +2337,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs,
arenas_i(mib[2])->astats->bstats[mib[4]].reslabs, uint64_t)
CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_lock_data,
+ arenas_i(mib[2])->astats->bstats[mib[4]].lock_data, lock_prof_data_t)
static const ctl_named_node_t *
stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
diff --git a/src/nstime.c b/src/nstime.c
index a3f6c1d..70b2f9d 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -1,6 +1,7 @@
#include "jemalloc/internal/jemalloc_internal.h"
#define BILLION UINT64_C(1000000000)
+#define MILLION UINT64_C(1000000)
void
nstime_init(nstime_t *time, uint64_t ns) {
@@ -18,6 +19,11 @@ nstime_ns(const nstime_t *time) {
}
uint64_t
+nstime_msec(const nstime_t *time) {
+ return time->ns / MILLION;
+}
+
+uint64_t
nstime_sec(const nstime_t *time) {
return time->ns / BILLION;
}
diff --git a/src/stats.c b/src/stats.c
index 58b9a04..58283f8 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -32,6 +32,31 @@ bool opt_stats_print = false;
/******************************************************************************/
+/* Calculate x.yyy and output a string (takes a fixed sized char array). */
+static bool
+get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) {
+ if (divisor == 0 || dividend > divisor) {
+ /* The rate is not supposed to be greater than 1. */
+ return true;
+ }
+ if (dividend > 0) {
+ assert(UINT64_MAX / dividend >= 1000);
+ }
+
+ unsigned n = (unsigned)((dividend * 1000) / divisor);
+ if (n < 10) {
+ malloc_snprintf(str, 6, "0.00%u", n);
+ } else if (n < 100) {
+ malloc_snprintf(str, 6, "0.0%u", n);
+ } else if (n < 1000) {
+ malloc_snprintf(str, 6, "0.%u", n);
+ } else {
+ malloc_snprintf(str, 6, "1");
+ }
+
+ return false;
+}
+
static void
stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
bool json, bool large, unsigned i) {
@@ -51,13 +76,14 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
"bins: size ind allocated nmalloc"
" ndalloc nrequests curregs"
" curslabs regs pgs util nfills"
- " nflushes newslabs reslabs\n");
+ " nflushes newslabs reslabs"
+ " contention max_wait\n");
} else {
malloc_cprintf(write_cb, cbopaque,
"bins: size ind allocated nmalloc"
" ndalloc nrequests curregs"
" curslabs regs pgs util newslabs"
- " reslabs\n");
+ " reslabs contention max_wait\n");
}
}
for (j = 0, in_gap = false; j < nbins; j++) {
@@ -100,6 +126,9 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
uint64_t);
CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
size_t);
+ lock_prof_data_t lock_data;
+ CTL_M2_M4_GET("stats.arenas.0.bins.0.lock_data", i, j,
+ &lock_data, lock_prof_data_t);
if (json) {
malloc_cprintf(write_cb, cbopaque,
@@ -127,55 +156,59 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
curslabs,
(j + 1 < nbins) ? "," : "");
} else if (!in_gap) {
- size_t availregs, milli;
- char util[6]; /* "x.yyy". */
-
- availregs = nregs * curslabs;
- milli = (availregs != 0) ? (1000 * curregs) / availregs
- : 1000;
-
- if (milli > 1000) {
- /*
- * Race detected: the counters were read in
- * separate mallctl calls and concurrent
- * operations happened in between. In this case
- * no meaningful utilization can be computed.
- */
- malloc_snprintf(util, sizeof(util), " race");
- } else if (milli < 10) {
- malloc_snprintf(util, sizeof(util),
- "0.00%zu", milli);
- } else if (milli < 100) {
- malloc_snprintf(util, sizeof(util), "0.0%zu",
- milli);
- } else if (milli < 1000) {
- malloc_snprintf(util, sizeof(util), "0.%zu",
- milli);
- } else {
- assert(milli == 1000);
- malloc_snprintf(util, sizeof(util), "1");
+ size_t availregs = nregs * curslabs;
+ char util[6];
+ if (get_rate_str((uint64_t)curregs, (uint64_t)availregs,
+ util)) {
+ if (availregs == 0) {
+ malloc_snprintf(util, sizeof(util),
+ "1");
+ } else if (curregs > availregs) {
+ /*
+ * Race detected: the counters were read
+ * in separate mallctl calls and
+ * concurrent operations happened in
+ * between. In this case no meaningful
+ * utilization can be computed.
+ */
+ malloc_snprintf(util, sizeof(util),
+ " race");
+ } else {
+ not_reached();
+ }
+ }
+ char rate[6];
+ if (get_rate_str(lock_data.n_wait_times,
+ lock_data.n_lock_ops, rate)) {
+ if (lock_data.n_lock_ops == 0) {
+ malloc_snprintf(rate, sizeof(rate),
+ "0");
+ }
}
+ uint64_t max_wait = nstime_msec(
+ &lock_data.max_wait_time);
if (config_tcache) {
malloc_cprintf(write_cb, cbopaque,
"%20zu %3u %12zu %12"FMTu64
" %12"FMTu64" %12"FMTu64" %12zu"
" %12zu %4u %3zu %-5s %12"FMTu64
- " %12"FMTu64" %12"FMTu64" %12"FMTu64"\n",
+ " %12"FMTu64" %12"FMTu64" %12"FMTu64
+ " %12s %12"FMTu64"\n",
reg_size, j, curregs * reg_size, nmalloc,
ndalloc, nrequests, curregs, curslabs,
nregs, slab_size / page, util, nfills,
- nflushes, nslabs, nreslabs);
+ nflushes, nslabs, nreslabs, rate, max_wait);
} else {
malloc_cprintf(write_cb, cbopaque,
"%20zu %3u %12zu %12"FMTu64
" %12"FMTu64" %12"FMTu64" %12zu"
" %12zu %4u %3zu %-5s %12"FMTu64
- " %12"FMTu64"\n",
+ " %12"FMTu64" %12s\n",
reg_size, j, curregs * reg_size, nmalloc,
ndalloc, nrequests, curregs, curslabs,
nregs, slab_size / page, util, nslabs,
- nreslabs);
+ nreslabs, rate);
}
}
}