summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/jemalloc/internal/arena_inlines_a.h34
-rw-r--r--include/jemalloc/internal/arena_structs_b.h3
-rw-r--r--include/jemalloc/internal/atomic_inlines.h4
-rw-r--r--include/jemalloc/internal/atomic_types.h8
-rw-r--r--include/jemalloc/internal/jemalloc_internal.h.in7
-rw-r--r--include/jemalloc/internal/private_symbols.txt5
-rw-r--r--include/jemalloc/internal/prof_externs.h1
-rw-r--r--include/jemalloc/internal/prof_inlines_a.h76
-rw-r--r--include/jemalloc/internal/prof_inlines_b.h (renamed from include/jemalloc/internal/prof_inlines.h)6
-rw-r--r--include/jemalloc/internal/prof_structs.h7
-rw-r--r--include/jemalloc/internal/prof_types.h1
-rw-r--r--include/jemalloc/internal/witness_types.h1
-rw-r--r--src/arena.c18
-rw-r--r--src/prof.c14
-rw-r--r--src/tcache.c2
15 files changed, 128 insertions, 59 deletions
diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index a81aaf5..ea7e099 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -6,8 +6,6 @@ unsigned arena_ind_get(const arena_t *arena);
void arena_internal_add(arena_t *arena, size_t size);
void arena_internal_sub(arena_t *arena, size_t size);
size_t arena_internal_get(arena_t *arena);
-bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
-bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
#endif /* JEMALLOC_ENABLE_INLINE */
@@ -34,29 +32,6 @@ arena_internal_get(arena_t *arena) {
}
JEMALLOC_INLINE bool
-arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) {
- cassert(config_prof);
- assert(prof_interval != 0);
-
- arena->prof_accumbytes += accumbytes;
- if (arena->prof_accumbytes >= prof_interval) {
- arena->prof_accumbytes %= prof_interval;
- return true;
- }
- return false;
-}
-
-JEMALLOC_INLINE bool
-arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) {
- cassert(config_prof);
-
- if (likely(prof_interval == 0)) {
- return false;
- }
- return arena_prof_accum_impl(arena, accumbytes);
-}
-
-JEMALLOC_INLINE bool
arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
cassert(config_prof);
@@ -64,14 +39,7 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
return false;
}
- {
- bool ret;
-
- malloc_mutex_lock(tsdn, &arena->lock);
- ret = arena_prof_accum_impl(arena, accumbytes);
- malloc_mutex_unlock(tsdn, &arena->lock);
- return ret;
- }
+ return prof_accum_add(tsdn, &arena->prof_accum, accumbytes);
}
#endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index dde2689..2ee5690 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -138,7 +138,8 @@ struct arena_s {
*/
ql_head(tcache_t) tcache_ql;
- /* Synchronization: lock. */
+ /* Synchronization: internal. */
+ prof_accum_t prof_accum;
uint64_t prof_accumbytes;
/*
diff --git a/include/jemalloc/internal/atomic_inlines.h b/include/jemalloc/internal/atomic_inlines.h
index 7c1902f..de66d57 100644
--- a/include/jemalloc/internal/atomic_inlines.h
+++ b/include/jemalloc/internal/atomic_inlines.h
@@ -23,7 +23,7 @@
*/
#ifndef JEMALLOC_ENABLE_INLINE
-# if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+# ifdef JEMALLOC_ATOMIC_U64
uint64_t atomic_add_u64(uint64_t *p, uint64_t x);
uint64_t atomic_sub_u64(uint64_t *p, uint64_t x);
bool atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s);
@@ -50,7 +50,7 @@ void atomic_write_u(unsigned *p, unsigned x);
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
/******************************************************************************/
/* 64-bit operations. */
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#ifdef JEMALLOC_ATOMIC_U64
# if (defined(__amd64__) || defined(__x86_64__))
JEMALLOC_INLINE uint64_t
atomic_add_u64(uint64_t *p, uint64_t x) {
diff --git a/include/jemalloc/internal/atomic_types.h b/include/jemalloc/internal/atomic_types.h
new file mode 100644
index 0000000..0fd5e5b
--- /dev/null
+++ b/include/jemalloc/internal/atomic_types.h
@@ -0,0 +1,8 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_TYPES_H
+#define JEMALLOC_INTERNAL_ATOMIC_TYPES_H
+
+#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+# define JEMALLOC_ATOMIC_U64
+#endif
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index bace9c4..7e9c24b 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -380,6 +380,7 @@ typedef unsigned szind_t;
#include "jemalloc/internal/nstime_types.h"
#include "jemalloc/internal/util_types.h"
+#include "jemalloc/internal/atomic_types.h"
#include "jemalloc/internal/spin_types.h"
#include "jemalloc/internal/prng_types.h"
#include "jemalloc/internal/ticker_types.h"
@@ -419,10 +420,10 @@ typedef unsigned szind_t;
#include "jemalloc/internal/extent_structs.h"
#include "jemalloc/internal/extent_dss_structs.h"
#include "jemalloc/internal/base_structs.h"
+#include "jemalloc/internal/prof_structs.h"
#include "jemalloc/internal/arena_structs_b.h"
#include "jemalloc/internal/rtree_structs.h"
#include "jemalloc/internal/tcache_structs.h"
-#include "jemalloc/internal/prof_structs.h"
#include "jemalloc/internal/tsd_structs.h"
@@ -902,6 +903,7 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
* Include portions of arena code interleaved with tcache code in order to
* resolve circular dependencies.
*/
+#include "jemalloc/internal/prof_inlines_a.h"
#include "jemalloc/internal/arena_inlines_a.h"
#ifndef JEMALLOC_ENABLE_INLINE
@@ -1163,8 +1165,7 @@ ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
}
#endif
-#include "jemalloc/internal/prof_inlines.h"
-
+#include "jemalloc/internal/prof_inlines_b.h"
#ifdef __cplusplus
}
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index ab5a672..4e79991 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -54,8 +54,6 @@ arena_prefork1
arena_prefork2
arena_prefork3
arena_prof_accum
-arena_prof_accum_impl
-arena_prof_accum_locked
arena_prof_promote
arena_prof_tctx_get
arena_prof_tctx_reset
@@ -364,6 +362,9 @@ prng_range_zu
prng_state_next_u32
prng_state_next_u64
prng_state_next_zu
+prof_accum_add
+prof_accum_cancel
+prof_accum_init
prof_active
prof_active_get
prof_active_get_unlocked
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 76505f8..f3b6f8d 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -55,6 +55,7 @@ extern prof_dump_header_t *prof_dump_header;
void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes,
uint64_t *accumobjs, uint64_t *accumbytes);
#endif
+bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
void prof_idump(tsdn_t *tsdn);
bool prof_mdump(tsd_t *tsd, const char *filename);
void prof_gdump(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
new file mode 100644
index 0000000..d77635a
--- /dev/null
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -0,0 +1,76 @@
+#ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H
+#define JEMALLOC_INTERNAL_PROF_INLINES_A_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+bool prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum,
+ uint64_t accumbytes);
+void prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
+JEMALLOC_INLINE bool
+prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
+ cassert(config_prof);
+
+ bool overflow;
+ uint64_t a0, a1;
+
+ /*
+ * If the application allocates fast enough (and/or if idump is slow
+ * enough), extreme overflow here (a1 >= prof_interval * 2) can cause
+ * idump trigger coalescing. This is an intentional mechanism that
+ * avoids rate-limiting allocation.
+ */
+#ifdef JEMALLOC_ATOMIC_U64
+ do {
+ a0 = atomic_read_u64(&prof_accum->accumbytes);
+ a1 = a0 + accumbytes;
+ assert(a1 >= a0);
+ overflow = (a1 >= prof_interval);
+ if (overflow) {
+ a1 %= prof_interval;
+ }
+ } while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1));
+#else
+ malloc_mutex_lock(tsdn, &prof_accum->mtx);
+ a0 = prof_accum->accumbytes;
+ a1 = a0 + accumbytes;
+ overflow = (a1 >= prof_interval);
+ if (overflow) {
+ a1 %= prof_interval;
+ }
+ prof_accum->accumbytes = a1;
+ malloc_mutex_unlock(tsdn, &prof_accum->mtx);
+#endif
+ return overflow;
+}
+
+JEMALLOC_INLINE void
+prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
+ cassert(config_prof);
+
+ /*
+ * Cancel out as much of the excessive prof_accumbytes increase as
+ * possible without underflowing. Interval-triggered dumps occur
+ * slightly more often than intended as a result of incomplete
+ * canceling.
+ */
+ uint64_t a0, a1;
+#ifdef JEMALLOC_ATOMIC_U64
+ do {
+ a0 = atomic_read_u64(&prof_accum->accumbytes);
+ a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS -
+ usize) : 0;
+ } while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1));
+#else
+ malloc_mutex_lock(tsdn, &prof_accum->mtx);
+ a0 = prof_accum->accumbytes;
+ a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - usize) :
+ 0;
+ prof_accum->accumbytes = a1;
+ malloc_mutex_unlock(tsdn, &prof_accum->mtx);
+#endif
+}
+#endif
+
+#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines_b.h
index aba2936..9e969a0 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_PROF_INLINES_H
-#define JEMALLOC_INTERNAL_PROF_INLINES_H
+#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
+#define JEMALLOC_INTERNAL_PROF_INLINES_B_H
#ifndef JEMALLOC_ENABLE_INLINE
bool prof_active_get_unlocked(void);
@@ -237,4 +237,4 @@ prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize) {
}
#endif
-#endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */
+#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index caae125..afff6aa 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -15,6 +15,13 @@ typedef struct {
} prof_unwind_data_t;
#endif
+struct prof_accum_s {
+#ifndef JEMALLOC_ATOMIC_U64
+ malloc_mutex_t mtx;
+#endif
+ uint64_t accumbytes;
+};
+
struct prof_cnt_s {
/* Profiling counters. */
uint64_t curobjs;
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index ff0db65..1eff995 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -2,6 +2,7 @@
#define JEMALLOC_INTERNAL_PROF_TYPES_H
typedef struct prof_bt_s prof_bt_t;
+typedef struct prof_accum_s prof_accum_t;
typedef struct prof_cnt_s prof_cnt_t;
typedef struct prof_tctx_s prof_tctx_t;
typedef struct prof_gctx_s prof_gctx_t;
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index 2929916..f919cc5 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -47,6 +47,7 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
#define WITNESS_RANK_ARENA_LARGE WITNESS_RANK_LEAF
#define WITNESS_RANK_DSS WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_ACCUM WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF
diff --git a/src/arena.c b/src/arena.c
index 345c57d..40db9d1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1148,19 +1148,7 @@ arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
extent_usize_set(extent, usize);
- /*
- * Cancel out as much of the excessive prof_accumbytes increase as
- * possible without underflowing. Interval-triggered dumps occur
- * slightly more often than intended as a result of incomplete
- * canceling.
- */
- malloc_mutex_lock(tsdn, &arena->lock);
- if (arena->prof_accumbytes >= LARGE_MINCLASS - usize) {
- arena->prof_accumbytes -= LARGE_MINCLASS - usize;
- } else {
- arena->prof_accumbytes = 0;
- }
- malloc_mutex_unlock(tsdn, &arena->lock);
+ prof_accum_cancel(tsdn, &arena->prof_accum, usize);
assert(isalloc(tsdn, extent, ptr) == usize);
}
@@ -1574,7 +1562,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
}
if (config_prof) {
- arena->prof_accumbytes = 0;
+ if (prof_accum_init(tsdn, &arena->prof_accum)) {
+ goto label_error;
+ }
}
if (config_cache_oblivious) {
diff --git a/src/prof.c b/src/prof.c
index 5aeefb2..13fa20d 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1753,6 +1753,20 @@ prof_fdump(void) {
prof_dump(tsd, false, filename, opt_prof_leak);
}
+bool
+prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
+ cassert(config_prof);
+
+#ifndef JEMALLOC_ATOMIC_U64
+ if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
+ WITNESS_RANK_PROF_ACCUM)) {
+ return true;
+ }
+#endif
+ prof_accum->accumbytes = 0;
+ return false;
+}
+
void
prof_idump(tsdn_t *tsdn) {
tsd_t *tsd;
diff --git a/src/tcache.c b/src/tcache.c
index 94c4570..f38c2d5 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -200,7 +200,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
}
if ((config_prof || config_stats) && locked_arena == arena) {
if (config_prof) {
- idump = arena_prof_accum_locked(arena,
+ idump = arena_prof_accum(tsd_tsdn(tsd), arena,
tcache->prof_accumbytes);
tcache->prof_accumbytes = 0;
}