diff options
-rw-r--r-- | include/jemalloc/internal/arena_inlines_a.h | 34 | ||||
-rw-r--r-- | include/jemalloc/internal/arena_structs_b.h | 3 | ||||
-rw-r--r-- | include/jemalloc/internal/atomic_inlines.h | 4 | ||||
-rw-r--r-- | include/jemalloc/internal/atomic_types.h | 8 | ||||
-rw-r--r-- | include/jemalloc/internal/jemalloc_internal.h.in | 7 | ||||
-rw-r--r-- | include/jemalloc/internal/private_symbols.txt | 5 | ||||
-rw-r--r-- | include/jemalloc/internal/prof_externs.h | 1 | ||||
-rw-r--r-- | include/jemalloc/internal/prof_inlines_a.h | 76 | ||||
-rw-r--r-- | include/jemalloc/internal/prof_inlines_b.h (renamed from include/jemalloc/internal/prof_inlines.h) | 6 | ||||
-rw-r--r-- | include/jemalloc/internal/prof_structs.h | 7 | ||||
-rw-r--r-- | include/jemalloc/internal/prof_types.h | 1 | ||||
-rw-r--r-- | include/jemalloc/internal/witness_types.h | 1 | ||||
-rw-r--r-- | src/arena.c | 18 | ||||
-rw-r--r-- | src/prof.c | 14 | ||||
-rw-r--r-- | src/tcache.c | 2 |
15 files changed, 128 insertions, 59 deletions
diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h index a81aaf5..ea7e099 100644 --- a/include/jemalloc/internal/arena_inlines_a.h +++ b/include/jemalloc/internal/arena_inlines_a.h @@ -6,8 +6,6 @@ unsigned arena_ind_get(const arena_t *arena); void arena_internal_add(arena_t *arena, size_t size); void arena_internal_sub(arena_t *arena, size_t size); size_t arena_internal_get(arena_t *arena); -bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); -bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes); #endif /* JEMALLOC_ENABLE_INLINE */ @@ -34,29 +32,6 @@ arena_internal_get(arena_t *arena) { } JEMALLOC_INLINE bool -arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) { - cassert(config_prof); - assert(prof_interval != 0); - - arena->prof_accumbytes += accumbytes; - if (arena->prof_accumbytes >= prof_interval) { - arena->prof_accumbytes %= prof_interval; - return true; - } - return false; -} - -JEMALLOC_INLINE bool -arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) { - cassert(config_prof); - - if (likely(prof_interval == 0)) { - return false; - } - return arena_prof_accum_impl(arena, accumbytes); -} - -JEMALLOC_INLINE bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) { cassert(config_prof); @@ -64,14 +39,7 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) { return false; } - { - bool ret; - - malloc_mutex_lock(tsdn, &arena->lock); - ret = arena_prof_accum_impl(arena, accumbytes); - malloc_mutex_unlock(tsdn, &arena->lock); - return ret; - } + return prof_accum_add(tsdn, &arena->prof_accum, accumbytes); } #endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */ diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h index dde2689..2ee5690 100644 --- a/include/jemalloc/internal/arena_structs_b.h +++ b/include/jemalloc/internal/arena_structs_b.h @@ -138,7 +138,8 @@ struct arena_s { */ ql_head(tcache_t) tcache_ql; - /* Synchronization: lock. */ + /* Synchronization: internal. */ + prof_accum_t prof_accum; uint64_t prof_accumbytes; /* diff --git a/include/jemalloc/internal/atomic_inlines.h b/include/jemalloc/internal/atomic_inlines.h index 7c1902f..de66d57 100644 --- a/include/jemalloc/internal/atomic_inlines.h +++ b/include/jemalloc/internal/atomic_inlines.h @@ -23,7 +23,7 @@ */ #ifndef JEMALLOC_ENABLE_INLINE -# if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +# ifdef JEMALLOC_ATOMIC_U64 uint64_t atomic_add_u64(uint64_t *p, uint64_t x); uint64_t atomic_sub_u64(uint64_t *p, uint64_t x); bool atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s); @@ -50,7 +50,7 @@ void atomic_write_u(unsigned *p, unsigned x); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) /******************************************************************************/ /* 64-bit operations. */ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +#ifdef JEMALLOC_ATOMIC_U64 # if (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint64_t atomic_add_u64(uint64_t *p, uint64_t x) { diff --git a/include/jemalloc/internal/atomic_types.h b/include/jemalloc/internal/atomic_types.h new file mode 100644 index 0000000..0fd5e5b --- /dev/null +++ b/include/jemalloc/internal/atomic_types.h @@ -0,0 +1,8 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_TYPES_H +#define JEMALLOC_INTERNAL_ATOMIC_TYPES_H + +#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +# define JEMALLOC_ATOMIC_U64 +#endif + +#endif /* JEMALLOC_INTERNAL_ATOMIC_TYPES_H */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index bace9c4..7e9c24b 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -380,6 +380,7 @@ typedef unsigned szind_t; #include "jemalloc/internal/nstime_types.h" #include "jemalloc/internal/util_types.h" +#include "jemalloc/internal/atomic_types.h" #include "jemalloc/internal/spin_types.h" #include "jemalloc/internal/prng_types.h" #include "jemalloc/internal/ticker_types.h" @@ -419,10 +420,10 @@ typedef unsigned szind_t; #include "jemalloc/internal/extent_structs.h" #include "jemalloc/internal/extent_dss_structs.h" #include "jemalloc/internal/base_structs.h" +#include "jemalloc/internal/prof_structs.h" #include "jemalloc/internal/arena_structs_b.h" #include "jemalloc/internal/rtree_structs.h" #include "jemalloc/internal/tcache_structs.h" -#include "jemalloc/internal/prof_structs.h" #include "jemalloc/internal/tsd_structs.h" @@ -902,6 +903,7 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) { * Include portions of arena code interleaved with tcache code in order to * resolve circular dependencies. */ +#include "jemalloc/internal/prof_inlines_a.h" #include "jemalloc/internal/arena_inlines_a.h" #ifndef JEMALLOC_ENABLE_INLINE @@ -1163,8 +1165,7 @@ ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size, } #endif -#include "jemalloc/internal/prof_inlines.h" - +#include "jemalloc/internal/prof_inlines_b.h" #ifdef __cplusplus } diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index ab5a672..4e79991 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -54,8 +54,6 @@ arena_prefork1 arena_prefork2 arena_prefork3 arena_prof_accum -arena_prof_accum_impl -arena_prof_accum_locked arena_prof_promote arena_prof_tctx_get arena_prof_tctx_reset @@ -364,6 +362,9 @@ prng_range_zu prng_state_next_u32 prng_state_next_u64 prng_state_next_zu +prof_accum_add +prof_accum_cancel +prof_accum_init prof_active prof_active_get prof_active_get_unlocked diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h index 76505f8..f3b6f8d 100644 --- a/include/jemalloc/internal/prof_externs.h +++ b/include/jemalloc/internal/prof_externs.h @@ -55,6 +55,7 @@ extern prof_dump_header_t *prof_dump_header; void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs, uint64_t *accumbytes); #endif +bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum); void prof_idump(tsdn_t *tsdn); bool prof_mdump(tsd_t *tsd, const char *filename); void prof_gdump(tsdn_t *tsdn); diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h new file mode 100644 index 0000000..d77635a --- /dev/null +++ b/include/jemalloc/internal/prof_inlines_a.h @@ -0,0 +1,76 @@ +#ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H +#define JEMALLOC_INTERNAL_PROF_INLINES_A_H + +#ifndef JEMALLOC_ENABLE_INLINE +bool prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, + uint64_t accumbytes); +void prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +JEMALLOC_INLINE bool +prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) { + cassert(config_prof); + + bool overflow; + uint64_t a0, a1; + + /* + * If the application allocates fast enough (and/or if idump is slow + * enough), extreme overflow here (a1 >= prof_interval * 2) can cause + * idump trigger coalescing. This is an intentional mechanism that + * avoids rate-limiting allocation. + */ +#ifdef JEMALLOC_ATOMIC_U64 + do { + a0 = atomic_read_u64(&prof_accum->accumbytes); + a1 = a0 + accumbytes; + assert(a1 >= a0); + overflow = (a1 >= prof_interval); + if (overflow) { + a1 %= prof_interval; + } + } while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1)); +#else + malloc_mutex_lock(tsdn, &prof_accum->mtx); + a0 = prof_accum->accumbytes; + a1 = a0 + accumbytes; + overflow = (a1 >= prof_interval); + if (overflow) { + a1 %= prof_interval; + } + prof_accum->accumbytes = a1; + malloc_mutex_unlock(tsdn, &prof_accum->mtx); +#endif + return overflow; +} + +JEMALLOC_INLINE void +prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) { + cassert(config_prof); + + /* + * Cancel out as much of the excessive prof_accumbytes increase as + * possible without underflowing. Interval-triggered dumps occur + * slightly more often than intended as a result of incomplete + * canceling. + */ + uint64_t a0, a1; +#ifdef JEMALLOC_ATOMIC_U64 + do { + a0 = atomic_read_u64(&prof_accum->accumbytes); + a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - + usize) : 0; + } while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1)); +#else + malloc_mutex_lock(tsdn, &prof_accum->mtx); + a0 = prof_accum->accumbytes; + a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - usize) : + 0; + prof_accum->accumbytes = a1; + malloc_mutex_unlock(tsdn, &prof_accum->mtx); +#endif +} +#endif + +#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */ diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines_b.h index aba2936..9e969a0 100644 --- a/include/jemalloc/internal/prof_inlines.h +++ b/include/jemalloc/internal/prof_inlines_b.h @@ -1,5 +1,5 @@ -#ifndef JEMALLOC_INTERNAL_PROF_INLINES_H -#define JEMALLOC_INTERNAL_PROF_INLINES_H +#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H +#define JEMALLOC_INTERNAL_PROF_INLINES_B_H #ifndef JEMALLOC_ENABLE_INLINE bool prof_active_get_unlocked(void); @@ -237,4 +237,4 @@ prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize) { } #endif -#endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */ +#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */ diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h index caae125..afff6aa 100644 --- a/include/jemalloc/internal/prof_structs.h +++ b/include/jemalloc/internal/prof_structs.h @@ -15,6 +15,13 @@ typedef struct { } prof_unwind_data_t; #endif +struct prof_accum_s { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_t mtx; +#endif + uint64_t accumbytes; +}; + struct prof_cnt_s { /* Profiling counters. */ uint64_t curobjs; diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h index ff0db65..1eff995 100644 --- a/include/jemalloc/internal/prof_types.h +++ b/include/jemalloc/internal/prof_types.h @@ -2,6 +2,7 @@ #define JEMALLOC_INTERNAL_PROF_TYPES_H typedef struct prof_bt_s prof_bt_t; +typedef struct prof_accum_s prof_accum_t; typedef struct prof_cnt_s prof_cnt_t; typedef struct prof_tctx_s prof_tctx_t; typedef struct prof_gctx_s prof_gctx_t; diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h index 2929916..f919cc5 100644 --- a/include/jemalloc/internal/witness_types.h +++ b/include/jemalloc/internal/witness_types.h @@ -47,6 +47,7 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *, #define WITNESS_RANK_ARENA_LARGE WITNESS_RANK_LEAF #define WITNESS_RANK_DSS WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_ACCUM WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF diff --git a/src/arena.c b/src/arena.c index 345c57d..40db9d1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1148,19 +1148,7 @@ arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr, extent_usize_set(extent, usize); - /* - * Cancel out as much of the excessive prof_accumbytes increase as - * possible without underflowing. Interval-triggered dumps occur - * slightly more often than intended as a result of incomplete - * canceling. - */ - malloc_mutex_lock(tsdn, &arena->lock); - if (arena->prof_accumbytes >= LARGE_MINCLASS - usize) { - arena->prof_accumbytes -= LARGE_MINCLASS - usize; - } else { - arena->prof_accumbytes = 0; - } - malloc_mutex_unlock(tsdn, &arena->lock); + prof_accum_cancel(tsdn, &arena->prof_accum, usize); assert(isalloc(tsdn, extent, ptr) == usize); } @@ -1574,7 +1562,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } if (config_prof) { - arena->prof_accumbytes = 0; + if (prof_accum_init(tsdn, &arena->prof_accum)) { + goto label_error; + } } if (config_cache_oblivious) { @@ -1753,6 +1753,20 @@ prof_fdump(void) { prof_dump(tsd, false, filename, opt_prof_leak); } +bool +prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) { + cassert(config_prof); + +#ifndef JEMALLOC_ATOMIC_U64 + if (malloc_mutex_init(&prof_accum->mtx, "prof_accum", + WITNESS_RANK_PROF_ACCUM)) { + return true; + } +#endif + prof_accum->accumbytes = 0; + return false; +} + void prof_idump(tsdn_t *tsdn) { tsd_t *tsd; diff --git a/src/tcache.c b/src/tcache.c index 94c4570..f38c2d5 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -200,7 +200,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, } if ((config_prof || config_stats) && locked_arena == arena) { if (config_prof) { - idump = arena_prof_accum_locked(arena, + idump = arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes); tcache->prof_accumbytes = 0; } |