From fa2d64c94b07ee21a0f6f44b9fe6e3bbefa51c6c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 12 Feb 2017 17:03:46 -0800 Subject: Convert arena->prof_accumbytes synchronization to atomics. --- include/jemalloc/internal/arena_inlines_a.h | 34 +--- include/jemalloc/internal/arena_structs_b.h | 3 +- include/jemalloc/internal/atomic_inlines.h | 4 +- include/jemalloc/internal/atomic_types.h | 8 + include/jemalloc/internal/jemalloc_internal.h.in | 7 +- include/jemalloc/internal/private_symbols.txt | 5 +- include/jemalloc/internal/prof_externs.h | 1 + include/jemalloc/internal/prof_inlines.h | 240 ----------------------- include/jemalloc/internal/prof_inlines_a.h | 76 +++++++ include/jemalloc/internal/prof_inlines_b.h | 240 +++++++++++++++++++++++ include/jemalloc/internal/prof_structs.h | 7 + include/jemalloc/internal/prof_types.h | 1 + include/jemalloc/internal/witness_types.h | 1 + src/arena.c | 18 +- src/prof.c | 14 ++ src/tcache.c | 2 +- 16 files changed, 365 insertions(+), 296 deletions(-) create mode 100644 include/jemalloc/internal/atomic_types.h delete mode 100644 include/jemalloc/internal/prof_inlines.h create mode 100644 include/jemalloc/internal/prof_inlines_a.h create mode 100644 include/jemalloc/internal/prof_inlines_b.h diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h index a81aaf5..ea7e099 100644 --- a/include/jemalloc/internal/arena_inlines_a.h +++ b/include/jemalloc/internal/arena_inlines_a.h @@ -6,8 +6,6 @@ unsigned arena_ind_get(const arena_t *arena); void arena_internal_add(arena_t *arena, size_t size); void arena_internal_sub(arena_t *arena, size_t size); size_t arena_internal_get(arena_t *arena); -bool arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes); -bool arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes); bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes); #endif /* JEMALLOC_ENABLE_INLINE */ @@ -34,29 +32,6 @@ arena_internal_get(arena_t *arena) { } JEMALLOC_INLINE bool -arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) { - cassert(config_prof); - assert(prof_interval != 0); - - arena->prof_accumbytes += accumbytes; - if (arena->prof_accumbytes >= prof_interval) { - arena->prof_accumbytes %= prof_interval; - return true; - } - return false; -} - -JEMALLOC_INLINE bool -arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) { - cassert(config_prof); - - if (likely(prof_interval == 0)) { - return false; - } - return arena_prof_accum_impl(arena, accumbytes); -} - -JEMALLOC_INLINE bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) { cassert(config_prof); @@ -64,14 +39,7 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) { return false; } - { - bool ret; - - malloc_mutex_lock(tsdn, &arena->lock); - ret = arena_prof_accum_impl(arena, accumbytes); - malloc_mutex_unlock(tsdn, &arena->lock); - return ret; - } + return prof_accum_add(tsdn, &arena->prof_accum, accumbytes); } #endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */ diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h index dde2689..2ee5690 100644 --- a/include/jemalloc/internal/arena_structs_b.h +++ b/include/jemalloc/internal/arena_structs_b.h @@ -138,7 +138,8 @@ struct arena_s { */ ql_head(tcache_t) tcache_ql; - /* Synchronization: lock. */ + /* Synchronization: internal. */ + prof_accum_t prof_accum; uint64_t prof_accumbytes; /* diff --git a/include/jemalloc/internal/atomic_inlines.h b/include/jemalloc/internal/atomic_inlines.h index 7c1902f..de66d57 100644 --- a/include/jemalloc/internal/atomic_inlines.h +++ b/include/jemalloc/internal/atomic_inlines.h @@ -23,7 +23,7 @@ */ #ifndef JEMALLOC_ENABLE_INLINE -# if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +# ifdef JEMALLOC_ATOMIC_U64 uint64_t atomic_add_u64(uint64_t *p, uint64_t x); uint64_t atomic_sub_u64(uint64_t *p, uint64_t x); bool atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s); @@ -50,7 +50,7 @@ void atomic_write_u(unsigned *p, unsigned x); #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) /******************************************************************************/ /* 64-bit operations. */ -#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +#ifdef JEMALLOC_ATOMIC_U64 # if (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint64_t atomic_add_u64(uint64_t *p, uint64_t x) { diff --git a/include/jemalloc/internal/atomic_types.h b/include/jemalloc/internal/atomic_types.h new file mode 100644 index 0000000..0fd5e5b --- /dev/null +++ b/include/jemalloc/internal/atomic_types.h @@ -0,0 +1,8 @@ +#ifndef JEMALLOC_INTERNAL_ATOMIC_TYPES_H +#define JEMALLOC_INTERNAL_ATOMIC_TYPES_H + +#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3) +# define JEMALLOC_ATOMIC_U64 +#endif + +#endif /* JEMALLOC_INTERNAL_ATOMIC_TYPES_H */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index bace9c4..7e9c24b 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -380,6 +380,7 @@ typedef unsigned szind_t; #include "jemalloc/internal/nstime_types.h" #include "jemalloc/internal/util_types.h" +#include "jemalloc/internal/atomic_types.h" #include "jemalloc/internal/spin_types.h" #include "jemalloc/internal/prng_types.h" #include "jemalloc/internal/ticker_types.h" @@ -419,10 +420,10 @@ typedef unsigned szind_t; #include "jemalloc/internal/extent_structs.h" #include "jemalloc/internal/extent_dss_structs.h" #include "jemalloc/internal/base_structs.h" +#include "jemalloc/internal/prof_structs.h" #include "jemalloc/internal/arena_structs_b.h" #include "jemalloc/internal/rtree_structs.h" #include "jemalloc/internal/tcache_structs.h" -#include "jemalloc/internal/prof_structs.h" #include "jemalloc/internal/tsd_structs.h" @@ -902,6 +903,7 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) { * Include portions of arena code interleaved with tcache code in order to * resolve circular dependencies. */ +#include "jemalloc/internal/prof_inlines_a.h" #include "jemalloc/internal/arena_inlines_a.h" #ifndef JEMALLOC_ENABLE_INLINE @@ -1163,8 +1165,7 @@ ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size, } #endif -#include "jemalloc/internal/prof_inlines.h" - +#include "jemalloc/internal/prof_inlines_b.h" #ifdef __cplusplus } diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index ab5a672..4e79991 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -54,8 +54,6 @@ arena_prefork1 arena_prefork2 arena_prefork3 arena_prof_accum -arena_prof_accum_impl -arena_prof_accum_locked arena_prof_promote arena_prof_tctx_get arena_prof_tctx_reset @@ -364,6 +362,9 @@ prng_range_zu prng_state_next_u32 prng_state_next_u64 prng_state_next_zu +prof_accum_add +prof_accum_cancel +prof_accum_init prof_active prof_active_get prof_active_get_unlocked diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h index 76505f8..f3b6f8d 100644 --- a/include/jemalloc/internal/prof_externs.h +++ b/include/jemalloc/internal/prof_externs.h @@ -55,6 +55,7 @@ extern prof_dump_header_t *prof_dump_header; void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs, uint64_t *accumbytes); #endif +bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum); void prof_idump(tsdn_t *tsdn); bool prof_mdump(tsd_t *tsd, const char *filename); void prof_gdump(tsdn_t *tsdn); diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h deleted file mode 100644 index aba2936..0000000 --- a/include/jemalloc/internal/prof_inlines.h +++ /dev/null @@ -1,240 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_PROF_INLINES_H -#define JEMALLOC_INTERNAL_PROF_INLINES_H - -#ifndef JEMALLOC_ENABLE_INLINE -bool prof_active_get_unlocked(void); -bool prof_gdump_get_unlocked(void); -prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); -prof_tctx_t *prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, - const void *ptr); -void prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, - size_t usize, prof_tctx_t *tctx); -void prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, - prof_tctx_t *tctx); -bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, - prof_tdata_t **tdata_out); -prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, - bool update); -void prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, - size_t usize, prof_tctx_t *tctx); -void prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, - size_t usize, prof_tctx_t *tctx, bool prof_active, bool updated, - extent_t *old_extent, const void *old_ptr, size_t old_usize, - prof_tctx_t *old_tctx); -void prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, - size_t usize); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) -JEMALLOC_ALWAYS_INLINE bool -prof_active_get_unlocked(void) { - /* - * Even if opt_prof is true, sampling can be temporarily disabled by - * setting prof_active to false. No locking is used when reading - * prof_active in the fast path, so there are no guarantees regarding - * how long it will take for all threads to notice state changes. - */ - return prof_active; -} - -JEMALLOC_ALWAYS_INLINE bool -prof_gdump_get_unlocked(void) { - /* - * No locking is used when reading prof_gdump_val in the fast path, so - * there are no guarantees regarding how long it will take for all - * threads to notice state changes. - */ - return prof_gdump_val; -} - -JEMALLOC_ALWAYS_INLINE prof_tdata_t * -prof_tdata_get(tsd_t *tsd, bool create) { - prof_tdata_t *tdata; - - cassert(config_prof); - - tdata = tsd_prof_tdata_get(tsd); - if (create) { - if (unlikely(tdata == NULL)) { - if (tsd_nominal(tsd)) { - tdata = prof_tdata_init(tsd); - tsd_prof_tdata_set(tsd, tdata); - } - } else if (unlikely(tdata->expired)) { - tdata = prof_tdata_reinit(tsd, tdata); - tsd_prof_tdata_set(tsd, tdata); - } - assert(tdata == NULL || tdata->attached); - } - - return tdata; -} - -JEMALLOC_ALWAYS_INLINE prof_tctx_t * -prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr) { - cassert(config_prof); - assert(ptr != NULL); - - return arena_prof_tctx_get(tsdn, extent, ptr); -} - -JEMALLOC_ALWAYS_INLINE void -prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize, - prof_tctx_t *tctx) { - cassert(config_prof); - assert(ptr != NULL); - - arena_prof_tctx_set(tsdn, extent, ptr, usize, tctx); -} - -JEMALLOC_ALWAYS_INLINE void -prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, - prof_tctx_t *tctx) { - cassert(config_prof); - assert(ptr != NULL); - - arena_prof_tctx_reset(tsdn, extent, ptr, tctx); -} - -JEMALLOC_ALWAYS_INLINE bool -prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, - prof_tdata_t **tdata_out) { - prof_tdata_t *tdata; - - cassert(config_prof); - - tdata = prof_tdata_get(tsd, true); - if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) { - tdata = NULL; - } - - if (tdata_out != NULL) { - *tdata_out = tdata; - } - - if (unlikely(tdata == NULL)) { - return true; - } - - if (likely(tdata->bytes_until_sample >= usize)) { - if (update) { - tdata->bytes_until_sample -= usize; - } - return true; - } else { - /* Compute new sample threshold. */ - if (update) { - prof_sample_threshold_update(tdata); - } - return !tdata->active; - } -} - -JEMALLOC_ALWAYS_INLINE prof_tctx_t * -prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) { - prof_tctx_t *ret; - prof_tdata_t *tdata; - prof_bt_t bt; - - assert(usize == s2u(usize)); - - if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, - &tdata))) { - ret = (prof_tctx_t *)(uintptr_t)1U; - } else { - bt_init(&bt, tdata->vec); - prof_backtrace(&bt); - ret = prof_lookup(tsd, &bt); - } - - return ret; -} - -JEMALLOC_ALWAYS_INLINE void -prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize, - prof_tctx_t *tctx) { - cassert(config_prof); - assert(ptr != NULL); - assert(usize == isalloc(tsdn, extent, ptr)); - - if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) { - prof_malloc_sample_object(tsdn, extent, ptr, usize, tctx); - } else { - prof_tctx_set(tsdn, extent, ptr, usize, - (prof_tctx_t *)(uintptr_t)1U); - } -} - -JEMALLOC_ALWAYS_INLINE void -prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize, - prof_tctx_t *tctx, bool prof_active, bool updated, extent_t *old_extent, - const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx) { - bool sampled, old_sampled, moved; - - cassert(config_prof); - assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); - - if (prof_active && !updated && ptr != NULL) { - assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr)); - if (prof_sample_accum_update(tsd, usize, true, NULL)) { - /* - * Don't sample. The usize passed to prof_alloc_prep() - * was larger than what actually got allocated, so a - * backtrace was captured for this allocation, even - * though its actual usize was insufficient to cross the - * sample threshold. - */ - prof_alloc_rollback(tsd, tctx, true); - tctx = (prof_tctx_t *)(uintptr_t)1U; - } - } - - sampled = ((uintptr_t)tctx > (uintptr_t)1U); - old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); - moved = (ptr != old_ptr); - - if (unlikely(sampled)) { - prof_malloc_sample_object(tsd_tsdn(tsd), extent, ptr, usize, - tctx); - } else if (moved) { - prof_tctx_set(tsd_tsdn(tsd), extent, ptr, usize, - (prof_tctx_t *)(uintptr_t)1U); - } else if (unlikely(old_sampled)) { - /* - * prof_tctx_set() would work for the !moved case as well, but - * prof_tctx_reset() is slightly cheaper, and the proper thing - * to do here in the presence of explicit knowledge re: moved - * state. - */ - prof_tctx_reset(tsd_tsdn(tsd), extent, ptr, tctx); - } else { - assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), extent, ptr) == - (uintptr_t)1U); - } - - /* - * The prof_free_sampled_object() call must come after the - * prof_malloc_sample_object() call, because tctx and old_tctx may be - * the same, in which case reversing the call order could cause the tctx - * to be prematurely destroyed as a side effect of momentarily zeroed - * counters. - */ - if (unlikely(old_sampled)) { - prof_free_sampled_object(tsd, old_usize, old_tctx); - } -} - -JEMALLOC_ALWAYS_INLINE void -prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize) { - prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), extent, ptr); - - cassert(config_prof); - assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr)); - - if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) { - prof_free_sampled_object(tsd, usize, tctx); - } -} -#endif - -#endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */ diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h new file mode 100644 index 0000000..d77635a --- /dev/null +++ b/include/jemalloc/internal/prof_inlines_a.h @@ -0,0 +1,76 @@ +#ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H +#define JEMALLOC_INTERNAL_PROF_INLINES_A_H + +#ifndef JEMALLOC_ENABLE_INLINE +bool prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, + uint64_t accumbytes); +void prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +JEMALLOC_INLINE bool +prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) { + cassert(config_prof); + + bool overflow; + uint64_t a0, a1; + + /* + * If the application allocates fast enough (and/or if idump is slow + * enough), extreme overflow here (a1 >= prof_interval * 2) can cause + * idump trigger coalescing. This is an intentional mechanism that + * avoids rate-limiting allocation. + */ +#ifdef JEMALLOC_ATOMIC_U64 + do { + a0 = atomic_read_u64(&prof_accum->accumbytes); + a1 = a0 + accumbytes; + assert(a1 >= a0); + overflow = (a1 >= prof_interval); + if (overflow) { + a1 %= prof_interval; + } + } while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1)); +#else + malloc_mutex_lock(tsdn, &prof_accum->mtx); + a0 = prof_accum->accumbytes; + a1 = a0 + accumbytes; + overflow = (a1 >= prof_interval); + if (overflow) { + a1 %= prof_interval; + } + prof_accum->accumbytes = a1; + malloc_mutex_unlock(tsdn, &prof_accum->mtx); +#endif + return overflow; +} + +JEMALLOC_INLINE void +prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) { + cassert(config_prof); + + /* + * Cancel out as much of the excessive prof_accumbytes increase as + * possible without underflowing. Interval-triggered dumps occur + * slightly more often than intended as a result of incomplete + * canceling. + */ + uint64_t a0, a1; +#ifdef JEMALLOC_ATOMIC_U64 + do { + a0 = atomic_read_u64(&prof_accum->accumbytes); + a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - + usize) : 0; + } while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1)); +#else + malloc_mutex_lock(tsdn, &prof_accum->mtx); + a0 = prof_accum->accumbytes; + a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - usize) : + 0; + prof_accum->accumbytes = a1; + malloc_mutex_unlock(tsdn, &prof_accum->mtx); +#endif +} +#endif + +#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */ diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h new file mode 100644 index 0000000..9e969a0 --- /dev/null +++ b/include/jemalloc/internal/prof_inlines_b.h @@ -0,0 +1,240 @@ +#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H +#define JEMALLOC_INTERNAL_PROF_INLINES_B_H + +#ifndef JEMALLOC_ENABLE_INLINE +bool prof_active_get_unlocked(void); +bool prof_gdump_get_unlocked(void); +prof_tdata_t *prof_tdata_get(tsd_t *tsd, bool create); +prof_tctx_t *prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, + const void *ptr); +void prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, + size_t usize, prof_tctx_t *tctx); +void prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, + prof_tctx_t *tctx); +bool prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, + prof_tdata_t **tdata_out); +prof_tctx_t *prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, + bool update); +void prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, + size_t usize, prof_tctx_t *tctx); +void prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, + size_t usize, prof_tctx_t *tctx, bool prof_active, bool updated, + extent_t *old_extent, const void *old_ptr, size_t old_usize, + prof_tctx_t *old_tctx); +void prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, + size_t usize); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +JEMALLOC_ALWAYS_INLINE bool +prof_active_get_unlocked(void) { + /* + * Even if opt_prof is true, sampling can be temporarily disabled by + * setting prof_active to false. No locking is used when reading + * prof_active in the fast path, so there are no guarantees regarding + * how long it will take for all threads to notice state changes. + */ + return prof_active; +} + +JEMALLOC_ALWAYS_INLINE bool +prof_gdump_get_unlocked(void) { + /* + * No locking is used when reading prof_gdump_val in the fast path, so + * there are no guarantees regarding how long it will take for all + * threads to notice state changes. + */ + return prof_gdump_val; +} + +JEMALLOC_ALWAYS_INLINE prof_tdata_t * +prof_tdata_get(tsd_t *tsd, bool create) { + prof_tdata_t *tdata; + + cassert(config_prof); + + tdata = tsd_prof_tdata_get(tsd); + if (create) { + if (unlikely(tdata == NULL)) { + if (tsd_nominal(tsd)) { + tdata = prof_tdata_init(tsd); + tsd_prof_tdata_set(tsd, tdata); + } + } else if (unlikely(tdata->expired)) { + tdata = prof_tdata_reinit(tsd, tdata); + tsd_prof_tdata_set(tsd, tdata); + } + assert(tdata == NULL || tdata->attached); + } + + return tdata; +} + +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr) { + cassert(config_prof); + assert(ptr != NULL); + + return arena_prof_tctx_get(tsdn, extent, ptr); +} + +JEMALLOC_ALWAYS_INLINE void +prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize, + prof_tctx_t *tctx) { + cassert(config_prof); + assert(ptr != NULL); + + arena_prof_tctx_set(tsdn, extent, ptr, usize, tctx); +} + +JEMALLOC_ALWAYS_INLINE void +prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, + prof_tctx_t *tctx) { + cassert(config_prof); + assert(ptr != NULL); + + arena_prof_tctx_reset(tsdn, extent, ptr, tctx); +} + +JEMALLOC_ALWAYS_INLINE bool +prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update, + prof_tdata_t **tdata_out) { + prof_tdata_t *tdata; + + cassert(config_prof); + + tdata = prof_tdata_get(tsd, true); + if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) { + tdata = NULL; + } + + if (tdata_out != NULL) { + *tdata_out = tdata; + } + + if (unlikely(tdata == NULL)) { + return true; + } + + if (likely(tdata->bytes_until_sample >= usize)) { + if (update) { + tdata->bytes_until_sample -= usize; + } + return true; + } else { + /* Compute new sample threshold. */ + if (update) { + prof_sample_threshold_update(tdata); + } + return !tdata->active; + } +} + +JEMALLOC_ALWAYS_INLINE prof_tctx_t * +prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) { + prof_tctx_t *ret; + prof_tdata_t *tdata; + prof_bt_t bt; + + assert(usize == s2u(usize)); + + if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update, + &tdata))) { + ret = (prof_tctx_t *)(uintptr_t)1U; + } else { + bt_init(&bt, tdata->vec); + prof_backtrace(&bt); + ret = prof_lookup(tsd, &bt); + } + + return ret; +} + +JEMALLOC_ALWAYS_INLINE void +prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize, + prof_tctx_t *tctx) { + cassert(config_prof); + assert(ptr != NULL); + assert(usize == isalloc(tsdn, extent, ptr)); + + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) { + prof_malloc_sample_object(tsdn, extent, ptr, usize, tctx); + } else { + prof_tctx_set(tsdn, extent, ptr, usize, + (prof_tctx_t *)(uintptr_t)1U); + } +} + +JEMALLOC_ALWAYS_INLINE void +prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize, + prof_tctx_t *tctx, bool prof_active, bool updated, extent_t *old_extent, + const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx) { + bool sampled, old_sampled, moved; + + cassert(config_prof); + assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U); + + if (prof_active && !updated && ptr != NULL) { + assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr)); + if (prof_sample_accum_update(tsd, usize, true, NULL)) { + /* + * Don't sample. The usize passed to prof_alloc_prep() + * was larger than what actually got allocated, so a + * backtrace was captured for this allocation, even + * though its actual usize was insufficient to cross the + * sample threshold. + */ + prof_alloc_rollback(tsd, tctx, true); + tctx = (prof_tctx_t *)(uintptr_t)1U; + } + } + + sampled = ((uintptr_t)tctx > (uintptr_t)1U); + old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U); + moved = (ptr != old_ptr); + + if (unlikely(sampled)) { + prof_malloc_sample_object(tsd_tsdn(tsd), extent, ptr, usize, + tctx); + } else if (moved) { + prof_tctx_set(tsd_tsdn(tsd), extent, ptr, usize, + (prof_tctx_t *)(uintptr_t)1U); + } else if (unlikely(old_sampled)) { + /* + * prof_tctx_set() would work for the !moved case as well, but + * prof_tctx_reset() is slightly cheaper, and the proper thing + * to do here in the presence of explicit knowledge re: moved + * state. + */ + prof_tctx_reset(tsd_tsdn(tsd), extent, ptr, tctx); + } else { + assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), extent, ptr) == + (uintptr_t)1U); + } + + /* + * The prof_free_sampled_object() call must come after the + * prof_malloc_sample_object() call, because tctx and old_tctx may be + * the same, in which case reversing the call order could cause the tctx + * to be prematurely destroyed as a side effect of momentarily zeroed + * counters. + */ + if (unlikely(old_sampled)) { + prof_free_sampled_object(tsd, old_usize, old_tctx); + } +} + +JEMALLOC_ALWAYS_INLINE void +prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize) { + prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), extent, ptr); + + cassert(config_prof); + assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr)); + + if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) { + prof_free_sampled_object(tsd, usize, tctx); + } +} +#endif + +#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */ diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h index caae125..afff6aa 100644 --- a/include/jemalloc/internal/prof_structs.h +++ b/include/jemalloc/internal/prof_structs.h @@ -15,6 +15,13 @@ typedef struct { } prof_unwind_data_t; #endif +struct prof_accum_s { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_t mtx; +#endif + uint64_t accumbytes; +}; + struct prof_cnt_s { /* Profiling counters. */ uint64_t curobjs; diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h index ff0db65..1eff995 100644 --- a/include/jemalloc/internal/prof_types.h +++ b/include/jemalloc/internal/prof_types.h @@ -2,6 +2,7 @@ #define JEMALLOC_INTERNAL_PROF_TYPES_H typedef struct prof_bt_s prof_bt_t; +typedef struct prof_accum_s prof_accum_t; typedef struct prof_cnt_s prof_cnt_t; typedef struct prof_tctx_s prof_tctx_t; typedef struct prof_gctx_s prof_gctx_t; diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h index 2929916..f919cc5 100644 --- a/include/jemalloc/internal/witness_types.h +++ b/include/jemalloc/internal/witness_types.h @@ -47,6 +47,7 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *, #define WITNESS_RANK_ARENA_LARGE WITNESS_RANK_LEAF #define WITNESS_RANK_DSS WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF +#define WITNESS_RANK_PROF_ACCUM WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_DUMP_SEQ WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_GDUMP WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_NEXT_THR_UID WITNESS_RANK_LEAF diff --git a/src/arena.c b/src/arena.c index 345c57d..40db9d1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1148,19 +1148,7 @@ arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr, extent_usize_set(extent, usize); - /* - * Cancel out as much of the excessive prof_accumbytes increase as - * possible without underflowing. Interval-triggered dumps occur - * slightly more often than intended as a result of incomplete - * canceling. - */ - malloc_mutex_lock(tsdn, &arena->lock); - if (arena->prof_accumbytes >= LARGE_MINCLASS - usize) { - arena->prof_accumbytes -= LARGE_MINCLASS - usize; - } else { - arena->prof_accumbytes = 0; - } - malloc_mutex_unlock(tsdn, &arena->lock); + prof_accum_cancel(tsdn, &arena->prof_accum, usize); assert(isalloc(tsdn, extent, ptr) == usize); } @@ -1574,7 +1562,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } if (config_prof) { - arena->prof_accumbytes = 0; + if (prof_accum_init(tsdn, &arena->prof_accum)) { + goto label_error; + } } if (config_cache_oblivious) { diff --git a/src/prof.c b/src/prof.c index 5aeefb2..13fa20d 100644 --- a/src/prof.c +++ b/src/prof.c @@ -1753,6 +1753,20 @@ prof_fdump(void) { prof_dump(tsd, false, filename, opt_prof_leak); } +bool +prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) { + cassert(config_prof); + +#ifndef JEMALLOC_ATOMIC_U64 + if (malloc_mutex_init(&prof_accum->mtx, "prof_accum", + WITNESS_RANK_PROF_ACCUM)) { + return true; + } +#endif + prof_accum->accumbytes = 0; + return false; +} + void prof_idump(tsdn_t *tsdn) { tsd_t *tsd; diff --git a/src/tcache.c b/src/tcache.c index 94c4570..f38c2d5 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -200,7 +200,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, } if ((config_prof || config_stats) && locked_arena == arena) { if (config_prof) { - idump = arena_prof_accum_locked(arena, + idump = arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes); tcache->prof_accumbytes = 0; } -- cgit v0.12