diff options
author | Qi Wang <interwq@gwu.edu> | 2021-10-19 00:33:15 (GMT) |
---|---|---|
committer | Qi Wang <interwq@gmail.com> | 2021-12-29 22:44:43 (GMT) |
commit | b75822bc6e5cbbf463c611d8dea32857f8de9d3e (patch) | |
tree | 3bb49cc57aea382bac754bab6eecbf8c0acdce53 /include/jemalloc | |
parent | 06aac61c4b261e5d1c8dcf3c7dd7921e9e395d62 (diff) | |
download | jemalloc-b75822bc6e5cbbf463c611d8dea32857f8de9d3e.zip jemalloc-b75822bc6e5cbbf463c611d8dea32857f8de9d3e.tar.gz jemalloc-b75822bc6e5cbbf463c611d8dea32857f8de9d3e.tar.bz2 |
Implement use-after-free detection using junk and stash.
On deallocation, sampled pointers (specially aligned) get junked and stashed
into tcache (to prevent immediate reuse). The expected behavior is to have
read-after-free corrupted and stopped by the junk-filling, while
write-after-free is checked when flushing the stashed pointers.
Diffstat (limited to 'include/jemalloc')
-rw-r--r-- | include/jemalloc/internal/cache_bin.h | 101 | ||||
-rw-r--r-- | include/jemalloc/internal/jemalloc_internal_defs.h.in | 3 | ||||
-rw-r--r-- | include/jemalloc/internal/jemalloc_internal_externs.h | 3 | ||||
-rw-r--r-- | include/jemalloc/internal/jemalloc_preamble.h.in | 8 | ||||
-rw-r--r-- | include/jemalloc/internal/san.h | 68 | ||||
-rw-r--r-- | include/jemalloc/internal/tcache_externs.h | 24 | ||||
-rw-r--r-- | include/jemalloc/internal/tcache_inlines.h | 20 |
7 files changed, 211 insertions, 16 deletions
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h index 41942e9..266897f 100644 --- a/include/jemalloc/internal/cache_bin.h +++ b/include/jemalloc/internal/cache_bin.h @@ -98,7 +98,7 @@ struct cache_bin_s { * when the array is nonempty -- this is in the array). * * Recall that since the stack grows down, this is the lowest address in - * the array. + * the array. Only adjusted when stashing items. */ uint16_t low_bits_full; @@ -107,7 +107,7 @@ struct cache_bin_s { * is empty. * * The stack grows down -- this is one past the highest address in the - * array. + * array. Immutable after initialization. */ uint16_t low_bits_empty; }; @@ -136,6 +136,26 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor, descriptor->bins = bins; } +JEMALLOC_ALWAYS_INLINE bool +cache_bin_nonfast_aligned(const void *ptr) { + if (!config_uaf_detection) { + return false; + } + /* + * Currently we use alignment to decide which pointer to junk & stash on + * dealloc (for catching use-after-free). In some common cases a + * page-aligned check is needed already (sdalloc w/ config_prof), so we + * are getting it more or less for free -- no added instructions on + * free_fastpath. + * + * Another way of deciding which pointer to sample, is adding another + * thread_event to pick one every N bytes. That also adds no cost on + * the fastpath, however it will tend to pick large allocations which is + * not the desired behavior. + */ + return ((uintptr_t)ptr & san_cache_bin_nonfast_mask) == 0; +} + /* Returns ncached_max: Upper limit on ncached. */ static inline cache_bin_sz_t cache_bin_info_ncached_max(cache_bin_info_t *info) { @@ -233,6 +253,20 @@ cache_bin_empty_position_get(cache_bin_t *bin) { } /* + * Internal. + * + * A pointer to the position with the lowest address of the backing array. + */ +static inline void ** +cache_bin_full_position_get(cache_bin_t *bin, cache_bin_info_t *info) { + cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info); + void **ret = cache_bin_empty_position_get(bin) - ncached_max; + assert(ret <= bin->stack_head); + + return ret; +} + +/* * As the name implies. This is important since it's not correct to try to * batch fill a nonempty cache bin. */ @@ -359,13 +393,17 @@ cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) { return n; } +JEMALLOC_ALWAYS_INLINE bool +cache_bin_full(cache_bin_t *bin) { + return ((uint16_t)(uintptr_t)bin->stack_head == bin->low_bits_full); +} + /* * Free an object into the given bin. Fails only if the bin is full. */ JEMALLOC_ALWAYS_INLINE bool cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) { - uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head; - if (unlikely(low_bits == bin->low_bits_full)) { + if (unlikely(cache_bin_full(bin))) { return false; } @@ -377,7 +415,39 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) { return true; } -/** +/* Returns false if failed to stash (i.e. bin is full). */ +JEMALLOC_ALWAYS_INLINE bool +cache_bin_stash(cache_bin_t *bin, void *ptr) { + if (cache_bin_full(bin)) { + return false; + } + + /* Stash at the full position, in the [full, head) range. */ + uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head; + /* Wraparound handled as well. */ + uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head); + *(void **)((uintptr_t)bin->stack_head - diff) = ptr; + + assert(!cache_bin_full(bin)); + bin->low_bits_full += sizeof(void *); + cache_bin_assert_earlier(bin, bin->low_bits_full, low_bits_head); + + return true; +} + +JEMALLOC_ALWAYS_INLINE cache_bin_sz_t +cache_bin_nstashed_get(cache_bin_t *bin, cache_bin_info_t *info) { + cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info); + void **full = cache_bin_full_position_get(bin, info); + + uint16_t nstashed = cache_bin_diff(bin, (uint16_t)(uintptr_t)full, + bin->low_bits_full) / sizeof(void *); + assert(nstashed <= ncached_max); + + return nstashed; +} + +/* * Filling and flushing are done in batch, on arrays of void *s. For filling, * the arrays go forward, and can be accessed with ordinary array arithmetic. * For flushing, we work from the end backwards, and so need to use special @@ -463,6 +533,27 @@ cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info, cache_bin_low_water_adjust(bin); } +static inline void +cache_bin_init_ptr_array_for_stashed(cache_bin_t *bin, szind_t binind, + cache_bin_info_t *info, cache_bin_ptr_array_t *arr, + cache_bin_sz_t nstashed) { + assert(nstashed > 0); + assert(cache_bin_nstashed_get(bin, info) == nstashed); + + void **full = cache_bin_full_position_get(bin, info); + arr->ptr = full; + assert(*arr->ptr != NULL); +} + +static inline void +cache_bin_finish_flush_stashed(cache_bin_t *bin, cache_bin_info_t *info) { + void **full = cache_bin_full_position_get(bin, info); + + /* Reset the bin local full position. */ + bin->low_bits_full = (uint16_t)(uintptr_t)full; + assert(cache_bin_nstashed_get(bin, info) == 0); +} + /* * Initialize a cache_bin_info to represent up to the given number of items in * the cache_bins it is associated with. diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index a4be549..0cb15d3 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -415,6 +415,9 @@ /* Performs additional size checks when defined. */ #undef JEMALLOC_OPT_SIZE_CHECKS +/* Allows sampled junk and stash for checking use-after-free when defined. */ +#undef JEMALLOC_UAF_DETECTION + /* Darwin VM_MAKE_TAG support */ #undef JEMALLOC_HAVE_VM_MAKE_TAG diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h index e8bfb03..fa1fabe 100644 --- a/include/jemalloc/internal/jemalloc_internal_externs.h +++ b/include/jemalloc/internal/jemalloc_internal_externs.h @@ -35,6 +35,9 @@ extern const char *zero_realloc_mode_names[]; extern atomic_zu_t zero_realloc_count; extern bool opt_cache_oblivious; +/* Escape free-fastpath when ptr & mask == 0 (for sanitization purpose). */ +extern uintptr_t san_cache_bin_nonfast_mask; + /* Number of CPUs. */ extern unsigned ncpus; diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in index f5d83a6..5ce77d9 100644 --- a/include/jemalloc/internal/jemalloc_preamble.h.in +++ b/include/jemalloc/internal/jemalloc_preamble.h.in @@ -198,6 +198,14 @@ static const bool config_opt_size_checks = #endif ; +static const bool config_uaf_detection = +#if defined(JEMALLOC_UAF_DETECTION) || defined(JEMALLOC_DEBUG) + true +#else + false +#endif + ; + /* Whether or not the C++ extensions are enabled. */ static const bool config_enable_cxx = #ifdef JEMALLOC_ENABLE_CXX diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h index 70debf3..f97211a 100644 --- a/include/jemalloc/internal/san.h +++ b/include/jemalloc/internal/san.h @@ -10,9 +10,16 @@ #define SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT 0 #define SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT 0 +#define SAN_LG_UAF_ALIGN_DEFAULT (-1) +#define SAN_CACHE_BIN_NONFAST_MASK_DEFAULT (uintptr_t)(-1) + +static const uintptr_t uaf_detect_junk = (uintptr_t)0x5b5b5b5b5b5b5b5bULL; + /* 0 means disabled, i.e. never guarded. */ extern size_t opt_san_guard_large; extern size_t opt_san_guard_small; +/* -1 means disabled, i.e. never check for use-after-free. */ +extern ssize_t opt_lg_san_uaf_align; void san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap, bool left, bool right, bool remap); @@ -24,7 +31,10 @@ void san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, */ void san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap); +void san_check_stashed_ptrs(void **ptrs, size_t nstashed, size_t usize); + void tsd_san_init(tsd_t *tsd); +void san_init(ssize_t lg_san_uaf_align); static inline void san_guard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, @@ -121,4 +131,62 @@ san_slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) { } } +static inline void +san_junk_ptr_locations(void *ptr, size_t usize, void **first, void **mid, + void **last) { + size_t ptr_sz = sizeof(void *); + + *first = ptr; + + *mid = (void *)((uintptr_t)ptr + ((usize >> 1) & ~(ptr_sz - 1))); + assert(*first != *mid || usize == ptr_sz); + assert((uintptr_t)*first <= (uintptr_t)*mid); + + /* + * When usize > 32K, the gap between requested_size and usize might be + * greater than 4K -- this means the last write may access an + * likely-untouched page (default settings w/ 4K pages). However by + * default the tcache only goes up to the 32K size class, and is usually + * tuned lower instead of higher, which makes it less of a concern. + */ + *last = (void *)((uintptr_t)ptr + usize - sizeof(uaf_detect_junk)); + assert(*first != *last || usize == ptr_sz); + assert(*mid != *last || usize <= ptr_sz * 2); + assert((uintptr_t)*mid <= (uintptr_t)*last); +} + +static inline bool +san_junk_ptr_should_slow(void) { + /* + * The latter condition (pointer size greater than the min size class) + * is not expected -- fall back to the slow path for simplicity. + */ + return config_debug || (LG_SIZEOF_PTR > SC_LG_TINY_MIN); +} + +static inline void +san_junk_ptr(void *ptr, size_t usize) { + if (san_junk_ptr_should_slow()) { + memset(ptr, (char)uaf_detect_junk, usize); + return; + } + + void *first, *mid, *last; + san_junk_ptr_locations(ptr, usize, &first, &mid, &last); + *(uintptr_t *)first = uaf_detect_junk; + *(uintptr_t *)mid = uaf_detect_junk; + *(uintptr_t *)last = uaf_detect_junk; +} + +static inline bool +san_uaf_detection_enabled(void) { + bool ret = config_uaf_detection && (opt_lg_san_uaf_align != -1); + if (config_uaf_detection && ret) { + assert(san_cache_bin_nonfast_mask == ((uintptr_t)1 << + opt_lg_san_uaf_align) - 1); + } + + return ret; +} + #endif /* JEMALLOC_INTERNAL_GUARD_H */ diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h index 95f3a68..a2ab710 100644 --- a/include/jemalloc/internal/tcache_externs.h +++ b/include/jemalloc/internal/tcache_externs.h @@ -34,23 +34,25 @@ extern cache_bin_info_t *tcache_bin_info; */ extern tcaches_t *tcaches; -size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); -void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, +size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); +void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, cache_bin_t *tbin, szind_t binind, bool *tcache_success); -void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, +void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t binind, unsigned rem); -void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, +void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t binind, unsigned rem); -void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow, +void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *bin, + szind_t binind, bool is_small); +void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_t *tcache, arena_t *arena); tcache_t *tcache_create_explicit(tsd_t *tsd); -void tcache_cleanup(tsd_t *tsd); -void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); -bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind); -void tcaches_flush(tsd_t *tsd, unsigned ind); -void tcaches_destroy(tsd_t *tsd, unsigned ind); -bool tcache_boot(tsdn_t *tsdn, base_t *base); +void tcache_cleanup(tsd_t *tsd); +void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); +bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind); +void tcaches_flush(tsd_t *tsd, unsigned ind); +void tcaches_destroy(tsd_t *tsd, unsigned ind); +bool tcache_boot(tsdn_t *tsdn, base_t *base); void tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_t *tcache, arena_t *arena); void tcache_prefork(tsdn_t *tsdn); diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h index 926c852..2634f14 100644 --- a/include/jemalloc/internal/tcache_inlines.h +++ b/include/jemalloc/internal/tcache_inlines.h @@ -3,6 +3,7 @@ #include "jemalloc/internal/bin.h" #include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/san.h" #include "jemalloc/internal/sc.h" #include "jemalloc/internal/sz.h" #include "jemalloc/internal/util.h" @@ -61,6 +62,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, return arena_malloc_hard(tsd_tsdn(tsd), arena, size, binind, zero); } + tcache_bin_flush_stashed(tsd, tcache, bin, binind, + /* is_small */ true); ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, bin, binind, &tcache_hard_success); @@ -100,6 +103,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, if (unlikely(arena == NULL)) { return NULL; } + tcache_bin_flush_stashed(tsd, tcache, bin, binind, + /* is_small */ false); ret = large_malloc(tsd_tsdn(tsd), arena, sz_s2u(size), zero); if (ret == NULL) { @@ -126,6 +131,21 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SC_SMALL_MAXCLASS); cache_bin_t *bin = &tcache->bins[binind]; + /* + * Not marking the branch unlikely because this is past free_fastpath() + * (which handles the most common cases), i.e. at this point it's often + * uncommon cases. + */ + if (cache_bin_nonfast_aligned(ptr)) { + /* Junk unconditionally, even if bin is full. */ + san_junk_ptr(ptr, sz_index2size(binind)); + if (cache_bin_stash(bin, ptr)) { + return; + } + assert(cache_bin_full(bin)); + /* Bin full; fall through into the flush branch. */ + } + if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) { if (unlikely(tcache_small_bin_disabled(binind, bin))) { arena_dalloc_small(tsd_tsdn(tsd), ptr); |