summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJason Evans <jasone@canonware.com>2015-05-04 16:58:36 (GMT)
committerJason Evans <jasone@canonware.com>2015-05-06 20:27:39 (GMT)
commit8a03cf039cd06f9fa6972711195055d865673966 (patch)
tree8e5f25a7877b3352dfa47ed34cdef9d9f34da130 /include
parent6bb54cb9da9cb0f996c16c6ea3e1dda0755390f5 (diff)
downloadjemalloc-8a03cf039cd06f9fa6972711195055d865673966.zip
jemalloc-8a03cf039cd06f9fa6972711195055d865673966.tar.gz
jemalloc-8a03cf039cd06f9fa6972711195055d865673966.tar.bz2
Implement cache index randomization for large allocations.
Extract szad size quantization into {extent,run}_quantize(), and . quantize szad run sizes to the union of valid small region run sizes and large run sizes. Refactor iteration in arena_run_first_fit() to use run_quantize{,_first,_next(), and add support for padded large runs. For large allocations that have no specified alignment constraints, compute a pseudo-random offset from the beginning of the first backing page that is a multiple of the cache line size. Under typical configurations with 4-KiB pages and 64-byte cache lines this results in a uniform distribution among 64 page boundary offsets. Add the --disable-cache-oblivious option, primarily intended for performance testing. This resolves #13.
Diffstat (limited to 'include')
-rw-r--r--include/jemalloc/internal/arena.h53
-rw-r--r--include/jemalloc/internal/jemalloc_internal.h.in7
-rw-r--r--include/jemalloc/internal/jemalloc_internal_defs.h.in6
-rw-r--r--include/jemalloc/internal/prng.h12
4 files changed, 56 insertions, 22 deletions
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index dff99fb..fba1b81 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -290,6 +290,12 @@ struct arena_s {
uint64_t prof_accumbytes;
+ /*
+ * PRNG state for cache index randomization of large allocation base
+ * pointers.
+ */
+ uint64_t offset_state;
+
dss_prec_t dss_prec;
/*
@@ -394,7 +400,15 @@ struct arena_s {
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
-extern ssize_t opt_lg_dirty_mult;
+static const size_t large_pad =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+ PAGE
+#else
+ 0
+#endif
+ ;
+
+extern ssize_t opt_lg_dirty_mult;
extern arena_bin_info_t arena_bin_info[NBINS];
@@ -475,7 +489,7 @@ void arena_stats_merge(arena_t *arena, const char **dss,
arena_stats_t *astats, malloc_bin_stats_t *bstats,
malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
arena_t *arena_new(unsigned ind);
-void arena_boot(void);
+bool arena_boot(void);
void arena_prefork(arena_t *arena);
void arena_postfork_parent(arena_t *arena);
void arena_postfork_child(arena_t *arena);
@@ -721,7 +735,7 @@ arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
{
size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
- assert((size & PAGE_MASK) == 0);
+ assert(size == PAGE_CEILING(size));
assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0);
assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags);
arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags);
@@ -734,7 +748,7 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
size_t mapbits = arena_mapbitsp_read(mapbitsp);
- assert((size & PAGE_MASK) == 0);
+ assert(size == PAGE_CEILING(size));
assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
arena_mapbitsp_write(mapbitsp, size | (mapbits & PAGE_MASK));
}
@@ -747,7 +761,7 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
size_t mapbits = arena_mapbitsp_read(mapbitsp);
size_t unzeroed;
- assert((size & PAGE_MASK) == 0);
+ assert(size == PAGE_CEILING(size));
assert((flags & CHUNK_MAP_DIRTY) == flags);
unzeroed = mapbits & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */
arena_mapbitsp_write(mapbitsp, size | CHUNK_MAP_BININD_INVALID | flags
@@ -762,7 +776,8 @@ arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
size_t mapbits = arena_mapbitsp_read(mapbitsp);
assert(binind <= BININD_INVALID);
- assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS);
+ assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS +
+ large_pad);
arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) |
(binind << CHUNK_MAP_BININD_SHIFT));
}
@@ -1107,13 +1122,16 @@ arena_salloc(const void *ptr, bool demote)
* end up looking at binind to determine that ptr is a
* small allocation.
*/
- assert(((uintptr_t)ptr & PAGE_MASK) == 0);
- ret = arena_mapbits_large_size_get(chunk, pageind);
+ assert(config_cache_oblivious || ((uintptr_t)ptr &
+ PAGE_MASK) == 0);
+ ret = arena_mapbits_large_size_get(chunk, pageind) -
+ large_pad;
assert(ret != 0);
- assert(pageind + (ret>>LG_PAGE) <= chunk_npages);
+ assert(pageind + ((ret+large_pad)>>LG_PAGE) <=
+ chunk_npages);
assert(arena_mapbits_dirty_get(chunk, pageind) ==
arena_mapbits_dirty_get(chunk,
- pageind+(ret>>LG_PAGE)-1));
+ pageind+((ret+large_pad)>>LG_PAGE)-1));
} else {
/*
* Small allocation (possibly promoted to a large
@@ -1157,11 +1175,13 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
size_t size = arena_mapbits_large_size_get(chunk,
pageind);
- assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+ assert(config_cache_oblivious || ((uintptr_t)ptr &
+ PAGE_MASK) == 0);
- if (likely(tcache != NULL) && size <= tcache_maxclass)
- tcache_dalloc_large(tsd, tcache, ptr, size);
- else {
+ if (likely(tcache != NULL) && size <= tcache_maxclass) {
+ tcache_dalloc_large(tsd, tcache, ptr, size -
+ large_pad);
+ } else {
arena_dalloc_large(extent_node_arena_get(
&chunk->node), chunk, ptr);
}
@@ -1188,7 +1208,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
*/
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
size = arena_mapbits_large_size_get(chunk,
- pageind);
+ pageind) - large_pad;
}
}
assert(s2u(size) == s2u(arena_salloc(ptr, false)));
@@ -1205,7 +1225,8 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
&chunk->node), chunk, ptr, pageind);
}
} else {
- assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+ assert(config_cache_oblivious || ((uintptr_t)ptr &
+ PAGE_MASK) == 0);
if (likely(tcache != NULL) && size <= tcache_maxclass)
tcache_dalloc_large(tsd, tcache, ptr, size);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index b398f31..910ebf7 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -126,6 +126,13 @@ static const bool config_ivsalloc =
false
#endif
;
+static const bool config_cache_oblivious =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+ true
+#else
+ false
+#endif
+ ;
#ifdef JEMALLOC_C11ATOMICS
#include <stdatomic.h>
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index a943d23..ed8347a 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -193,6 +193,12 @@
#undef JEMALLOC_IVSALLOC
/*
+ * If defined, explicitly attempt to more uniformly distribute large allocation
+ * pointer alignments across all cache indices.
+ */
+#undef JEMALLOC_CACHE_OBLIVIOUS
+
+/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
#undef JEMALLOC_ZONE
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index c6b1797..216d0ef 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -26,22 +26,22 @@
* const uint32_t a, c : See above discussion.
*/
#define prng32(r, lg_range, state, a, c) do { \
- assert(lg_range > 0); \
- assert(lg_range <= 32); \
+ assert((lg_range) > 0); \
+ assert((lg_range) <= 32); \
\
r = (state * (a)) + (c); \
state = r; \
- r >>= (32 - lg_range); \
+ r >>= (32 - (lg_range)); \
} while (false)
/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */
#define prng64(r, lg_range, state, a, c) do { \
- assert(lg_range > 0); \
- assert(lg_range <= 64); \
+ assert((lg_range) > 0); \
+ assert((lg_range) <= 64); \
\
r = (state * (a)) + (c); \
state = r; \
- r >>= (64 - lg_range); \
+ r >>= (64 - (lg_range)); \
} while (false)
#endif /* JEMALLOC_H_TYPES */