summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Evans <jasone@canonware.com>2016-11-07 18:52:44 (GMT)
committerJason Evans <jasone@canonware.com>2016-11-07 18:52:44 (GMT)
commit04b463546e57ecd9ebc334739881a1c69623813a (patch)
treeaab9e1d85e76d6cea1aa9dde7ea338c3b877f36c
parente0a9e78374f56bc7a27258ced08d89bfc436d8af (diff)
downloadjemalloc-04b463546e57ecd9ebc334739881a1c69623813a.zip
jemalloc-04b463546e57ecd9ebc334739881a1c69623813a.tar.gz
jemalloc-04b463546e57ecd9ebc334739881a1c69623813a.tar.bz2
Refactor prng to not use 64-bit atomics on 32-bit platforms.
This resolves #495.
-rw-r--r--include/jemalloc/internal/arena.h2
-rw-r--r--include/jemalloc/internal/extent.h4
-rw-r--r--include/jemalloc/internal/private_symbols.txt12
-rw-r--r--include/jemalloc/internal/prng.h143
-rw-r--r--src/arena.c6
-rw-r--r--src/ckh.c8
-rw-r--r--src/prof.c2
-rw-r--r--test/unit/prng.c209
8 files changed, 334 insertions, 52 deletions
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index ce9d8b5..dbd334e 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -182,7 +182,7 @@ struct arena_s {
* PRNG state for cache index randomization of large allocation base
* pointers.
*/
- uint64_t offset_state;
+ size_t offset_state;
dss_prec_t dss_prec;
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 673cac2..531d853 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -325,8 +325,8 @@ extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment)
if (alignment < PAGE) {
unsigned lg_range = LG_PAGE -
lg_floor(CACHELINE_CEILING(alignment));
- uint64_t r =
- prng_lg_range(&extent_arena_get(extent)->offset_state,
+ size_t r =
+ prng_lg_range_zu(&extent_arena_get(extent)->offset_state,
lg_range, true);
uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
lg_range);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 2949de1..f178daf 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -328,9 +328,15 @@ pind2sz_tab
pow2_ceil_u32
pow2_ceil_u64
pow2_ceil_zu
-prng_lg_range
-prng_range
-prng_state_next
+prng_lg_range_u32
+prng_lg_range_u64
+prng_lg_range_zu
+prng_range_u32
+prng_range_u64
+prng_range_zu
+prng_state_next_u32
+prng_state_next_u64
+prng_state_next_zu
prof_active
prof_active_get
prof_active_get_unlocked
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index ebe916f..c2bda19 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -19,8 +19,12 @@
* the next has a cycle of 4, etc. For this reason, we prefer to use the upper
* bits.
*/
-#define PRNG_A UINT64_C(6364136223846793005)
-#define PRNG_C UINT64_C(1442695040888963407)
+
+#define PRNG_A_32 UINT32_C(1103515241)
+#define PRNG_C_32 UINT32_C(12347)
+
+#define PRNG_A_64 UINT64_C(6364136223846793005)
+#define PRNG_C_64 UINT64_C(1442695040888963407)
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
@@ -35,45 +39,133 @@
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
-uint64_t prng_state_next(uint64_t state);
-uint64_t prng_lg_range(uint64_t *state, unsigned lg_range, bool atomic);
-uint64_t prng_range(uint64_t *state, uint64_t range, bool atomic);
+uint32_t prng_state_next_u32(uint32_t state);
+uint64_t prng_state_next_u64(uint64_t state);
+size_t prng_state_next_zu(size_t state);
+
+uint32_t prng_lg_range_u32(uint32_t *state, unsigned lg_range,
+ bool atomic);
+uint64_t prng_lg_range_u64(uint64_t *state, unsigned lg_range);
+size_t prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic);
+
+uint32_t prng_range_u32(uint32_t *state, uint32_t range, bool atomic);
+uint64_t prng_range_u64(uint64_t *state, uint64_t range);
+size_t prng_range_zu(size_t *state, size_t range, bool atomic);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_state_next_u32(uint32_t state)
+{
+
+ return ((state * PRNG_A_32) + PRNG_C_32);
+}
+
JEMALLOC_ALWAYS_INLINE uint64_t
-prng_state_next(uint64_t state)
+prng_state_next_u64(uint64_t state)
+{
+
+ return ((state * PRNG_A_64) + PRNG_C_64);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_state_next_zu(size_t state)
+{
+
+#if LG_SIZEOF_PTR == 2
+ return ((state * PRNG_A_32) + PRNG_C_32);
+#elif LG_SIZEOF_PTR == 3
+ return ((state * PRNG_A_64) + PRNG_C_64);
+#else
+#error Unsupported pointer size
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic)
{
+ uint32_t ret, state1;
+
+ assert(lg_range > 0);
+ assert(lg_range <= 32);
+
+ if (atomic) {
+ uint32_t state0;
+
+ do {
+ state0 = atomic_read_uint32(state);
+ state1 = prng_state_next_u32(state0);
+ } while (atomic_cas_uint32(state, state0, state1));
+ } else {
+ state1 = prng_state_next_u32(*state);
+ *state = state1;
+ }
+ ret = state1 >> (32 - lg_range);
- return ((state * PRNG_A) + PRNG_C);
+ return (ret);
}
+/* 64-bit atomic operations cannot be supported on all relevant platforms. */
JEMALLOC_ALWAYS_INLINE uint64_t
-prng_lg_range(uint64_t *state, unsigned lg_range, bool atomic)
+prng_lg_range_u64(uint64_t *state, unsigned lg_range)
{
uint64_t ret, state1;
assert(lg_range > 0);
assert(lg_range <= 64);
+ state1 = prng_state_next_u64(*state);
+ *state = state1;
+ ret = state1 >> (64 - lg_range);
+
+ return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic)
+{
+ size_t ret, state1;
+
+ assert(lg_range > 0);
+ assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
+
if (atomic) {
- uint64_t state0;
+ size_t state0;
do {
- state0 = atomic_read_uint64(state);
- state1 = prng_state_next(state0);
- } while (atomic_cas_uint64(state, state0, state1));
+ state0 = atomic_read_z(state);
+ state1 = prng_state_next_zu(state0);
+ } while (atomic_cas_z(state, state0, state1));
} else {
- state1 = prng_state_next(*state);
+ state1 = prng_state_next_zu(*state);
*state = state1;
}
- ret = state1 >> (64 - lg_range);
+ ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
+
+ return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_range_u32(uint32_t *state, uint32_t range, bool atomic)
+{
+ uint32_t ret;
+ unsigned lg_range;
+
+ assert(range > 1);
+
+ /* Compute the ceiling of lg(range). */
+ lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;
+
+ /* Generate a result in [0..range) via repeated trial. */
+ do {
+ ret = prng_lg_range_u32(state, lg_range, atomic);
+ } while (ret >= range);
return (ret);
}
JEMALLOC_ALWAYS_INLINE uint64_t
-prng_range(uint64_t *state, uint64_t range, bool atomic)
+prng_range_u64(uint64_t *state, uint64_t range)
{
uint64_t ret;
unsigned lg_range;
@@ -85,7 +177,26 @@ prng_range(uint64_t *state, uint64_t range, bool atomic)
/* Generate a result in [0..range) via repeated trial. */
do {
- ret = prng_lg_range(state, lg_range, atomic);
+ ret = prng_lg_range_u64(state, lg_range);
+ } while (ret >= range);
+
+ return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_range_zu(size_t *state, size_t range, bool atomic)
+{
+ size_t ret;
+ unsigned lg_range;
+
+ assert(range > 1);
+
+ /* Compute the ceiling of lg(range). */
+ lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+
+ /* Generate a result in [0..range) via repeated trial. */
+ do {
+ ret = prng_lg_range_zu(state, lg_range, atomic);
} while (ret >= range);
return (ret);
diff --git a/src/arena.c b/src/arena.c
index dd8e4d9..4b104a0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -422,8 +422,8 @@ arena_decay_deadline_init(arena_t *arena)
if (arena->decay.time > 0) {
nstime_t jitter;
- nstime_init(&jitter, prng_range(&arena->decay.jitter_state,
- nstime_ns(&arena->decay.interval), false));
+ nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state,
+ nstime_ns(&arena->decay.interval)));
nstime_add(&arena->decay.deadline, &jitter);
}
}
@@ -1680,7 +1680,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
* deterministic seed.
*/
arena->offset_state = config_debug ? ind :
- (uint64_t)(uintptr_t)arena;
+ (size_t)(uintptr_t)arena;
}
arena->dss_prec = extent_dss_prec_get();
diff --git a/src/ckh.c b/src/ckh.c
index 7537601..6f16565 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -99,8 +99,8 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
* Cycle through the cells in the bucket, starting at a random position.
* The randomness avoids worst-case search overhead as buckets fill up.
*/
- offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS,
- false);
+ offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
+ LG_CKH_BUCKET_CELLS);
for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
@@ -142,8 +142,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
* were an item for which both hashes indicated the same
* bucket.
*/
- i = (unsigned)prng_lg_range(&ckh->prng_state,
- LG_CKH_BUCKET_CELLS, false);
+ i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
+ LG_CKH_BUCKET_CELLS);
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
assert(cell->key != NULL);
diff --git a/src/prof.c b/src/prof.c
index 4bafb39..19c8fb7 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -878,7 +878,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata)
* pp 500
* (http://luc.devroye.org/rnbookindex.html)
*/
- r = prng_lg_range(&tdata->prng_state, 53, false);
+ r = prng_lg_range_u64(&tdata->prng_state, 53);
u = (double)r * (1.0/9007199254740992.0L);
tdata->bytes_until_sample = (uint64_t)(log(u) /
log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
diff --git a/test/unit/prng.c b/test/unit/prng.c
index f323445..111fa59 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -1,34 +1,71 @@
#include "test/jemalloc_test.h"
static void
-test_prng_lg_range(bool atomic)
+test_prng_lg_range_u32(bool atomic)
+{
+ uint32_t sa, sb, ra, rb;
+ unsigned lg_range;
+
+ sa = 42;
+ ra = prng_lg_range_u32(&sa, 32, atomic);
+ sa = 42;
+ rb = prng_lg_range_u32(&sa, 32, atomic);
+ assert_u32_eq(ra, rb,
+ "Repeated generation should produce repeated results");
+
+ sb = 42;
+ rb = prng_lg_range_u32(&sb, 32, atomic);
+ assert_u32_eq(ra, rb,
+ "Equivalent generation should produce equivalent results");
+
+ sa = 42;
+ ra = prng_lg_range_u32(&sa, 32, atomic);
+ rb = prng_lg_range_u32(&sa, 32, atomic);
+ assert_u32_ne(ra, rb,
+ "Full-width results must not immediately repeat");
+
+ sa = 42;
+ ra = prng_lg_range_u32(&sa, 32, atomic);
+ for (lg_range = 31; lg_range > 0; lg_range--) {
+ sb = 42;
+ rb = prng_lg_range_u32(&sb, lg_range, atomic);
+ assert_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
+ 0, "High order bits should be 0, lg_range=%u", lg_range);
+ assert_u32_eq(rb, (ra >> (32 - lg_range)),
+ "Expected high order bits of full-width result, "
+ "lg_range=%u", lg_range);
+ }
+}
+
+static void
+test_prng_lg_range_u64(void)
{
uint64_t sa, sb, ra, rb;
unsigned lg_range;
sa = 42;
- ra = prng_lg_range(&sa, 64, atomic);
+ ra = prng_lg_range_u64(&sa, 64);
sa = 42;
- rb = prng_lg_range(&sa, 64, atomic);
+ rb = prng_lg_range_u64(&sa, 64);
assert_u64_eq(ra, rb,
"Repeated generation should produce repeated results");
sb = 42;
- rb = prng_lg_range(&sb, 64, atomic);
+ rb = prng_lg_range_u64(&sb, 64);
assert_u64_eq(ra, rb,
"Equivalent generation should produce equivalent results");
sa = 42;
- ra = prng_lg_range(&sa, 64, atomic);
- rb = prng_lg_range(&sa, 64, atomic);
+ ra = prng_lg_range_u64(&sa, 64);
+ rb = prng_lg_range_u64(&sa, 64);
assert_u64_ne(ra, rb,
"Full-width results must not immediately repeat");
sa = 42;
- ra = prng_lg_range(&sa, 64, atomic);
+ ra = prng_lg_range_u64(&sa, 64);
for (lg_range = 63; lg_range > 0; lg_range--) {
sb = 42;
- rb = prng_lg_range(&sb, lg_range, atomic);
+ rb = prng_lg_range_u64(&sb, lg_range);
assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)),
0, "High order bits should be 0, lg_range=%u", lg_range);
assert_u64_eq(rb, (ra >> (64 - lg_range)),
@@ -37,22 +74,102 @@ test_prng_lg_range(bool atomic)
}
}
-TEST_BEGIN(test_prng_lg_range_nonatomic)
+static void
+test_prng_lg_range_zu(bool atomic)
+{
+ uint64_t sa, sb, ra, rb;
+ unsigned lg_range;
+
+ sa = 42;
+ ra = prng_lg_range_zu(&sa, 64, atomic);
+ sa = 42;
+ rb = prng_lg_range_zu(&sa, 64, atomic);
+ assert_zu_eq(ra, rb,
+ "Repeated generation should produce repeated results");
+
+ sb = 42;
+ rb = prng_lg_range_zu(&sb, 64, atomic);
+ assert_zu_eq(ra, rb,
+ "Equivalent generation should produce equivalent results");
+
+ sa = 42;
+ ra = prng_lg_range_zu(&sa, 64, atomic);
+ rb = prng_lg_range_zu(&sa, 64, atomic);
+ assert_zu_ne(ra, rb,
+ "Full-width results must not immediately repeat");
+
+ sa = 42;
+ ra = prng_lg_range_zu(&sa, 64, atomic);
+ for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0;
+ lg_range--) {
+ sb = 42;
+ rb = prng_lg_range_zu(&sb, lg_range, atomic);
+ assert_zu_eq((rb & (SIZE_T_MAX << lg_range)),
+ 0, "High order bits should be 0, lg_range=%u", lg_range);
+ assert_zu_eq(rb, (ra >> (64 - lg_range)),
+ "Expected high order bits of full-width result, "
+ "lg_range=%u", lg_range);
+ }
+}
+
+TEST_BEGIN(test_prng_lg_range_u32_nonatomic)
{
- test_prng_lg_range(false);
+ test_prng_lg_range_u32(false);
}
TEST_END
-TEST_BEGIN(test_prng_lg_range_atomic)
+TEST_BEGIN(test_prng_lg_range_u32_atomic)
{
- test_prng_lg_range(true);
+ test_prng_lg_range_u32(true);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_u64_nonatomic)
+{
+
+ test_prng_lg_range_u64();
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_zu_nonatomic)
+{
+
+ test_prng_lg_range_zu(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_zu_atomic)
+{
+
+ test_prng_lg_range_zu(true);
}
TEST_END
static void
-test_prng_range(bool atomic)
+test_prng_range_u32(bool atomic)
+{
+ uint32_t range;
+#define MAX_RANGE 10000000
+#define RANGE_STEP 97
+#define NREPS 10
+
+ for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+ uint32_t s;
+ unsigned rep;
+
+ s = range;
+ for (rep = 0; rep < NREPS; rep++) {
+ uint32_t r = prng_range_u32(&s, range, atomic);
+
+ assert_u32_lt(r, range, "Out of range");
+ }
+ }
+}
+
+static void
+test_prng_range_u64(void)
{
uint64_t range;
#define MAX_RANGE 10000000
@@ -65,24 +182,66 @@ test_prng_range(bool atomic)
s = range;
for (rep = 0; rep < NREPS; rep++) {
- uint64_t r = prng_range(&s, range, atomic);
+ uint64_t r = prng_range_u64(&s, range);
assert_u64_lt(r, range, "Out of range");
}
}
}
-TEST_BEGIN(test_prng_range_nonatomic)
+static void
+test_prng_range_zu(bool atomic)
+{
+ size_t range;
+#define MAX_RANGE 10000000
+#define RANGE_STEP 97
+#define NREPS 10
+
+ for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+ size_t s;
+ unsigned rep;
+
+ s = range;
+ for (rep = 0; rep < NREPS; rep++) {
+ size_t r = prng_range_zu(&s, range, atomic);
+
+ assert_zu_lt(r, range, "Out of range");
+ }
+ }
+}
+
+TEST_BEGIN(test_prng_range_u32_nonatomic)
+{
+
+ test_prng_range_u32(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_u32_atomic)
+{
+
+ test_prng_range_u32(true);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_u64_nonatomic)
+{
+
+ test_prng_range_u64();
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_zu_nonatomic)
{
- test_prng_range(false);
+ test_prng_range_zu(false);
}
TEST_END
-TEST_BEGIN(test_prng_range_atomic)
+TEST_BEGIN(test_prng_range_zu_atomic)
{
- test_prng_range(true);
+ test_prng_range_zu(true);
}
TEST_END
@@ -91,8 +250,14 @@ main(void)
{
return (test(
- test_prng_lg_range_nonatomic,
- test_prng_lg_range_atomic,
- test_prng_range_nonatomic,
- test_prng_range_atomic));
+ test_prng_lg_range_u32_nonatomic,
+ test_prng_lg_range_u32_atomic,
+ test_prng_lg_range_u64_nonatomic,
+ test_prng_lg_range_zu_nonatomic,
+ test_prng_lg_range_zu_atomic,
+ test_prng_range_u32_nonatomic,
+ test_prng_range_u32_atomic,
+ test_prng_range_u64_nonatomic,
+ test_prng_range_zu_nonatomic,
+ test_prng_range_zu_atomic));
}