summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Goldblatt <davidgoldblatt@fb.com>2017-12-20 01:30:50 (GMT)
committerDavid Goldblatt <davidtgoldblatt@gmail.com>2017-12-21 22:25:43 (GMT)
commit21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b (patch)
tree0b57b8ba7a9946d599adc21070f58983ce59d1f3
parent7f1b02e3fa9de7e0bb5e2562994b5ab3b82c0ec3 (diff)
downloadjemalloc-21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b.zip
jemalloc-21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b.tar.gz
jemalloc-21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b.tar.bz2
Add the div module, which allows fast division by dynamic values.
-rw-r--r--Makefile.in2
-rw-r--r--include/jemalloc/internal/div.h41
-rw-r--r--src/div.c55
-rw-r--r--src/sz.c3
-rw-r--r--test/unit/div.c29
5 files changed, 129 insertions, 1 deletions
diff --git a/Makefile.in b/Makefile.in
index 2f0b3b2..96c4ae0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -97,6 +97,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
$(srcroot)src/bitmap.c \
$(srcroot)src/ckh.c \
$(srcroot)src/ctl.c \
+ $(srcroot)src/div.c \
$(srcroot)src/extent.c \
$(srcroot)src/extent_dss.c \
$(srcroot)src/extent_mmap.c \
@@ -165,6 +166,7 @@ TESTS_UNIT := \
$(srcroot)test/unit/bitmap.c \
$(srcroot)test/unit/ckh.c \
$(srcroot)test/unit/decay.c \
+ $(srcroot)test/unit/div.c \
$(srcroot)test/unit/extent_quantize.c \
$(srcroot)test/unit/fork.c \
$(srcroot)test/unit/hash.c \
diff --git a/include/jemalloc/internal/div.h b/include/jemalloc/internal/div.h
new file mode 100644
index 0000000..aebae93
--- /dev/null
+++ b/include/jemalloc/internal/div.h
@@ -0,0 +1,41 @@
+#ifndef JEMALLOC_INTERNAL_DIV_H
+#define JEMALLOC_INTERNAL_DIV_H
+
+#include "jemalloc/internal/assert.h"
+
+/*
+ * This module does the division that computes the index of a region in a slab,
+ * given its offset relative to the base.
+ * That is, given a divisor d, an n = i * d (all integers), we'll return i.
+ * We do some pre-computation to do this more quickly than a CPU division
+ * instruction.
+ * We bound n < 2^32, and don't support dividing by one.
+ */
+
+typedef struct div_info_s div_info_t;
+struct div_info_s {
+ uint32_t magic;
+#ifdef JEMALLOC_DEBUG
+ size_t d;
+#endif
+};
+
+void div_init(div_info_t *div_info, size_t divisor);
+
+static inline size_t
+div_compute(div_info_t *div_info, size_t n) {
+ assert(n <= (uint32_t)-1);
+ /*
+ * This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine,
+ * the compilers I tried were all smart enough to turn this into the
+ * appropriate "get the high 32 bits of the result of a multiply" (e.g.
+ * mul; mov edx eax; on x86, umull on arm, etc.).
+ */
+ size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32;
+#ifdef JEMALLOC_DEBUG
+ assert(i * div_info->d == n);
+#endif
+ return i;
+}
+
+#endif /* JEMALLOC_INTERNAL_DIV_H */
diff --git a/src/div.c b/src/div.c
new file mode 100644
index 0000000..808892a
--- /dev/null
+++ b/src/div.c
@@ -0,0 +1,55 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/div.h"
+
+#include "jemalloc/internal/assert.h"
+
+/*
+ * Suppose we have n = q * d, all integers. We know n and d, and want q = n / d.
+ *
+ * For any k, we have (here, all division is exact; not C-style rounding):
+ * floor(ceil(2^k / d) * n / 2^k) = floor((2^k + r) / d * n / 2^k), where
+ * r = (-2^k) mod d.
+ *
+ * Expanding this out:
+ * ... = floor(2^k / d * n / 2^k + r / d * n / 2^k)
+ * = floor(n / d + (r / d) * (n / 2^k)).
+ *
+ * The fractional part of n / d is 0 (because of the assumption that d divides n
+ * exactly), so we have:
+ * ... = n / d + floor((r / d) * (n / 2^k))
+ *
+ * So that our initial expression is equal to the quantity we seek, so long as
+ * (r / d) * (n / 2^k) < 1.
+ *
+ * r is a remainder mod d, so r < d and r / d < 1 always. We can make
+ * n / 2 ^ k < 1 by setting k = 32. This gets us a value of magic that works.
+ */
+
+void
+div_init(div_info_t *div_info, size_t d) {
+ /* Nonsensical. */
+ assert(d != 0);
+ /*
+ * This would make the value of magic too high to fit into a uint32_t
+ * (we would want magic = 2^32 exactly). This would mess with code gen
+ * on 32-bit machines.
+ */
+ assert(d != 1);
+
+ uint64_t two_to_k = ((uint64_t)1 << 32);
+ uint32_t magic = (uint32_t)(two_to_k / d);
+
+ /*
+ * We want magic = ceil(2^k / d), but C gives us floor. We have to
+ * increment it unless the result was exact (i.e. unless d is a power of
+ * two).
+ */
+ if (two_to_k % d != 0) {
+ magic++;
+ }
+ div_info->magic = magic;
+#ifdef JEMALLOC_DEBUG
+ div_info->d = d;
+#endif
+}
diff --git a/src/sz.c b/src/sz.c
index 0986615..9de77e4 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -26,7 +26,8 @@ const size_t sz_index2size_tab[NSIZES] = {
JEMALLOC_ALIGNED(CACHELINE)
const uint8_t sz_size2index_tab[] = {
#if LG_TINY_MIN == 0
-#warning "Dangerous LG_TINY_MIN"
+/* The div module doesn't support division by 1. */
+#error "Unsupported LG_TINY_MIN"
#define S2B_0(i) i,
#elif LG_TINY_MIN == 1
#warning "Dangerous LG_TINY_MIN"
diff --git a/test/unit/div.c b/test/unit/div.c
new file mode 100644
index 0000000..b47f10b
--- /dev/null
+++ b/test/unit/div.c
@@ -0,0 +1,29 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/div.h"
+
+TEST_BEGIN(test_div_exhaustive) {
+ for (size_t divisor = 2; divisor < 1000 * 1000; ++divisor) {
+ div_info_t div_info;
+ div_init(&div_info, divisor);
+ size_t max = 1000 * divisor;
+ if (max < 1000 * 1000) {
+ max = 1000 * 1000;
+ }
+ for (size_t dividend = 0; dividend < 1000 * divisor;
+ dividend += divisor) {
+ size_t quotient = div_compute(
+ &div_info, dividend);
+ assert_zu_eq(dividend, quotient * divisor,
+ "With divisor = %zu, dividend = %zu, "
+ "got quotient %zu", divisor, dividend, quotient);
+ }
+ }
+}
+TEST_END
+
+int
+main(void) {
+ return test_no_reentrancy(
+ test_div_exhaustive);
+}