Add the div module, which allows fast division by dynamic values.

author: David Goldblatt <davidgoldblatt@fb.com> 2017-12-20 01:30:50 (GMT)
committer: David Goldblatt <davidtgoldblatt@gmail.com> 2017-12-21 22:25:43 (GMT)
commit: 21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b (patch)
tree: 0b57b8ba7a9946d599adc21070f58983ce59d1f3
parent: 7f1b02e3fa9de7e0bb5e2562994b5ab3b82c0ec3 (diff)
download: jemalloc-21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b.zip
jemalloc-21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b.tar.gz
jemalloc-21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b.tar.bz2
5 files changed, 129 insertions, 1 deletions
diff --git a/Makefile.in b/Makefile.in
index 2f0b3b2..96c4ae0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -97,6 +97,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
+	$(srcroot)src/div.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
@@ -165,6 +166,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
+	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/extent_quantize.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
diff --git a/include/jemalloc/internal/div.h b/include/jemalloc/internal/div.h
new file mode 100644
index 0000000..aebae93
--- /dev/null
+++ b/include/jemalloc/internal/div.h
@@ -0,0 +1,41 @@
+#ifndef JEMALLOC_INTERNAL_DIV_H
+#define JEMALLOC_INTERNAL_DIV_H
+
+#include "jemalloc/internal/assert.h"
+
+/*
+ * This module does the division that computes the index of a region in a slab,
+ * given its offset relative to the base.
+ * That is, given a divisor d, an n = i * d (all integers), we'll return i.
+ * We do some pre-computation to do this more quickly than a CPU division
+ * instruction.
+ * We bound n < 2^32, and don't support dividing by one.
+ */
+
+typedef struct div_info_s div_info_t;
+struct div_info_s {
+	uint32_t magic;
+#ifdef JEMALLOC_DEBUG
+	size_t d;
+#endif
+};
+
+void div_init(div_info_t *div_info, size_t divisor);
+
+static inline size_t
+div_compute(div_info_t *div_info, size_t n) {
+	assert(n <= (uint32_t)-1);
+	/*
+	 * This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine,
+	 * the compilers I tried were all smart enough to turn this into the
+	 * appropriate "get the high 32 bits of the result of a multiply" (e.g.
+	 * mul; mov edx eax; on x86, umull on arm, etc.).
+	 */
+	size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32;
+#ifdef JEMALLOC_DEBUG
+	assert(i * div_info->d == n);
+#endif
+	return i;
+}
+
+#endif /* JEMALLOC_INTERNAL_DIV_H */
diff --git a/src/div.c b/src/div.c
new file mode 100644
index 0000000..808892a
--- /dev/null
+++ b/src/div.c
@@ -0,0 +1,55 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/div.h"
+
+#include "jemalloc/internal/assert.h"
+
+/*
+ * Suppose we have n = q * d, all integers. We know n and d, and want q = n / d.
+ *
+ * For any k, we have (here, all division is exact; not C-style rounding):
+ * floor(ceil(2^k / d) * n / 2^k) = floor((2^k + r) / d * n / 2^k), where
+ * r = (-2^k) mod d.
+ *
+ * Expanding this out:
+ * ... = floor(2^k / d * n / 2^k + r / d * n / 2^k)
+ *     = floor(n / d + (r / d) * (n / 2^k)).
+ *
+ * The fractional part of n / d is 0 (because of the assumption that d divides n
+ * exactly), so we have:
+ * ... = n / d + floor((r / d) * (n / 2^k))
+ *
+ * So that our initial expression is equal to the quantity we seek, so long as
+ * (r / d) * (n / 2^k) < 1.
+ *
+ * r is a remainder mod d, so r < d and r / d < 1 always. We can make
+ * n / 2 ^ k < 1 by setting k = 32. This gets us a value of magic that works.
+ */
+
+void
+div_init(div_info_t *div_info, size_t d) {
+	/* Nonsensical. */
+	assert(d != 0);
+	/*
+	 * This would make the value of magic too high to fit into a uint32_t
+	 * (we would want magic = 2^32 exactly). This would mess with code gen
+	 * on 32-bit machines.
+	 */
+	assert(d != 1);
+
+	uint64_t two_to_k = ((uint64_t)1 << 32);
+	uint32_t magic = (uint32_t)(two_to_k / d);
+
+	/*
+	 * We want magic = ceil(2^k / d), but C gives us floor. We have to
+	 * increment it unless the result was exact (i.e. unless d is a power of
+	 * two).
+	 */
+	if (two_to_k % d != 0) {
+		magic++;
+	}
+	div_info->magic = magic;
+#ifdef JEMALLOC_DEBUG
+	div_info->d = d;
+#endif
+}
diff --git a/src/sz.c b/src/sz.c
index 0986615..9de77e4 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -26,7 +26,8 @@ const size_t sz_index2size_tab[NSIZES] = {
 JEMALLOC_ALIGNED(CACHELINE)
 const uint8_t sz_size2index_tab[] = {
 #if LG_TINY_MIN == 0
-#warning "Dangerous LG_TINY_MIN"
+/* The div module doesn't support division by 1. */
+#error "Unsupported LG_TINY_MIN"
 #define S2B_0(i)	i,
 #elif LG_TINY_MIN == 1
 #warning "Dangerous LG_TINY_MIN"
diff --git a/test/unit/div.c b/test/unit/div.c
new file mode 100644
index 0000000..b47f10b
--- /dev/null
+++ b/test/unit/div.c
@@ -0,0 +1,29 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/div.h"
+
+TEST_BEGIN(test_div_exhaustive) {
+	for (size_t divisor = 2; divisor < 1000 * 1000; ++divisor) {
+		div_info_t div_info;
+		div_init(&div_info, divisor);
+		size_t max = 1000 * divisor;
+		if (max < 1000 * 1000) {
+			max = 1000 * 1000;
+		}
+		for (size_t dividend = 0; dividend < 1000 * divisor;
+		    dividend += divisor) {
+			size_t quotient = div_compute(
+			    &div_info, dividend);
+			assert_zu_eq(dividend, quotient * divisor,
+			    "With divisor = %zu, dividend = %zu, "
+			    "got quotient %zu", divisor, dividend, quotient);
+		}
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_div_exhaustive);
+}
author	David Goldblatt <davidgoldblatt@fb.com>	2017-12-20 01:30:50 (GMT)
committer	David Goldblatt <davidtgoldblatt@gmail.com>	2017-12-21 22:25:43 (GMT)
commit	21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b (patch)
tree	0b57b8ba7a9946d599adc21070f58983ce59d1f3
parent	7f1b02e3fa9de7e0bb5e2562994b5ab3b82c0ec3 (diff)
download	jemalloc-21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b.zip jemalloc-21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b.tar.gz jemalloc-21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b.tar.bz2