summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2013-07-28 09:39:49 (GMT)
committerRaymond Hettinger <python@rcn.com>2013-07-28 09:39:49 (GMT)
commit77578204d6aeb89a9ee8365f8fb28ce18aa2eb7c (patch)
tree303daae3dfcb80a883facdae7d593e3e099779c3
parent1f1d0a57fad17fb0fb1e1a44b1a38be17ea9976e (diff)
downloadcpython-77578204d6aeb89a9ee8365f8fb28ce18aa2eb7c.zip
cpython-77578204d6aeb89a9ee8365f8fb28ce18aa2eb7c.tar.gz
cpython-77578204d6aeb89a9ee8365f8fb28ce18aa2eb7c.tar.bz2
Restore the data block size to 62.
The former block size traded away good fit within cache lines in order to gain faster division in deque_item(). However, compilers are getting smarter and can now replace the slow division operation with a fast integer multiply and right shift. Accordingly, it makes sense to go back to a size that lets blocks neatly fill entire cache-lines. GCC-4.8 and CLANG 4.0 both compute "x // 62" with something roughly equivalent to "x * 9520900167075897609 >> 69".
-rw-r--r--Lib/test/test_deque.py2
-rw-r--r--Modules/_collectionsmodule.c9
2 files changed, 7 insertions, 4 deletions
diff --git a/Lib/test/test_deque.py b/Lib/test/test_deque.py
index ae1de9a..7bff1d2 100644
--- a/Lib/test/test_deque.py
+++ b/Lib/test/test_deque.py
@@ -536,7 +536,7 @@ class TestBasic(unittest.TestCase):
@support.cpython_only
def test_sizeof(self):
- BLOCKLEN = 64
+ BLOCKLEN = 62
basesize = support.calcobjsize('2P4nlP')
blocksize = struct.calcsize('2P%dP' % BLOCKLEN)
self.assertEqual(object.__sizeof__(deque()), basesize)
diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c
index 1f583b8..e5dfdb4 100644
--- a/Modules/_collectionsmodule.c
+++ b/Modules/_collectionsmodule.c
@@ -10,11 +10,14 @@
/* The block length may be set to any number over 1. Larger numbers
* reduce the number of calls to the memory allocator, give faster
* indexing and rotation, and reduce the link::data overhead ratio.
- * If the block length is a power-of-two, we also get faster
- * division/modulo computations during indexing.
+ *
+ * Ideally, the block length will be set to two less than some
+ * multiple of the cache-line length (so that the full block
+ * including the leftlink and rightlink will fit neatly into
+ * cache lines).
*/
-#define BLOCKLEN 64
+#define BLOCKLEN 62
#define CENTER ((BLOCKLEN - 1) / 2)
/* A `dequeobject` is composed of a doubly-linked list of `block` nodes.