diff options
author | Raymond Hettinger <python@rcn.com> | 2013-07-28 09:39:49 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2013-07-28 09:39:49 (GMT) |
commit | 77578204d6aeb89a9ee8365f8fb28ce18aa2eb7c (patch) | |
tree | 303daae3dfcb80a883facdae7d593e3e099779c3 | |
parent | 1f1d0a57fad17fb0fb1e1a44b1a38be17ea9976e (diff) | |
download | cpython-77578204d6aeb89a9ee8365f8fb28ce18aa2eb7c.zip cpython-77578204d6aeb89a9ee8365f8fb28ce18aa2eb7c.tar.gz cpython-77578204d6aeb89a9ee8365f8fb28ce18aa2eb7c.tar.bz2 |
Restore the data block size to 62.
The former block size traded away good fit within cache lines in
order to gain faster division in deque_item(). However, compilers
are getting smarter and can now replace the slow division operation
with a fast integer multiply and right shift. Accordingly, it makes
sense to go back to a size that lets blocks neatly fill entire
cache-lines.
GCC-4.8 and CLANG 4.0 both compute "x // 62" with something
roughly equivalent to "x * 9520900167075897609 >> 69".
-rw-r--r-- | Lib/test/test_deque.py | 2 | ||||
-rw-r--r-- | Modules/_collectionsmodule.c | 9 |
2 files changed, 7 insertions, 4 deletions
diff --git a/Lib/test/test_deque.py b/Lib/test/test_deque.py index ae1de9a..7bff1d2 100644 --- a/Lib/test/test_deque.py +++ b/Lib/test/test_deque.py @@ -536,7 +536,7 @@ class TestBasic(unittest.TestCase): @support.cpython_only def test_sizeof(self): - BLOCKLEN = 64 + BLOCKLEN = 62 basesize = support.calcobjsize('2P4nlP') blocksize = struct.calcsize('2P%dP' % BLOCKLEN) self.assertEqual(object.__sizeof__(deque()), basesize) diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c index 1f583b8..e5dfdb4 100644 --- a/Modules/_collectionsmodule.c +++ b/Modules/_collectionsmodule.c @@ -10,11 +10,14 @@ /* The block length may be set to any number over 1. Larger numbers * reduce the number of calls to the memory allocator, give faster * indexing and rotation, and reduce the link::data overhead ratio. - * If the block length is a power-of-two, we also get faster - * division/modulo computations during indexing. + * + * Ideally, the block length will be set to two less than some + * multiple of the cache-line length (so that the full block + * including the leftlink and rightlink will fit neatly into + * cache lines). */ -#define BLOCKLEN 64 +#define BLOCKLEN 62 #define CENTER ((BLOCKLEN - 1) / 2) /* A `dequeobject` is composed of a doubly-linked list of `block` nodes. |