diff options
author | Raymond Hettinger <python@rcn.com> | 2004-06-01 06:36:24 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2004-06-01 06:36:24 (GMT) |
commit | 41bd02256f5a2348d2de3d6e5fdfcaeb2fcaaebc (patch) | |
tree | 7c27849dbc8f686cde8f37f55002ed4b13255107 | |
parent | 504239fb38da8526bb48cf8b0778fe47e34554e5 (diff) | |
download | cpython-41bd02256f5a2348d2de3d6e5fdfcaeb2fcaaebc.zip cpython-41bd02256f5a2348d2de3d6e5fdfcaeb2fcaaebc.tar.gz cpython-41bd02256f5a2348d2de3d6e5fdfcaeb2fcaaebc.tar.bz2 |
SF bug #942952: Weakness in tuple hash
(Basic approach and test concept by Tim Peters.)
* Improved the hash to reduce collisions.
* Added the torture test to the test suite.
-rw-r--r-- | Lib/test/test_tuple.py | 19 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Objects/tupleobject.c | 5 |
3 files changed, 25 insertions, 2 deletions
diff --git a/Lib/test/test_tuple.py b/Lib/test/test_tuple.py index 4fe299f..a3f40dd 100644 --- a/Lib/test/test_tuple.py +++ b/Lib/test/test_tuple.py @@ -41,6 +41,25 @@ class TupleTest(seq_tests.CommonTest): yield i self.assertEqual(list(tuple(f())), range(1000)) + def test_hash(self): + # See SF bug 942952: Weakness in tuple hash + # The hash should: + # be non-commutative + # should spread-out closely spaced values + # should not exhibit cancellation in tuples like (x,(x,y)) + # should be distinct from element hashes: hash(x)!=hash((x,)) + # This test exercises those cases. + # For a pure random hash and N=50, the expected number of collisions + # is 7.3. Here we allow twice that number. + # Any worse and the hash function is sorely suspect. + + N=50 + base = range(N) + xp = [(i, j) for i in base for j in base] + inps = base + [(i, j) for i in base for j in xp] + \ + [(i, j) for i in xp for j in base] + xp + zip(base) + collisions = len(inps) - len(set(map(hash, inps))) + self.assert_(collisions <= 15) def test_main(): test_support.run_unittest(TupleTest) @@ -12,6 +12,9 @@ What's New in Python 2.4 alpha 1? Core and builtins ----------------- +- Improved the tuple hashing algorithm to give fewer collisions in + common cases. Fixes bug #942952. + - Implemented generator expressions (PEP 289). Coded by Jiwon Seo. - Enabled the profiling of C extension functions (and builtins) - check diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 159dc44..4cb80f0 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -262,15 +262,16 @@ tuplehash(PyTupleObject *v) register long x, y; register int len = v->ob_size; register PyObject **p; + long mult = 1000003L; x = 0x345678L; p = v->ob_item; while (--len >= 0) { y = PyObject_Hash(*p++); if (y == -1) return -1; - x = (1000003*x) ^ y; + x = (x ^ y) * mult; + mult += 69068L + len + len; } - x ^= v->ob_size; if (x == -1) x = -2; return x; |