summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2004-06-01 06:36:24 (GMT)
committerRaymond Hettinger <python@rcn.com>2004-06-01 06:36:24 (GMT)
commit41bd02256f5a2348d2de3d6e5fdfcaeb2fcaaebc (patch)
tree7c27849dbc8f686cde8f37f55002ed4b13255107
parent504239fb38da8526bb48cf8b0778fe47e34554e5 (diff)
downloadcpython-41bd02256f5a2348d2de3d6e5fdfcaeb2fcaaebc.zip
cpython-41bd02256f5a2348d2de3d6e5fdfcaeb2fcaaebc.tar.gz
cpython-41bd02256f5a2348d2de3d6e5fdfcaeb2fcaaebc.tar.bz2
SF bug #942952: Weakness in tuple hash
(Basic approach and test concept by Tim Peters.) * Improved the hash to reduce collisions. * Added the torture test to the test suite.
-rw-r--r--Lib/test/test_tuple.py19
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/tupleobject.c5
3 files changed, 25 insertions, 2 deletions
diff --git a/Lib/test/test_tuple.py b/Lib/test/test_tuple.py
index 4fe299f..a3f40dd 100644
--- a/Lib/test/test_tuple.py
+++ b/Lib/test/test_tuple.py
@@ -41,6 +41,25 @@ class TupleTest(seq_tests.CommonTest):
yield i
self.assertEqual(list(tuple(f())), range(1000))
+ def test_hash(self):
+ # See SF bug 942952: Weakness in tuple hash
+ # The hash should:
+ # be non-commutative
+ # should spread-out closely spaced values
+ # should not exhibit cancellation in tuples like (x,(x,y))
+ # should be distinct from element hashes: hash(x)!=hash((x,))
+ # This test exercises those cases.
+ # For a pure random hash and N=50, the expected number of collisions
+ # is 7.3. Here we allow twice that number.
+ # Any worse and the hash function is sorely suspect.
+
+ N=50
+ base = range(N)
+ xp = [(i, j) for i in base for j in base]
+ inps = base + [(i, j) for i in base for j in xp] + \
+ [(i, j) for i in xp for j in base] + xp + zip(base)
+ collisions = len(inps) - len(set(map(hash, inps)))
+ self.assert_(collisions <= 15)
def test_main():
test_support.run_unittest(TupleTest)
diff --git a/Misc/NEWS b/Misc/NEWS
index ab705af..2c5dfd9 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 2.4 alpha 1?
Core and builtins
-----------------
+- Improved the tuple hashing algorithm to give fewer collisions in
+ common cases. Fixes bug #942952.
+
- Implemented generator expressions (PEP 289). Coded by Jiwon Seo.
- Enabled the profiling of C extension functions (and builtins) - check
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index 159dc44..4cb80f0 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -262,15 +262,16 @@ tuplehash(PyTupleObject *v)
register long x, y;
register int len = v->ob_size;
register PyObject **p;
+ long mult = 1000003L;
x = 0x345678L;
p = v->ob_item;
while (--len >= 0) {
y = PyObject_Hash(*p++);
if (y == -1)
return -1;
- x = (1000003*x) ^ y;
+ x = (x ^ y) * mult;
+ mult += 69068L + len + len;
}
- x ^= v->ob_size;
if (x == -1)
x = -2;
return x;