summaryrefslogtreecommitdiffstats
path: root/Objects/setobject.c
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2014-01-05 20:00:31 (GMT)
committerRaymond Hettinger <python@rcn.com>2014-01-05 20:00:31 (GMT)
commit74fc8c47f66da5673ed733d21e9014e9508f2819 (patch)
tree82d852bc1dd691fe5f3145eacf47aeb3aa64a7f1 /Objects/setobject.c
parentdf3ed242c07ac9d0d46faa89a4705bd6acb3dd68 (diff)
downloadcpython-74fc8c47f66da5673ed733d21e9014e9508f2819.zip
cpython-74fc8c47f66da5673ed733d21e9014e9508f2819.tar.gz
cpython-74fc8c47f66da5673ed733d21e9014e9508f2819.tar.bz2
Add comments to frozenset_hash().
Also, provide a minor hint to the compiler on how to group the xors.
Diffstat (limited to 'Objects/setobject.c')
-rw-r--r--Objects/setobject.c15
1 files changed, 14 insertions, 1 deletions
diff --git a/Objects/setobject.c b/Objects/setobject.c
index b0803f6..fa6a6d0 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -738,6 +738,17 @@ set_traverse(PySetObject *so, visitproc visit, void *arg)
static Py_hash_t
frozenset_hash(PyObject *self)
{
+ /* Most of the constants in this hash algorithm are randomly choosen
+ large primes with "interesting bit patterns" and that passed
+ tests for good collision statistics on a variety of problematic
+ datasets such as:
+
+ ps = []
+ for r in range(21):
+ ps += itertools.combinations(range(20), r)
+ num_distinct_hashes = len({hash(frozenset(s)) for s in ps})
+
+ */
PySetObject *so = (PySetObject *)self;
Py_uhash_t h, hash = 1927868237UL;
setentry *entry;
@@ -754,8 +765,10 @@ frozenset_hash(PyObject *self)
hashes so that many distinct combinations collapse to only
a handful of distinct hash values. */
h = entry->hash;
- hash ^= (h ^ (h << 16) ^ 89869747UL) * 3644798167UL;
+ hash ^= ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
}
+ /* Make the final result spread-out in a different pattern
+ than the algorithem for tuples or other python objects. */
hash = hash * 69069U + 907133923UL;
if (hash == -1)
hash = 590923713UL;