diff options
author | Raymond Hettinger <python@rcn.com> | 2014-01-05 20:00:31 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2014-01-05 20:00:31 (GMT) |
commit | 74fc8c47f66da5673ed733d21e9014e9508f2819 (patch) | |
tree | 82d852bc1dd691fe5f3145eacf47aeb3aa64a7f1 /Objects | |
parent | df3ed242c07ac9d0d46faa89a4705bd6acb3dd68 (diff) | |
download | cpython-74fc8c47f66da5673ed733d21e9014e9508f2819.zip cpython-74fc8c47f66da5673ed733d21e9014e9508f2819.tar.gz cpython-74fc8c47f66da5673ed733d21e9014e9508f2819.tar.bz2 |
Add comments to frozenset_hash().
Also, provide a minor hint to the compiler on how to group the xors.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/setobject.c | 15 |
1 files changed, 14 insertions, 1 deletions
diff --git a/Objects/setobject.c b/Objects/setobject.c index b0803f6..fa6a6d0 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -738,6 +738,17 @@ set_traverse(PySetObject *so, visitproc visit, void *arg) static Py_hash_t frozenset_hash(PyObject *self) { + /* Most of the constants in this hash algorithm are randomly choosen + large primes with "interesting bit patterns" and that passed + tests for good collision statistics on a variety of problematic + datasets such as: + + ps = [] + for r in range(21): + ps += itertools.combinations(range(20), r) + num_distinct_hashes = len({hash(frozenset(s)) for s in ps}) + + */ PySetObject *so = (PySetObject *)self; Py_uhash_t h, hash = 1927868237UL; setentry *entry; @@ -754,8 +765,10 @@ frozenset_hash(PyObject *self) hashes so that many distinct combinations collapse to only a handful of distinct hash values. */ h = entry->hash; - hash ^= (h ^ (h << 16) ^ 89869747UL) * 3644798167UL; + hash ^= ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL; } + /* Make the final result spread-out in a different pattern + than the algorithem for tuples or other python objects. */ hash = hash * 69069U + 907133923UL; if (hash == -1) hash = 590923713UL; |