diff options
author | Raymond Hettinger <rhettinger@users.noreply.github.com> | 2021-04-22 15:34:57 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-22 15:34:57 (GMT) |
commit | a07da09ad5bd7d234ccd084a3a0933c290d1b592 (patch) | |
tree | 8c1ab67575527bd5c0c9452a74458ad5a29a1d08 /Python | |
parent | accea7dc2bd30a6e8e1b0334acfca9585cbd7f8a (diff) | |
download | cpython-a07da09ad5bd7d234ccd084a3a0933c290d1b592.zip cpython-a07da09ad5bd7d234ccd084a3a0933c290d1b592.tar.gz cpython-a07da09ad5bd7d234ccd084a3a0933c290d1b592.tar.bz2 |
bpo-43475: Fix worst case collision behavior for NaN instances (GH-25493)
Diffstat (limited to 'Python')
-rw-r--r-- | Python/pyhash.c | 14 | ||||
-rw-r--r-- | Python/sysmodule.c | 2 |
2 files changed, 11 insertions, 5 deletions
diff --git a/Python/pyhash.c b/Python/pyhash.c index 3b6c34e..f0c8235 100644 --- a/Python/pyhash.c +++ b/Python/pyhash.c @@ -56,8 +56,12 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0}; If the result of the reduction is infinity (this is impossible for integers, floats and Decimals) then use the predefined hash value _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead. - _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the - hashes of float and Decimal infinities and nans. + _PyHASH_INF and -_PyHASH_INF are also used for the + hashes of float and Decimal infinities. + + NaNs hash with a pointer hash. Having distinct hash values prevents + catastrophic pileups from distinct NaN instances which used to always + have the same hash value but would compare unequal. A selling point for the above strategy is that it makes it possible to compute hashes of decimal and binary floating-point numbers @@ -82,8 +86,10 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0}; */ +Py_hash_t _Py_HashPointer(const void *); + Py_hash_t -_Py_HashDouble(double v) +_Py_HashDouble(PyObject *inst, double v) { int e, sign; double m; @@ -93,7 +99,7 @@ _Py_HashDouble(double v) if (Py_IS_INFINITY(v)) return v > 0 ? _PyHASH_INF : -_PyHASH_INF; else - return _PyHASH_NAN; + return _Py_HashPointer(inst); } m = frexp(v, &e); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index a36d90f..911c2d9 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1405,7 +1405,7 @@ get_hash_info(PyThreadState *tstate) PyStructSequence_SET_ITEM(hash_info, field++, PyLong_FromLong(_PyHASH_INF)); PyStructSequence_SET_ITEM(hash_info, field++, - PyLong_FromLong(_PyHASH_NAN)); + PyLong_FromLong(0)); // This is no longer used PyStructSequence_SET_ITEM(hash_info, field++, PyLong_FromLong(_PyHASH_IMAG)); PyStructSequence_SET_ITEM(hash_info, field++, |