summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2021-04-22 15:34:57 (GMT)
committerGitHub <noreply@github.com>2021-04-22 15:34:57 (GMT)
commita07da09ad5bd7d234ccd084a3a0933c290d1b592 (patch)
tree8c1ab67575527bd5c0c9452a74458ad5a29a1d08
parentaccea7dc2bd30a6e8e1b0334acfca9585cbd7f8a (diff)
downloadcpython-a07da09ad5bd7d234ccd084a3a0933c290d1b592.zip
cpython-a07da09ad5bd7d234ccd084a3a0933c290d1b592.tar.gz
cpython-a07da09ad5bd7d234ccd084a3a0933c290d1b592.tar.bz2
bpo-43475: Fix worst case collision behavior for NaN instances (GH-25493)
-rw-r--r--Doc/library/stdtypes.rst9
-rw-r--r--Doc/library/sys.rst2
-rw-r--r--Include/pyhash.h3
-rw-r--r--Lib/_pydecimal.py2
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2021-04-20-20-10-46.bpo-43475.oV8Mbs.rst3
-rw-r--r--Modules/_decimal/_decimal.c5
-rw-r--r--Objects/complexobject.c4
-rw-r--r--Objects/floatobject.c2
-rw-r--r--Python/pyhash.c14
-rw-r--r--Python/sysmodule.c2
10 files changed, 25 insertions, 21 deletions
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index 68b6050..b83d0d8 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -692,10 +692,9 @@ Here are the rules in detail:
as ``-hash(-x)``. If the resulting hash is ``-1``, replace it with
``-2``.
-- The particular values ``sys.hash_info.inf``, ``-sys.hash_info.inf``
- and ``sys.hash_info.nan`` are used as hash values for positive
- infinity, negative infinity, or nans (respectively). (All hashable
- nans have the same hash value.)
+- The particular values ``sys.hash_info.inf`` and ``-sys.hash_info.inf``
+ are used as hash values for positive
+ infinity or negative infinity (respectively).
- For a :class:`complex` number ``z``, the hash values of the real
and imaginary parts are combined by computing ``hash(z.real) +
@@ -740,7 +739,7 @@ number, :class:`float`, or :class:`complex`::
"""Compute the hash of a float x."""
if math.isnan(x):
- return sys.hash_info.nan
+ return super().__hash__()
elif math.isinf(x):
return sys.hash_info.inf if x > 0 else -sys.hash_info.inf
else:
diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst
index e431d1b..fe1cca1 100644
--- a/Doc/library/sys.rst
+++ b/Doc/library/sys.rst
@@ -855,7 +855,7 @@ always available.
+---------------------+--------------------------------------------------+
| :const:`inf` | hash value returned for a positive infinity |
+---------------------+--------------------------------------------------+
- | :const:`nan` | hash value returned for a nan |
+ | :const:`nan` | (this attribute is no longer used) |
+---------------------+--------------------------------------------------+
| :const:`imag` | multiplier used for the imaginary part of a |
| | complex number |
diff --git a/Include/pyhash.h b/Include/pyhash.h
index 4437b87..728ef93 100644
--- a/Include/pyhash.h
+++ b/Include/pyhash.h
@@ -7,7 +7,7 @@ extern "C" {
/* Helpers for hash functions */
#ifndef Py_LIMITED_API
-PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
+PyAPI_FUNC(Py_hash_t) _Py_HashDouble(PyObject *, double);
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*);
// Similar to _Py_HashPointer(), but don't replace -1 with -2
PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*);
@@ -29,7 +29,6 @@ PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
#define _PyHASH_INF 314159
-#define _PyHASH_NAN 0
#define _PyHASH_IMAG _PyHASH_MULTIPLIER
diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py
index ab989e5..ff23322 100644
--- a/Lib/_pydecimal.py
+++ b/Lib/_pydecimal.py
@@ -951,7 +951,7 @@ class Decimal(object):
if self.is_snan():
raise TypeError('Cannot hash a signaling NaN value.')
elif self.is_nan():
- return _PyHASH_NAN
+ return super().__hash__()
else:
if self._sign:
return -_PyHASH_INF
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-04-20-20-10-46.bpo-43475.oV8Mbs.rst b/Misc/NEWS.d/next/Core and Builtins/2021-04-20-20-10-46.bpo-43475.oV8Mbs.rst
new file mode 100644
index 0000000..73ed022
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-04-20-20-10-46.bpo-43475.oV8Mbs.rst
@@ -0,0 +1,3 @@
+Hashes of NaN values now depend on object identity. Formerly, they always
+hashed to 0 even though NaN values are not equal to one another. Having the
+same hash for unequal values caused pile-ups in hash tables.
diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c
index 9a4329f..9b89fa4 100644
--- a/Modules/_decimal/_decimal.c
+++ b/Modules/_decimal/_decimal.c
@@ -4536,7 +4536,6 @@ _dec_hash(PyDecObject *v)
#error "No valid combination of CONFIG_64, CONFIG_32 and _PyHASH_BITS"
#endif
const Py_hash_t py_hash_inf = 314159;
- const Py_hash_t py_hash_nan = 0;
mpd_uint_t ten_data[1] = {10};
mpd_t ten = {MPD_POS|MPD_STATIC|MPD_CONST_DATA,
0, 2, 1, 1, ten_data};
@@ -4555,7 +4554,7 @@ _dec_hash(PyDecObject *v)
return -1;
}
else if (mpd_isnan(MPD(v))) {
- return py_hash_nan;
+ return _Py_HashPointer(v);
}
else {
return py_hash_inf * mpd_arith_sign(MPD(v));
@@ -5939,5 +5938,3 @@ error:
return NULL; /* GCOV_NOT_REACHED */
}
-
-
diff --git a/Objects/complexobject.c b/Objects/complexobject.c
index a65ebdf..91e06a8 100644
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -412,10 +412,10 @@ static Py_hash_t
complex_hash(PyComplexObject *v)
{
Py_uhash_t hashreal, hashimag, combined;
- hashreal = (Py_uhash_t)_Py_HashDouble(v->cval.real);
+ hashreal = (Py_uhash_t)_Py_HashDouble((PyObject *) v, v->cval.real);
if (hashreal == (Py_uhash_t)-1)
return -1;
- hashimag = (Py_uhash_t)_Py_HashDouble(v->cval.imag);
+ hashimag = (Py_uhash_t)_Py_HashDouble((PyObject *)v, v->cval.imag);
if (hashimag == (Py_uhash_t)-1)
return -1;
/* Note: if the imaginary part is 0, hashimag is 0 now,
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index b3c41b1..7e78132 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -556,7 +556,7 @@ float_richcompare(PyObject *v, PyObject *w, int op)
static Py_hash_t
float_hash(PyFloatObject *v)
{
- return _Py_HashDouble(v->ob_fval);
+ return _Py_HashDouble((PyObject *)v, v->ob_fval);
}
static PyObject *
diff --git a/Python/pyhash.c b/Python/pyhash.c
index 3b6c34e..f0c8235 100644
--- a/Python/pyhash.c
+++ b/Python/pyhash.c
@@ -56,8 +56,12 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
If the result of the reduction is infinity (this is impossible for
integers, floats and Decimals) then use the predefined hash value
_PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
- _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
- hashes of float and Decimal infinities and nans.
+ _PyHASH_INF and -_PyHASH_INF are also used for the
+ hashes of float and Decimal infinities.
+
+ NaNs hash with a pointer hash. Having distinct hash values prevents
+ catastrophic pileups from distinct NaN instances which used to always
+ have the same hash value but would compare unequal.
A selling point for the above strategy is that it makes it possible
to compute hashes of decimal and binary floating-point numbers
@@ -82,8 +86,10 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
*/
+Py_hash_t _Py_HashPointer(const void *);
+
Py_hash_t
-_Py_HashDouble(double v)
+_Py_HashDouble(PyObject *inst, double v)
{
int e, sign;
double m;
@@ -93,7 +99,7 @@ _Py_HashDouble(double v)
if (Py_IS_INFINITY(v))
return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
else
- return _PyHASH_NAN;
+ return _Py_HashPointer(inst);
}
m = frexp(v, &e);
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index a36d90f..911c2d9 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -1405,7 +1405,7 @@ get_hash_info(PyThreadState *tstate)
PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(_PyHASH_INF));
PyStructSequence_SET_ITEM(hash_info, field++,
- PyLong_FromLong(_PyHASH_NAN));
+ PyLong_FromLong(0)); // This is no longer used
PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(_PyHASH_IMAG));
PyStructSequence_SET_ITEM(hash_info, field++,