summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorChristian Heimes <christian@cheimes.de>2013-11-20 10:46:18 (GMT)
committerChristian Heimes <christian@cheimes.de>2013-11-20 10:46:18 (GMT)
commit985ecdcfc29adfc36ce2339acf03f819ad414869 (patch)
tree06a11f82271e768dbe49469c8736b65b083f671c /Objects
parentfe32aec25a8b36498d840bd69485e9bc94195b9c (diff)
downloadcpython-985ecdcfc29adfc36ce2339acf03f819ad414869.zip
cpython-985ecdcfc29adfc36ce2339acf03f819ad414869.tar.gz
cpython-985ecdcfc29adfc36ce2339acf03f819ad414869.tar.bz2
ssue #19183: Implement PEP 456 'secure and interchangeable hash algorithm'.
Python now uses SipHash24 on all major platforms.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/bytesobject.c2
-rw-r--r--Objects/memoryobject.c2
-rw-r--r--Objects/object.c146
-rw-r--r--Objects/unicodeobject.c35
4 files changed, 4 insertions, 181 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index efa0192..8217b1e 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -897,7 +897,7 @@ bytes_hash(PyBytesObject *a)
{
if (a->ob_shash == -1) {
/* Can't fail */
- a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
+ a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
}
return a->ob_shash;
}
diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c
index 1d52d9d..cb644b8 100644
--- a/Objects/memoryobject.c
+++ b/Objects/memoryobject.c
@@ -2742,7 +2742,7 @@ memory_hash(PyMemoryViewObject *self)
}
/* Can't fail */
- self->hash = _Py_HashBytes((unsigned char *)mem, view->len);
+ self->hash = _Py_HashBytes(mem, view->len);
if (mem != view->buf)
PyMem_Free(mem);
diff --git a/Objects/object.c b/Objects/object.c
index acc34af..395e28d 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -731,150 +731,6 @@ PyObject_RichCompareBool(PyObject *v, PyObject *w, int op)
return ok;
}
-/* Set of hash utility functions to help maintaining the invariant that
- if a==b then hash(a)==hash(b)
-
- All the utility functions (_Py_Hash*()) return "-1" to signify an error.
-*/
-
-/* For numeric types, the hash of a number x is based on the reduction
- of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that
- hash(x) == hash(y) whenever x and y are numerically equal, even if
- x and y have different types.
-
- A quick summary of the hashing strategy:
-
- (1) First define the 'reduction of x modulo P' for any rational
- number x; this is a standard extension of the usual notion of
- reduction modulo P for integers. If x == p/q (written in lowest
- terms), the reduction is interpreted as the reduction of p times
- the inverse of the reduction of q, all modulo P; if q is exactly
- divisible by P then define the reduction to be infinity. So we've
- got a well-defined map
-
- reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
-
- (2) Now for a rational number x, define hash(x) by:
-
- reduce(x) if x >= 0
- -reduce(-x) if x < 0
-
- If the result of the reduction is infinity (this is impossible for
- integers, floats and Decimals) then use the predefined hash value
- _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
- _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
- hashes of float and Decimal infinities and nans.
-
- A selling point for the above strategy is that it makes it possible
- to compute hashes of decimal and binary floating-point numbers
- efficiently, even if the exponent of the binary or decimal number
- is large. The key point is that
-
- reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS)
-
- provided that {reduce(x), reduce(y)} != {0, infinity}. The reduction of a
- binary or decimal float is never infinity, since the denominator is a power
- of 2 (for binary) or a divisor of a power of 10 (for decimal). So we have,
- for nonnegative x,
-
- reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS
-
- reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS
-
- and reduce(10**e) can be computed efficiently by the usual modular
- exponentiation algorithm. For reduce(2**e) it's even better: since
- P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
- by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.
-
- */
-
-Py_hash_t
-_Py_HashDouble(double v)
-{
- int e, sign;
- double m;
- Py_uhash_t x, y;
-
- if (!Py_IS_FINITE(v)) {
- if (Py_IS_INFINITY(v))
- return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
- else
- return _PyHASH_NAN;
- }
-
- m = frexp(v, &e);
-
- sign = 1;
- if (m < 0) {
- sign = -1;
- m = -m;
- }
-
- /* process 28 bits at a time; this should work well both for binary
- and hexadecimal floating point. */
- x = 0;
- while (m) {
- x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
- m *= 268435456.0; /* 2**28 */
- e -= 28;
- y = (Py_uhash_t)m; /* pull out integer part */
- m -= y;
- x += y;
- if (x >= _PyHASH_MODULUS)
- x -= _PyHASH_MODULUS;
- }
-
- /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */
- e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
- x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);
-
- x = x * sign;
- if (x == (Py_uhash_t)-1)
- x = (Py_uhash_t)-2;
- return (Py_hash_t)x;
-}
-
-Py_hash_t
-_Py_HashPointer(void *p)
-{
- Py_hash_t x;
- size_t y = (size_t)p;
- /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
- excessive hash collisions for dicts and sets */
- y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));
- x = (Py_hash_t)y;
- if (x == -1)
- x = -2;
- return x;
-}
-
-Py_hash_t
-_Py_HashBytes(unsigned char *p, Py_ssize_t len)
-{
- Py_uhash_t x;
- Py_ssize_t i;
-
- /*
- We make the hash of the empty string be 0, rather than using
- (prefix ^ suffix), since this slightly obfuscates the hash secret
- */
-#ifdef Py_DEBUG
- assert(_Py_HashSecret_Initialized);
-#endif
- if (len == 0) {
- return 0;
- }
- x = (Py_uhash_t) _Py_HashSecret.prefix;
- x ^= (Py_uhash_t) *p << 7;
- for (i = 0; i < len; i++)
- x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
- x ^= (Py_uhash_t) len;
- x ^= (Py_uhash_t) _Py_HashSecret.suffix;
- if (x == -1)
- x = -2;
- return x;
-}
-
Py_hash_t
PyObject_HashNotImplemented(PyObject *v)
{
@@ -883,8 +739,6 @@ PyObject_HashNotImplemented(PyObject *v)
return -1;
}
-_Py_HashSecret_t _Py_HashSecret;
-
Py_hash_t
PyObject_Hash(PyObject *v)
{
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 880889e..3644db3 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -11386,39 +11386,8 @@ unicode_hash(PyObject *self)
_PyUnicode_HASH(self) = 0;
return 0;
}
-
- /* The hash function as a macro, gets expanded three times below. */
-#define HASH(P) \
- x ^= (Py_uhash_t) *P << 7; \
- while (--len >= 0) \
- x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *P++; \
-
- x = (Py_uhash_t) _Py_HashSecret.prefix;
- switch (PyUnicode_KIND(self)) {
- case PyUnicode_1BYTE_KIND: {
- const unsigned char *c = PyUnicode_1BYTE_DATA(self);
- HASH(c);
- break;
- }
- case PyUnicode_2BYTE_KIND: {
- const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self);
- HASH(s);
- break;
- }
- default: {
- Py_UCS4 *l;
- assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND &&
- "Impossible switch case in unicode_hash");
- l = PyUnicode_4BYTE_DATA(self);
- HASH(l);
- break;
- }
- }
- x ^= (Py_uhash_t) PyUnicode_GET_LENGTH(self);
- x ^= (Py_uhash_t) _Py_HashSecret.suffix;
-
- if (x == -1)
- x = -2;
+ x = _Py_HashBytes(PyUnicode_DATA(self),
+ PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
_PyUnicode_HASH(self) = x;
return x;
}