From 13634cf7a407202777285172a57cfcddb20a2047 Mon Sep 17 00:00:00 2001 From: Fred Drake Date: Thu, 29 Jun 2000 19:17:04 +0000 Subject: This patch addresses two main issues: (1) There exist some non-fatal errors in some of the hash algorithms. For exmaple, in float_hash and complex_hash a certain part of the value is not included in the hash calculation. See Tim's, Guido's, and my discussion of this on python-dev in May under the title "fix float_hash and complex_hash for 64-bit *nix" (2) The hash algorithms that use pointers (e.g. func_hash, code_hash) are universally not correct on Win64 (they assume that sizeof(long) == sizeof(void*)) As well, this patch significantly cleans up the hash code. It adds the two function _Py_HashDouble and _PyHash_VoidPtr that the various hashing routine are changed to use. These help maintain the hash function invariant: (a==b) => (hash(a)==hash(b))) I have added Lib/test/test_hash.py and Lib/test/output/test_hash to test this for some cases. --- Include/object.h | 4 +++ Lib/test/output/test_hash | 1 + Lib/test/test_hash.py | 26 +++++++++++++++++++ Objects/classobject.c | 5 +--- Objects/complexobject.c | 37 +++++++--------------------- Objects/floatobject.c | 21 ++++++++-------- Objects/funcobject.c | 6 +++-- Objects/methodobject.c | 10 ++++++-- Objects/object.c | 63 +++++++++++++++++++++++++++++++++++++++++++++-- PC/_winreg.c | 2 +- 10 files changed, 126 insertions(+), 49 deletions(-) create mode 100644 Lib/test/output/test_hash create mode 100644 Lib/test/test_hash.py diff --git a/Include/object.h b/Include/object.h index 2250b7f..20757be 100644 --- a/Include/object.h +++ b/Include/object.h @@ -293,6 +293,10 @@ extern DL_IMPORT(void) Py_ReprLeave Py_PROTO((PyObject *)); /* tstate dict key for PyObject_Compare helper */ extern PyObject *_PyCompareState_Key; +/* Helpers for hash functions */ +extern DL_IMPORT(long) _Py_HashDouble Py_PROTO((double)); +extern DL_IMPORT(long) _Py_HashPointer Py_PROTO((void*)); + /* Flag bits for printing: */ #define Py_PRINT_RAW 1 /* No string quotes etc. */ diff --git a/Lib/test/output/test_hash b/Lib/test/output/test_hash new file mode 100644 index 0000000..0141a1a --- /dev/null +++ b/Lib/test/output/test_hash @@ -0,0 +1 @@ +test_hash diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py new file mode 100644 index 0000000..51b4c33 --- /dev/null +++ b/Lib/test/test_hash.py @@ -0,0 +1,26 @@ +# test the invariant that +# iff a==b then hash(a)==hash(b) +# + +import test_support + + +def same_hash(*objlist): + # hash each object given an raise TestFailed if + # the hash values are not all the same + hashed = map(hash, objlist) + for h in hashed[1:]: + if h != hashed[0]: + raise TestFailed, "hashed values differ: %s" % `objlist` + + + +same_hash(1, 1L, 1.0, 1.0+0.0j) +same_hash(int(1), long(1), float(1), complex(1)) + +same_hash(long(1.23e300), float(1.23e300)) + +same_hash(float(0.5), complex(0.5, 0.0)) + + + diff --git a/Objects/classobject.c b/Objects/classobject.c index cd0bb1d..f1dabfe 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -864,10 +864,7 @@ instance_hash(inst) func = instance_getattr(inst, cmpstr); if (func == NULL) { PyErr_Clear(); - outcome = (long)inst; - if (outcome == -1) - outcome = -2; - return outcome; + return _Py_HashPointer(inst); } PyErr_SetString(PyExc_TypeError, "unhashable instance"); return -1; diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 42709ee..1a15a27 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -285,8 +285,7 @@ complex_hash(v) PyComplexObject *v; { double intpart, fractpart; - int expo; - long hipart, x; + long x; /* This is designed so that Python numbers with the same value hash to the same value, otherwise comparisons of mapping keys will turn out weird */ @@ -302,7 +301,7 @@ complex_hash(v) #endif if (fractpart == 0.0 && v->cval.imag == 0.0) { - if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) { + if (intpart > LONG_MAX || -intpart > LONG_MAX) { /* Convert to long int and use its hash... */ PyObject *w = PyLong_FromDouble(v->cval.real); if (w == NULL) @@ -314,36 +313,18 @@ complex_hash(v) x = (long)intpart; } else { - fractpart = frexp(fractpart, &expo); - fractpart = fractpart * 2147483648.0; /* 2**31 */ - hipart = (long)fractpart; /* Take the top 32 bits */ - fractpart = (fractpart - (double)hipart) * 2147483648.0; - /* Get the next 32 bits */ - x = hipart + (long)fractpart + (long)intpart + (expo << 15); - /* Combine everything */ + x = _Py_HashDouble(v->cval.real); + if (x == -1) + return -1; if (v->cval.imag != 0.0) { /* Hash the imaginary part */ /* XXX Note that this hashes complex(x, y) to the same value as complex(y, x). Still better than it used to be :-) */ -#ifdef MPW - { - extended e; - fractpart = modf(v->cval.imag, &e); - intpart = e; - } -#else - fractpart = modf(v->cval.imag, &intpart); -#endif - fractpart = frexp(fractpart, &expo); - fractpart = fractpart * 2147483648.0; /* 2**31 */ - hipart = (long)fractpart; /* Take the top 32 bits */ - fractpart = - (fractpart - (double)hipart) * 2147483648.0; - /* Get the next 32 bits */ - x ^= hipart + (long)fractpart + - (long)intpart + (expo << 15); - /* Combine everything */ + long y = _Py_HashDouble(v->cval.imag); + if (y == -1) + return -1; + x += y; } } if (x == -1) diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 69b66b7..29ade28 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -59,7 +59,13 @@ PERFORMANCE OF THIS SOFTWARE. #endif #ifndef LONG_MAX +#if SIZEOF_LONG == 4 #define LONG_MAX 0X7FFFFFFFL +#elif SIZEOF_LONG == 8 +#define LONG_MAX 0X7FFFFFFFFFFFFFFFL +#else +#error "could not set LONG_MAX" +#endif #endif #ifndef LONG_MIN @@ -357,12 +363,12 @@ float_compare(v, w) return (i < j) ? -1 : (i > j) ? 1 : 0; } + static long float_hash(v) PyFloatObject *v; { double intpart, fractpart; - int expo; long x; /* This is designed so that Python numbers with the same value hash to the same value, otherwise comparisons @@ -379,7 +385,7 @@ float_hash(v) #endif if (fractpart == 0.0) { - if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) { + if (intpart > LONG_MAX || -intpart > LONG_MAX) { /* Convert to long int and use its hash... */ PyObject *w = PyLong_FromDouble(v->ob_fval); if (w == NULL) @@ -393,14 +399,9 @@ float_hash(v) else { /* Note -- if you change this code, also change the copy in complexobject.c */ - long hipart; - fractpart = frexp(fractpart, &expo); - fractpart = fractpart * 2147483648.0; /* 2**31 */ - hipart = (long)fractpart; /* Take the top 32 bits */ - fractpart = (fractpart - (double)hipart) * 2147483648.0; - /* Get the next 32 bits */ - x = hipart + (long)fractpart + (long)intpart + (expo << 15); - /* Combine everything */ + x = _Py_HashDouble(v->ob_fval); + if (x == -1) + return -1; } if (x == -1) x = -2; diff --git a/Objects/funcobject.c b/Objects/funcobject.c index b976eab..d9ce027 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -231,10 +231,12 @@ static long func_hash(f) PyFunctionObject *f; { - long h; + long h,x; h = PyObject_Hash(f->func_code); if (h == -1) return h; - h = h ^ (long)f->func_globals; + x = _Py_HashPointer(f->func_globals); + if (x == -1) return x; + h ^= x; if (h == -1) h = -2; return h; } diff --git a/Objects/methodobject.c b/Objects/methodobject.c index 8b67a87..580bb2f 100644 --- a/Objects/methodobject.c +++ b/Objects/methodobject.c @@ -172,7 +172,7 @@ static long meth_hash(a) PyCFunctionObject *a; { - long x; + long x,y; if (a->m_self == NULL) x = 0; else { @@ -180,7 +180,13 @@ meth_hash(a) if (x == -1) return -1; } - return x ^ (long) a->m_ml->ml_meth; + y = _Py_HashPointer(a->m_ml->ml_meth); + if (y == -1) + return -1; + x ^= y; + if (x == -1) + x = -2; + return x; } PyTypeObject PyCFunction_Type = { diff --git a/Objects/object.c b/Objects/object.c index 9a0e87c..9ed03b2 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -33,6 +33,8 @@ PERFORMANCE OF THIS SOFTWARE. #include "Python.h" +#include "mymath.h" + /* just for trashcan: */ #include "compile.h" #include "frameobject.h" @@ -507,6 +509,62 @@ PyObject_Compare(v, w) return result; } + +/* Set of hash utility functions to help maintaining the invariant that + iff a==b then hash(a)==hash(b) + + All the utility functions (_Py_Hash*()) return "-1" to signify an error. +*/ + +long +_Py_HashDouble(v) + double v; +{ + /* Use frexp to get at the bits in the double. + * Since the VAX D double format has 56 mantissa bits, which is the + * most of any double format in use, each of these parts may have as + * many as (but no more than) 56 significant bits. + * So, assuming sizeof(long) >= 4, each part can be broken into two longs; + * frexp and multiplication are used to do that. + * Also, since the Cray double format has 15 exponent bits, which is the + * most of any double format in use, shifting the exponent field left by + * 15 won't overflow a long (again assuming sizeof(long) >= 4). + */ + int expo; + long hipart; + + v = frexp(v, &expo); + v = v * 2147483648.0; /* 2**31 */ + hipart = (long)v; /* Take the top 32 bits */ + v = (v - (double)hipart) * 2147483648.0; /* Get the next 32 bits */ + + return hipart + (long)v + (expo << 15); /* Combine everything */ +} + +long +_Py_HashPointer(p) + void *p; +{ +#if SIZEOF_LONG >= SIZEOF_VOID_P + return (long)p; +#else + /* convert to a Python long and hash that */ + PyObject* longobj; + long x; + + if ((longobj = PyLong_FromVoidPtr(p)) == NULL) { + x = -1; + goto finally; + } + x = PyObject_Hash(longobj); + +finally: + Py_XDECREF(longobj); + return x; +#endif +} + + long PyObject_Hash(v) PyObject *v; @@ -514,8 +572,9 @@ PyObject_Hash(v) PyTypeObject *tp = v->ob_type; if (tp->tp_hash != NULL) return (*tp->tp_hash)(v); - if (tp->tp_compare == NULL) - return (long) v; /* Use address as hash value */ + if (tp->tp_compare == NULL) { + return _Py_HashPointer(v); /* Use address as hash value */ + } /* If there's a cmp but no hash defined, the object can't be hashed */ PyErr_SetString(PyExc_TypeError, "unhashable type"); return -1; diff --git a/PC/_winreg.c b/PC/_winreg.c index f92476e..4539959 100644 --- a/PC/_winreg.c +++ b/PC/_winreg.c @@ -423,7 +423,7 @@ PyHKEY_hashFunc(PyObject *ob) /* Just use the address. XXX - should we use the handle value? */ - return (long)ob; + return _Py_HashPointer(ob); } -- cgit v0.12