summaryrefslogtreecommitdiffstats
path: root/Objects/object.c
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>2000-06-29 19:17:04 (GMT)
committerFred Drake <fdrake@acm.org>2000-06-29 19:17:04 (GMT)
commit13634cf7a407202777285172a57cfcddb20a2047 (patch)
tree6cb9ea5a8fbe00548224ee45e04ca3bc1daff715 /Objects/object.c
parentb46696c0ed640992b4524aab888a26a56d993142 (diff)
downloadcpython-13634cf7a407202777285172a57cfcddb20a2047.zip
cpython-13634cf7a407202777285172a57cfcddb20a2047.tar.gz
cpython-13634cf7a407202777285172a57cfcddb20a2047.tar.bz2
This patch addresses two main issues: (1) There exist some non-fatal
errors in some of the hash algorithms. For exmaple, in float_hash and complex_hash a certain part of the value is not included in the hash calculation. See Tim's, Guido's, and my discussion of this on python-dev in May under the title "fix float_hash and complex_hash for 64-bit *nix" (2) The hash algorithms that use pointers (e.g. func_hash, code_hash) are universally not correct on Win64 (they assume that sizeof(long) == sizeof(void*)) As well, this patch significantly cleans up the hash code. It adds the two function _Py_HashDouble and _PyHash_VoidPtr that the various hashing routine are changed to use. These help maintain the hash function invariant: (a==b) => (hash(a)==hash(b))) I have added Lib/test/test_hash.py and Lib/test/output/test_hash to test this for some cases.
Diffstat (limited to 'Objects/object.c')
-rw-r--r--Objects/object.c63
1 files changed, 61 insertions, 2 deletions
diff --git a/Objects/object.c b/Objects/object.c
index 9a0e87c..9ed03b2 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -33,6 +33,8 @@ PERFORMANCE OF THIS SOFTWARE.
#include "Python.h"
+#include "mymath.h"
+
/* just for trashcan: */
#include "compile.h"
#include "frameobject.h"
@@ -507,6 +509,62 @@ PyObject_Compare(v, w)
return result;
}
+
+/* Set of hash utility functions to help maintaining the invariant that
+ iff a==b then hash(a)==hash(b)
+
+ All the utility functions (_Py_Hash*()) return "-1" to signify an error.
+*/
+
+long
+_Py_HashDouble(v)
+ double v;
+{
+ /* Use frexp to get at the bits in the double.
+ * Since the VAX D double format has 56 mantissa bits, which is the
+ * most of any double format in use, each of these parts may have as
+ * many as (but no more than) 56 significant bits.
+ * So, assuming sizeof(long) >= 4, each part can be broken into two longs;
+ * frexp and multiplication are used to do that.
+ * Also, since the Cray double format has 15 exponent bits, which is the
+ * most of any double format in use, shifting the exponent field left by
+ * 15 won't overflow a long (again assuming sizeof(long) >= 4).
+ */
+ int expo;
+ long hipart;
+
+ v = frexp(v, &expo);
+ v = v * 2147483648.0; /* 2**31 */
+ hipart = (long)v; /* Take the top 32 bits */
+ v = (v - (double)hipart) * 2147483648.0; /* Get the next 32 bits */
+
+ return hipart + (long)v + (expo << 15); /* Combine everything */
+}
+
+long
+_Py_HashPointer(p)
+ void *p;
+{
+#if SIZEOF_LONG >= SIZEOF_VOID_P
+ return (long)p;
+#else
+ /* convert to a Python long and hash that */
+ PyObject* longobj;
+ long x;
+
+ if ((longobj = PyLong_FromVoidPtr(p)) == NULL) {
+ x = -1;
+ goto finally;
+ }
+ x = PyObject_Hash(longobj);
+
+finally:
+ Py_XDECREF(longobj);
+ return x;
+#endif
+}
+
+
long
PyObject_Hash(v)
PyObject *v;
@@ -514,8 +572,9 @@ PyObject_Hash(v)
PyTypeObject *tp = v->ob_type;
if (tp->tp_hash != NULL)
return (*tp->tp_hash)(v);
- if (tp->tp_compare == NULL)
- return (long) v; /* Use address as hash value */
+ if (tp->tp_compare == NULL) {
+ return _Py_HashPointer(v); /* Use address as hash value */
+ }
/* If there's a cmp but no hash defined, the object can't be hashed */
PyErr_SetString(PyExc_TypeError, "unhashable type");
return -1;