From 57e683e53eed1455176b17304b3ac007ae7eb181 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sat, 24 Sep 2011 18:18:40 +0100 Subject: Issue #1621: Fix undefined behaviour in bytes.__hash__, str.__hash__, tuple.__hash__, frozenset.__hash__ and set indexing operations. --- Objects/bytesobject.c | 8 ++++---- Objects/dictobject.c | 4 ++-- Objects/setobject.c | 20 ++++++++++---------- Objects/tupleobject.c | 9 +++++---- Objects/unicodeobject.c | 10 +++++----- 5 files changed, 26 insertions(+), 25 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index a286646..d7f9981 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -869,16 +869,16 @@ bytes_hash(PyBytesObject *a) { register Py_ssize_t len; register unsigned char *p; - register Py_hash_t x; + register Py_uhash_t x; if (a->ob_shash != -1) return a->ob_shash; len = Py_SIZE(a); p = (unsigned char *) a->ob_sval; - x = *p << 7; + x = (Py_uhash_t)*p << 7; while (--len >= 0) - x = (1000003*x) ^ *p++; - x ^= Py_SIZE(a); + x = (1000003U*x) ^ (Py_uhash_t)*p++; + x ^= (Py_uhash_t)Py_SIZE(a); if (x == -1) x = -2; a->ob_shash = x; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index cdc27ab..e76e508 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -418,7 +418,7 @@ lookdict_unicode(PyDictObject *mp, PyObject *key, register Py_hash_t hash) mp->ma_lookup = lookdict; return lookdict(mp, key, hash); } - i = hash & mask; + i = (size_t)hash & mask; ep = &ep0[i]; if (ep->me_key == NULL || ep->me_key == key) return ep; @@ -572,7 +572,7 @@ insertdict_clean(register PyDictObject *mp, PyObject *key, Py_hash_t hash, register PyDictEntry *ep; MAINTAIN_TRACKING(mp, key, value); - i = hash & mask; + i = (size_t)hash & mask; ep = &ep0[i]; for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; diff --git a/Objects/setobject.c b/Objects/setobject.c index d1bad27..41df24d 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -77,7 +77,7 @@ NULL if the rich comparison returns an error. static setentry * set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash) { - register Py_ssize_t i; + register size_t i; register size_t perturb; register setentry *freeslot; register size_t mask = so->mask; @@ -86,7 +86,7 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash) register int cmp; PyObject *startkey; - i = hash & mask; + i = (size_t)hash & mask; entry = &table[i]; if (entry->key == NULL || entry->key == key) return entry; @@ -159,7 +159,7 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash) static setentry * set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash) { - register Py_ssize_t i; + register size_t i; register size_t perturb; register setentry *freeslot; register size_t mask = so->mask; @@ -174,7 +174,7 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash) so->lookup = set_lookkey; return set_lookkey(so, key, hash); } - i = hash & mask; + i = (size_t)hash & mask; entry = &table[i]; if (entry->key == NULL || entry->key == key) return entry; @@ -256,7 +256,7 @@ set_insert_clean(register PySetObject *so, PyObject *key, Py_hash_t hash) setentry *table = so->table; register setentry *entry; - i = hash & mask; + i = (size_t)hash & mask; entry = &table[i]; for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; @@ -770,14 +770,14 @@ static Py_hash_t frozenset_hash(PyObject *self) { PySetObject *so = (PySetObject *)self; - Py_hash_t h, hash = 1927868237L; + Py_uhash_t h, hash = 1927868237U; setentry *entry; Py_ssize_t pos = 0; if (so->hash != -1) return so->hash; - hash *= PySet_GET_SIZE(self) + 1; + hash *= (Py_uhash_t)PySet_GET_SIZE(self) + 1; while (set_next(so, &pos, &entry)) { /* Work to increase the bit dispersion for closely spaced hash values. The is important because some use cases have many @@ -785,11 +785,11 @@ frozenset_hash(PyObject *self) hashes so that many distinct combinations collapse to only a handful of distinct hash values. */ h = entry->hash; - hash ^= (h ^ (h << 16) ^ 89869747L) * 3644798167u; + hash ^= (h ^ (h << 16) ^ 89869747U) * 3644798167U; } - hash = hash * 69069L + 907133923L; + hash = hash * 69069U + 907133923U; if (hash == -1) - hash = 590923713L; + hash = 590923713U; so->hash = hash; return hash; } diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index ccfd281..ddb69e4 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -315,11 +315,12 @@ Done: static Py_hash_t tuplehash(PyTupleObject *v) { - register Py_hash_t x, y; + register Py_uhash_t x; + register Py_hash_t y; register Py_ssize_t len = Py_SIZE(v); register PyObject **p; - Py_hash_t mult = 1000003L; - x = 0x345678L; + Py_uhash_t mult = 1000003; + x = 0x345678; p = v->ob_item; while (--len >= 0) { y = PyObject_Hash(*p++); @@ -330,7 +331,7 @@ tuplehash(PyTupleObject *v) mult += (Py_hash_t)(82520L + len + len); } x += 97531L; - if (x == -1) + if (x == (Py_uhash_t)-1) x = -2; return x; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8c2ce6a..a85bac8 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -7721,22 +7721,22 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index) } /* Believe it or not, this produces the same value for ASCII strings - as string_hash(). */ + as bytes_hash(). */ static Py_hash_t unicode_hash(PyUnicodeObject *self) { Py_ssize_t len; Py_UNICODE *p; - Py_hash_t x; + Py_uhash_t x; if (self->hash != -1) return self->hash; len = Py_SIZE(self); p = self->str; - x = *p << 7; + x = (Py_uhash_t)*p << 7; while (--len >= 0) - x = (1000003*x) ^ *p++; - x ^= Py_SIZE(self); + x = (1000003U*x) ^ (Py_uhash_t)*p++; + x ^= (Py_uhash_t)Py_SIZE(self); if (x == -1) x = -2; self->hash = x; -- cgit v0.12